Open csmangum opened 5 hours ago
import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency
def analyze_action_distribution(actions_df):
"""
Perform correlation and Chi-Square analysis on actions.
:param actions_df: DataFrame with columns ['agent_id', 'action', 'reward', 'lifespan']
:return: Chi-Square test results
"""
action_reward_correlation = actions_df.groupby('action')['reward'].mean().corr(actions_df['lifespan'])
action_contingency_table = pd.crosstab(actions_df['action'], actions_df['reward'] > actions_df['reward'].median())
chi2_stat, p_val, _, _ = chi2_contingency(action_contingency_table)
return {"correlation": action_reward_correlation, "chi_square": (chi2_stat, p_val)}
def calculate_reward_efficiency(actions_df):
"""
Calculate reward per action.
:param actions_df: DataFrame with columns ['action', 'reward']
:return: DataFrame with reward efficiency
"""
reward_efficiency = actions_df.groupby('action')['reward'].mean() / actions_df['action'].value_counts()
return reward_efficiency
from scipy.signal import correlate
from statsmodels.tsa.stattools import acf
def analyze_health_resource_dynamics(health_series, resource_series):
"""
Cross-correlation analysis of health and resource levels.
:param health_series: Series of health values over time
:param resource_series: Series of resource levels over time
:return: Cross-correlation and ACF results
"""
cross_corr = np.correlate(health_series - health_series.mean(), resource_series - resource_series.mean(), mode='full')
acf_results = acf(health_series, nlags=50)
return {"cross_correlation": cross_corr, "acf": acf_results}
from lifelines import KaplanMeierFitter
def analyze_lifespan_distribution(lifespan_series):
"""
Perform survival analysis.
:param lifespan_series: Series of agent lifespans
:return: Kaplan-Meier survival function
"""
kmf = KaplanMeierFitter()
kmf.fit(lifespan_series)
return kmf.survival_function_
from statsmodels.tsa.seasonal import seasonal_decompose
def decompose_population_dynamics(population_series):
"""
Decompose population time series into trend, seasonal, and residual components.
:param population_series: Series of total population counts over time
:return: Decomposition results
"""
decomposition = seasonal_decompose(population_series, model='additive')
return decomposition
def calculate_gini_coefficient(rewards):
"""
Calculate the Gini coefficient for reward distribution.
:param rewards: List or array of rewards
:return: Gini coefficient
"""
rewards = np.sort(rewards)
n = len(rewards)
cumulative_rewards = np.cumsum(rewards)
gini = (2 / n) * np.sum((np.arange(1, n+1) - 0.5) * rewards) / np.sum(rewards) - (n + 1) / n
return gini
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
def cluster_agent_behaviors(actions_df):
"""
Cluster agent behaviors based on action frequencies.
:param actions_df: DataFrame with columns ['agent_id', 'action']
:return: Cluster labels
"""
action_frequencies = actions_df.groupby(['agent_id', 'action']).size().unstack(fill_value=0)
pca = PCA(n_components=2)
reduced_data = pca.fit_transform(action_frequencies)
kmeans = KMeans(n_clusters=3)
clusters = kmeans.fit_predict(reduced_data)
return clusters, reduced_data
from scipy.optimize import curve_fit
def fit_health_age_model(age_series, health_series):
"""
Fit a nonlinear regression model to health vs. age data.
:param age_series: Series of agent ages
:param health_series: Series of agent health
:return: Fitted curve parameters
"""
def health_decay(age, a, b, c):
return a * np.exp(-b * age) + c
params, _ = curve_fit(health_decay, age_series, health_series)
return params
import networkx as nx
def analyze_strategy_transitions(actions_df):
"""
Analyze Markov transition probabilities between actions.
:param actions_df: DataFrame with columns ['agent_id', 'action', 'step']
:return: Transition matrix
"""
transitions = actions_df.sort_values(['agent_id', 'step']).groupby('agent_id')['action'].apply(lambda x: list(zip(x, x[1:])))
edges = [edge for sublist in transitions for edge in sublist]
G = nx.DiGraph()
G.add_edges_from(edges)
transition_matrix = nx.to_numpy_matrix(G)
return transition_matrix
from scipy.stats import ttest_ind
def compare_cohorts(df, cohort_column, metric_column):
"""
Compare performance between cohorts.
:param df: DataFrame with cohort and metric data
:param cohort_column: Column representing cohorts
:param metric_column: Column representing the performance metric
:return: t-test results
"""
cohorts = df[cohort_column].unique()
t_stat, p_val = ttest_ind(df[df[cohort_column] == cohorts[0]][metric_column],
df[df[cohort_column] == cohorts[1]][metric_column])
return {"t_stat": t_stat, "p_val": p_val}
The purpose of Action Type Distribution Analysis is to investigate the relationship between the types of actions agents perform and their overall success or behavior in the simulation. It helps to uncover:
Frequency Analysis
Correlation Analysis
Chi-Square Test
Visualization
This analysis provides critical insights into agent behaviors:
Here’s how you can break this into manageable steps with Python:
Calculate Frequencies
def calculate_action_frequencies(actions_df):
"""
Calculate frequencies of each action type.
:param actions_df: DataFrame with columns ['action']
:return: Series with action frequencies
"""
return actions_df['action'].value_counts()
Calculate Correlations
def calculate_action_correlations(actions_df):
"""
Calculate correlation between action frequencies and agent success metrics.
:param actions_df: DataFrame with columns ['agent_id', 'action', 'reward', 'lifespan']
:return: Correlation matrix
"""
grouped = actions_df.groupby('action').agg({'reward': 'mean', 'lifespan': 'mean'})
return grouped.corr()
Perform Chi-Square Test
from scipy.stats import chi2_contingency
def chi_square_test(actions_df):
"""
Perform Chi-Square test between action type and success (e.g., high reward).
:param actions_df: DataFrame with columns ['action', 'reward']
:return: Chi-Square test results
"""
contingency_table = pd.crosstab(actions_df['action'], actions_df['reward'] > actions_df['reward'].median())
chi2, p, dof, expected = chi2_contingency(contingency_table)
return {"chi2": chi2, "p_value": p, "degrees_of_freedom": dof, "expected": expected}
Visualize Results
import matplotlib.pyplot as plt
def plot_action_distribution(actions_df):
"""
Plot the frequency of each action type.
:param actions_df: DataFrame with columns ['action']
"""
action_counts = actions_df['action'].value_counts()
action_counts.plot(kind='bar', color='skyblue', edgecolor='black')
plt.title("Action Type Distribution")
plt.xlabel("Action Type")
plt.ylabel("Frequency")
plt.show()
This issue focuses on implementing statistical analyses for deeper insights into the simulation results. The goal is to analyze agent behavior, rewards, population dynamics, and other metrics to uncover patterns and optimize the simulation. Each analysis will be modular and reusable for further simulations.
Tasks
Action Type Distribution Analysis
analyze_action_distribution(actions_df)
Reward Efficiency Analysis
calculate_reward_efficiency(actions_df)
Health and Resource Dynamics
analyze_health_resource_dynamics(health_series, resource_series)
Lifespan Analysis
analyze_lifespan_distribution(lifespan_series)
Population Dynamics Analysis
decompose_population_dynamics(population_series)
Reward Inequality
calculate_gini_coefficient(rewards)
Behavioral Clustering
cluster_agent_behaviors(actions_df)
Health vs. Age Interaction
fit_health_age_model(age_series, health_series)
Strategy Evolution
analyze_strategy_transitions(actions_df)
Cohort-Based Analysis
compare_cohorts(df, cohort_column, metric_column)
Acceptance Criteria
Additional Notes