AutoResearch / autora

Automated Research Assistant
https://autoresearch.github.io/autora/
MIT License
36 stars 5 forks source link

docs: Add to README.md basic installation instructions, minimum example, link to contribution guidelines. #529

Closed musslick closed 9 months ago

musslick commented 1 year ago

Todo add for README:

Should only be merged after: https://github.com/AutoResearch/autora-core/issues/57

musslick commented 9 months ago

Notes

Example from Chad:

#Setup: Import modules
import numpy as np
import matplotlib.pyplot as plt
from autora.theorist.bms import BMSRegressor
from autora.experimentalist.random import random_sample #Note that this sampler is embedded within the autora-core module and so does not need to be explicitly installed

#Step 0: Defining variables
ground_truth = lambda x: np.sin(x) #Define a ground truth model that we will attempt to recover - here a sine wave
initial_X = np.linspace(0, 4 * np.pi, 200) #Define initial data

#Step 1: EXPERIMENTALIST: Sample using the experimentalist
new_conditions = random_sample(initial_X, num_samples = 20)
new_conditions = np.array(new_conditions).reshape(-1,1) #Turn variable into a 2D array

#Step 2: EXPERIMENT RUNNER: Define and then obtain observations using the experiment runner
run_experiment = lambda x: ground_truth(x) + np.random.normal(0, 0.1, size=x.shape) #Define the runner, which here is simply the ground truth with noise
new_observations = run_experiment(new_conditions) #Obtain observations from the runner for the conditions proposed by the experimentalist
new_observations = new_observations.reshape(-1,1) #Turn variable into a 2D array

#Step 3: THEORIST: Initiate and fit a model using the theorist
theorist_bms = BMSRegressor(epochs=100) #Initiate the BMS theorist
theorist_bms.fit(new_conditions, new_observations, seed=42) #Fit a model to the data

#Wrap-Up: Plot data and model
sort_index = np.argsort(new_conditions, axis=0)[:,0] #We will first sort our data
new_conditions = new_conditions[sort_index,:]
new_observations = new_observations[sort_index,:]

plt.plot(initial_X, ground_truth(initial_X), label='Ground Truth')
plt.plot(new_conditions, new_observations, 'o', label='Sampled Conditions')
plt.plot(initial_X, theorist_bms.predict(initial_X.reshape(-1,1)), label=f'Bayesian Machine Scientist ({theorist_bms.repr()})')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Sine Function')
plt.legend()
musslick commented 9 months ago

New Example from Chad:


####################################################################################
## Import statements
####################################################################################

import pandas as pd 
import numpy as np
import sympy as sp

from autora.variable import Variable, ValueType, VariableCollection

from autora.experimentalist.random import random_pool
from autora.experiment_runner.synthetic.abstract.equation import equation_experiment
from autora.theorist.bms import BMSRegressor

from autora.state import StandardState, on_state, estimator_on_state

####################################################################################
## Define initial data
####################################################################################

#### Define variable data ####
iv = Variable(name="x", value_range=(0, 2 * np.pi), allowed_values=np.linspace(0, 2 * np.pi, 30))
dv = Variable(name="y", type=ValueType.REAL)
variables = VariableCollection(independent_variables=[iv],dependent_variables=[dv])

#### Define seed condition data ####
conditions = random_pool(variables, num_samples=10, random_state=0)

####################################################################################
## Define experimentalist
####################################################################################

experimentalist = on_state(random_pool, output=["conditions"])

####################################################################################
## Define experiment runner
####################################################################################

sin_experiment = equation_experiment(sp.simplify('sin(x)'), variables.independent_variables, variables.dependent_variables[0])
sin_runner = sin_experiment.experiment_runner

experiment_runner = on_state(sin_runner, output=["experiment_data"])

####################################################################################
## Define theorist
####################################################################################

theorist = estimator_on_state(BMSRegressor(epochs=100))

####################################################################################
## Define state
####################################################################################

s = StandardState(
    variables = variables,
    conditions = conditions,
    experiment_data = pd.DataFrame(columns=["x","y"])
)

####################################################################################
## Cycle through the state
####################################################################################

print('Pre-Defined State:')
print(f"Number of datapoints collected: {len(s['experiment_data'])}")
print(f"Derived models: {s['models']}")
print('\n')

for i in range(5):
    s = experimentalist(s, num_samples=10, random_state=42)
    s = experiment_runner(s, added_noise=1.0, random_state=42)
    s = theorist(s)
    print(f"\nCycle {i+1} Results:")
    print(f"Number of datapoints collected: {len(s['experiment_data'])}")
    print(f"Derived models: {s['models']}")
    print('\n')