There is an arror in the RSF model in this notebook
Survival Regression with Auton-Survival.ipynb
Here's the reproducible code
import pandas as pd
import sys
sys.path.append('../')
from auton_survival.datasets import load_dataset
Load data and features
# Load the SUPPORT dataset
outcomes, features = load_dataset(dataset='SUPPORT')
# Identify categorical (cat_feats) and continuous (num_feats) features
cat_feats = ['sex', 'dzgroup', 'dzclass', 'income', 'race', 'ca']
num_feats = ['age', 'num.co', 'meanbp', 'wblc', 'hrt', 'resp',
'temp', 'pafi', 'alb', 'bili', 'crea', 'sod', 'ph',
'glucose', 'bun', 'urine', 'adlp', 'adls']
Preprocess
import numpy as np
from sklearn.model_selection import train_test_split
# Split the SUPPORT data into training, validation, and test data
x_tr, x_te, y_tr, y_te = train_test_split(features, outcomes, test_size=0.2, random_state=1)
x_tr, x_val, y_tr, y_val = train_test_split(x_tr, y_tr, test_size=0.25, random_state=1)
print(f'Number of training data points: {len(x_tr)}')
print(f'Number of validation data points: {len(x_val)}')
print(f'Number of test data points: {len(x_te)}')
from auton_survival.preprocessing import Preprocessor
# Fit the imputer and scaler to the training data and transform the training, validation and test data
preprocessor = Preprocessor(cat_feat_strat='ignore', num_feat_strat= 'mean')
transformer = preprocessor.fit(features, cat_feats=cat_feats, num_feats=num_feats,
one_hot=True, fill_value=-1)
x_tr = transformer.transform(x_tr)
x_val = transformer.transform(x_val)
x_te = transformer.transform(x_te)
Fit RSF
from auton_survival.estimators import SurvivalModel
from auton_survival.metrics import survival_regression_metric
from sklearn.model_selection import ParameterGrid
# Define parameters for tuning the model
param_grid = {'n_estimators' : [100, 300],
'max_depth' : [3, 5],
'max_features' : ['sqrt', 'log2']
}
params = ParameterGrid(param_grid)
# Define the times for tuning the model hyperparameters and for evaluating the model
times = np.quantile(y_tr['time'][y_tr['event']==1], np.linspace(0.1, 1, 10)).tolist()
# Perform hyperparameter tuning
models = []
for param in params:
model = SurvivalModel('rsf', random_seed=8, n_estimators=param['n_estimators'], max_depth=param['max_depth'], max_features=param['max_features'])
# The fit method is called to train the model
model.fit(x_tr, y_tr)
# Obtain survival probabilities for validation set and compute the Integrated Brier Score
predictions_val = model.predict_survival(x_val, times)
metric_val = survival_regression_metric('ibs', y_val, predictions_val, times, y_tr)
models.append([metric_val, model])
# Select the best model based on the mean metric value computed for the validation set
metric_vals = [i[0] for i in models]
first_min_idx = metric_vals.index(min(metric_vals))
model = models[first_min_idx][1]
There is an arror in the RSF model in this notebook Survival Regression with Auton-Survival.ipynb
Here's the reproducible code
Load data and features
Preprocess
Fit RSF
AttributeError