Open rflperry opened 4 years ago
When oob=True, the classification accuracy doesn't match oob=False and also shows variability even with a set seed.
from rerf.rerfClassifier import rerfClassifier # Import scikit-learn dataset library from sklearn import datasets
# Load dataset iris = datasets.load_iris()
import pandas as pd
from sklearn.model_selection import train_test_split
X = data[["sepal length", "sepal width", "petal length", "petal width"]] # Features y = data["species"] # Labels
# Split dataset into training set and test set X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3 ) # 70% training and 30% test
clf1 = rerfClassifier(n_estimators=10, oob_score=False, random_state=2) clf2 = rerfClassifier(n_estimators=10, oob_score=True, random_state=2)
clf1.fit(X_train, y_train) clf2.fit(X_train, y_train)
rerfClassifier(feature_combinations=1.5, image_height=None, image_width=None, max_depth=None, max_features='auto', min_samples_split=1, n_estimators=10, n_jobs=None, oob_score=True, patch_height_max=None, patch_height_min=1, patch_width_max=None, patch_width_min=1, projection_matrix='RerF', random_state=2)
y_pred1 = clf1.predict(X_test) y_pred2 = clf2.predict(X_test)
from sklearn import metrics
print("Accuracy:", metrics.accuracy_score(y_test, y_pred1)) print("Accuracy oob:", metrics.accuracy_score(y_test, y_pred2))
Accuracy: 0.9555555555555556 Accuracy oob: 0.9333333333333333
When oob=True, the classification accuracy doesn't match oob=False and also shows variability even with a set seed.