Open maruf78901 opened 4 years ago
import pandas as pd from matplotlib import pyplot as plt import numpy as np df = pd.read_csv(r"C:\Users\mdmar\Downloads\Thesis\Data/1024.csv")
Y = df['target'].values Y=Y.astype('int')
X = df.drop(labels=['target'], axis=1)
from sklearn.model_selection import train_test_split X_train, X_test, Y_train, Y_test=train_test_split(X,Y,test_size=0.30, random_state=30)
from sklearn.ensemble import RandomForestClassifier model = RandomForestClassifier(n_estimators=70, random_state=40) model.fit(X_train,Y_train) prediction_test = model.predict(X_test) print(prediction_test) from sklearn import metrics print("Accuracy:",metrics.accuracy_score(Y_test, prediction_test)100,'%') print(model.featureimportances) print(model.featureimportances100,'%') Out : [1 1 1 0 0 1 1 1 1 1 1 1 0 1 1 0 0 1 1 0 1 0 0 0 1 1 1 1 1 1 0 1 0 1 0 1 1 1 0 0 1 1 1 0 0 0 1 1 0 0 1 1 1 1 1 1 0 0 0 0 1 1 1 0 0 0 1 0 0 1 0 1 0 0 0 0 1 1 0 0 0 1 0 0 1 1 0 0 0 1 0 0 1 0 0 0 0 0 0 1 1 0 0 1 1 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 1 1 0 1 0 1 0 0 1 1 1 1 1 0 0 0 0 1 0 1 0 0 1 1 0 0 0 0 1 0 1 0 0 1 1 1 0 0 0 1 0 1 1 1 1 1 1 1 1 0 0 1 0 0 0 1 0 0 1 0 0 0 0 0 1 1 1 0 1 1 1 0 1 1 1 1 1 0 0 1 1 0 0 0 1 0 0 1 0 1 0 0 0 0 1 1 1 0 0 0 0 1 0 0 1 1 1 1 1 1 0 1 0 1 0 0 1 0 0 0 0 1 1 0 1 1 1 0 0 1 0 1 1 0 0 1 1 1 1 1 0 1 0 0 0 1 0 0 1 0 1 0 0 0 1 0 0 1 1 1 0 1 0 0 1 0 0 0 1 0 1 1 1 0 0 1 0 1 0 0 1 1 1 1] Accuracy: 97.72727272727273 % [0.1031516 0.0358877 0.11881306 0.09649581 0.09640051 0.01116345 0.02173903 0.13786201 0.06464796 0.13677327 0.05528801 0.12177758] [10.31516006 3.58876957 11.88130644 9.64958137 9.64005136 1.11634483 2.17390296 13.78620058 6.46479635 13.67732725 5.52880149 12.17775775] %
how to do hyperparameter tuning using K Fold cross-validation using this model
You can probably try following code,
_from sklearn.model_selection import cross_valscore cross_val_score(model, X, Y, cv=3)
import pandas as pd from matplotlib import pyplot as plt import numpy as np df = pd.read_csv(r"C:\Users\mdmar\Downloads\Thesis\Data/1024.csv")
print(df.head())
sizes = df['target'].value_counts(sort=1)
print(sizes)
Define dependent variable
Y = df['target'].values Y=Y.astype('int')
define independent variable
X = df.drop(labels=['target'], axis=1)
Split dataset for train and test
from sklearn.model_selection import train_test_split X_train, X_test, Y_train, Y_test=train_test_split(X,Y,test_size=0.30, random_state=30)
print(X_train)
Import Random Forest
from sklearn.ensemble import RandomForestClassifier model = RandomForestClassifier(n_estimators=70, random_state=40) model.fit(X_train,Y_train) prediction_test = model.predict(X_test) print(prediction_test) from sklearn import metrics print("Accuracy:",metrics.accuracy_score(Y_test, prediction_test)100,'%') print(model.featureimportances) print(model.featureimportances100,'%') Out : [1 1 1 0 0 1 1 1 1 1 1 1 0 1 1 0 0 1 1 0 1 0 0 0 1 1 1 1 1 1 0 1 0 1 0 1 1 1 0 0 1 1 1 0 0 0 1 1 0 0 1 1 1 1 1 1 0 0 0 0 1 1 1 0 0 0 1 0 0 1 0 1 0 0 0 0 1 1 0 0 0 1 0 0 1 1 0 0 0 1 0 0 1 0 0 0 0 0 0 1 1 0 0 1 1 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 1 1 0 1 0 1 0 0 1 1 1 1 1 0 0 0 0 1 0 1 0 0 1 1 0 0 0 0 1 0 1 0 0 1 1 1 0 0 0 1 0 1 1 1 1 1 1 1 1 0 0 1 0 0 0 1 0 0 1 0 0 0 0 0 1 1 1 0 1 1 1 0 1 1 1 1 1 0 0 1 1 0 0 0 1 0 0 1 0 1 0 0 0 0 1 1 1 0 0 0 0 1 0 0 1 1 1 1 1 1 0 1 0 1 0 0 1 0 0 0 0 1 1 0 1 1 1 0 0 1 0 1 1 0 0 1 1 1 1 1 0 1 0 0 0 1 0 0 1 0 1 0 0 0 1 0 0 1 1 1 0 1 0 0 1 0 0 0 1 0 1 1 1 0 0 1 0 1 0 0 1 1 1 1] Accuracy: 97.72727272727273 % [0.1031516 0.0358877 0.11881306 0.09649581 0.09640051 0.01116345 0.02173903 0.13786201 0.06464796 0.13677327 0.05528801 0.12177758] [10.31516006 3.58876957 11.88130644 9.64958137 9.64005136 1.11634483 2.17390296 13.78620058 6.46479635 13.67732725 5.52880149 12.17775775] %
how to do hyperparameter tuning using K Fold cross-validation using this model