Open simplysameer333 opened 3 years ago
import pickle
your_data = {'foo': 'bar'}
with open('filename.pickle', 'wb') as handle: pickle.dump(your_data, handle, protocol=pickle.HIGHEST_PROTOCOL)
with open('filename.pickle', 'rb') as handle: unserialized_data = pickle.load(handle)
print(your_data == unserialized_data) The advantage of HIGHEST_PROTOCOL is that files get smaller. This makes unpickling sometimes much faster.
Important notice: The maximum file size of pickle is about 2GB.
import xgboost as xg xgb=xg.XGBClassifier(objective='binary:logistic',max_depth=4,random_state=200)
xgb.fit(X_train,y_train) xgb.score(X_test,y_test)
Create the parameter grid based on the results of random search
params = { 'max_depth': [1, 2, 5, 10, 20], 'min_samples_leaf': [5, 10, 20, 50, 100], 'max_features': [2,3,4], 'n_estimators': [10, 30, 50, 100, 200] }
Instantiate the grid search model
grid_search = GridSearchCV(estimator=classifier_rf, param_grid=params, cv=4, n_jobs=-1, verbose=1, scoring = "accuracy")
Importing required packages for visualization
from IPython.display import Image
from sklearn.externals.six import StringIO
from six import StringIO from sklearn.tree import export_graphviz import pydotplus, graphviz
def get_dt_graph(classifier): dot_data = StringIO() export_graphviz(classifier, out_file=dot_data, filled=True,rounded=True, feature_names=X.columns, class_names=['Disease', "No Disease"]) graph = pydotplus.graph_from_dot_data(dot_data.getvalue()) return graph
gph = get_dt_graph(sample_tree) Image(gph.create_png(), width=700, height=700)
from sklearn.metrics import confusion_matrix, accuracy_score def evaluate_model(classifier): print("Train Accuracy :", accuracy_score(y_train, classifier.predict(X_train))) print("Train Confusion Matrix:") print(confusion_matrix(y_train, classifier.predict(X_train))) print("-"*50) print("Test Accuracy :", accuracy_score(y_test, classifier.predict(X_test))) print("Test Confusion Matrix:") print(confusion_matrix(y_test, classifier.predict(X_test)))