#!/usr/bin/env python
# coding: utf-8
# In[5]:
# compare hard voting to standalone classifiers
from numpy import mean
from numpy import std
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import VotingClassifier
from matplotlib import pyplot
from sklearn.naive_bayes import GaussianNB
from sklearn import svm
from sklearn.svm import SVC
svc = SVC()
# get the dataset
def get_dataset():
X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, n_redundant=5, random_state=2)
return X, y
# get a voting ensemble of models
def get_voting():
# define the base models
models = list()
models.append(('NaiveBayes', GaussianNB()))
models.append(('SVC', SVC()))
# define the voting ensemble
ensemble = VotingClassifier(estimators=models, voting='hard')
return ensemble
# get a list of models to evaluate
def get_models():
models = dict()
models['NaiveBayes'] = GaussianNB()
models['SVC'] = SVC()
models['hard_voting'] = get_voting()
return models
# evaluate a give model using cross-validation
def evaluate_model(model):
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
return scores
# define dataset
X, y = get_dataset()
# get the models to evaluate
models = get_models()
# evaluate the models and store results
results, names = list(), list()
for name, model in models.items():
scores = evaluate_model(model)
print('>%s %.3f (%.3f)' % (name, mean(scores), std(scores)))
# plot model performance for comparison
pyplot.boxplot(results, labels=names, showmeans=True)
@mahdi-whip needs to implement a voter classifier which is easy. send me a PR on GitHub when you are done with it.