pwnslinger / Cyberbullying-NLP

conference paper;
0 stars 0 forks source link

voter classifier #2

Open pwnslinger opened 4 years ago

pwnslinger commented 4 years ago

@mahdi-whip needs to implement a voter classifier which is easy. send me a PR on GitHub when you are done with it.

link: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RANSACRegressor.html

pwnslinger commented 4 years ago

https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.VotingClassifier.html

sogol-golafshan commented 4 years ago


#!/usr/bin/env python
# coding: utf-8

# In[5]:

# compare hard voting to standalone classifiers
from numpy import mean
from numpy import std
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import VotingClassifier
from matplotlib import pyplot
from sklearn.naive_bayes import GaussianNB
from sklearn import svm
from sklearn.svm import SVC
svc = SVC()

# get the dataset
def get_dataset():
    X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, n_redundant=5, random_state=2)
    return X, y

# get a voting ensemble of models
def get_voting():
    # define the base models
    models = list()
    models.append(('NaiveBayes', GaussianNB()))
    models.append(('SVC', SVC()))

    # define the voting ensemble
    ensemble = VotingClassifier(estimators=models, voting='hard')
    return ensemble

# get a list of models to evaluate
def get_models():
    models = dict()
    models['NaiveBayes'] = GaussianNB()
    models['SVC'] = SVC()

    models['hard_voting'] = get_voting()
    return models

# evaluate a give model using cross-validation
def evaluate_model(model):
    cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
    scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
    return scores

# define dataset
X, y = get_dataset()
# get the models to evaluate
models = get_models()
# evaluate the models and store results
results, names = list(), list()
for name, model in models.items():
    scores = evaluate_model(model)
    results.append(scores)
    names.append(name)
    print('>%s %.3f (%.3f)' % (name, mean(scores), std(scores)))
# plot model performance for comparison
pyplot.boxplot(results, labels=names, showmeans=True)
pyplot.show()


```