"Toward Talent Scientist: Sharing and Learning Together" --- Jingwei Too
Demo_PSO
provides an example of how to apply PSO on benchmark dataset The main function jfs
is adopted to perform feature selection. You may switch the algorithm by changing the pso
in from FS.pso import jfs
to other abbreviations
from FS.pso import jfs
from FS.de import jfs
feat
: feature vector matrix ( Instance x Features )label
: label matrix ( Instance x 1 )opts
: parameter settings
N
: number of solutions / population size ( for all methods )T
: maximum number of iterations ( for all methods )k
: k-value in k-nearest neighbor Acc
: accuracy of validation modelfmdl
: feature selection model ( It contains several results )
sf
: index of selected featuresnf
: number of selected featuresc
: convergence curveimport numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from FS.pso import jfs # change this to switch algorithm
import matplotlib.pyplot as plt
# load data
data = pd.read_csv('ionosphere.csv')
data = data.values
feat = np.asarray(data[:, 0:-1]) # feature vector
label = np.asarray(data[:, -1]) # label vector
# split data into train & validation (70 -- 30)
xtrain, xtest, ytrain, ytest = train_test_split(feat, label, test_size=0.3, stratify=label)
fold = {'xt':xtrain, 'yt':ytrain, 'xv':xtest, 'yv':ytest}
# parameter
k = 5 # k-value in KNN
N = 10 # number of particles
T = 100 # maximum number of iterations
w = 0.9
c1 = 2
c2 = 2
opts = {'k':k, 'fold':fold, 'N':N, 'T':T, 'w':w, 'c1':c1, 'c2':c2}
# perform feature selection
fmdl = jfs(feat, label, opts)
sf = fmdl['sf']
# model with selected features
num_train = np.size(xtrain, 0)
num_valid = np.size(xtest, 0)
x_train = xtrain[:, sf]
y_train = ytrain.reshape(num_train) # Solve bug
x_valid = xtest[:, sf]
y_valid = ytest.reshape(num_valid) # Solve bug
mdl = KNeighborsClassifier(n_neighbors = k)
mdl.fit(x_train, y_train)
# accuracy
y_pred = mdl.predict(x_valid)
Acc = np.sum(y_valid == y_pred) / num_valid
print("Accuracy:", 100 * Acc)
# number of selected features
num_feat = fmdl['nf']
print("Feature Size:", num_feat)
# plot convergence
curve = fmdl['c']
curve = curve.reshape(np.size(curve,1))
x = np.arange(0, opts['T'], 1.0) + 1.0
fig, ax = plt.subplots()
ax.plot(x, curve, 'o-')
ax.set_xlabel('Number of Iterations')
ax.set_ylabel('Fitness')
ax.set_title('PSO')
ax.grid()
plt.show()
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from FS.ga import jfs # change this to switch algorithm
import matplotlib.pyplot as plt
# load data
data = pd.read_csv('ionosphere.csv')
data = data.values
feat = np.asarray(data[:, 0:-1])
label = np.asarray(data[:, -1])
# split data into train & validation (70 -- 30)
xtrain, xtest, ytrain, ytest = train_test_split(feat, label, test_size=0.3, stratify=label)
fold = {'xt':xtrain, 'yt':ytrain, 'xv':xtest, 'yv':ytest}
# parameter
k = 5 # k-value in KNN
N = 10 # number of chromosomes
T = 100 # maximum number of generations
CR = 0.8
MR = 0.01
opts = {'k':k, 'fold':fold, 'N':N, 'T':T, 'CR':CR, 'MR':MR}
# perform feature selection
fmdl = jfs(feat, label, opts)
sf = fmdl['sf']
# model with selected features
num_train = np.size(xtrain, 0)
num_valid = np.size(xtest, 0)
x_train = xtrain[:, sf]
y_train = ytrain.reshape(num_train) # Solve bug
x_valid = xtest[:, sf]
y_valid = ytest.reshape(num_valid) # Solve bug
mdl = KNeighborsClassifier(n_neighbors = k)
mdl.fit(x_train, y_train)
# accuracy
y_pred = mdl.predict(x_valid)
Acc = np.sum(y_valid == y_pred) / num_valid
print("Accuracy:", 100 * Acc)
# number of selected features
num_feat = fmdl['nf']
print("Feature Size:", num_feat)
# plot convergence
curve = fmdl['c']
curve = curve.reshape(np.size(curve,1))
x = np.arange(0, opts['T'], 1.0) + 1.0
fig, ax = plt.subplots()
ax.plot(x, curve, 'o-')
ax.set_xlabel('Number of Iterations')
ax.set_ylabel('Fitness')
ax.set_title('GA')
ax.grid()
plt.show()
opts
to set the specific parametersNo. | Abbreviation | Name | Year | Extra Parameters |
---|---|---|---|---|
13 | hho |
Harris Hawk Optimization | 2019 | No |
12 | ssa |
Salp Swarm Algorithm | 2017 | No |
11 | woa |
Whale Optimization Algorithm | 2016 | Yes |
10 | sca |
Sine Cosine Algorithm | 2016 | Yes |
09 | ja |
Jaya Algorithm | 2016 | No |
08 | gwo |
Grey Wolf Optimizer | 2014 | No |
07 | fpa |
Flower Pollination Algorithm | 2012 | Yes |
06 | ba |
Bat Algorithm | 2010 | Yes |
05 | fa |
Firefly Algorithm | 2010 | Yes |
04 | cs |
Cuckoo Search Algorithm | 2009 | Yes |
03 | de |
Differential Evolution | 1997 | Yes |
02 | pso |
Particle Swarm Optimization | 1995 | Yes |
01 | ga |
Genetic Algorithm | - | Yes |