Sandy4321 commented 6 years ago

after adding weights still, performance is low for basic example from http://srome.github.io/Leveraging-Factorization-Machines-for-Sparse-Data-and-Supervised-Visualization/

performance: Factorization Machine Error: 0.15506159614354575 precision recall f1-score support

      0       0.83      0.99      0.91      2786
      1       0.96      0.41      0.57       948

avg / total 0.86 0.84 0.82 3734

Confusion Matrix Predicted False True all Actual
False 2771 15 2786 True 564 384 948 all 3335 399 3734

simple confusion matrix Predicted 0 1 Actual
0 2771 15 1 564 384 len v = 6 len v[0] = 1048576 len w1 = 1048576 len w0 = 1

code is:

S_May6_NLP_data_Theano_FM_sparse.py

https://github.com/dstein64/PyFactorizationMachines/issues/5

reg = regularizers.L2(0, 0, .01)

fm_classifier = Classifier(X_t.shape[1] , k=2, X_format="csr")

fm_classifier .fit(X_t, y_t, regularizer=reg)

https://github.com/dstein64/PyFactorizationMachines/issues/6

v has the parameters for the interaction terms, w0 has the bias term paramater, and w1 has the first-order term parameters.

These can be accessed with the get_weights method. The example below shows how to access the v parameters,

and also see how many there are, which is a function of 1) the number of dimensions in your dataset and 2) the value of k.

w0, w1, v = fm_classifier.get_weights()

print("v:")

print(v)

print("shape(v)")

print(v.shape)

print("size(v)")

print(v.size)

S_May3_changed_hashing_size_Theano_FM_sparse.py

C:\Windows\system32>conda install theano <- good

conda install m2w64-toolchain <-destroys theano

S_May2_changed_hashing_size_Theano_FM_sparse.py

S_May2_hashing_FM_sparse.py

S_apr25_FM_sparse

code from

https://github.com/dstein64/PyFactorizationMachines

data example from

http://srome.github.io/Leveraging-Factorization-Machines-for-Sparse-Data-and-Supervised-Visualization/

if 0: import os

os.environ["THEANO_FLAGS"] = "mode=FAST_RUN,device=gpu,floatX=float32"

os.environ["THEANO_FLAGS"] = "mode=FAST_RUN,device=cuda,floatX=float32"

import pickle import pandas as pd from sklearn.feature_extraction import FeatureHasher

original from PyFactorizationMachines.src.pyfm import FactorizationMachineClassifier

from pyfms import Classifier from pyfms import regularizers from sklearn.model_selection import train_test_split from pandas_ml import ConfusionMatrix import pandas as pd import numpy as np

parameters

flag_regulariser= 'L2' #'L1' # L2 n_V_dimensions = 6 flag_0_use_all_hash_features_or_1_set_number_of_features = 0 number_of_features_for_Hasher = 12345 #1234 # 123#12345

if 1: from sklearn.datasets import fetch_20newsgroups twenty_train = fetch_20newsgroups(subset='train', shuffle=True, random_state=42)

filehandler = open(b"twenty_data.pkl","wb")
pickle.dump(twenty_train,filehandler)

else:

file_name = open("twenty_data.pkl",'rb')
twenty_train= pickle.load(file_name)

twenty_train.target_names

def define_label(x, target_names): name = target_names[x] return 1 if 'comp' in name else 0

target = pd.Series(twenty_train.target).apply(lambda x : define_label(x,twenty_train.target_names)) target.mean() # Prevalence of the label

q=3 def clean_text(text):

Basic cleaning

text=text.replace('\n','').replace('\t','').replace('<','').replace('>','').replace('|','')
return [x for x in text.split(' ') if len(x) > 3]

X = [clean_text(x) for x in twenty_train.data]

q=1

Hash away!

if flag_0_use_all_hash_features_or_1_set_number_of_features: fh = FeatureHasher(input_type='string',n_features= number_of_features_for_Hasher, non_negative=True) else: fh = FeatureHasher(input_type='string', non_negative=True) # full number of features X_t = fh.transform(X)

Bin the inputs so that the "interaction" terms are more interpretable

X_bin = X_t.copy() X_bin[X_bin >= 1] = 1 X_train, X_test, y_train, y_test = train_test_split(X_bin, target, test_size=0.33, random_state=42)

Re-weight instances so that each class gets equal total weighting.

class_count_lookup = dict(zip(*np.unique(y_train, return_counts=True))) sample_weight = np.array([1.0 / class_count_lookup[_y] for _y in y_train])

if flag_regulariser == 'L2': reg = regularizers.L2(0, 0, .01) if flag_regulariser == 'L1': reg = regularizers.L1(0, 0, .01)

print('regularizer is ', flag_regulariser) fm_classifier = Classifier(X_t.shape[1] , k = n_V_dimensions, X_format="csr") if 0: fm_classifier.fit(X_train, y_train.values, verbosity=5, nb_epoch= 50)# original else: fm_classifier.fit(X_train, y_train.values, verbosity=5, nb_epoch= 50, regularizer = reg, sample_weight = sample_weight,)

original as in example.py

type(y_train)

<class 'numpy.ndarray'>

original f.fit(X_train, y_train, verbosity=50, nb_epoch=200)

original f.fit(X_train, y_train, verbosity=5, nb_epoch=20)

q=4 from sklearn.metrics import accuracy_score def error_score(y_true, y_pred): return 1.0 - accuracy_score(y_true, y_pred) print() print('Factorization Machine Error: {}'.format( error_score(y_test, fm_classifier.predict(X_test)))) q=6

from sklearn.metrics import classification_report print( classification_report( y_test, fm_classifier.predict(X_test) ) )

q=6

cm = ConfusionMatrix(y_test.values, fm_classifier.predict(X_test) ) print('Confusion Matrix') print(cm)

y_actu = pd.Series(y_test.values, name='Actual') y_pred = pd.Series(fm_classifier.predict(X_test), name='Predicted') df_confusion = pd.crosstab(y_actu, y_pred) print(' \n \n simple confusion matrix') print(df_confusion)

cm.print_stats()

q=8 v = fm_classifier.v.eval() print('len v =', len(v)) print('len v[0] =', len(v[0])) w1 = fm_classifier.w1.eval() print('len w1 = ', len(w1) ) w0 = fm_classifier.w0.eval() print('len w0 = ', len(w0) ) q=7 ''' y_actu = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2] cm = ConfusionMatrix(y_actu, y_pred) cm.print_stats()

q=8

import pandas as pd y_actu = pd.Series([2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2], name='Actual') y_pred = pd.Series([0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2], name='Predicted') df_confusion = pd.crosstab(y_actu, y_pred)

q=7

'''

dstein64 commented 6 years ago

@Sandy4321, I'm not familiar with the data you're working with, and I'm not sure what the expected performance is. To boost performance, it may be worthwhile to tune the hyperparameters, like k and the regularization parameters (e.g., adding regularization to the first-order weights).

Given that this is not a problem I'm familiar with, and there's nothing that suggests a particular bug to be fixed, I'm closing this issue as I have no way to address it.

Sandy4321 commented 6 years ago

thanks you are right changing regularisation and size of hash table helps, pls see below but still, performance is not the best, though we do not know potential performance for this data...

The question is how you tuned your code to make sure performance meets potential performance for FM? For example, you may have hidden floating bugs, a code is not crushing but performance is low? There is one way: to run this data or another data on original FM code.... by the way, you wrote adding regularization to the first-order weights does it mean you do not code regularization to the first-order weights?

flag_regulariser= 'L1' n_V_dimensions = 6 flag_0_use_all_hash_features_or_1_set_number_of_features = 1 number_of_features_for_Hasher = 12345 n_epoch = 250 Epoch 245/250 loss: 0.0007158995819372165, min_loss: 0.0004515586029462726 Epoch 250/250 loss: 0.0006929234543094471, min_loss: 0.0004515586029462726

Factorization Machine Error: 0.08810926620246384 precision recall f1-score support

      0       0.92      0.97      0.94      2786
      1       0.90      0.74      0.81       948

avg / total 0.91 0.91 0.91 3734

Confusion Matrix Predicted False True all Actual
False 2704 82 2786 True 247 701 948 all 2951 783 3734

simple confusion matrix Predicted 0 1 Actual
0 2704 82 1 247 701

In any case thank you very much for code and cooperation

dstein64 commented 6 years ago

"The question is how you tuned your code to make sure performance meets potential performance for FM?"

There was no particular tuning. The code implements the algorithm from the paper on factorization machines. However, rather than specifying gradients directly for gradient descent, Theano calculates them automatically (the paper includes the gradient formulas, but these were not used directly). The code itself was not particularly tuned, as it relies on Theano for that. The adam optimization algorithm was added to try to improve gradient descent.

"For example, you may have hidden floating bugs, a code is not crushing but performance is low?"

I have not seen any indication of this.

"There is one way: to run this data or another data on original FM code"

I do not currently have time to do that.

"by the way, you wrote adding regularization to the first-order weights does it mean you do not code regularization to the first-order weights?"

The code supports regularizing the first order weights. The following code adds regularization to the interaction weights. reg = pyfms.regularizers.L2(0, 0, .01) To regularize the first order weights, include a non-zero argument for the second argument. For example: reg = pyfms.regularizers.L2(0, .01, .01) The bias terms can also be regularized with a non-zero argument for the first argument, but I don't recommend using that functionality.

Sandy4321 commented 6 years ago

The code supports regularizing the first order weights. The following code adds regularization to the interaction weights. reg = pyfms.regularizers.L2(0, 0, .01) To regularize the first order weights, include a non-zero argument for the second argument. For example: reg = pyfms.regularizers.L2(0, .01, .01)

great thanks I did not mentioned this

Sandy4321 commented 6 years ago

by the way, it would be helpful to show after each epoch ConfusionMatrix as well

dstein64 commented 6 years ago

"by the way, it would be helpful to show after each epoch ConfusionMatrix as well"

@Sandy4321, the following code would print a confusion matrix every 5000 epochs during training (with 20 iterations of the outer loop). This can be adjusted to accommodate your specific use-case by changing the outer loop number of iterations, the number of epochs, and how and which data is used for calculating the function matrix.

for _ in range(20):
    fm_classifier.fit(X_train, y_train, nb_epoch=5000)
    # <INSERT CUSTOM CODE TO GENERATE AND PRINT CONFUSION MATRIX>

Sandy4321 commented 6 years ago

great thanks just to make it clear does it meant 1 each call starts not from new weights but from previously calculated weight? 2 should I use predict after each run like this

for _ in range(20): fm_classifier.fit(X_train, y_train, nb_epoch=5000)

print('Factorization Machine Error: {}'.format(
error_score(y_test, fm_classifier.predict(X_test))))

thanks

dstein64 commented 6 years ago

"each call starts not from new weights but from previously calculated weight?"

That's correct. The weights are initialized when the pyfms.Classifier is constructed, not when the fit method is called.

"should I use predict after each run"

That looks good, but I'm not sure what your intended use case is. You had mentioned confusion matrices, which would require more code.

Also, be careful, as tuning your model towards your test set will inflate the final test set accuracies. A workaround could incorporate a validation scheme that doesn't use test data (e.g., cross validation, or holding out some of the test data for validation).

Sandy4321 commented 6 years ago

but how without fm_classifier.predict(X_test) to build confusion matrix?

dstein64 commented 6 years ago

@Sandy4321, I wasn't suggesting that predictions wouldn't be required. Rather, I was suggesting that more code, using the predictions, would be required to calculate confusion matrices.

dstein64 commented 6 years ago

This is getting outside the scope of this issue, or the project, but here's an example that might help.

import numpy as np

actual = np.array([0,0,1,1,0,1])
predict = np.array([0,0,0,1,1,1])

labels = [0,1]
label_idx = {label: idx for idx, label in enumerate(labels)}
n = len(labels)

M = np.zeros((n, n), dtype=int)
for _actual, _predict in zip(actual, predict):
  M[label_idx[_actual], label_idx[_predict]] += 1

print M

Sandy4321 commented 6 years ago

I see thanks there is no difference from the first run with the last run

is it possble to use differnt oprimaisers, not only Adan as it is for now?

run counter = 1 Epoch 10/30 loss: 0.10203997190742001, min_loss: 0.10203997190742001 Epoch 20/30 loss: 0.0403944697758382, min_loss: 0.0403944697758382 Epoch 30/30 loss: 0.019172531796430914, min_loss: 0.019172531796430914 Confusion Matrix Predicted False True all Actual
False 2681 105 2786 True 389 559 948 all 3070 664 3734 precision recall f1-score support

      0       0.87      0.96      0.92      2786
      1       0.84      0.59      0.69       948

avg / total 0.87 0.87 0.86 3734

run counter = 7 Epoch 10/30 loss: 0.0002019931039399595, min_loss: 0.00015794413641505192 Epoch 20/30 loss: 0.00018336819717181644, min_loss: 0.00015794413641505192 Epoch 30/30 loss: 0.00016183750885412254, min_loss: 0.00015794413641505192 Confusion Matrix Predicted False True all Actual
False 2566 220 2786 True 358 590 948 all 2924 810 3734 precision recall f1-score support

      0       0.88      0.92      0.90      2786
      1       0.73      0.62      0.67       948

avg / total 0.84 0.85 0.84 3734

Sandy4321 commented 6 years ago

full code is below

S_May14_NLP_data_Theano_FM_sparse_weights.py

S_May18_NLP_data_Theano_FM_sparse_weights.py

@Sandy4321, the following code would print a confusion matrix every 5000 epochs during training (with 20 iterations of the outer loop). This can be adjusted to accommodate your specific use-case by changing the outer loop number of iterations, the number of epochs, and how and which data is used for calculating the function matrix.

for _ in range(20):

# fm_classifier.fit(X_train, y_train, nb_epoch=5000)
#  <INSERT CUSTOM CODE TO GENERATE AND PRINT CONFUSION MATRIX>

S_May14_NLP_data_Theano_FM_sparse_weights.py

added

w_regularisatoin_coeff = 0.01

v_regularisatoin_coeff = 0.01

S_May14_NLP_data_Theano_FM_sparse_weights.py

added

weights to address unbalanced data

S_May6_NLP_data_Theano_FM_sparse.py

https://github.com/dstein64/PyFactorizationMachines/issues/5

reg = regularizers.L2(0, 0, .01)

fm_classifier = Classifier(X_t.shape[1] , k=2, X_format="csr")

fm_classifier .fit(X_t, y_t, regularizer=reg)

https://github.com/dstein64/PyFactorizationMachines/issues/6

v has the parameters for the interaction terms, w0 has the bias term paramater, and w1 has the first-order term parameters.

These can be accessed with the get_weights method. The example below shows how to access the v parameters,

and also see how many there are, which is a function of 1) the number of dimensions in your dataset and 2) the value of k.

w0, w1, v = fm_classifier.get_weights()

print("v:")

print(v)

print("shape(v)")

print(v.shape)

print("size(v)")

print(v.size)

S_May3_changed_hashing_size_Theano_FM_sparse.py

C:\Windows\system32>conda install theano <- good

conda install m2w64-toolchain <-destroys theano

S_May2_changed_hashing_size_Theano_FM_sparse.py

S_May2_hashing_FM_sparse.py

S_apr25_FM_sparse

code from

https://github.com/dstein64/PyFactorizationMachines

data example from

http://srome.github.io/Leveraging-Factorization-Machines-for-Sparse-Data-and-Supervised-Visualization/

if 0: import os

os.environ["THEANO_FLAGS"] = "mode=FAST_RUN,device=gpu,floatX=float32"

os.environ["THEANO_FLAGS"] = "mode=FAST_RUN,device=cuda,floatX=float32"

import pickle import pandas as pd from sklearn.feature_extraction import FeatureHasher

original from PyFactorizationMachines.src.pyfm import FactorizationMachineClassifier

from pyfms import Classifier from pyfms import regularizers from sklearn.model_selection import train_test_split from pandas_ml import ConfusionMatrix import pandas as pd import numpy as np from sklearn.metrics import classification_report from sklearn.metrics import accuracy_score

parameters

flag_regulariser= 'L1' # 'L2' # 'L1' # 'L2' # 'L1' # L2 n_V_dimensions = 6# 20 flag_0_use_all_hash_features_or_1_set_number_of_features = 1 number_of_features_for_Hasher = 12345 # 123456 # 12345 # 1234 # 12345 # 1234 # 123# 12345 n_epoch = 30 # 100# 250 w_regularisatoin_coeff = 0.01 v_regularisatoin_coeff = 0# 0.1 # 0.01

''' L1 flag_regulariser= 'L1' # 'L2' # 'L1' # 'L2' # 'L1' # L2 n_V_dimensions = 6# 20 flag_0_use_all_hash_features_or_1_set_number_of_features = 1 number_of_features_for_Hasher = 12345 # 123456 # 12345 # 1234 # 12345 # 1234 # 123# 12345 n_epoch = 250

Epoch 245/250 loss: 0.0007158995819372165, min_loss: 0.0004515586029462726 Epoch 250/250 loss: 0.0006929234543094471, min_loss: 0.0004515586029462726

Factorization Machine Error: 0.08810926620246384 precision recall f1-score support

      0       0.92      0.97      0.94      2786
      1       0.90      0.74      0.81       948

avg / total 0.91 0.91 0.91 3734

Confusion Matrix Predicted False True all Actual
False 2704 82 2786 True 247 701 948 all 2951 783 3734

simple confusion matrix Predicted 0 1 Actual
0 2704 82 1 247 701

L2 flag_regulariser= 'L2' # 'L1' # 'L2' # 'L1' # L2 n_V_dimensions = 6 flag_0_use_all_hash_features_or_1_set_number_of_features = 1 number_of_features_for_Hasher = 12345 # 123456 # 12345 # 1234 # 12345 # 1234 # 123# 12345 n_epoch = 250 Epoch 245/250 loss: 0.00012438844802074696, min_loss: 0.00012438844802074696 Epoch 250/250 loss: 0.00011682842340260386, min_loss: 0.00011682842340260386

Factorization Machine Error: 0.10283877878950187 precision recall f1-score support

      0       0.89      0.98      0.93      2786
      1       0.92      0.65      0.76       948

avg / total 0.90 0.90 0.89 3734 simple confusion matrix Predicted 0 1 Actual
0 2733 53 1 331 617

''' if 1: from sklearn.datasets import fetch_20newsgroups twenty_train = fetch_20newsgroups(subset='train', shuffle=True, random_state=42)

filehandler = open(b"twenty_data.pkl","wb")
pickle.dump(twenty_train,filehandler)

else:

file_name = open("twenty_data.pkl",'rb')
twenty_train= pickle.load(file_name)

twenty_train.target_names

def define_label(x, target_names): name = target_names[x] return 1 if 'comp' in name else 0

target = pd.Series(twenty_train.target).apply(lambda x : define_label(x,twenty_train.target_names)) target.mean() # Prevalence of the label

q=3 def clean_text(text):

Basic cleaning

text=text.replace('\n','').replace('\t','').replace('<','').replace('>','').replace('|','')
return [x for x in text.split(' ') if len(x) > 3]

X = [clean_text(x) for x in twenty_train.data]

q=1

Hash away!

if flag_0_use_all_hash_features_or_1_set_number_of_features: fh = FeatureHasher(input_type='string',n_features= number_of_features_for_Hasher, non_negative=True) else: fh = FeatureHasher(input_type='string', non_negative=True) # full number of features X_t = fh.transform(X)

Bin the inputs so that the "interaction" terms are more interpretable

X_bin = X_t.copy() X_bin[X_bin >= 1] = 1 X_train, X_test, y_train, y_test = train_test_split(X_bin, target, test_size=0.33, random_state=42)

Re-weight instances so that each class gets equal total weighting.

class_count_lookup = dict(zip(*np.unique(y_train, return_counts=True))) sample_weight = np.array([1.0 / class_count_lookup[_y] for _y in y_train])

if flag_regulariser == 'L2': reg = regularizers.L2(0, w_regularisatoin_coeff, v_regularisatoin_coeff) if flag_regulariser == 'L1': reg = regularizers.L1(0, w_regularisatoin_coeff, v_regularisatoin_coeff)

print('regularizer is ', flag_regulariser) fm_classifier = Classifier(X_t.shape[1] , k = n_V_dimensions, X_format="csr") if 0: fm_classifier.fit(X_train, y_train.values, verbosity=5, nb_epoch = n_epoch)# original else: runcounter = 1 for in range(7): print('\n run counter = ', run_counter) run_counter+=1 fm_classifier.fit(X_train, y_train.values, verbosity=10, nb_epoch = n_epoch, regularizer = reg, sample_weight = sample_weight) cm = ConfusionMatrix(y_test.values, fm_classifier.predict(X_test) ) print('Confusion Matrix') print(cm) print( classification_report( y_test, fm_classifier.predict(X_test) ) ) q=1

original as in example.py

type(y_train)

<class 'numpy.ndarray'>

original f.fit(X_train, y_train, verbosity=50, nb_epoch=200)

original f.fit(X_train, y_train, verbosity=5, nb_epoch=20)

q=4

def error_score(y_true, y_pred): return 1.0 - accuracy_score(y_true, y_pred) print() print('Factorization Machine Error: {}'.format( error_score(y_test, fm_classifier.predict(X_test)))) q=6

print( classification_report( y_test, fm_classifier.predict(X_test) ) )

q=6

cm = ConfusionMatrix(y_test.values, fm_classifier.predict(X_test) ) print('Confusion Matrix') print(cm)

y_actu = pd.Series(y_test.values, name='Actual') y_pred = pd.Series(fm_classifier.predict(X_test), name='Predicted') df_confusion = pd.crosstab(y_actu, y_pred) print(' \n \n simple confusion matrix') print(df_confusion)

cm.print_stats()

q=8 v = fm_classifier.v.eval() print('len v =', len(v)) print('len v[0] =', len(v[0])) w1 = fm_classifier.w1.eval() print('len w1 = ', len(w1) ) w0 = fm_classifier.w0.eval() print('len w0 = ', len(w0) ) q=7

''' y_actu = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2] cm = ConfusionMatrix(y_actu, y_pred) cm.print_stats()

q=8

import pandas as pd y_actu = pd.Series([2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2], name='Actual') y_pred = pd.Series([0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2], name='Predicted') df_confusion = pd.crosstab(y_actu, y_pred)

q=7

'''

Sandy4321 commented 6 years ago

sorry I do not know how to copy past plain text, somehow they apply a funny formating

dstein64 commented 6 years ago

Hi @Sandy4321, to get the code to format properly, add a line with ``` (three backticks) both before and after the code. If you follow the first set of backticks with "python" (no space between backticks and text), the code will be syntax highlighted.

However, please don't paste entire programs (limit the code to the smallest example that can show the issue).

"is it possble to use differnt oprimaisers, not only Adan as it is for now?"

The optimizer can be changed by passing an optimizer argument to the fit function.

import pyfms.optimizers

rmsprop = pyfms.optimizers.RMSProp(lr=0.001, rho=0.9, epsilon=1e-6)
fm_classifier.fit(X_train, y_train.values, optimizer=rmsprop)

sgd = pyfms.optimizers.SGD(lr=0.001, decay=0.0)
fm_classifier.fit(X_train, y_train.values, optimizer=sgd)

Hyperparameters like lr (learning rate) are not required, as they take defaults, but tuning them may help with your problem.

Sandy4321 commented 6 years ago

rmsprop_optimizerperformance is the same as for default Adam but for sgd_optimizer is much worse though parameters tuning is needed The main point that adding more epochs not helps

rmsprop_optimizer

run counter = 1 Epoch 10/30 loss: 0.04257191921374652, min_loss: 0.04257191921374652 Epoch 20/30 loss: 0.012045892724266576, min_loss: 0.012045892724266576 Epoch 30/30 loss: 0.005875949723672396, min_loss: 0.005875949723672396 Confusion Matrix Predicted False True all Actual
False 2687 99 2786 True 319 629 948 all 3006 728 3734 precision recall f1-score support

      0       0.89      0.96      0.93      2786
      1       0.86      0.66      0.75       948

avg / total 0.89 0.89 0.88 3734

sgd_optimizer

run counter = 5 Epoch 10/30 loss: 0.47345642163266527, min_loss: 0.4684790639534747 Epoch 20/30 loss: 0.4618976953466651, min_loss: 0.45370914368579585 Epoch 30/30 loss: 0.4390427815838477, min_loss: 0.4390427815838477 Confusion Matrix Predicted False True all Actual
False 2395 391 2786 True 623 325 948 all 3018 716 3734 precision recall f1-score support

      0       0.79      0.86      0.83      2786
      1       0.45      0.34      0.39       948

avg / total 0.71 0.73 0.71 3734

Sandy4321 commented 6 years ago

to get the code to format properly, add a line with ``` (three backticks) both before and after the code. If you follow the first set of backticks with "python" (no space between backticks and text), the code will be syntax highlighted.

do you mean when copy past to html browser of github

dstein64 commented 6 years ago

"do you mean when copy past to html browser of github"

Yeah. The backticks are interpreted by github if you include them in the text box when pasting code.

dstein64 commented 6 years ago

"The main point that adding more epochs not helps"

Perhaps more epochs is causing overfitting. This can possibly be countered with more regularization.

Sandy4321 commented 6 years ago

interesting idea

dstein64 / pyfms

performance is low for basic example for unbalanced data #8

code is:

S_May6_NLP_data_Theano_FM_sparse.py

reg = regularizers.L2(0, 0, .01)

fm_classifier = Classifier(X_t.shape[1] , k=2, X_format="csr")

fm_classifier .fit(X_t, y_t, regularizer=reg)

v has the parameters for the interaction terms, w0 has the bias term paramater, and w1 has the first-order term parameters.

These can be accessed with the get_weights method. The example below shows how to access the v parameters,

and also see how many there are, which is a function of 1) the number of dimensions in your dataset and 2) the value of k.

w0, w1, v = fm_classifier.get_weights()

print("v:")

print(v)

print("shape(v)")

print(v.shape)

print("size(v)")

print(v.size)

S_May3_changed_hashing_size_Theano_FM_sparse.py

C:\Windows\system32>conda install theano <- good

conda install m2w64-toolchain <-destroys theano

S_May2_changed_hashing_size_Theano_FM_sparse.py

S_May2_hashing_FM_sparse.py

S_apr25_FM_sparse

code from

data example from

os.environ["THEANO_FLAGS"] = "mode=FAST_RUN,device=gpu,floatX=float32"

original from PyFactorizationMachines.src.pyfm import FactorizationMachineClassifier

parameters

Basic cleaning

Hash away!

Bin the inputs so that the "interaction" terms are more interpretable

Re-weight instances so that each class gets equal total weighting.

original as in example.py

type(y_train)

<class 'numpy.ndarray'>

original f.fit(X_train, y_train, verbosity=50, nb_epoch=200)

original f.fit(X_train, y_train, verbosity=5, nb_epoch=20)

cm.print_stats()

S_May14_NLP_data_Theano_FM_sparse_weights.py

S_May18_NLP_data_Theano_FM_sparse_weights.py

for _ in range(20):

S_May14_NLP_data_Theano_FM_sparse_weights.py

added

w_regularisatoin_coeff = 0.01

v_regularisatoin_coeff = 0.01

S_May14_NLP_data_Theano_FM_sparse_weights.py

added

weights to address unbalanced data

S_May6_NLP_data_Theano_FM_sparse.py

reg = regularizers.L2(0, 0, .01)

fm_classifier = Classifier(X_t.shape[1] , k=2, X_format="csr")

fm_classifier .fit(X_t, y_t, regularizer=reg)

v has the parameters for the interaction terms, w0 has the bias term paramater, and w1 has the first-order term parameters.

These can be accessed with the get_weights method. The example below shows how to access the v parameters,

and also see how many there are, which is a function of 1) the number of dimensions in your dataset and 2) the value of k.

w0, w1, v = fm_classifier.get_weights()

print("v:")

print(v)

print("shape(v)")

print(v.shape)

print("size(v)")

print(v.size)

S_May3_changed_hashing_size_Theano_FM_sparse.py

C:\Windows\system32>conda install theano <- good

conda install m2w64-toolchain <-destroys theano

S_May2_changed_hashing_size_Theano_FM_sparse.py

S_May2_hashing_FM_sparse.py

S_apr25_FM_sparse

code from

data example from

os.environ["THEANO_FLAGS"] = "mode=FAST_RUN,device=gpu,floatX=float32"

original from PyFactorizationMachines.src.pyfm import FactorizationMachineClassifier

parameters

Basic cleaning

Hash away!

Bin the inputs so that the "interaction" terms are more interpretable

Re-weight instances so that each class gets equal total weighting.

original as in example.py

type(y_train)

<class 'numpy.ndarray'>