LogisticRegression.h - Reimplement

Implemented. See example below comparing cross entropy to user-specified squared error loss.

#include <iostream>
#include <vector>
#include <math.h>
#include "data/DataSet.h"
#include "models/classification/LogisticRegression.h"

using namespace std;

double squared_error(double actual_y, double predicted_y) {
    return (actual_y - predicted_y) * (actual_y - predicted_y);
}

int main() {

    // generated data from sklearn make_classification
    // x, y = make_classification(n_samples = 5000, n_classes = 2, n_informative = 8, n_features = 20)
    DataSet xtrain, xtest, ytrain, ytest;
    xtrain.load("xtrain.csv");
    xtest.load("xtest.csv");
    ytrain.load("ytrain.csv");
    ytest.load("ytest.csv");

    Classifier *se_logistic = new LogisticRegression(true, 1000, 0.001, &squared_error);
    Classifier *def_logistic = new LogisticRegression(true);

    se_logistic->fit(xtrain.cast_data_double(), ytrain.cast_target_int());
    cout << "\n";
    def_logistic->fit(xtrain.cast_data_double(), ytrain.cast_target_int());

    vector<int> se_preds = se_logistic->predict(xtest.cast_data_double());
    vector<int> def_preds = def_logistic->predict(xtest.cast_data_double());

    cout << "\n";
    cout << "Cross Entropy (Default) F1 Score: " << se_logistic->get_f1_score(ytest.cast_target_int(), se_preds) << "\n";
    cout << "Squared Error F1 Score: " << def_logistic->get_f1_score(ytest.cast_target_int(), def_preds) << "\n";

    delete se_logistic, def_logistic;

}

OUTPUT FROM ABOVE:

Total loss at iteration #0: 581.497
Total loss at iteration #100: 475.495

Total loss at iteration #0: 2353.73
Total loss at iteration #100: 2188.07

Cross Entropy (Default) F1 Score: 0.795377
Squared Error F1 Score: 0.795409

Kiyoshika / CppEZML

LogisticRegression.h - Reimplement #5