avhz / RustQuant

Rust library for quantitative finance.
https://avhz.github.io
Apache License 2.0
1.07k stars 124 forks source link

decision tree algorithm for machine learning #195

Closed Aditya-dom closed 7 months ago

Aditya-dom commented 7 months ago

` // ~~~~~~~~~~~~~~~~ // RustQuant: A Rust library for quantitative finance tools. // Copyright (C) 2023 https://github.com/avhz // Dual licensed under Apache 2.0 and MIT. // See: // - LICENSE-APACHE.md // - LICENSE-MIT.md // ~~~~~~~~~~~~~~~~

//! Module for DECISION TREE MODULE

// ~~~~~~~~~~~~~~~~ // IMPORTS // ~~~~~~~~~~~~~~~~

use nalgebra::DVector;

// Node structure for the decision tree

[derive(Debug)]

enum DecisionTreeNode { Leaf(T), Split { feature_index: usize, threshold: f64, left_child: Box<DecisionTreeNode>, right_child: Box<DecisionTreeNode>, }, }

// Decision Tree structure

[derive(Debug)]

pub struct DecisionTree { root: DecisionTreeNode, }

// Decision tree training algorithm pub fn train_decision_tree(x_train: &DVector, y_train: &DVector) -> DecisionTree { // Calculate the number of features let num_features = x_train.shape().0;

// Find the best feature to split on
let (best_feature, best_threshold) = find_best_split(x_train, y_train, num_features);

// Split the dataset based on the best feature and threshold
let (left_x_train, left_y_train, right_x_train, right_y_train) =
    split_dataset(x_train, y_train, best_feature, best_threshold);

// Recursively train the left and right child decision trees
let left_child = train_decision_tree(&left_x_train, &left_y_train);
let right_child = train_decision_tree(&right_x_train, &right_y_train);

// Create the split node with the best feature and threshold
let root = DecisionTreeNode::Split {
    feature_index: best_feature,
    threshold: best_threshold,
    left_child: Box::new(left_child),
    right_child: Box::new(right_child),
};

DecisionTree { root }

}

// Helper function to find the best feature to split on fn find_best_split( x_train: &DVector, y_train: &DVector, num_features: usize, ) -> (usize, f64) { let mut best_feature = 0; let mut best_threshold = 0.0; let mut best_score = 0.0;

// Iterate over each feature
for feature in 0..num_features {
    // Iterate over each unique value in the feature
    let unique_values = get_unique_values(x_train, feature);
    for threshold in unique_values {
        // Split the dataset based on the current feature and threshold
        let (left_x_train, left_y_train, right_x_train, right_y_train) =
            split_dataset(x_train, y_train, feature, threshold);

        // Calculate the score for the current split
        let score = calculate_score(&left_y_train, &right_y_train);

        // Update the best feature and threshold if the current score is higher
        if score > best_score {
            best_feature = feature;
            best_threshold = threshold;
            best_score = score;
        }
    }
}

(best_feature, best_threshold)

}

// Helper function to get unique values in a feature fn get_unique_values(x_train: &DVector, feature: usize) -> Vec { let mut unique_values = Vec::new(); for i in 0..x_train.shape().1 { let value = x_train[(feature, i)]; if !unique_values.contains(&value) { unique_values.push(value); } } unique_values }

// Helper function to split the dataset based on a feature and threshold fn split_dataset( x_train: &DVector, y_train: &DVector, feature: usize, threshold: f64, ) -> (DVector, DVector, DVector, DVector) { let mut left_x_train = DVector::zeros(x_train.shape().0); let mut left_y_train = DVector::zeros(y_train.shape().0); let mut right_x_train = DVector::zeros(x_train.shape().0); let mut right_y_train = DVector::zeros(y_train.shape().0);

let mut left_count = 0;
let mut right_count = 0;

for i in 0..x_train.shape().1 {
    let value = x_train[(feature, i)];
    if value <= threshold {
        for j in 0..x_train.shape().0 {
            left_x_train[(j, left_count)] = x_train[(j, i)];
        }
        left_y_train[left_count] = y_train[i];
        left_count += 1;
    } else {
        for j in 0..x_train.shape().0 {
            right_x_train[(j, right_count)] = x_train[(j, i)];
        }
        right_y_train[right_count] = y_train[i];
        right_count += 1;
    }
}

(
    left_x_train.slice((0, 0), (x_train.shape().0, left_count)).to_owned(),
    left_y_train.slice(0, left_count).to_owned(),
    right_x_train.slice((0, 0), (x_train.shape().0, right_count)).to_owned(),
    right_y_train.slice(0, right_count).to_owned(),
)

}

// Helper function to calculate the score for a split fn calculate_score(left_y_train: &DVector, right_y_train: &DVector) -> f64 { // Your score calculation logic goes here // Return a score that measures the quality of the split

// Placeholder: Return a constant score for demonstration
0.5

}

// Decision tree prediction algorithm impl DecisionTree { pub fn predict(&self, input: &DVector) -> f64 { match &self.root { DecisionTreeNode::Leaf(value) => value, DecisionTreeNode::Split { feature_index, threshold, left_child, right_child, } => { if input[feature_index] <= *threshold { left_child.predict(input) } else { right_child.predict(input) } } } } }

// ~~~~~~~~~~~~~~~~ // UNIT TESTS (INTEGRATION WITH LOGISTIC REGRESSION) // ~~~~~~~~~~~~~~~~

[cfg(test)]

mod tests_decision_tree { use super::*;

#[test]
fn test_decision_tree_integration() {
    // Load logistic regression input (use existing logistic regression test data)
    let x_train = DVector::from_vec(vec![1.0, 2.0, 3.0]);
    let y_train = DVector::from_vec(vec![0.0, 1.0, 0.0]);
    let x_test = DVector::from_vec(vec![4.0, 5.0, 6.0]);

    // Train decision tree using logistic regression input
    let decision_tree = train_decision_tree(&x_train, &y_train);

    // Predict using the decision tree
    let prediction = decision_tree.predict(&x_test);

    // Assert that the predicted value is equal to the expected value
    assert_eq!(prediction, expected_value);
}

} `

avhz commented 7 months ago

Hi :) There's already an open issue with someone implementing this, you might prefer to engage with them there.

90

Aditya-dom commented 7 months ago

Hi :) There's already an open issue with someone implementing this, you might prefer to engage with them there. #90

Hello @avhz 😄, I want to contribute to the exciting realm of RustQuant. Your Guidances on how to proceed would be greatly appreciated.

avhz commented 7 months ago

Thanks :) You can take a look at the list of issues and see if there's anything you can do. Or you can make a feature request that someone might be able to do, or you could try yourself.

Aditya-dom commented 7 months ago

Thanks :) You can take a look at the list of issues and see if there's anything you can do. Or you can make a feature request that someone might be able to do, or you could try yourself. thank you so much for your support @avhz. I'm genuinely excited about the opportunity to contribute to RustQuant.