ChristopherRabotin commented 6 years ago

Proposal

Currently, nabla will call the function f N times, where N is the size of the input vector to the function f. Although this computes the gradient properly, it would probably be advantageous in terms of calling stack to only call the function once. This can be achieved by generating a hyperdual space and calling the provided function with the base-defining matrix. In terms of precise computations, this will should not have any impact: performing math on a matrix instead of doing the same on N vector will indeed require the same number of computations.

Impact

This will require changing the signature of the f function parameter to nabla from Fn(T, &VectorN<Dual<T>, N>) -> VectorN<Dual<T>, N> to Fn(T, &MatrixMN<Dual<T>, N, N>) -> MatrixMN<Dual<T>, N, N> . This is a breaking change.

Proof of concept

It is important to note the difference between functions eom and eom_mat: eom_mat is noticeably more complicated as it requires extracting different parts of the provided state. Further enhancements may hopefully limit such changes (possibly by defining Dual<T> as a na::Scalar (which might not be possible since Dual is itself represented as a Vector2, although mathematically, I think that a Dual is a scalar number)).

extern crate dual_num;
extern crate nalgebra as na;

use dual_num::linalg::norm;
use dual_num::{Dual, Float};
use na::{DimName, Matrix3x6, Matrix6, U3, U6, Vector3, Vector6};

fn diff<F>(x: Vector6<f64>, func: F) -> Matrix6<f64>
where
    F: Fn(&Vector6<Dual<f64>>) -> Vector6<Dual<f64>>,
{
    let mut grad_as_slice = Vec::with_capacity(U6::dim() * U6::dim());
    for coord in 0..U6::dim() {
        let x11 = Vector6::<Dual<f64>>::new(
            Dual::new(x[(0, 0)], if coord == 0 { 1.0 } else { 0.0 }),
            Dual::new(x[(1, 0)], if coord == 1 { 1.0 } else { 0.0 }),
            Dual::new(x[(2, 0)], if coord == 2 { 1.0 } else { 0.0 }),
            Dual::new(x[(3, 0)], if coord == 3 { 1.0 } else { 0.0 }),
            Dual::new(x[(4, 0)], if coord == 4 { 1.0 } else { 0.0 }),
            Dual::new(x[(5, 0)], if coord == 5 { 1.0 } else { 0.0 }),
        );
        println!("eom #{}", coord);
        let df_di = func(&x11);
        for i in 0..6 {
            grad_as_slice.push(df_di[(i, 0)].dual());
        }
    }

    let grad = Matrix6::from_column_slice(&grad_as_slice);
    grad
}

fn diff_mat<F>(x: Vector6<f64>, func: F) -> Matrix6<f64>
where
    F: Fn(&Matrix6<Dual<f64>>) -> Matrix6<Dual<f64>>,
{
    // Create a Matrix for the hyperdual space
    let mut space = Matrix6::zeros();

    for coord in 0..U6::dim() {
        let x11 = Vector6::<Dual<f64>>::new(
            Dual::new(x[(0, 0)], if coord == 0 { 1.0 } else { 0.0 }),
            Dual::new(x[(1, 0)], if coord == 1 { 1.0 } else { 0.0 }),
            Dual::new(x[(2, 0)], if coord == 2 { 1.0 } else { 0.0 }),
            Dual::new(x[(3, 0)], if coord == 3 { 1.0 } else { 0.0 }),
            Dual::new(x[(4, 0)], if coord == 4 { 1.0 } else { 0.0 }),
            Dual::new(x[(5, 0)], if coord == 5 { 1.0 } else { 0.0 }),
        );
        space.set_column(coord, &x11);
    }

    let state_n_grad = func(&space);
    // Extract the dual part
    let mut grad = Matrix6::zeros();
    for i in 0..6 {
        for j in 0..6 {
            grad[(i, j)] = state_n_grad[(i, j)].dual();
        }
    }
    grad
}

fn eom_mat(state: &Matrix6<Dual<f64>>) -> Matrix6<Dual<f64>> {
    let radius = state.fixed_slice::<U3, U6>(0, 0).into_owned();
    let velocity = state.fixed_slice::<U3, U6>(3, 0).into_owned();
    let mut body_acceleration = Matrix3x6::zeros();
    for i in 0..3 {
        let this_norm = norm(&Vector3::new(
            radius[(0, i)],
            radius[(1, i)],
            radius[(2, i)],
        ));
        let body_acceleration_f = Dual::from_real(-398_600.4415) / this_norm.powi(3);
        let this_body_acceleration = Vector3::new(
            radius[(0, i)] * body_acceleration_f,
            radius[(1, i)] * body_acceleration_f,
            radius[(2, i)] * body_acceleration_f,
        );
        body_acceleration.set_column(i, &this_body_acceleration);
    }
    let mut rtn = Matrix6::zeros();
    for i in 0..6 {
        if i < 3 {
            rtn.set_row(i, &velocity.row(i));
        } else {
            rtn.set_row(i, &body_acceleration.row(i - 3));
        }
    }
    rtn
}

fn eom(state: &Vector6<Dual<f64>>) -> Vector6<Dual<f64>> {
    let radius = state.fixed_rows::<U3>(0).into_owned();
    let velocity = state.fixed_rows::<U3>(3).into_owned();
    println!("x={:1.32}", radius);
    let norm = (radius[(0, 0)].powi(2) + radius[(1, 0)].powi(2) + radius[(2, 0)].powi(2)).sqrt();
    let body_acceleration_f = Dual::from_real(-398_600.4415) / norm.powi(3);
    let body_acceleration = Vector3::new(
        radius[(0, 0)] * body_acceleration_f,
        radius[(1, 0)] * body_acceleration_f,
        radius[(2, 0)] * body_acceleration_f,
    );
    Vector6::from_iterator(velocity.iter().chain(body_acceleration.iter()).cloned())
}

fn main() {
    let state = Vector6::new(
        -9042.862233600335,
        18536.333069123244,
        6999.9570694864115,
        -3.28878900377057,
        -2.226285193102822,
        1.6467383807226765,
    );
    let grad = diff(state, eom);
    println!("{}", grad);

    let grad_mat = diff_mat(state, eom_mat);
    println!("{}", grad_mat);

    /*
    The following result is from the analytical computation in `nyx`:

    thread 'stat_od::two_body::ckf_fixed_step_perfect_stations' panicked at '
  ┌                                                                                                                                                                                                                         ┐
  │  0.00000000000000000000000000000000  0.00000000000000000000000000000000  0.00000000000000000000000000000000  1.00000000000000000000000000000000  0.00000000000000000000000000000000  0.00000000000000000000000000000000 │
  │  0.00000000000000000000000000000000  0.00000000000000000000000000000000  0.00000000000000000000000000000000  0.00000000000000000000000000000000  1.00000000000000000000000000000000  0.00000000000000000000000000000000 │
  │  0.00000000000000000000000000000000  0.00000000000000000000000000000000  0.00000000000000000000000000000000  0.00000000000000000000000000000000  0.00000000000000000000000000000000  1.00000000000000000000000000000000 │
  │ -0.00000001862839867653828534481953 -0.00000004089774775108091810256206 -0.00000001544439654966730038449371  0.00000000000000000000000000000000  0.00000000000000000000000000000000  0.00000000000000000000000000000000 │
  │ -0.00000004089774775108091810256206  0.00000004525327175187384341105835  0.00000003165839212196756679373275  0.00000000000000000000000000000000  0.00000000000000000000000000000000  0.00000000000000000000000000000000 │
  │ -0.00000001544439654966730038449371  0.00000003165839212196756679373275 -0.00000002662487307533553821390412  0.00000000000000000000000000000000  0.00000000000000000000000000000000  0.00000000000000000000000000000000 │
  └                                                                                                                                                                                                                         ┘
    */

    let mut expected = Matrix6::zeros();
    expected[(0, 3)] = 1.0;
    expected[(1, 4)] = 1.0;
    expected[(2, 5)] = 1.0;
    expected[(3, 0)] = -0.00000001862839867653828534481953;
    expected[(4, 0)] = -0.00000004089774775108091810256206;
    expected[(5, 0)] = -0.00000001544439654966730038449371;
    expected[(3, 1)] = -0.00000004089774775108091810256206;
    expected[(4, 1)] = 0.00000004525327175187384341105835;
    expected[(5, 1)] = 0.00000003165839212196756679373275;
    expected[(3, 2)] = -0.00000001544439654966730038449371;
    expected[(4, 2)] = 0.00000003165839212196756679373275;
    expected[(5, 2)] = -0.00000002662487307533553821390412;

    println!("|delta| = {:e}", (grad - expected).norm());
    println!("|delta| = {:e}", (grad_mat - expected).norm());
}

ChristopherRabotin commented 6 years ago

Wow, this proposed enhancement drastically speeds up the computation of the gradient!

I compared the code above by enabling either the vector computation or the matrix computation by moving the diff or diff_mat code into a #[no_mangle] pub fn, and called that function one billion times. The result from std::time::Instant shows that the matrix computation takes strictly zero seconds to execute all billion calls (albeit there may be drastic optimizations by the compiler here since I'm running with the --release flag). That same piece of code with the Vector6 call took several minutes (I Ctrl-C'd it).

ChristopherRabotin commented 6 years ago

FWTW, I'm working on getting non-square partials working using the hyperdual space. Something is wrong somewhere, and I haven't figure it out yet.

novacrazy / dual_num

Update `nabla` function to call the function only once #4

Proposal

Impact

Proof of concept