georust / netcdf

High-level netCDF bindings for Rust
Apache License 2.0
81 stars 28 forks source link

scale_factor & offset_factor #116

Closed krestomantsi closed 9 months ago

krestomantsi commented 9 months ago

The docs are not very clear on how to use the scale and offset factors. Can you please add these to the docs? Or a simple example? Thank you!

lnicola commented 9 months ago

In the meanwhile, https://help.marine.copernicus.eu/en/articles/5470092-how-to-use-add_offset-and-scale_factor-to-calculate-real-values-of-a-variable#h_a1b3dce0a3 might help.

krestomantsi commented 9 months ago

First of all thank you for the lightning speed reply! I am aware of what those are. I just can not find in the docs how to access those values for a variable using rust netcdf.

lnicola commented 9 months ago

Oh, sorry for that! See https://github.com/georust/netcdf/issues/110.

krestomantsi commented 9 months ago

I am assuming since alot of newcomers (to rust ) and or rustnetcdf will have the same question, can we have a mwe in the docs to show how to use add_offset and scale_factor?

magnusuMET commented 9 months ago

There is definitely a need to address this when this issue as been raised twice. I suggest adding the conversion helpers from https://github.com/georust/netcdf/issues/110#issuecomment-1590652493, adding an example examples/cf-scale-offset.rs (maybe this would be overdoing it?), and adding a small blurb on Variable::values to reference this example. Hopefully that should make it easy for new (and experienced) users to use this library.

krestomantsi commented 9 months ago

after a couple of hours it is still not clear to me how to get the attribute values as floats. So far I have something like this:

use ndarray::prelude::*;

fn main() {
    let now = std::time::Instant::now();
    let fname = "/home/nick/WeatherHindcast/significant_height_of_combined_wind_waves_and_swell_2022_download.nc";
    let file = netcdf::open(fname).unwrap();
    let var = &file.variable("swh").expect("variable not found");
    // How should the below be done properly?
    let scale_factor: f32 = var.attribute("scale_factor").unwrap().value().unwrap();
    let add_offset: f32 = var.attribute("add_offset").unwrap().value().unwrap();
    let mis = var.attribute("missing_value").unwrap().value().unwrap();
    let var = var.values_arr::<f32, _>(..).unwrap();

    println!("scale: {:?}", scale_factor);
    println!("offset: {:?}", add_offset);
    println!("missing: {:?}", mis);
    println!("var size: {:?}", var.shape());
    let var = var.mapv_into(|x| cdfvar_process_scalar(x, scale_factor, add_offset, mis, 0.0f32));
    println!("var: {:?}", var);
    // println!("var max: {:?}", var.fold(0.0f32, |a, &b| a.max(b)));
    // println!("var min: {:?}", var.fold(0.0f32, |a, &b| a.min(b)));
    // println!("var mean: {:?}", var.mean().unwrap());
    println!("Time elapsed {:?}", now.elapsed())
}

fn cdfvar_process_scalar(
    x: f32,
    scale_factor: f32,
    add_offset: f32,
    missing_value: f32,
    fill_value: f32,
) -> f32 {
    if x.abs() == missing_value {
        return fill_value;
    } else {
        return x * scale_factor + add_offset;
    }
}

Any comments would be rly appreciated!

krestomantsi commented 9 months ago

ok this is what I was looking for

use ndarray::prelude::*;

fn main() {
    let now = std::time::Instant::now();
    let fname = "/home/nick/WeatherHindcast/significant_height_of_combined_wind_waves_and_swell_2022_download.nc";
    let file = netcdf::open(fname).unwrap();
    let var = &file.variable("swh").expect("variable not found");
    let scale_factor = var.attribute("scale_factor").unwrap().value().unwrap();
    let scale_factor = attrconvert(&scale_factor).unwrap();
    let add_offset = var.attribute("add_offset").unwrap().value().unwrap();
    let add_offset = attrconvert(&add_offset).unwrap();
    let mis = var.attribute("missing_value").unwrap().value().unwrap();
    let mis = attrconvert(&mis).unwrap();
    let var = var.values_arr::<f32, _>(..).unwrap();

    println!("scale: {:?}", scale_factor);
    println!("offset: {:?}", add_offset);
    println!("missing: {:?}", mis);
    println!("var size: {:?}", var.shape());
    let var = var.mapv_into(|x| cdfvar_process_scalar(x, scale_factor, add_offset, mis, 0.0f32));
    println!("var: {:?}", var);
    println!("var max: {:?}", var.fold(0.0f32, |a, &b| a.max(b)));
    println!("var min: {:?}", var.fold(0.0f32, |a, &b| a.min(b)));
    println!("var mean: {:?}", var.mean().unwrap());
    println!("Time elapsed {:?}", now.elapsed())
}

fn cdfvar_process_scalar(
    x: f32,
    scale_factor: f32,
    add_offset: f32,
    missing_value: f32,
    fill_value: f32,
) -> f32 {
    if (x - missing_value) < (1e-5 as f32) {
        return fill_value;
    } else {
        return x * scale_factor + add_offset;
    }
}
fn attrconvert(a: &netcdf::AttrValue) -> Option<f32> {
    use netcdf::AttrValue::*;
    Some(match a {
        netcdf::AttrValue::Uchar(x) => *x as f32,
        netcdf::AttrValue::Schar(x) => *x as f32,
        netcdf::AttrValue::Short(x) => *x as f32,
        netcdf::AttrValue::Ushort(x) => *x as f32,
        netcdf::AttrValue::Uint(x) => *x as f32,
        netcdf::AttrValue::Int(x) => *x as f32,
        netcdf::AttrValue::Ulonglong(x) => *x as f32,
        netcdf::AttrValue::Longlong(x) => *x as f32,
        netcdf::AttrValue::Float(x) => *x as f32,
        netcdf::AttrValue::Double(x) => *x as f32,
        _ => return None,
    })
}

I will now mark this as closed. If you want me to help with the documentation, please do tell me.

lnicola commented 9 months ago

Hi, I was just writing this out (untested):

use netcdf::{error::Result, AttrValue, Variable};

fn convert(a: &AttrValue) -> Option<f64> {
    use AttrValue::*;
    Some(match a {
        Uchar(x) => *x as f64,
        Schar(x) => *x as f64,
        Short(x) => *x as f64,
        Ushort(x) => *x as f64,
        Uint(x) => *x as f64,
        Int(x) => *x as f64,
        Ulonglong(x) => *x as f64,
        Longlong(x) => *x as f64,
        Float(x) => *x as f64,
        Double(x) => *x as f64,
        _ => return None,
    })
}

trait VariableExt {
    fn attribute_value_f64(&self, name: &str) -> Result<Option<f64>>;
    fn attribute_value_f64_or(&self, name: &str, default: f64) -> Result<f64>;
}

impl<'a> VariableExt for Variable<'a> {
    fn attribute_value_f64(&self, name: &str) -> Result<Option<f64>> {
        let Some(attribute) = self.attribute(name) else {
            return Ok(None);
        };
        let value = attribute.value()?;
        let value = convert(&value);
        Ok(value)
    }

    fn attribute_value_f64_or(&self, name: &str, default: f64) -> Result<f64> {
        Ok(self.attribute_value_f64(name)?.unwrap_or(default))
    }
}

fn main() -> Result<()> {
    let now = std::time::Instant::now();
    let fname = "/home/nick/WeatherHindcast/significant_height_of_combined_wind_waves_and_swell_2022_download.nc";
    let file = netcdf::open(fname)?;
    let var = &file.variable("swh").expect("variable not found");

    let scale_factor = var.attribute_value_f64_or("scale_factor", 1.0)? as f32;
    let add_offset = var.attribute_value_f64_or("add_offset", 0.0)? as f32;
    let missing_value = var.attribute_value_f64("missing_value")?.map(|x| x as f32);

    let var = var.values_arr::<f32, _>(..).unwrap();

    println!("scale: {:?}", scale_factor);
    println!("offset: {:?}", add_offset);
    println!("missing: {:?}", missing_value);
    println!("var size: {:?}", var.shape());

    let var = var
        .mapv_into(|x| cdfvar_process_scalar(x, scale_factor, add_offset, missing_value, 0.0f32));

    println!("var: {:?}", var);
    println!("var max: {:?}", var.fold(0.0f32, |a, &b| a.max(b)));
    println!("var min: {:?}", var.fold(0.0f32, |a, &b| a.min(b)));
    println!("var mean: {:?}", var.mean().unwrap());
    println!("Time elapsed {:?}", now.elapsed());

    Ok(())
}

fn cdfvar_process_scalar(
    x: f32,
    scale_factor: f32,
    add_offset: f32,
    missing_value: Option<f32>,
    fill_value: f32,
) -> f32 {
    if let Some(missing_value) = missing_value {
        if x == missing_value {
            return fill_value;
        }
    }

    return x * scale_factor + add_offset;
}
magnusuMET commented 9 months ago

@krestomantsi The work of #117 is included in netcdf version 0.8.3