Urban-Analytics-Technology-Platform / popgetter-cli

A rust library and CLI for accessing popgetter data
0 stars 0 forks source link

Adding types from pydantic json schema + metadata loading #10

Closed stuartlynn closed 2 months ago

stuartlynn commented 2 months ago

Work on #1

Some notes

the generated types look like

pub struct CountryMetadata {
    ///The ISO2 code of the country (for example 'BE').
    pub iso2: String,
    ///The ISO3 code of the country (for example 'BEL').
    pub iso3: String,
    ///The official name of the country (for example 'Kingdom of Belgium'). In English if available.
    pub name_official: String,
    ///The short name of the country in English (for example 'Belgium').
    pub name_short_en: String,
}

pub struct DataPublisher {
    ///A list of countries for which the publisher has data available.
    pub countries_of_interest: Vec<CountryMetadata>,
    ///A brief description of the organisation publishing the data, including its mandate.
    pub description: String,
    ///The name of the organisation publishing the data
    pub name: String,
    ///The url of the publisher's homepage.
    pub url: String,
}

pub struct MetricMetadata {
    ///A longer description of the metric which might include info on the caveats for the metric
    pub description: String,
    ///A human readable name for the metric, something like "Total Population under 12 years old"
    pub human_readable_name: String,
    ///Field description using the Humanitarian eXchange Language (HXL) standard
    pub hxl_tag: String,
    ///The location (URL) of the parquet file that contains this metric value
    pub metric_parquet_file_url: String,
    ///Metric if any which is the parent to this one ( some census data like the ACS is organised hierarchically, this can be useful for making the metadata more searchable)
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub parent_metric_id: Option<String>,
    ///Name of column in the outputted parquet file which contains the metric
    pub parquet_column_name: String,
    ///Name of the column if any that contains the margin of error for the metric
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub parquet_margin_of_error_column: Option<String>,
    ///Location (url) of the parquet file that contains the margin of error for the metric
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub parquet_margin_of_error_file: Option<String>,
    ///A list of metrics which are suitable denominators for this metric.
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub potential_denominator_ids: Vec<String>,
    ///The name of the metric that comes from the source dataset ( for example in the ACS this might be "B001_E001" or something similar
    pub source_metric_id: String,
}

pub struct SourceDataRelease {
    ///A list of the available metrics
    pub available_metrics: Vec<MetricMetadata>,
    ///The range of time during which the data was collected. Should be in the format (start_date, end_date). If the data represents a single day snapshot, end_date should be `None`.
    pub collection_period: (chrono::naive::NaiveDate, chrono::naive::NaiveDate),
    ///A list of the countries for which the data is available
    pub countries_of_interest: Vec<CountryMetadata>,
    ///The date on which the data was published
    pub date_published: chrono::naive::NaiveDate,
    ///A description of the data release
    pub description: String,
    ///The date on which is it expected that an updated edition of the data will be published. In same cases this will be the same as the `reference_period[1]`.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub except_next_update: Option<chrono::naive::NaiveDate>,
    ///The path of the geography file
    pub geography_file: String,
    ///The level of the geography
    pub geography_level: String,
    ///The name of the data release, as given by the publisher
    pub name: String,
    ///The publisher of the data
    pub publishing_organisation: DataPublisher,
    ///The range of time for which the data can be assumed to be valid. Should be in the format (start_date, end_date). If the data represents a single day snapshot, end_date should be `None`.
    pub reference_period: (chrono::naive::NaiveDate, chrono::naive::NaiveDate),
    ///The url of the data release.
    pub url: String,
}
yongrenjie commented 2 months ago

Small additions:

None of this really changes the contents of the PR (it more just tidies things up) so will merge now