Adds a schema.json which was exported from the main popgetter pydantic models
Imports the types using the import_types! macro
Adds an example metadata file for the US
Adds a function and test to load in the metadata to the generated structs
Some notes
The macro already implements serde on the types which is great
It uses chrono for datetimes
We should probably move this into it's own module at some point
the generated types look like
pub struct CountryMetadata {
///The ISO2 code of the country (for example 'BE').
pub iso2: String,
///The ISO3 code of the country (for example 'BEL').
pub iso3: String,
///The official name of the country (for example 'Kingdom of Belgium'). In English if available.
pub name_official: String,
///The short name of the country in English (for example 'Belgium').
pub name_short_en: String,
}
pub struct DataPublisher {
///A list of countries for which the publisher has data available.
pub countries_of_interest: Vec<CountryMetadata>,
///A brief description of the organisation publishing the data, including its mandate.
pub description: String,
///The name of the organisation publishing the data
pub name: String,
///The url of the publisher's homepage.
pub url: String,
}
pub struct MetricMetadata {
///A longer description of the metric which might include info on the caveats for the metric
pub description: String,
///A human readable name for the metric, something like "Total Population under 12 years old"
pub human_readable_name: String,
///Field description using the Humanitarian eXchange Language (HXL) standard
pub hxl_tag: String,
///The location (URL) of the parquet file that contains this metric value
pub metric_parquet_file_url: String,
///Metric if any which is the parent to this one ( some census data like the ACS is organised hierarchically, this can be useful for making the metadata more searchable)
#[serde(default, skip_serializing_if = "Option::is_none")]
pub parent_metric_id: Option<String>,
///Name of column in the outputted parquet file which contains the metric
pub parquet_column_name: String,
///Name of the column if any that contains the margin of error for the metric
#[serde(default, skip_serializing_if = "Option::is_none")]
pub parquet_margin_of_error_column: Option<String>,
///Location (url) of the parquet file that contains the margin of error for the metric
#[serde(default, skip_serializing_if = "Option::is_none")]
pub parquet_margin_of_error_file: Option<String>,
///A list of metrics which are suitable denominators for this metric.
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub potential_denominator_ids: Vec<String>,
///The name of the metric that comes from the source dataset ( for example in the ACS this might be "B001_E001" or something similar
pub source_metric_id: String,
}
pub struct SourceDataRelease {
///A list of the available metrics
pub available_metrics: Vec<MetricMetadata>,
///The range of time during which the data was collected. Should be in the format (start_date, end_date). If the data represents a single day snapshot, end_date should be `None`.
pub collection_period: (chrono::naive::NaiveDate, chrono::naive::NaiveDate),
///A list of the countries for which the data is available
pub countries_of_interest: Vec<CountryMetadata>,
///The date on which the data was published
pub date_published: chrono::naive::NaiveDate,
///A description of the data release
pub description: String,
///The date on which is it expected that an updated edition of the data will be published. In same cases this will be the same as the `reference_period[1]`.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub except_next_update: Option<chrono::naive::NaiveDate>,
///The path of the geography file
pub geography_file: String,
///The level of the geography
pub geography_level: String,
///The name of the data release, as given by the publisher
pub name: String,
///The publisher of the data
pub publishing_organisation: DataPublisher,
///The range of time for which the data can be assumed to be valid. Should be in the format (start_date, end_date). If the data represents a single day snapshot, end_date should be `None`.
pub reference_period: (chrono::naive::NaiveDate, chrono::naive::NaiveDate),
///The url of the data release.
pub url: String,
}
Work on #1
Some notes
the generated types look like