Living-with-machines / lwmdb

A django-based library for managing the Living with Machines newspapers metadata database schema
https://living-with-machines.github.io/lwmdb/
MIT License
2 stars 0 forks source link

Find a schema definition toolchain that suits our needs #6

Closed ChristinaLast closed 2 years ago

ChristinaLast commented 2 years ago

Currently using diagramDB (link to schema here

kallewesterling commented 2 years ago

Adding myself as an assignee after our handover meeting @ChristinaLast.

ChristinaLast commented 2 years ago

Structure for table creation in DBdiagram.io

Table item {
  id int [pk, not null, unique]
  item_code varchar
  item_type varchar
  title varchar
  language char(2)
  word_count int
  page_number int[] // integer array
  ocr_quality_mean float
  ocr_quality_sd float
  input_filename varchar
  issue_id int [ref: > issue.id]
  data_provider_id int [ref: > data_provider.id]
  digitisation_id int [ref: > digitisation.id]
  ingest_id int [ref: > ingest.id]
}

Table issue {
  id int [pk, not null, unique]
  issue_code varchar
  issue_date date
  input_sub_path varchar
  publication_id int [ref: > publication.id]
}

Table publication {
  id int [pk, not null, unique]
//  publication_code varchar
  nlp_id varchar
  title varchar
  location_raw varchar
  mitchells_publication_id int  [ref: > mitchells_publication.id]
  place_of_publication_id int  [ref: > place_of_publication.id]
}

Table mitchells_publication {
  id int [pk, not null, unique]
  year int
//  nlp_id varchar
  mitchells_id int [ref: > mitchells.id]
}

Table mitchells {
  id int [pk, not null, unique]
  title text
  policital_leaning_1 varchar
  policital_leaning_2 varchar
  policital_leaning_raw JSON[varchar]
  price_1 varchar
  price_2 varchar
  price_raw JSON[varchar]
  year date
  date_raw varchar
  day_of_publication_raw varchar
  place_circulation_raw varchar
  publication_district_raw varchar
  publication_county varchar
  organisations varchar
  persons varchar
  place_of_publication_id int [ref: > place_of_publication.id]
}

Table place_of_publication {
  id int [pk, not null, unique]
  place_wikidata_id varchar
  place_label varchar
  latitude float
  longitude float
  geopoint Point
//  administrative_region Dict[varchar]
  geonames_ids varchar
  historic_county_id int [ref: > historic_county.id]
  admin_county_id int [ref: > admin_county.id]
  country_id int [ref: > country.id]
}

Table historic_county {
  id int [pk, not null, unique]
  hcounty_label varchar
  hcounty_wikidata_id varchar
}

Table admin_county {
  id int [pk, not null, unique]
  admin_county_label varchar
  admin_county_wikidata_id varchar
}

Table country {
  id int [pk, not null, unique]
  country_label varchar
  country_wikidata_id varchar
}
Table digitisation {
  id int [pk, not null, unique]
  xml_flavour varchar
  software varchar
  mets_namespace varchar
  alto_namespace varchar
}

Table ingest {
  id int [pk, not null, unique]
  lwm_tool_name varchar
  lwm_tool_version varchar
  lwm_tool_source varchar
}

Table data_provider {
  id int [pk, not null, unique]
  name varchar
  collection varchar
  source_note varchar
}

Ref: "data_provider"."collection" < "issue"."publication_id"
ChristinaLast commented 2 years ago

@mcollardanuy to create a new "Copy of the diagram" and share new link with unchanged schema

mcollardanuy commented 2 years ago

I have created a copy of the diagram here and updated the readme with the link.

kallewesterling commented 2 years ago

Newest and most up to date diagram of the database models/tables is available here:

https://dbdiagram.io/d/62bb46ba69be0b672c5d2a15