mbari-org / m3-quickstart

A simple and quick method to run MBARI's Video Annotation and References System
2 stars 1 forks source link

Create SQL migration script for postgres from old db to current 1.0 version #12

Open hohonuuli opened 6 months ago

hohonuuli commented 6 months ago

I have a sample db dump from geomar to test with

hohonuuli commented 6 months ago

Set up

  1. start services using docker_start.sh
  2. Copy db dump into temp/postgres
  3. Get a shell in the postgres container using docker exec -it docker-postgres-1 /bin/bash
  4. Get a psgl prompt using psql -U m3 -d M3_VARS
  5. Drop existing tables
DROP SCHEMA public CASCADE;
CREATE SCHEMA public;
GRANT ALL ON SCHEMA public TO postgres;
GRANT ALL ON SCHEMA public TO m3;
GRANT ALL ON SCHEMA public TO public;
hohonuuli commented 6 months ago

Load dump file

We're still in the docker container's bash shell from above ...

cd /var/lib/postgresql/data
psql -f ./db_geomar.sql -U m3 -d M3_VARS
hohonuuli commented 6 months ago

Annosaurus: Add AUD tables:

create sequence REVINFO_SEQ start with 1 increment by 50;
create table REVINFO (REV integer not null, REVTSTMP bigint, primary key (REV));
create table associations_AUD (REV integer not null, REVTYPE smallint, observation_uuid uuid, uuid uuid not null, mime_type varchar(64), link_name varchar(128), to_concept varchar(128), link_value varchar(1024), primary key (REV, uuid));
create table observations_AUD (REV integer not null, REVTYPE smallint, duration_millis bigint, observation_timestamp timestamp(6) with time zone, imaged_moment_uuid uuid, uuid uuid not null, activity varchar(128), observation_group varchar(128), observer varchar(128), concept varchar(256), primary key (REV, uuid));
alter table if exists associations_AUD add constraint fk_associations_aud__revinfo foreign key (REV) references REVINFO;
alter table if exists observations_AUD add constraint fk_observations_aud_refinfo foreign key (REV) references REVINFO;
hohonuuli commented 6 months ago

Annosaurus: Get annosaurus working

drop view annotations;
alter table public.imaged_moments
    alter column elapsed_time_millis type bigint using elapsed_time_millis::bigint;
alter table public.observations
    alter column duration_millis type bigint using duration_millis::bigint;
hohonuuli commented 6 months ago

Vampire-squid: Add AUD tables:


create table video_sequences_AUD (
                                     REV integer not null,
                                     REVTYPE smallint,
                                     uuid uuid not null,
                                     camera_id varchar(256),
                                     name varchar(512),
                                     description varchar(2048),
                                     primary key (REV, uuid)
);
alter table if exists video_sequences_AUD add constraint fk__video_seqs_aud__revinfo foreign key (REV) references REVINFO;

create table videos_AUD (
                            REV integer not null,
                            REVTYPE smallint,
                            duration_millis bigint,
                            start_time timestamp(6) with time zone,
                            uuid uuid not null,
                            video_sequence_uuid uuid,
                            name varchar(512),
                            description varchar(2048),
                            primary key (REV, uuid)
);
alter table if exists videos_AUD add constraint fk__videos_aud__revinfo foreign key (REV) references REVINFO;

create table video_references_AUD (
                                      REV integer not null,
                                      REVTYPE smallint,
                                      frame_rate float(53),
                                      height integer,
                                      width integer,
                                      size_bytes bigint,
                                      uuid uuid not null,
                                      video_uuid uuid,
                                      audio_codec varchar(128),
                                      container varchar(128),
                                      sha512 varchar(128),
                                      video_codec varchar(128),
                                      uri varchar(1024),
                                      description varchar(2048),
                                      primary key (REV, uuid)
);
alter table if exists video_references_AUD add constraint fk__video_refs_aud__revinfo foreign key (REV) references REVINFO;
hohonuuli commented 6 months ago

Vampire-squid update

drop view public.unique_videos;
alter table public.video_references
    alter column size_bytes type bigint using size_bytes::bigint;
alter table public.videos
    alter column duration_millis type bigint using duration_millis::bigint;
hohonuuli commented 6 months ago

Add timezones

ALTER TABLE videos ALTER COLUMN start_time TYPE timestamptz
    USING start_time AT TIME ZONE 'UTC';
ALTER TABLE observations ALTER COLUMN observation_timestamp TYPE timestamptz
    USING observation_timestamp AT TIME ZONE 'UTC';
ALTER TABLE imaged_moments ALTER COLUMN recorded_timestamp TYPE timestamptz
    USING recorded_timestamp AT TIME ZONE 'UTC';

re-add views

CREATE VIEW "unique_videos"
AS 
SELECT 
  s.uuid AS video_sequence_uuid,
  s.name AS video_sequence_name,
  v.uuid AS video_uuid,
  v.name AS video_name,
  r.uuid AS video_reference_uuid,
  r.uri,
  s.camera_id,
  v.duration_millis,
  v.start_time,
  r.container,
  r.width,
  r.height,
  r.size_bytes,
  r.last_updated_time,
  r.sha512
FROM 
    video_sequences AS s LEFT JOIN
    videos AS v ON v.video_sequence_uuid = s.uuid LEFT JOIN
    video_references AS r ON r.video_uuid = v.uuid;

CREATE VIEW "annotations"
AS
    SELECT
        im.uuid AS imaged_moment_uuid,
        im.elapsed_time_millis AS index_elapsed_time_millis,
        im.recorded_timestamp AS index_recorded_timestamp,
        im.timecode AS index_timecode,
        obs.uuid AS observation_uuid,
        obs.activity,
        obs.concept,
        obs.duration_millis,
        obs.observation_group,
        obs.observation_timestamp,
        obs.observer,
        ir.uuid AS image_reference_uuid,
        ir.description AS image_description,
        ir.format AS image_format,
        ir.height_pixels AS image_height,
        ir.width_pixels AS image_width,
        ir.url AS image_url,
        ass.link_name,
        ass.link_value,
        ass.to_concept,
        ass.mime_type AS association_mime_type,
        ass.link_name || ' | ' || ass.to_concept || ' | ' || ass.link_value AS associations,
        ad.altitude,
        ad.coordinate_reference_system,
        ad.depth_meters,
        ad.latitude,
        ad.longitude,
        ad.oxygen_ml_per_l,
        ad.phi,
        ad.xyz_position_units,
        ad.pressure_dbar,
        ad.psi,
        ad.salinity,
        ad.temperature_celsius,
        ad.theta,
        ad.x,
        ad.y,
        ad.z,
        ad.light_transmission,
        vr.uuid AS video_reference_uuid,
        vr.audio_codec,
        vr.container AS video_container,
        vr.description AS video_reference_description,
        vr.frame_rate,
        vr.height AS video_height,
        vr.sha512 AS video_sha512,
        vr.size_bytes AS video_size_bytes,
        vr.uri AS video_uri,
        vr.video_codec,
        vr.width AS video_width,
        v.description AS video_description,
        v.duration_millis AS video_duration_millis,
        v.name AS video_name,
        v.start_time AS video_start_timestamp,
        vs.camera_id,
        vs.description AS video_sequence_description,
        vs.name AS video_sequence_name,
        info.mission_contact AS chief_scientist,
        info.mission_id AS dive_number,
        info.platform_name AS camera_platform
    FROM
        imaged_moments im
        LEFT JOIN observations obs ON obs.imaged_moment_uuid = im.uuid
        LEFT JOIN image_references ir ON ir.imaged_moment_uuid = im.uuid
        LEFT JOIN associations ass ON ass.observation_uuid = obs.uuid
        LEFT JOIN ancillary_data  ad ON ad.imaged_moment_uuid = im.uuid
        LEFT JOIN video_references vr ON vr.uuid = im.video_reference_uuid
        LEFT JOIN videos v ON v.uuid = vr.video_uuid
        LEFT JOIN video_sequences vs ON vs.uuid = v.video_sequence_uuid
        LEFT JOIN video_reference_information info ON info.video_reference_uuid = im.video_reference_uuid;
hohonuuli commented 6 months ago

Note that there are a number of non-UTF- characters in the dbdump file. Nearly all these characters are in the VARS KB data. You can find the offending lines using `grep -axv '.' temp/postgres/db_geomar.sql`. They can be stripped out using

iconv -f utf-8 -t utf-8 -c temp/postgres/db_geomar.sql > temp/postgres/db_geomar_utf8.sql

Be aware that just removes the offending character, which appear to be mostly accented letters, it doesn't replace them.

hohonuuli commented 6 months ago

Next step is to completely drop the database, scrub the non UTF-8 chars out of the load file and then do a clean load and update with the following scripts.

00_db_drop.sql

DROP SCHEMA public CASCADE;
CREATE SCHEMA public;
GRANT ALL ON SCHEMA public TO postgres;
GRANT ALL ON SCHEMA public TO m3;
GRANT ALL ON SCHEMA public TO public;

01_db_migration.sql

-- anosaurus
drop view annotations;
alter table public.imaged_moments
    alter column elapsed_time_millis type bigint using elapsed_time_millis::bigint;
alter table public.observations
    alter column duration_millis type bigint using duration_millis::bigint;

create sequence REVINFO_SEQ start with 1 increment by 50;
create table REVINFO (REV integer not null, REVTSTMP bigint, primary key (REV));
create table associations_AUD (REV integer not null, REVTYPE smallint, observation_uuid uuid, uuid uuid not null, mime_type varchar(64), link_name varchar(128), to_concept varchar(128), link_value varchar(1024), primary key (REV, uuid));
create table observations_AUD (REV integer not null, REVTYPE smallint, duration_millis bigint, observation_timestamp timestamp(6) with time zone, imaged_moment_uuid uuid, uuid uuid not null, activity varchar(128), observation_group varchar(128), observer varchar(128), concept varchar(256), primary key (REV, uuid));
alter table if exists associations_AUD add constraint fk_associations_aud__revinfo foreign key (REV) references REVINFO;
alter table if exists observations_AUD add constraint fk_observations_aud_refinfo foreign key (REV) references REVINFO;

-- vampire-squid
drop view public.unique_videos;
alter table public.video_references
    alter column size_bytes type bigint using size_bytes::bigint;
alter table public.videos
    alter column duration_millis type bigint using duration_millis::bigint;
ALTER TABLE videos ALTER COLUMN start_time TYPE timestamptz
    USING start_time AT TIME ZONE 'UTC';
ALTER TABLE observations ALTER COLUMN observation_timestamp TYPE timestamptz
    USING observation_timestamp AT TIME ZONE 'UTC';
ALTER TABLE imaged_moments ALTER COLUMN recorded_timestamp TYPE timestamptz
    USING recorded_timestamp AT TIME ZONE 'UTC';

create table video_sequences_AUD (
                                     REV integer not null,
                                     REVTYPE smallint,
                                     uuid uuid not null,
                                     camera_id varchar(256),
                                     name varchar(512),
                                     description varchar(2048),
                                     primary key (REV, uuid)
);
alter table if exists video_sequences_AUD add constraint fk__video_seqs_aud__revinfo foreign key (REV) references REVINFO;

create table videos_AUD (
                            REV integer not null,
                            REVTYPE smallint,
                            duration_millis bigint,
                            start_time timestamp(6) with time zone,
                            uuid uuid not null,
                            video_sequence_uuid uuid,
                            name varchar(512),
                            description varchar(2048),
                            primary key (REV, uuid)
);
alter table if exists videos_AUD add constraint fk__videos_aud__revinfo foreign key (REV) references REVINFO;

create table video_references_AUD (
                                      REV integer not null,
                                      REVTYPE smallint,
                                      frame_rate float(53),
                                      height integer,
                                      width integer,
                                      size_bytes bigint,
                                      uuid uuid not null,
                                      video_uuid uuid,
                                      audio_codec varchar(128),
                                      container varchar(128),
                                      sha512 varchar(128),
                                      video_codec varchar(128),
                                      uri varchar(1024),
                                      description varchar(2048),
                                      primary key (REV, uuid)
);
alter table if exists video_references_AUD add constraint fk__video_refs_aud__revinfo foreign key (REV) references REVINFO;

-- views
CREATE VIEW "unique_videos"
AS 
SELECT 
  s.uuid AS video_sequence_uuid,
  s.name AS video_sequence_name,
  v.uuid AS video_uuid,
  v.name AS video_name,
  r.uuid AS video_reference_uuid,
  r.uri,
  s.camera_id,
  v.duration_millis,
  v.start_time,
  r.container,
  r.width,
  r.height,
  r.size_bytes,
  r.last_updated_time,
  r.sha512
FROM 
    video_sequences AS s LEFT JOIN
    videos AS v ON v.video_sequence_uuid = s.uuid LEFT JOIN
    video_references AS r ON r.video_uuid = v.uuid;

CREATE VIEW "annotations"
AS
    SELECT
        im.uuid AS imaged_moment_uuid,
        im.elapsed_time_millis AS index_elapsed_time_millis,
        im.recorded_timestamp AS index_recorded_timestamp,
        im.timecode AS index_timecode,
        obs.uuid AS observation_uuid,
        obs.activity,
        obs.concept,
        obs.duration_millis,
        obs.observation_group,
        obs.observation_timestamp,
        obs.observer,
        ir.uuid AS image_reference_uuid,
        ir.description AS image_description,
        ir.format AS image_format,
        ir.height_pixels AS image_height,
        ir.width_pixels AS image_width,
        ir.url AS image_url,
        ass.link_name,
        ass.link_value,
        ass.to_concept,
        ass.mime_type AS association_mime_type,
        ass.link_name || ' | ' || ass.to_concept || ' | ' || ass.link_value AS associations,
        ad.altitude,
        ad.coordinate_reference_system,
        ad.depth_meters,
        ad.latitude,
        ad.longitude,
        ad.oxygen_ml_per_l,
        ad.phi,
        ad.xyz_position_units,
        ad.pressure_dbar,
        ad.psi,
        ad.salinity,
        ad.temperature_celsius,
        ad.theta,
        ad.x,
        ad.y,
        ad.z,
        ad.light_transmission,
        vr.uuid AS video_reference_uuid,
        vr.audio_codec,
        vr.container AS video_container,
        vr.description AS video_reference_description,
        vr.frame_rate,
        vr.height AS video_height,
        vr.sha512 AS video_sha512,
        vr.size_bytes AS video_size_bytes,
        vr.uri AS video_uri,
        vr.video_codec,
        vr.width AS video_width,
        v.description AS video_description,
        v.duration_millis AS video_duration_millis,
        v.name AS video_name,
        v.start_time AS video_start_timestamp,
        vs.camera_id,
        vs.description AS video_sequence_description,
        vs.name AS video_sequence_name,
        info.mission_contact AS chief_scientist,
        info.mission_id AS dive_number,
        info.platform_name AS camera_platform
    FROM
        imaged_moments im
        LEFT JOIN observations obs ON obs.imaged_moment_uuid = im.uuid
        LEFT JOIN image_references ir ON ir.imaged_moment_uuid = im.uuid
        LEFT JOIN associations ass ON ass.observation_uuid = obs.uuid
        LEFT JOIN ancillary_data  ad ON ad.imaged_moment_uuid = im.uuid
        LEFT JOIN video_references vr ON vr.uuid = im.video_reference_uuid
        LEFT JOIN videos v ON v.uuid = vr.video_uuid
        LEFT JOIN video_sequences vs ON vs.uuid = v.video_sequence_uuid
        LEFT JOIN video_reference_information info ON info.video_reference_uuid = im.video_reference_uuid;
hohonuuli commented 6 months ago

Tested with VARS:

Put the SQL scripts in m3-quickstart/temp/postgresql so that they're mounted into the postgres container. Then run the following:

cd m3-quickstart
bin/docker_start.sh
docker exec -it  docker-postgres-1 /bin/bash

# We're now in the postgres container
psql -f 00_db_drop.sql -U m3 -d M3_VARS
psql -f 01_db_geomar_utf8.sql -U m3 -d M3_VARS
psql -f 02_db_migration.sql -U m3 -d M3_VARS