Open hohonuuli opened 7 months ago
docker_start.sh
temp/postgres
docker exec -it docker-postgres-1 /bin/bash
psql -U m3 -d M3_VARS
DROP SCHEMA public CASCADE;
CREATE SCHEMA public;
GRANT ALL ON SCHEMA public TO postgres;
GRANT ALL ON SCHEMA public TO m3;
GRANT ALL ON SCHEMA public TO public;
We're still in the docker container's bash shell from above ...
cd /var/lib/postgresql/data
psql -f ./db_geomar.sql -U m3 -d M3_VARS
create sequence REVINFO_SEQ start with 1 increment by 50;
create table REVINFO (REV integer not null, REVTSTMP bigint, primary key (REV));
create table associations_AUD (REV integer not null, REVTYPE smallint, observation_uuid uuid, uuid uuid not null, mime_type varchar(64), link_name varchar(128), to_concept varchar(128), link_value varchar(1024), primary key (REV, uuid));
create table observations_AUD (REV integer not null, REVTYPE smallint, duration_millis bigint, observation_timestamp timestamp(6) with time zone, imaged_moment_uuid uuid, uuid uuid not null, activity varchar(128), observation_group varchar(128), observer varchar(128), concept varchar(256), primary key (REV, uuid));
alter table if exists associations_AUD add constraint fk_associations_aud__revinfo foreign key (REV) references REVINFO;
alter table if exists observations_AUD add constraint fk_observations_aud_refinfo foreign key (REV) references REVINFO;
drop view annotations;
alter table public.imaged_moments
alter column elapsed_time_millis type bigint using elapsed_time_millis::bigint;
alter table public.observations
alter column duration_millis type bigint using duration_millis::bigint;
create table video_sequences_AUD (
REV integer not null,
REVTYPE smallint,
uuid uuid not null,
camera_id varchar(256),
name varchar(512),
description varchar(2048),
primary key (REV, uuid)
);
alter table if exists video_sequences_AUD add constraint fk__video_seqs_aud__revinfo foreign key (REV) references REVINFO;
create table videos_AUD (
REV integer not null,
REVTYPE smallint,
duration_millis bigint,
start_time timestamp(6) with time zone,
uuid uuid not null,
video_sequence_uuid uuid,
name varchar(512),
description varchar(2048),
primary key (REV, uuid)
);
alter table if exists videos_AUD add constraint fk__videos_aud__revinfo foreign key (REV) references REVINFO;
create table video_references_AUD (
REV integer not null,
REVTYPE smallint,
frame_rate float(53),
height integer,
width integer,
size_bytes bigint,
uuid uuid not null,
video_uuid uuid,
audio_codec varchar(128),
container varchar(128),
sha512 varchar(128),
video_codec varchar(128),
uri varchar(1024),
description varchar(2048),
primary key (REV, uuid)
);
alter table if exists video_references_AUD add constraint fk__video_refs_aud__revinfo foreign key (REV) references REVINFO;
drop view public.unique_videos;
alter table public.video_references
alter column size_bytes type bigint using size_bytes::bigint;
alter table public.videos
alter column duration_millis type bigint using duration_millis::bigint;
ALTER TABLE videos ALTER COLUMN start_time TYPE timestamptz
USING start_time AT TIME ZONE 'UTC';
ALTER TABLE observations ALTER COLUMN observation_timestamp TYPE timestamptz
USING observation_timestamp AT TIME ZONE 'UTC';
ALTER TABLE imaged_moments ALTER COLUMN recorded_timestamp TYPE timestamptz
USING recorded_timestamp AT TIME ZONE 'UTC';
CREATE VIEW "unique_videos"
AS
SELECT
s.uuid AS video_sequence_uuid,
s.name AS video_sequence_name,
v.uuid AS video_uuid,
v.name AS video_name,
r.uuid AS video_reference_uuid,
r.uri,
s.camera_id,
v.duration_millis,
v.start_time,
r.container,
r.width,
r.height,
r.size_bytes,
r.last_updated_time,
r.sha512
FROM
video_sequences AS s LEFT JOIN
videos AS v ON v.video_sequence_uuid = s.uuid LEFT JOIN
video_references AS r ON r.video_uuid = v.uuid;
CREATE VIEW "annotations"
AS
SELECT
im.uuid AS imaged_moment_uuid,
im.elapsed_time_millis AS index_elapsed_time_millis,
im.recorded_timestamp AS index_recorded_timestamp,
im.timecode AS index_timecode,
obs.uuid AS observation_uuid,
obs.activity,
obs.concept,
obs.duration_millis,
obs.observation_group,
obs.observation_timestamp,
obs.observer,
ir.uuid AS image_reference_uuid,
ir.description AS image_description,
ir.format AS image_format,
ir.height_pixels AS image_height,
ir.width_pixels AS image_width,
ir.url AS image_url,
ass.link_name,
ass.link_value,
ass.to_concept,
ass.mime_type AS association_mime_type,
ass.link_name || ' | ' || ass.to_concept || ' | ' || ass.link_value AS associations,
ad.altitude,
ad.coordinate_reference_system,
ad.depth_meters,
ad.latitude,
ad.longitude,
ad.oxygen_ml_per_l,
ad.phi,
ad.xyz_position_units,
ad.pressure_dbar,
ad.psi,
ad.salinity,
ad.temperature_celsius,
ad.theta,
ad.x,
ad.y,
ad.z,
ad.light_transmission,
vr.uuid AS video_reference_uuid,
vr.audio_codec,
vr.container AS video_container,
vr.description AS video_reference_description,
vr.frame_rate,
vr.height AS video_height,
vr.sha512 AS video_sha512,
vr.size_bytes AS video_size_bytes,
vr.uri AS video_uri,
vr.video_codec,
vr.width AS video_width,
v.description AS video_description,
v.duration_millis AS video_duration_millis,
v.name AS video_name,
v.start_time AS video_start_timestamp,
vs.camera_id,
vs.description AS video_sequence_description,
vs.name AS video_sequence_name,
info.mission_contact AS chief_scientist,
info.mission_id AS dive_number,
info.platform_name AS camera_platform
FROM
imaged_moments im
LEFT JOIN observations obs ON obs.imaged_moment_uuid = im.uuid
LEFT JOIN image_references ir ON ir.imaged_moment_uuid = im.uuid
LEFT JOIN associations ass ON ass.observation_uuid = obs.uuid
LEFT JOIN ancillary_data ad ON ad.imaged_moment_uuid = im.uuid
LEFT JOIN video_references vr ON vr.uuid = im.video_reference_uuid
LEFT JOIN videos v ON v.uuid = vr.video_uuid
LEFT JOIN video_sequences vs ON vs.uuid = v.video_sequence_uuid
LEFT JOIN video_reference_information info ON info.video_reference_uuid = im.video_reference_uuid;
Note that there are a number of non-UTF- characters in the dbdump file. Nearly all these characters are in the VARS KB data. You can find the offending lines using `grep -axv '.' temp/postgres/db_geomar.sql`. They can be stripped out using
iconv -f utf-8 -t utf-8 -c temp/postgres/db_geomar.sql > temp/postgres/db_geomar_utf8.sql
Be aware that just removes the offending character, which appear to be mostly accented letters, it doesn't replace them.
Next step is to completely drop the database, scrub the non UTF-8 chars out of the load file and then do a clean load and update with the following scripts.
DROP SCHEMA public CASCADE;
CREATE SCHEMA public;
GRANT ALL ON SCHEMA public TO postgres;
GRANT ALL ON SCHEMA public TO m3;
GRANT ALL ON SCHEMA public TO public;
-- anosaurus
drop view annotations;
alter table public.imaged_moments
alter column elapsed_time_millis type bigint using elapsed_time_millis::bigint;
alter table public.observations
alter column duration_millis type bigint using duration_millis::bigint;
create sequence REVINFO_SEQ start with 1 increment by 50;
create table REVINFO (REV integer not null, REVTSTMP bigint, primary key (REV));
create table associations_AUD (REV integer not null, REVTYPE smallint, observation_uuid uuid, uuid uuid not null, mime_type varchar(64), link_name varchar(128), to_concept varchar(128), link_value varchar(1024), primary key (REV, uuid));
create table observations_AUD (REV integer not null, REVTYPE smallint, duration_millis bigint, observation_timestamp timestamp(6) with time zone, imaged_moment_uuid uuid, uuid uuid not null, activity varchar(128), observation_group varchar(128), observer varchar(128), concept varchar(256), primary key (REV, uuid));
alter table if exists associations_AUD add constraint fk_associations_aud__revinfo foreign key (REV) references REVINFO;
alter table if exists observations_AUD add constraint fk_observations_aud_refinfo foreign key (REV) references REVINFO;
-- vampire-squid
drop view public.unique_videos;
alter table public.video_references
alter column size_bytes type bigint using size_bytes::bigint;
alter table public.videos
alter column duration_millis type bigint using duration_millis::bigint;
ALTER TABLE videos ALTER COLUMN start_time TYPE timestamptz
USING start_time AT TIME ZONE 'UTC';
ALTER TABLE observations ALTER COLUMN observation_timestamp TYPE timestamptz
USING observation_timestamp AT TIME ZONE 'UTC';
ALTER TABLE imaged_moments ALTER COLUMN recorded_timestamp TYPE timestamptz
USING recorded_timestamp AT TIME ZONE 'UTC';
create table video_sequences_AUD (
REV integer not null,
REVTYPE smallint,
uuid uuid not null,
camera_id varchar(256),
name varchar(512),
description varchar(2048),
primary key (REV, uuid)
);
alter table if exists video_sequences_AUD add constraint fk__video_seqs_aud__revinfo foreign key (REV) references REVINFO;
create table videos_AUD (
REV integer not null,
REVTYPE smallint,
duration_millis bigint,
start_time timestamp(6) with time zone,
uuid uuid not null,
video_sequence_uuid uuid,
name varchar(512),
description varchar(2048),
primary key (REV, uuid)
);
alter table if exists videos_AUD add constraint fk__videos_aud__revinfo foreign key (REV) references REVINFO;
create table video_references_AUD (
REV integer not null,
REVTYPE smallint,
frame_rate float(53),
height integer,
width integer,
size_bytes bigint,
uuid uuid not null,
video_uuid uuid,
audio_codec varchar(128),
container varchar(128),
sha512 varchar(128),
video_codec varchar(128),
uri varchar(1024),
description varchar(2048),
primary key (REV, uuid)
);
alter table if exists video_references_AUD add constraint fk__video_refs_aud__revinfo foreign key (REV) references REVINFO;
-- views
CREATE VIEW "unique_videos"
AS
SELECT
s.uuid AS video_sequence_uuid,
s.name AS video_sequence_name,
v.uuid AS video_uuid,
v.name AS video_name,
r.uuid AS video_reference_uuid,
r.uri,
s.camera_id,
v.duration_millis,
v.start_time,
r.container,
r.width,
r.height,
r.size_bytes,
r.last_updated_time,
r.sha512
FROM
video_sequences AS s LEFT JOIN
videos AS v ON v.video_sequence_uuid = s.uuid LEFT JOIN
video_references AS r ON r.video_uuid = v.uuid;
CREATE VIEW "annotations"
AS
SELECT
im.uuid AS imaged_moment_uuid,
im.elapsed_time_millis AS index_elapsed_time_millis,
im.recorded_timestamp AS index_recorded_timestamp,
im.timecode AS index_timecode,
obs.uuid AS observation_uuid,
obs.activity,
obs.concept,
obs.duration_millis,
obs.observation_group,
obs.observation_timestamp,
obs.observer,
ir.uuid AS image_reference_uuid,
ir.description AS image_description,
ir.format AS image_format,
ir.height_pixels AS image_height,
ir.width_pixels AS image_width,
ir.url AS image_url,
ass.link_name,
ass.link_value,
ass.to_concept,
ass.mime_type AS association_mime_type,
ass.link_name || ' | ' || ass.to_concept || ' | ' || ass.link_value AS associations,
ad.altitude,
ad.coordinate_reference_system,
ad.depth_meters,
ad.latitude,
ad.longitude,
ad.oxygen_ml_per_l,
ad.phi,
ad.xyz_position_units,
ad.pressure_dbar,
ad.psi,
ad.salinity,
ad.temperature_celsius,
ad.theta,
ad.x,
ad.y,
ad.z,
ad.light_transmission,
vr.uuid AS video_reference_uuid,
vr.audio_codec,
vr.container AS video_container,
vr.description AS video_reference_description,
vr.frame_rate,
vr.height AS video_height,
vr.sha512 AS video_sha512,
vr.size_bytes AS video_size_bytes,
vr.uri AS video_uri,
vr.video_codec,
vr.width AS video_width,
v.description AS video_description,
v.duration_millis AS video_duration_millis,
v.name AS video_name,
v.start_time AS video_start_timestamp,
vs.camera_id,
vs.description AS video_sequence_description,
vs.name AS video_sequence_name,
info.mission_contact AS chief_scientist,
info.mission_id AS dive_number,
info.platform_name AS camera_platform
FROM
imaged_moments im
LEFT JOIN observations obs ON obs.imaged_moment_uuid = im.uuid
LEFT JOIN image_references ir ON ir.imaged_moment_uuid = im.uuid
LEFT JOIN associations ass ON ass.observation_uuid = obs.uuid
LEFT JOIN ancillary_data ad ON ad.imaged_moment_uuid = im.uuid
LEFT JOIN video_references vr ON vr.uuid = im.video_reference_uuid
LEFT JOIN videos v ON v.uuid = vr.video_uuid
LEFT JOIN video_sequences vs ON vs.uuid = v.video_sequence_uuid
LEFT JOIN video_reference_information info ON info.video_reference_uuid = im.video_reference_uuid;
Tested with VARS:
Put the SQL scripts in m3-quickstart/temp/postgresql
so that they're mounted into the postgres container. Then run the following:
cd m3-quickstart
bin/docker_start.sh
docker exec -it docker-postgres-1 /bin/bash
# We're now in the postgres container
psql -f 00_db_drop.sql -U m3 -d M3_VARS
psql -f 01_db_geomar_utf8.sql -U m3 -d M3_VARS
psql -f 02_db_migration.sql -U m3 -d M3_VARS
I have a sample db dump from geomar to test with