meerapatelmd / metathesaurus

Setup a local Postgres UMLS Metathesaurus.
https://meerapatelmd.github.io/metathesaurus
0 stars 1 forks source link

Add MRHIER pivoting feature #1

Open mpatel-cai opened 2 years ago

mpatel-cai commented 2 years ago

Create a schema containing all the possible classifications within the MRHIER table in the a depivoted format. This script is driven by the sab in the MRIHIER table.

Please include a minimal reproducible example (AKA a reprex). If you've never heard of a reprex before, start by reading https://www.tidyverse.org/help/#reprex.

Brief description of the problem

do
$$
declare
    f record;
    tbl varchar(255);
    sab varchar(255);
begin
    for f in 
      SELECT 
        h.sab AS hierarchy_sab, 
        REGEXP_REPLACE(h.sab, '[[:punct:]]', '_') AS hierarchy_table,
        COUNT(*) 
      FROM mth.mrhier h
      INNER JOIN mth.mrconso c 
      ON c.aui = h.aui 
      WHERE c.lat = 'ENG' 
      GROUP BY h.sab, hierarchy_table
      HAVING COUNT(*) > 1 
      ORDER BY COUNT(*) 
    loop 
      tbl := f.hierarchy_table;
      sab := f.hierarchy_sab;
      raise notice '% (% rows)', sab, f.count;
      execute format(
  'DROP TABLE IF EXISTS mth_hier.%I; 
  CREATE TABLE  mth_hier.%I (
    ptr_id INTEGER NOT NULL,
    aui varchar(12), 
    code varchar(100), 
    str text, 
    rela varchar(100), 
    relative_aui varchar(12) NOT NULL,
    relative_code varchar(100),
    relative_str text,
    relative_level INTEGER NOT NULL
  );
    WITH relatives0 AS (
    SELECT ROW_NUMBER() OVER() AS rowid, r0.*
    FROM 
        (
            SELECT DISTINCT s1.aui, s1.code, s1.str, m.rela, m.ptr --ptr: Path To Root
            FROM mth.mrhier m
            INNER JOIN (SELECT * FROM mth.mrconso WHERE sab = ''%s'') s1 
            ON s1.aui = m.aui
        ) r0
  ) ,
  relatives1 AS (
    SELECT rowid, aui, code, str, rela, ptr, unnest(string_to_array(ptr, ''.'')) AS relative_aui
    FROM relatives0 r0 
    ORDER BY rowid
  ),
  relatives2 AS (
    SELECT r1.*, ROW_NUMBER() OVER (PARTITION BY rowid) AS relative_level
    FROM relatives1 r1 
  ),
  relatives3 AS (
    SELECT r2.*, m.code AS relative_code, m.str AS relative_str 
    FROM relatives2 r2
    LEFT JOIN mth.mrconso m 
    ON m.aui = r2.relative_aui
  )

  INSERT INTO mth_hier.%I  
  SELECT DISTINCT
    rowid AS ptr_id,
    aui, 
    code, 
    str, 
    rela, 
    relative_aui,
    relative_code,
    relative_str,
    relative_level   
  FROM relatives3
  ;
  ',tbl, tbl, sab,tbl);
    end loop;
end;
$$
;
mpatel-cai commented 2 years ago

change '%I' to '%s'.

meerapatelmd commented 2 years ago
do
$$
declare 
    dest_schema varchar(255) := 'rxnorm_classifications'; 
    f record;
    sab varchar(255);
    tbl varchar(255);
begin 
    for f in 
      SELECT 
        h.sab AS hierarchy_sab, 
        REGEXP_REPLACE(h.sab, '[[:punct:]]', '_') AS hierarchy_table,
        COUNT(*) 
      FROM mth.mrconso rx 
      LEFT JOIN mth.mrrel rel 
      ON rel.aui1 = rx.aui 
      LEFT JOIN mth.mrconso other 
      ON other.aui = rel.aui2  
      LEFT JOIN mth.mrhier h  
      ON h.aui = rel.aui2 
      WHERE 
        rx.sab = 'RXNORM' 
        AND rx.tty = 'IN' 
        AND h.sab IS NOT NULL  
      GROUP BY hierarchy_sab, hierarchy_table 
      ORDER BY COUNT(*) DESC 
    LOOP
      tbl := f.hierarchy_table;
      sab := f.hierarchy_sab;
      RAISE NOTICE '% (% classifications)', sab, f.count;
      EXECUTE 
        format(
          '
          DROP TABLE IF EXISTS %s.%s;
          CREATE TABLE %s.%s AS (
          SELECT 
            mth.aui AS rx_ingr_aui, 
            mth.code AS rx_ingr_code, 
            mth.str  AS rx_ingr_str, 
            rel.rel AS relationship_type, 
            rel.rela AS relationship,
            h.aui    AS related_aui, 
            h.code  AS related_code, 
            h.str   AS related_str, 
            h.ptr_id,
            h.rela  AS hierarchical_rel, 
            h.relative_level,
            h.relative_aui,
            h.relative_code, 
            h.relative_str 
          FROM mth.mrconso mth 
          LEFT JOIN mth.mrrel rel 
          ON rel.aui1 = mth.aui 
          LEFT JOIN mth.mrconso mth2 
          ON mth2.aui = rel.aui2 
          LEFT JOIN mth_hier.%I h 
          ON h.aui = mth2.aui 
          WHERE 
            mth.sab = ''RXNORM'' 
            AND mth.tty = ''IN'' 
            AND mth2.sab = ''%s'' 
            -- only when there is a hierarchy present 
            AND h.aui IS NOT NULL
          ORDER BY mth.aui, mth.code, h.ptr_id, h.relative_level
          )',
            dest_schema,
            tbl,
            dest_schema,
            tbl,
            tbl, 
            sab
          );
    end loop;
end;
$$
;

DROP SCHEMA rxnorm_classifications2 CASCADE;
CREATE SCHEMA rxnorm_classifications2;

-- Create a reference table to split a second time on relationship 

DROP TABLE IF EXISTS rxnorm_classifications2.tmp_md;
CREATE TABLE rxnorm_classifications2.tmp_md (
  table_name varchar(25), 
  relationship_type varchar(10), 
  relationship varchar(255)
)
;

DROP TABLE IF EXISTS rxnorm_classifications2.md;
CREATE TABLE rxnorm_classifications2.md (
  table_name varchar(25), 
  relationship_type varchar(10), 
  relationship varchar(255)
)
;

do 
$$
declare 
  src_schema varchar(255) := 'rxnorm_classifications'; 
  target_schema varchar(255) := 'rxnorm_classifications2';
  f record; 
  src_tbl varchar(25);
  rel_type varchar(10); 
  relationship varchar(255);
begin  
  for f in  
    SELECT ic.table_name 
    FROM information_schema.tables  it 
    INNER JOIN information_schema.columns ic 
    ON ic.table_schema = it.table_schema
    WHERE it.table_schema = 'rxnorm_classifications'   
  LOOP 
    src_tbl := f.table_name; 
    -- RAISE NOTICE '% (% classifications)', sab, f.count;
      EXECUTE 
        format(
          '
          INSERT INTO %s.tmp_md  
          SELECT DISTINCT 
            ''%s'' AS table_name, 
            relationship_type, 
            relationship
          FROM %s.%s mth 
          ;',
            target_schema,
            src_tbl,
            src_schema,
            src_tbl
          );
    end loop;
end;
$$
;

INSERT INTO rxnorm_classifications2.md 
SELECT DISTINCT * FROM rxnorm_classifications2.tmp_md;
DROP TABLE rxnorm_classifications2.tmp_md;

select * 
from rxnorm_classifications2.md;

do 
$$
declare 
  src_schema varchar(255) := 'rxnorm_classifications'; 
  target_schema varchar(255) := 'rxnorm_classifications2';
  f record; 
  src_tbl varchar(25);
  rel_type varchar(10); 
  relationship varchar(255);
  target_table varchar(63);
begin  
  for f in  
    SELECT DISTINCT * FROM rxnorm_classifications2.md  
  LOOP 
    src_tbl := f.table_name; 
    IF 
      f.relationship IS NULL THEN  target_table := CONCAT(f.table_name, '_', f.relationship_type);
      ELSE target_table := CONCAT(f.table_name, '_', f.relationship_type, '_', f.relationship); 
      END if;
    target_table := SUBSTRING(target_table, 1, 63);
    RAISE NOTICE '%', target_table;

    IF 
      f.relationship IS NULL THEN 
          EXECUTE 
    format(
      '
      DROP TABLE IF EXISTS %s.%s; 
      CREATE TABLE %s.%s AS (
        SELECT distinct
            rx_ingr_aui,
            rx_ingr_code,
            rx_ingr_str,
            related_aui,
            related_code,
            related_str,
            ptr_id,
            relative_level,
            relative_aui,
            relative_code,
            relative_str 
        FROM %s.%s 
        WHERE 
          relationship_type = ''%s'' 
          AND relationship IS NULL
      ); 
      ', 
      target_schema,
      target_table,
      target_schema,
      target_table,
      src_schema,
      src_tbl,
      f.relationship_type);
    ELSE 
    EXECUTE 
    format(
      '
      DROP TABLE IF EXISTS %s.%s; 
      CREATE TABLE %s.%s AS (
        SELECT distinct
            rx_ingr_aui,
            rx_ingr_code,
            rx_ingr_str,
            related_aui,
            related_code,
            related_str,
            ptr_id,
            relative_level,
            relative_aui,
            relative_code,
            relative_str
        FROM %s.%s 
        WHERE 
          relationship_type = ''%s'' 
          AND relationship = ''%s''  
        ORDER BY rx_ingr_aui, ptr_id, relative_level
      ); 
      ', 
      target_schema,
      target_table,
      target_schema,
      target_table,
      src_schema,
      src_tbl,
      f.relationship_type, 
      f.relationship);
      END if;

    end loop;
end;
$$
; 

-- Collapse `related_*` fields as the last `relative_level`
DROP SCHEMA rxnorm_classifications3 CASCADE;
CREATE SCHEMA rxnorm_classifications3;

DROP SCHEMA rxnorm_classifications3 CASCADE;
CREATE SCHEMA rxnorm_classifications3;

DROP TABLE IF EXISTS rxnorm_classifications3.md;
CREATE TABLE  rxnorm_classifications3.md AS (   
SELECT DISTINCT ic.table_name 
    FROM information_schema.tables  it 
    INNER JOIN information_schema.columns ic 
    ON ic.table_schema = it.table_schema
    WHERE it.table_schema = 'rxnorm_classifications2' AND ic.table_name <> 'md')
;

select * 
from rxnorm_classifications3.md;

do 
$$
declare 
  src_schema varchar(255) := 'rxnorm_classifications2'; 
  src_tbl     varchar(255);
  target_schema varchar(255) := 'rxnorm_classifications3';
  target_tbl varchar(255);
  f record; 
begin  
  for f in  
    SELECT * FROM rxnorm_classifications3.md 
  LOOP  
    src_tbl := f.table_name;
    target_tbl := f.table_name;  
    EXECUTE
    format(
      '
      DROP TABLE IF EXISTS %s.%s; 
      CREATE TABLE %s.%s (
        rx_ingr_aui varchar(100),
        rx_ingr_code varchar(100),
        rx_ingr_str varchar(255),
        ptr_id integer NOT NULL, 
        relative_level integer NOT NULL,
        relative_aui varchar(100),
        relative_code varchar(100),
        relative_str varchar(255)
      );

      WITH related_level AS (
        SELECT 
          rx_ingr_aui, 
          rx_ingr_code, 
          rx_ingr_str,
          ptr_id, 
          MAX(relative_level)+1 AS relative_level,
          related_aui AS relative_aui, 
          related_code AS relative_code, 
          related_str AS relative_str
        FROM %s.%s 
        GROUP BY rx_ingr_aui, rx_ingr_code, rx_ingr_str, ptr_id, related_aui, related_code, related_str
        ) 

       INSERT INTO %s.%s 
        SELECT 
         rx_ingr_aui, 
         rx_ingr_code,
         rx_ingr_str, 
         ptr_id, 
         relative_level,
         relative_aui,
         relative_code,
         relative_str 
        FROM %s.%s m
        UNION 
        SELECT 
         rx_ingr_aui, 
         rx_ingr_code,
         rx_ingr_str, 
         ptr_id, 
         relative_level,
         relative_aui,
         relative_code,
         relative_str 
        FROM related_level 
        ORDER BY rx_ingr_aui, ptr_id, relative_level
        ;     
      ', 
      target_schema,
      target_tbl,
      target_schema,
      target_tbl,
      src_schema,
      src_tbl,
      target_schema,
      target_TBL,
      src_schema,
      src_tbl
    );

    end loop;
end;
$$
;