TAMULib / Vireo

Vireo is a turnkey Electronic Thesis and Dissertation (ETD) Management System.
http://vireoetd.org/vireo/
1 stars 0 forks source link

Sprint 2 #97

Closed wwelling closed 1 month ago

wwelling commented 1 month ago

Sprint 2 deployment migration

  1. run node.js migration scripts below
  2. run sql migration scripts below
  3. build & deploy new image

Here is a node.js script to update Vireo 4 weaver_users netid to be institutionalidentifier (UIN) from Vireo 3. If UIN not available, it will use netid (unique hash) or email.

// npm install pg
// mkdir logs
// node index.js > logs/index.log
const { Client } = require('pg');

const vireo4db = {
  user: process.env.VIREO4_DB_USER || 'vireo',
  host: process.env.VIREO4_DB_HOST || 'localhost',
  database: process.env.VIREO4_DB_NAME || 'vireo',
  password: process.env.VIREO4_DB_PASSWORD || 'vireo',
  port: process.env.VIREO4_DB_PORT || 5432,
};

const vireo3db = {
  user: process.env.VIREO3_DB_USER || 'etduser',
  host: process.env.VIREO3_DB_HOST || 'localhost',
  database: process.env.VIREO3_DB_NAME || 'vireo',
  password: process.env.VIREO3_DB_PASSWORD || 'etduser',
  port: process.env.VIREO3_DB_PORT || 5433,
};

async function queryDatabase(config, query) {
  const client = new Client(config);

  try {
    await client.connect();
    const res = await client.query(query);
    return res.rows;
  } catch (err) {
    console.log('Error querying database:', err);
  } finally {
    await client.end();
  }
}

async function updateDatabase(config, query, values) {
  const client = new Client(config);

  try {
    await client.connect();
    const res = await client.query(query, values);
    return res.rowCount;
  } catch (err) {
    console.log('Error updating database:', err);
  } finally {
    await client.end();
  }
}

async function main() {
  const selectQuery = 'SELECT coalesce(institutionalidentifier, netid, email) AS primary, coalesce(netid, email) AS secondary FROM person';
  const updateQuery = 'UPDATE weaver_users SET netid = $1 WHERE netid = $2 OR email = $2';

  try {
    for (const person of await queryDatabase(vireo3db, selectQuery)) {
      const rowsUpdated = await updateDatabase(vireo4db, updateQuery, [person.primary, person.secondary]);
      if (rowsUpdated) {
        console.log(`SUCCCESS: UPDATE weaver_users SET netid = ${person.primary} WHERE netid = ${person.secondary} OR email = ${person.secondary}`);
      } else {
        console.log(`FAILURE: UPDATE weaver_users SET netid = ${person.primary} WHERE netid = ${person.secondary} OR email = ${person.secondary}`);
      }
    }
  } catch (err) {
    console.log('Error during main execution:', err);
  }
}

main();

Here is a node.js script to request individual record from directory search ID office provides to retrieve UIN to update netid.

https://docs.security.tamu.edu/docs/identity-security/attribute-services/api/directory_search/

// provide client identifier and shared secret
// npm install pg axios crypto
// mkdir logs
// node uin.js > logs/uin.log
const { Client } = require('pg');
const axios = require('axios');
const crypto = require('crypto');

const vireo4db = {
  user: process.env.VIREO4_DB_USER || 'vireo',
  host: process.env.VIREO4_DB_HOST || 'localhost',
  database: process.env.VIREO4_DB_NAME || 'vireo',
  password: process.env.VIREO4_DB_PASSWORD || 'vireo',
  port: process.env.VIREO4_DB_PORT || 5432,
};

const directoryClient = {
  identifier: process.env.CLIENT_IDENTIFIER || '',
  sharedSecret: process.env.CLIENT_SHARED_SECRET || '',
  throttleEvery: process.env.CLIENT_THROTTLE_EVERY || 10,
  throttleInMilliseconds: process.env.CLIENT_THROTTLE_IN_MILLISECONDS || 2000
}

async function queryDatabase(config, query) {
  const client = new Client(config);

  try {
    await client.connect();
    const res = await client.query(query);
    return res.rows;
  } catch (err) {
    console.log('Error querying database:', err);
  } finally {
    await client.end();
  }
}

async function updateDatabase(config, query, values) {
  const client = new Client(config);

  try {
    await client.connect(); 
    const res = await client.query(query, values);
    return res.rowCount;
  } catch (err) {
    console.log('Error updating database:', err);
  } finally {
    await client.end();
  }
}

async function getPerson(netid) {
  const api = netid.length === 32 ? 'uid' : 'netid';
  const requestUri = `/rest/directory/${api}/${netid}/json/`;
  const baseUrl = 'https://mqs.tamu.edu';
  const url = baseUrl + requestUri;
  const date = new Date().toUTCString();

  const authenticationString = `${requestUri}\n${date}\n${directoryClient.identifier}`;

  const signature = crypto
    .createHmac('sha256', directoryClient.sharedSecret)
    .update(authenticationString)
    .digest('base64');

  const authorizationHeader = `TAM ${directoryClient.identifier}:${signature}`;

  try {
    console.log(url);
    const response = await axios.get(url, {
      headers: {
        'Authorization': authorizationHeader,
        'Date': date
      }
    });
    return response.data;
  } catch (error) {
    console.log('Error making request:', error);
  }
}

async function sleep(ms) {
  return new Promise(resolve => setTimeout(resolve, ms));
}

async function main() {
  const selectQuery = 'SELECT netid FROM weaver_users WHERE length(netid) != 9 AND netid !~ \'^\\d+$\'';
  const updateQuery = 'UPDATE weaver_users SET netid = $1 WHERE netid = $2';

  try {
    let i = 0;
    for (const user of await queryDatabase(vireo4db, selectQuery)) {
      console.log(user);
      if (user.netid.indexOf('@') >= 0) {
        continue;
      }
      const person = await getPerson(user.netid);
      console.log(person);
      if (!!person && person.tamuEduPersonUIN && person.tamuEduPersonUIN.length > 0) {
        const uin = person.tamuEduPersonUIN[0]
        const rowsUpdated = await updateDatabase(vireo4db, updateQuery, [uin, user.netid]);
        if (rowsUpdated) {
          console.log(`SUCCCESS: UPDATE weaver_users SET netid = ${uin} WHERE netid = ${user.netid}`);
        } else {
          console.log(`FAILURE: UPDATE weaver_users SET netid = ${uin} WHERE netid = ${user.netid}`);
        }
      }
      i++;
      if (i % directoryClient.throttleEvery === 0) {
        await sleep(directoryClient.throttleInMilliseconds);
      }
    }
  } catch (err) {
    console.log('Error during main execution:', err);
  }
}

main();

The following SQL migration scripts are intended to be run against a newly migrated Vireo 3 to Vireo 4.

-- https://github.com/TAMULib/Vireo/pull/93

-- update weaver_users with duplicate netid after uin to netid migration

WITH duplicate_netids AS (
    SELECT netid, COUNT(*) AS count FROM weaver_users GROUP BY netid HAVING COUNT(*) > 1
)
UPDATE weaver_users SET netid = NULL FROM duplicate_netids WHERE weaver_users.netid = duplicate_netids.netid AND weaver_users.email NOT LIKE '%tamu.edu';

UPDATE weaver_users SET netid = NULL WHERE id IN (2818, 152, 351, 762, 1329, 3115, 3142, 853, 2794);

-- add unique constraint on user netid

ALTER TABLE weaver_users ADD UNIQUE (netid);

-- https://github.com/TAMULib/Vireo/pull/88

-- remove unused default submission list columns

WITH submissions AS (
    SELECT * FROM submission_list_column WHERE (title = 'ORCID' AND predicate = 'dc.creator.orcid') OR
    (title = 'Major' AND predicate = 'thesis.degree.major')
)
DELETE FROM submission_list_column_value_path WHERE submission_list_column_id IN (SELECT id FROM submissions);

WITH submissions AS (
    SELECT * FROM submission_list_column WHERE (title = 'ORCID' AND predicate = 'dc.creator.orcid') OR
    (title = 'Major' AND predicate = 'thesis.degree.major')
)
DELETE FROM weaver_users_submission_view_columns WHERE submission_view_columns_id IN (SELECT id FROM submissions);

WITH submissions AS (
    SELECT * FROM submission_list_column WHERE (title = 'ORCID' AND predicate = 'dc.creator.orcid') OR
    (title = 'Major' AND predicate = 'thesis.degree.major')
)
DELETE FROM submission_list_column WHERE id IN (SELECT id FROM submissions);

-- update existing organization workflow field profiles to use TAMU custom predicates

WITH field_predicates AS (
    SELECT * FROM field_predicate WHERE value IN ('thesis.degree.major', 'local.etdauthor.orcid')
),
field_profiles AS (
    SELECT * FROM abstract_field_profile WHERE field_predicate_id IN (SELECT id FROM field_predicates)
)
DELETE FROM workflow_step_aggregate_field_profiles WHERE aggregate_field_profiles_id IN (SELECT id FROM field_profiles);

WITH field_predicates AS (
    SELECT * FROM field_predicate WHERE value IN ('thesis.degree.major', 'local.etdauthor.orcid')
)
DELETE FROM abstract_field_profile WHERE field_predicate_id IN (SELECT id FROM field_predicates);

DELETE FROM field_predicate WHERE value IN ('thesis.degree.major', 'local.etdauthor.orcid');

UPDATE field_predicate SET value = 'local.etdauthor.orcid' WHERE value = 'dc.creator.orcid';

-- add ORCID submission list column back, update since latest vireo 3 migration

INSERT INTO submission_list_column (predicate, status, title, input_type_id) VALUES ('local.etdauthor.orcid', NULL, 'ORCID', 1);

WITH submission AS (
    SELECT id
    FROM submission_list_column
    WHERE predicate = 'local.etdauthor.orcid'
)
INSERT INTO submission_list_column_value_path (submission_list_column_id, value_path, value_path_order)
SELECT id, 'fieldValues', 0 FROM submission UNION ALL SELECT id, 'value', 1 FROM submission;

-- https://github.com/TAMULib/Vireo/pull/87

-- add Graduation Semester (List) submission list column

INSERT INTO submission_list_column (predicate, status, title, input_type_id) VALUES ('dc.date.issued', NULL, 'Graduation Semester (List)', 7);

WITH submission AS (
    SELECT id
    FROM submission_list_column
    WHERE title = 'Graduation Semester (List)'
)
INSERT INTO submission_list_column_value_path (submission_list_column_id, value_path, value_path_order)
SELECT id, 'fieldValues', 0 FROM submission UNION ALL SELECT id, 'value', 1 FROM submission;

-- update the unique constraint on the submission_list_column table

DO $$
DECLARE
    v_constraint_name TEXT;
BEGIN
    WITH constraint_info AS (
        SELECT
            conname AS constraint_name,
            conrelid::regclass AS table_name,
            ARRAY(SELECT a.attname
                  FROM unnest(conkey) AS k
                  JOIN pg_attribute AS a ON a.attnum = k AND a.attrelid = conrelid) AS column_names
        FROM
            pg_constraint
        WHERE
            conrelid = 'submission_list_column'::regclass
    ),
    constraints AS (
        SELECT
            ci.constraint_name,
            ci.table_name
        FROM
            constraint_info ci
        WHERE
            ci.column_names::text[] @> ARRAY['title', 'predicate', 'input_type_id']::text[]
    )
    SELECT constraint_name INTO v_constraint_name FROM constraints;

    IF v_constraint_name IS NOT NULL THEN
        EXECUTE format('ALTER TABLE %I DROP CONSTRAINT %I', 'submission_list_column', v_constraint_name);
    END IF;

    EXECUTE format('ALTER TABLE %I ADD CONSTRAINT %I UNIQUE (%I)', 'submission_list_column', v_constraint_name, 'title');
END $$;

-- https://github.com/TAMULib/Vireo/pull/89

-- add Student Name submission list column

INSERT INTO submission_list_column (predicate, status, title, input_type_id) VALUES ('last_name, first_name middle_name', NULL, 'Student Name', 1);

WITH submission AS (
    SELECT id
    FROM submission_list_column
    WHERE predicate = 'last_name, first_name middle_name'
)
INSERT INTO submission_list_column_value_path (submission_list_column_id, value_path, value_path_order)
SELECT id, 'fieldValues', 0 FROM submission UNION ALL SELECT id, 'value', 1 FROM submission;

-- https://github.com/TAMULib/Vireo/pull/94

-- update configuration repo for shibboleth mappings

UPDATE managed_configuration SET name = 'auth.shib.enabled' WHERE name = 'APPLICATION_AUTH_SHIB_ENABLED';
UPDATE managed_configuration SET name = 'auth.shib.visible' WHERE name = 'APPLICATION_AUTH_SHIB_VISIBLE';
UPDATE managed_configuration SET name = 'auth.shib.name' WHERE name = 'APPLICATION_AUTH_SHIB_NAME';
UPDATE managed_configuration SET name = 'auth.shib.description' WHERE name = 'APPLICATION_AUTH_SHIB_DESCRIPTION';
UPDATE managed_configuration SET name = 'auth.shib.login.forceSSL' WHERE name = 'APPLICATION_AUTH_SHIB_LOGIN_FORCE_SSL';
UPDATE managed_configuration SET name = 'auth.shib.login.url' WHERE name = 'APPLICATION_AUTH_SHIB_LOGIN_URL';
UPDATE managed_configuration SET name = 'auth.shib.logout.url' WHERE name = 'APPLICATION_AUTH_SHIB_LOGOUT_URL';
UPDATE managed_configuration SET name = 'auth.shib.logout.enabled' WHERE name = 'APPLICATION_AUTH_SHIB_LOGOUT_ENABLED';
UPDATE managed_configuration SET name = 'auth.shib.primaryIdentifier' WHERE name = 'APPLICATION_AUTH_SHIB_PRIMARY_IDENTIFIER';
UPDATE managed_configuration SET name = 'auth.shib.mock' WHERE name = 'APPLICATION_AUTH_SHIB_MOCK';
UPDATE managed_configuration SET name = 'auth.shib.log' WHERE name = 'APPLICATION_AUTH_SHIB_LOG';
UPDATE managed_configuration SET name = 'auth.shib.attribute.netid' WHERE name = 'APPLICATION_AUTH_SHIB_ATTRIBUTE_NETID';
UPDATE managed_configuration SET name = 'auth.shib.attribute.email' WHERE name = 'APPLICATION_AUTH_SHIB_ATTRIBUTE_EMAIL';
UPDATE managed_configuration SET name = 'auth.shib.attribute.firstName' WHERE name = 'APPLICATION_AUTH_SHIB_ATTRIBUTE_FIRST_NAME';
UPDATE managed_configuration SET name = 'auth.shib.attribute.lastName' WHERE name = 'APPLICATION_AUTH_SHIB_ATTRIBUTE_LAST_NAME';
UPDATE managed_configuration SET name = 'auth.shib.attribute.institutionIdentifier' WHERE name = 'APPLICATION_AUTH_SHIB_ATTRIBUTE_INSTITUTION_IDENTIFIER';
UPDATE managed_configuration SET name = 'auth.shib.attribute.institutionalIdentifier' WHERE name = 'APPLICATION_AUTH_SHIB_ATTRIBUTE_INSTITUTIONAL_IDENTIFIER';
UPDATE managed_configuration SET name = 'auth.shib.attribute.middleName' WHERE name = 'APPLICATION_AUTH_SHIB_ATTRIBUTE_MIDDLE_NAME';
UPDATE managed_configuration SET name = 'auth.shib.attribute.birthYear' WHERE name = 'APPLICATION_AUTH_SHIB_ATTRIBUTE_BIRTH_YEAR';
UPDATE managed_configuration SET name = 'auth.shib.attribute.affiliations' WHERE name = 'APPLICATION_AUTH_SHIB_ATTRIBUTE_AFFILIATIONS';
UPDATE managed_configuration SET name = 'auth.shib.attribute.currentPhoneNumber' WHERE name = 'APPLICATION_AUTH_SHIB_ATTRIBUTE_CURRENT_PHONE_NUMBER';
UPDATE managed_configuration SET name = 'auth.shib.attribute.currentPostalAddress' WHERE name = 'APPLICATION_AUTH_SHIB_ATTRIBUTE_CURRENT_POSTAL_ADDRESS';
UPDATE managed_configuration SET name = 'auth.shib.attribute.currentEmailAddress' WHERE name = 'APPLICATION_AUTH_SHIB_ATTRIBUTE_CURRENT_EMAIL_ADDRESS';
UPDATE managed_configuration SET name = 'auth.shib.attribute.permanentPhoneNumber' WHERE name = 'APPLICATION_AUTH_SHIB_ATTRIBUTE_PERMANENT_PHONE_NUMBER';
UPDATE managed_configuration SET name = 'auth.shib.attribute.permanentPostalAddress' WHERE name = 'APPLICATION_AUTH_SHIB_ATTRIBUTE_PERMANENT_POSTAL_ADDRESS';
UPDATE managed_configuration SET name = 'auth.shib.attribute.permanentEmailAddress' WHERE name = 'APPLICATION_AUTH_SHIB_ATTRIBUTE_PERMANENT_EMAIL_ADDRESS';
UPDATE managed_configuration SET name = 'auth.shib.attribute.currentDegree' WHERE name = 'APPLICATION_AUTH_SHIB_ATTRIBUTE_CURRENT_DEGREE';
UPDATE managed_configuration SET name = 'auth.shib.attribute.currentDepartment' WHERE name = 'APPLICATION_AUTH_SHIB_ATTRIBUTE_CURRENT_DEPARTMENT';
UPDATE managed_configuration SET name = 'auth.shib.attribute.currentCollege' WHERE name = 'APPLICATION_AUTH_SHIB_ATTRIBUTE_CURRENT_COLLEGE';
UPDATE managed_configuration SET name = 'auth.shib.attribute.currentMajor' WHERE name = 'APPLICATION_AUTH_SHIB_ATTRIBUTE_CURRENT_MAJOR';
UPDATE managed_configuration SET name = 'auth.shib.attribute.currentGraduationYear' WHERE name = 'APPLICATION_AUTH_SHIB_ATTRIBUTE_CURRENT_GRADUATION_YEAR';
UPDATE managed_configuration SET name = 'auth.shib.attribute.currentGraduationMonth' WHERE name = 'APPLICATION_AUTH_SHIB_ATTRIBUTE_CURRENT_GRADUATION_MONTH';
UPDATE managed_configuration SET name = 'auth.shib.attribute.orcid' WHERE name = 'APPLICATION_AUTH_SHIB_ATTRIBUTE_ORCID';

-- add managed_configuration to map shibboleth UIN to netid

INSERT INTO managed_configuration (name, type, value) VALUES ('auth.shib.attribute.netid', 'shibboleth', 'uin');

-- https://github.com/TAMULib/Vireo/pull/98

-- update submission last action

WITH submissions AS (
  SELECT * 
  FROM submission 
  WHERE last_action_id IS NULL
),
last_action_logs AS (
  SELECT DISTINCT ON (s.id) al.id AS aid, s.id AS sid 
  FROM action_log al
  JOIN submissions s ON al.action_logs_id = s.id
  ORDER BY s.id, al.action_date DESC
)
UPDATE submission s
SET last_action_id = las.aid
FROM last_action_logs las
WHERE s.id = las.sid;

-- update the submission list columns

UPDATE submission_list_column SET input_type_id = 8 WHERE id = 57;
UPDATE submission_list_column_value_path SET value_path = 'lastAction' WHERE submission_list_column_id = 56;
INSERT INTO submission_list_column_value_path (submission_list_column_id, value_path, value_path_order) VALUES (56, 'entry', 1), (57, 'lastAction', 0), (57, 'actionDate', 1);

-- update all auto-generated ids sequences

SELECT setval('abstract_email_recipient_id_seq', (SELECT MAX(id) FROM abstract_email_recipient) + 1);
SELECT setval('abstract_field_profile_id_seq', (SELECT MAX(id) FROM abstract_field_profile) + 1);
SELECT setval('abstract_formatter_id_seq', (SELECT MAX(id) FROM abstract_formatter) + 1);
SELECT setval('abstract_note_id_seq', (SELECT MAX(id) FROM abstract_note) + 1);
SELECT setval('abstract_packager_id_seq', (SELECT MAX(id) FROM abstract_packager) + 1);
SELECT setval('action_log_id_seq', (SELECT MAX(id) FROM action_log) + 1);
SELECT setval('address_id_seq', (SELECT MAX(id) FROM address) + 1);
SELECT setval('contact_info_id_seq', (SELECT MAX(id) FROM contact_info) + 1);
SELECT setval('controlled_vocabulary_id_seq', (SELECT MAX(id) FROM controlled_vocabulary) + 1);
SELECT setval('custom_action_definition_id_seq', (SELECT MAX(id) FROM custom_action_definition) + 1);
SELECT setval('custom_action_value_id_seq', (SELECT MAX(id) FROM custom_action_value) + 1);
SELECT setval('degree_id_seq', (SELECT MAX(id) FROM degree) + 1);
SELECT setval('degree_level_id_seq', (SELECT MAX(id) FROM degree_level) + 1);
SELECT setval('deposit_location_id_seq', (SELECT MAX(id) FROM deposit_location) + 1);
SELECT setval('document_type_id_seq', (SELECT MAX(id) FROM document_type) + 1);
SELECT setval('email_template_id_seq', (SELECT MAX(id) FROM email_template) + 1);
SELECT setval('email_workflow_rule_id_seq', (SELECT MAX(id) FROM email_workflow_rule) + 1);
SELECT setval('embargo_id_seq', (SELECT MAX(id) FROM embargo) + 1);
SELECT setval('field_predicate_id_seq', (SELECT MAX(id) FROM field_predicate) + 1);
SELECT setval('field_value_id_seq', (SELECT MAX(id) FROM field_value) + 1);
SELECT setval('filter_criterion_id_seq', (SELECT MAX(id) FROM filter_criterion) + 1);
SELECT setval('graduation_month_id_seq', (SELECT MAX(id) FROM graduation_month) + 1);
SELECT setval('input_type_id_seq', (SELECT MAX(id) FROM input_type) + 1);
SELECT setval('language_id_seq', (SELECT MAX(id) FROM language) + 1);
SELECT setval('managed_configuration_id_seq', (SELECT MAX(id) FROM managed_configuration) + 1);
SELECT setval('named_search_filter_group_id_seq', (SELECT MAX(id) FROM named_search_filter_group) + 1);
SELECT setval('named_search_filter_id_seq', (SELECT MAX(id) FROM named_search_filter) + 1);
SELECT setval('organization_category_id_seq', (SELECT MAX(id) FROM organization_category) + 1);
SELECT setval('organization_id_seq', (SELECT MAX(id) FROM organization) + 1);
SELECT setval('submission_id_seq', (SELECT MAX(id) FROM submission) + 1);
SELECT setval('submission_list_column_id_seq', (SELECT MAX(id) FROM submission_list_column) + 1);
SELECT setval('submission_status_id_seq', (SELECT MAX(id) FROM submission_status) + 1);
SELECT setval('submission_workflow_step_id_seq', (SELECT MAX(id) FROM submission_workflow_step) + 1);
SELECT setval('vocabulary_word_id_seq', (SELECT MAX(id) FROM vocabulary_word) + 1);
SELECT setval('weaver_users_id_seq', (SELECT MAX(id) FROM weaver_users) + 1);
SELECT setval('workflow_step_id_seq', (SELECT MAX(id) FROM workflow_step) + 1);
wwelling commented 1 month ago

I really wish we limited out languages for the migration scripts. We already have Ruby. We have many of the fixes via SQL. Adding Node makes this just more complicated.

I have discussed with James S regarding merging migration scripts as much as possible. The node script from Vireo 3 to Vireo 4 database can and should be an update in TAMU customized ruby scripts. The second node script can be added to the ruby script as to make requests to the directory search API for retrieving UIN by netid when not in Vireo 3 database but will require additional dependencies and more complicated logic than the external node script.