joshuaye1024 / covidApp

covid-data
0 stars 0 forks source link

Read and write to trileaf database covid tables #3

Closed ogallagher closed 3 years ago

ogallagher commented 3 years ago
ogallagher commented 3 years ago

@joshuaye1024 check out data_collector/process/update_db.py to see how the data collector updates the different tables, and as a general guide for how your methods can work.

Note in particular the uses of the trileaf_db.db_client.DBClient.query(sql,args) method, and how trileaf_db.db_api will contain constants describing the tables and columns that you’ll be updating.

ogallagher commented 3 years ago

I’m thinking your insert code will look something like this, once I update trileaf_db.db_api and the database to include the covid tables.

from typing import Tuple

from pandas import DataFrame

import logging
from logging import Logger

from trileaf_db.db_client import DBClient
from trileaf_db import db_api as api

log:Logger = logging.getLogger(__name__)

def insert_covid_region(dbclient:DBClient, region_id:str) -> bool:
    """Insert new covid region.

    Returns:
        True if the region was inserted.
    """

    inserts:int = dbclient.query(
        sql='insert into {t_cr}({id}) values (%s)'.format(
            t_cr=api.TABLE_COVID_REGIONS,
            id=api.COVID_REGIONS_ID
        ),
        args=region_id
    )

    if inserts > 0:
        log.info('added region {} to the database'.format(region_id))
        return True

    else:
        log.warning('region {} not added to db'.format(region_id))
        return False
# end insert_covid_region

# add region stats
def insert_covid_region_stats(dbclient:DBClient,
    region_id:str,
    region_stats:DataFrame) -> int:
    """Insert new covid region stats.

    Args:
        dbclient = DBClient instance.

        region_id = covid region code (US state/territory abbreviation).

        region_stats = DataFrame of covid stats, where each row corresponds to a datetime.

    Returns:
        inserted = number of new rows inserted into the covid_region_stats table. Ideally, this
        is equal to region_stats.shape[0].
    """

    # check if region already in database
    # exists will be [[1]] if found in db, [] if not
    exists:Tuple = dbclient.query(
        sql='select 1 from {t_cr} where {id}=%s'.format(
            t_cr=api.TABLE_COVID_REGIONS,
            id=api.COVID_REGIONS_ID
        ),
        args=region_id
    )

    if len(exists) == 0:
        # region is new; add to database
        insert_covid_region(dbclient, region_id)

    # else, region already in database
    log.debug('region {} in database'.format(region_id))

    inserts:int = dbclient.multiquery(
        sql='insert into {t_crs}({id},{dt},{pos},{etc}) values (%s,%s,%s)'.format(
            t_crs=api.TABLE_COVID_REGION_STATS,
            id=api.COVID_REG_STAT_REGION_ID,
            dt=api.COVID_REG_STAT_DATETIME,
            pos=api.COVID_REG_STAT_POSITIVES,
            etc='...'
        ),
        # assumes columns in region_stats align with the order of the column names
        # enumerated above
        args=region_stats
    )

    # as an alternative to dbclient.multiquery, dbclient.query with each row in
    # region_stats.itertuples() is what I did for data_collector

    return inserts
# end insert_covid_region_stats

def main():
    # load db credentials from ./res/secrets/db_credentials.txt
    db_client.init()

    # create connection client
    dbclient:DBClient = DBClient()

    if dbclient.connected():
        # fetch historical region stats from covid api
        # this doesn't show how to skip older data that's already collected
        region_code:str = 'xx'
        region_stats:DataFrame = covid_api_fetch_region_stats(region_code)

        inserts:int = insert_covid_region_stats(
            dbclient, 
            region_code, 
            region_stats
        )

        log.info('inserted {} new rows for region {}'.format(
            inserts,
            region_code
        ))

    else:
        log.error('failed to connect to trileaf database')
# end main