al-obrien / farrago

GNU General Public License v3.0
3 stars 0 forks source link

Add more and update sqlite helpers #3

Open al-obrien opened 3 weeks ago

al-obrien commented 3 weeks ago

Add and update based on the following for various helpers and ability to control the busy time out...

#' Quickly create and write to a SQLite database
#'
#' \code{write2sqlite} is a light wrapper function around various \code{RSQLite} and \code{DBI} functions to make it easier to
#' quickly create a database, tables, and append to existing tables.
#'
#' This function requires some of the more recent features of SQLite, including 'extended types' and date formatting capabilities.
#' To ensure date formatting is preserved, \code{write2sqlite} checks column types for the table being added. Using this function also
#' controls the connection open/closing automatically (given the provided DB name). Converting dates to characters can be a slow process,
#' with more recent versoin of SQLite, this conversion is not usually necessary and \code{date2text} can be set to \code{FALSE} and the same
#' effect may be seen with just \code{extended_types} being used. In the past, without these options, dates would be saved and returned in
#' a numeric format that required parsing after the fact.
#'
#' An optional logging feature is also available. This creates a column called 'log_book' and will track when a table was updated.
#'
#' @param db_name Database name (either path to existing, or name of new database (e.g. name.sqlite, name.db)).
#' @param data Dataset to add to database.
#' @param tbl_name Name to assign to table associated with the provided data.
#' @param extended_types RSQLite parameter for additional data types such as dates (default: \code{TRUE}).
#' @param date2text Detect which columns are dates and parse them to characters (instead of numeric) to store in SQLite (default: \code{TRUE}).
#' @param logging Boolean, determines if logging table named 'log_book' is used in the writing operation (default: \code{FALSE}).
#' @param busy_timeout Integer value to set the PRAGMA timeout for the SQLite connectio; no action if set to \code{NULL}.
#' @param ... Additional parameters to be passed to \code{\link[DBI]{dbWriteTable}}.
#'
#' @examples
#' \dontrun{
#' # Use an existing dataset to add data
#' write2sqlite('path/to/my/dbfile.sqlite', mtcars, 'boringcars')
#'
#' # Replace an existing dataset
#' newmtcars <- mtcars[1:10]
#' write2sqlite('path/to/my/dbfile.sqlite', newmtcars, 'boringcars', overwrite = TRUE)
#'
#' # Create a new DB and add first table in one swoop
#' write2sqlite('mydb.sqlite', newmtcars, 'firsttable')
#' }
#' @seealso read_sqlite
#' @export
write2sqlite <- function(db_name, data, tbl_name, extended_types = TRUE, date2text = TRUE, logging = FALSE, busy_timeout = NULL, ...) {

  db_con <- DBI::dbConnect(RSQLite::SQLite(), db_name, extended_types = extended_types)

  if(!is.null(busy_timeout)) RSQLite::sqliteSetBusyHandler(db_con, busy_timeout)

  if(date2text) {
    # Detect date fields, preferably in ISO standard, and pass as character with DATE field.types to SQLite
    date_loc <- vapply(data, FUN = function(x) inherits(x, 'Date'), FUN.VALUE = logical(1))
    date_loc <- names(date_loc[date_loc])
    lngth <- length(date_loc)
    datev <- setNames(rep('DATE', lngth), date_loc)

    if(length(date_loc > 0 )) {
      data <- AHRtools::convert_format(data, as.character, date_loc)
      DBI::dbWriteTable(db_con, tbl_name, data,  field.types = datev, ...)
    } else DBI::dbWriteTable(db_con, tbl_name, data, ...)

  } else {
    DBI::dbWriteTable(db_con, tbl_name, data, ...)
  }

  if(logging) {write_sql_log(db_name, message = glue::glue('Table written: {tbl_name}'), extended_types)}

  on.exit({DBI::dbDisconnect(db_con)}, add = TRUE)
}

#' Quickly query a SQLite database
#'
#' Access a SQLite file and pass a standard query. This function deals with all the {DBI} work and will open/close the connection
#' automatically. If you need to perform multiple operations it is advised to work with {DBI} directly.
#'
#' @param db_name Database name (full or relative path must be part of the name).
#' @param query Character vector containing the SQL query.
#' @param extended_types RSQLite parameter for additional data types such as dates (default: \code{TRUE})
#' @param ... Additional parameters to be passed to \code{\link[DBI]{dbGetQuery}}.
#'
#' @examples
#' \dontrun{
#' # Create a new DB and add first table in one swoop
#' write2sqlite('mydb.sqlite', mtcars, 'firsttable')
#'
#' read_sqlite('mydb.sqlite', "SELECT * FROM firsttable")
#' read_sqlite('mydb.sqlite', "SELECT gear, count(*) AS N FROM firsttable GROUP BY gear")
#' }
#' @seealso write2sqlite
#' @export
read_sqlite <- function(db_name, query, extended_types = TRUE, ...) {

  tmp_con <- DBI::dbConnect(RSQLite::SQLite(), db_name, extended_types = extended_types)

  output <- DBI::dbGetQuery(tmp_con, query, ...)

  on.exit({DBI::dbDisconnect(tmp_con)}, add = TRUE)

  return(output)
}

#' Check status of SQLite lock
#'
#' Run a command against the provided SQLite database to check if it is locked..
#'
#' @inheritParams read_sqlite
#'
#' @returns Message about lock status.
#'
#' @examples
#' \dontrun{
#' check_sqlite_lock('path/to/db.sqlite')
#' }
#'
#' @export
check_sqlite_lock <- function(db_name, ...) {
  db_con <- DBI::dbConnect(RSQLite::SQLite(), db_name, ...)
  on.exit({DBI::dbDisconnect(db_con)}, add = TRUE)
  tryCatch({
    DBI::dbExecute(db_con, "BEGIN IMMEDIATE")
    print("Database is not locked")
    DBI::dbExecute(db_con, "ROLLBACK")
  }, error = function(e) {
    print("Database is locked")
  })
}

#' Retrieve and/or kill PID linked to SQLite database
#'
#' Provide a SQLite file to determine associated PIDs using it and possibly kill the processes.
#'
#' @inheritParams read_sqlite
#' @param kill Kill the process linked to SQLite file?
#' @param verbose Return verbose results?
#' @returns PID value.
#'
#' @examples
#' \dontrun{
#' sqlite_fuser('path/to/db.sqlite') # List PIDs, if any
#' sqlite_fuser('path/to/db.sqlite', kill = TRUE) # Kill any PIDs it can
#' }
#'
#' @export
sqlite_fuser <- function(db_name, kill = FALSE, verbose = FALSE) {

  stopifnot(is_sqlite(db_name))

  # Argument vector
  arg_vec <- c(db_name, '-i', '-u')
  if(kill) arg_vec <- c(arg_vec, '-k')
  if(verbose) arg_vec <- c(arg_vec, '-v')

  system2('fuser', args = arg_vec)
}

#' Determine if file is a SQLite database
#'
#' Check if a provided file is a SQLite database.
#'
#' Must have a familiar SQLite extension, if so, it will attempt a connection.
#'
#' @inheritParams read_sqlite
#' @returns Logical value.
#'
#' @examples
#' \dontrun{
#' is_sqlite('path/to/db.sqlite') # Passes
#' is_sqlite('path/to/notsqlite') # Fails
#' }
#'
#' @export
is_sqlite <- function(db_name) {

  # Ensure database has an informative extension to avoid issues
  db_ext <- tolower(tools::file_ext(db_name))
  match.arg(db_ext, c('sqlite', 'db', 'sql', 'sqlite3', 'db3'))

  # Check if SQLite
  db_con <- DBI::dbConnect(RSQLite::SQLite(), db_name)
  on.exit({try(DBI::dbDisconnect(db_con))}, add = TRUE)

  if(is.null(db_con)) return(FALSE) else return(TRUE)

}

#' Extract schema details from SQLite database
#'
#' Helper function to quickly pull schema table from SQLite dateabase.
#'
#' @inheritParams read_sqlite
#' @format Boolean value, if set to \code{TRUE}, will attempt to format content nicer.
#'
#' @return A data.frame
#'
#' @examples
#' \dontrun{
#' read_sqlite_schema('path/to/db.sqlite')
#' }
#'
#' @export
read_sqlite_schema <- function(db_name, format = FALSE) {

  tbl_schema <- read_sqlite(db_name, 'SELECT * FROM sqlite_schema')

  if(format) {

    # Extractr within (), clean up white spacing and split on ,
    split_val <- gsub(x = tbl_schema$sql, pattern =  '(.*)\\((.*)\\)$', replacement= '\\2') %>%
      gsub(x = ., pattern = '\\n[ ]?', replacement = '') %>%
      trimws() %>%
      gsub(x = ., pattern = ',[ ]?', replacement = ',') %>%
      strsplit(split = ',')

    # may need to split more intelligenty if names have spaces
    split_val_2 <- lapply(split_val, function(x) strsplit(x = x, '\\s+'))

    # Create formatted df
    lt <- lengths(split_val)
    fulllist <- unlist(split_val_2)
    lt_lst <- length(fulllist)

    data.frame(idx = rep.int(seq_along(split_val), lt),
               type = rep(tbl_schema$type, lt),
               tbl_name = rep(tbl_schema$tbl_name, lt),
               col_name = fulllist[seq(1, lt_lst, by = 2)],
               col_type = fulllist[seq(2, lt_lst, by = 2)])

  } else {
    return(tbl_schema)
  }
}

#' Write to logging column in SQLite
#'
#' Used internally by write2sqlite
#'
#' @param db_name Database name (full or relative path must be part of the name).
#' @param message Logging message to write to log_book table.
#' @param extended_types RSQLite parameter for additional data types such as dates (default: \code{TRUE})
#' @param ... Additional parameters to be passed to \code{\link[DBI]{dbWriteTable}}.
#'
write_sql_log <- function(db_name, message = NA_character_, extended_types = TRUE,  ...) {

  tmp_con_log <- DBI::dbConnect(RSQLite::SQLite(), db_name, extended_types = extended_types)

  content <- data.frame(CRT_TIME = Sys.time(), CRT_DT = Sys.Date(), MESSAGE = message)

  DBI::dbWriteTable(tmp_con_log, 'log_book', content, append = TRUE, ...)

  on.exit({DBI::dbDisconnect(tmp_con_log)}, add = TRUE)

}

#' Quickly list tables in a SQLite database
#'
#' A helper function to quickly connect to a file-based SQLite database, list the tables, and close the connection.
#'
#' @inheritParams read_sqlite
#' @export
list_sqlite_tbls <- function(db_name, extended_types = TRUE, ...) {

  tmp_con <- DBI::dbConnect(RSQLite::SQLite(), db_name, extended_types = extended_types)

  output <- DBI::dbListTables(tmp_con, ...)

  on.exit({DBI::dbDisconnect(tmp_con)}, add = TRUE)

  return(output)
}