metrumresearchgroup / review

helpful tools for organizing and performing quality control (QC) tasks
https://metrumresearchgroup.github.io/review/
2 stars 0 forks source link

New dirSummary function #28

Closed michaelmcd18 closed 9 months ago

michaelmcd18 commented 9 months ago

Edited to include updated version:

#' Directory Summary Function
#'
#' Generates a summary of files in a given directory by examining their 
#' SVN statuses, last edited dates, authors, and other related metadata.
#'
#' @param .dir A character string indicating the directory to be summarized.
#'
#' @return A list containing the following elements:
#'   \itemize{
#'     \item \code{project}: A character string of the project name.
#'     \item \code{data}: A dataframe with columns 'File', 'Author', 'Latest edit', 
#'           'Latest rev', 'Status', and 'QCer', summarizing the relevant files.
#'     \item \code{directory}: A character string of the directory passed to the function.
#'   }
#'
#' @examples
#' # Assuming you have the relevant data and packages loaded
#' # dirSummary("path/to/directory")
#'
dirSummary <- function(.dir) {

  project_name <- tryCatch(basename(review:::logRoot()), error = identity)

  if (inherits(project_name, "error")) {
    stop("No QC log found")
  }

  returnList <- list(
    project = project_name
  )

  # Gather files to scan ----------------------------------------------------
  all_files <- list.files(.dir, full.names = TRUE, recursive = TRUE)

  relevant_file_types <- c("R", "Rmd", "yaml", "yml", "ctl", "cpp", "cp", "mod", "stan")

  extensions <- tools::file_ext(all_files)

  relevant_files <- all_files[extensions %in% relevant_file_types] %>% review:::pathFromLogRoot()

  relevant_files_df <- dplyr::tibble(
    file = relevant_files,
    lastauthor = NA_character_,
    lastedit = NA_POSIXct_,
    lastrev = NA_real_,
    insvn = NA_character_,
  )

  # Determine current log state ---------------------------------------------
  log_summary <- review::logSummary()

  # Build data --------------------------------------------------------------
  relevant_files_df <- relevant_files_df %>% left_join(log_summary, by = "file")

  n_iter <- nrow(relevant_files_df)
  pb <- progress::progress_bar$new(total = n_iter)

  for (i in 1:n_iter) {

    log.i <- tryCatch(
      review::svnLog(relevant_files_df$file[i]),
      error = identity
    )

    if (inherits(log.i, "error")) {

      relevant_files_df$insvn[i] <- "No"

      next
    }

    log.i <- log.i %>% dplyr::filter(datetime == max(datetime))

    relevant_files_df$lastauthor[i] <- log.i$author
    relevant_files_df$lastedit[i] <- log.i$datetime
    relevant_files_df$lastrev[i] <- log.i$rev
    relevant_files_df$insvn[i] <- "Yes"

    pb$tick()
    rm(log.i)
  }

  # Final cleanup -----------------------------------------------------------
  relevant_files_df <-
    relevant_files_df %>%
    dplyr::transmute(
      File = file,
      Author = lastauthor,
      `Latest edit` = lastedit,
      `Latest rev` = lastrev,
      Status = dplyr::case_when(
        insvn == "No" ~ "Not in SVN",
        is.na(reviewer) ~ "Not in QC log",
        headf > revf | heado > revo ~ "In QC log, needs QC",
        TRUE ~ "QC up to date"
      ),
      QCer = reviewer
    )

  returnList[["data"]] <- relevant_files_df
  returnList[["directory"]] <- .dir

  return(returnList)
}