Closed trinker closed 7 years ago
path <- system.file("docs/transcripts", package = 'textreadr') list_files <- textreadr:::list_files read_dir_transcript <- function(path, col.names = c("Document", "Person", "Dialogue"), pattern = NULL, all.files = FALSE, recursive = FALSE, skip = 0, ...) { to_read_in <- list_files(path, all.files = all.files, full.names = TRUE, recursive = recursive) if (identical(character(0), to_read_in)) { stop("The following location does not appear to contain files:\n -", path) } if (length(skip) == 1) skip <- rep(skip, length(to_read_in)) reads <- Map(function(x, y) { try(read_transcript(x, col.names = col.names[-1], skip = y, ...)) }, to_read_in, skip) names(reads) <- tools::file_path_sans_ext(basename(to_read_in)) # # args <- names(formals(read_dir_transcript))[-c(1:2)] # args <- args[-length(args)] # args <- args[!args %in% 'text'] # # len <-length(to_read_in) # arg_list <- vector(mode = 'list', length(args)) # names(arg_list) <- args # # for (i in seq_along(args)){ # arg <- eval(parse(text=args[i])) # if (is.null(arg)) arg <- lapply(seq_len(len), function(i) NULL) # if (length(arg) == 1) { # arg <- as.list(rep(arg, len)) # } # if (!is.list(arg)) arg <- as.list(arg) # if (length(arg) != len) { # stop('Argument lengths must be either length 1 or equal in length to documents in `path`') # } # arg_list[[i]] <- arg # } # # doc_list <- vector(mode = 'list', length(args)) # # browser() # # for (i in seq_len(len)){ # paste(names(arg_list), lapply(arg_list, `[[`, i)), sep = " = ") # doc_list[[i]] <- try(read_transcript(to_read_in[i], col.names[-1], )) # } goods <- !sapply(reads, inherits, 'try-error') if (any(!goods)) { warning(paste0("The following files did not read in correctly:\n", paste0(' - ', to_read_in[!goods], collapse = "\n") )) } textshape::tidy_list(reads[goods], col.names[1]) } p_load(tidyverse) peek <- textreadr::peek separator needs to be repeated or a detect separator skips <- c(0, 1, 1, 0, 0, 1) path <- system.file("docs/transcripts", package = 'textreadr') path %>% read_dir_transcript(skip = skips) %>% textclean::filter_row("Person", "^\\[") %>% mutate( Person = stringi::stri_replace_all_regex(Person, "(^/\\s*)|(:\\s*$)", "") %>% trimws(), Dialogue = stringi::stri_replace_all_regex(Dialogue, "(^/\\s*)", "") ) %>% peek(Inf) textreadr::peek(read_dir_transcript(path, skip = skips), Inf)