BigelowLab / edna-dada2

Maine eDNA dada2
0 stars 0 forks source link

sample.names number problem #24

Closed robinsleith closed 3 years ago

robinsleith commented 3 years ago

I think this has to do with dropping a sample at the cutadapt step. That then led to something going wrong in the paired_quality_scores call. It looks like sample Gloeo-134 gets dropped from input_files but not from sample.names.

Output from cutadapt (all run interactively)

  1 small filepairs encountered, dropping: Gloeo-134_S160_L001_R1_001.fastq

Output from paired_quality_scores

Error: Problem with `mutate()` column `sample`.
ℹ `sample = sample_names`.
ℹ `sample` must be size 49 or 1, not 50.
Run `rlang::last_error()` to see where the error occurred.

traceback()

22: stop(fallback)
21: signal_abort(cnd)
20: abort(bullets, class = c("dplyr:::mutate_error", "dplyr_error"), 
        error_name = error_name, error_expression = error_expression, 
        parent = e, bullets = bullets)
19: (function (e) 
    {
        local_call_step(dots = dots, .index = i, .fn = "mutate", 
            .dot_data = inherits(e, "rlang_error_data_pronoun_not_found"))
        call_step_envir <- peek_call_step()
        error_name <- call_step_envir$error_name
        error_expression <- call_step_envir$error_expression
        show_group_details <- TRUE
        if (inherits(e, "dplyr:::mutate_incompatible_size")) {
            size <- vec_size(rows[[mask$get_current_group()]])
            x_size <- e$x_size
            bullets <- c(i = cnd_bullet_column_info(), i = glue("`{error_name}` must be size {or_1(size)}, not {x_size}."), 
                i = cnd_bullet_rowwise_unlist())
        }
        else if (inherits(e, "dplyr:::mutate_mixed_null")) {
            show_group_details <- FALSE
            bullets <- c(i = cnd_bullet_column_info(), x = glue("`{error_name}` must return compatible vectors across groups."), 
                i = "Cannot combine NULL and non NULL results.", 
                i = cnd_bullet_rowwise_unlist())
        }
        else if (inherits(e, "dplyr:::mutate_not_vector")) {
            bullets <- c(i = cnd_bullet_column_info(), x = glue("`{error_name}` must be a vector, not {friendly_type_of(e$result)}."), 
                i = cnd_bullet_rowwise_unlist())
        }
        else if (inherits(e, "dplyr:::error_mutate_incompatible_combine")) {
            show_group_details <- FALSE
            bullets <- c(i = cnd_bullet_column_info(), x = glue("`{error_name}` must return compatible vectors across groups"), 
                i = cnd_bullet_combine_details(e$parent$x, e$parent$x_arg), 
                i = cnd_bullet_combine_details(e$parent$y, e$parent$y_arg))
        }
        else {
            bullets <- c(i = cnd_bullet_column_info(), x = conditionMessage(e))
        }
        bullets <- c(cnd_bullet_header(), bullets, i = if (show_group_details) cnd_bullet_cur_group_label())
        abort(bullets, class = c("dplyr:::mutate_error", "dplyr_error"), 
            error_name = error_name, error_expression = error_expression, 
            parent = e, bullets = bullets)
    })(structure(list(message = "", trace = structure(list(calls = list(
        paired_quality_scores(input_files) %>% paired_ee_per_read() %>% 
            paired_ee_threshold(sample_names = sample.names, filename = file.path(CFG$output_path, 
                "EE_thresholds.csv")), dadautils::paired_ee_threshold(., 
            sample_names = sample.names, filename = file.path(CFG$output_path, 
                "EE_thresholds.csv")), base::sapply(xx, function(x, 
            sample_names = "unknown") {
            dplyr::mutate(x, sample = sample_names) %>% dplyr::relocate(sample, 
                .before = 1)
        }, simplify = FALSE, sample_names = sample_names), base::lapply(X = X, 
            FUN = FUN, ...), dadautils:::FUN(X[[i]], ...), dplyr::mutate(x, 
            sample = sample_names) %>% dplyr::relocate(sample, .before = 1), 
        dplyr::relocate(., sample, .before = 1), dplyr::mutate(x, 
            sample = sample_names), dplyr:::mutate.data.frame(x, 
            sample = sample_names), dplyr:::mutate_cols(.data, ..., 
            caller_env = caller_env()), base::withCallingHandlers({
            for (i in seq_along(dots)) {
                mask$across_cache_reset()
                context_poke("column", old_current_column)
                quosures <- expand_across(dots[[i]])
                quosures_results <- vector(mode = "list", length = length(quosures))
                for (k in seq_along(quosures)) {
                    quo <- quosures[[k]]
                    quo_data <- attr(quo, "dplyr:::data")
                    if (!is.null(quo_data$column)) {
                      context_poke("column", quo_data$column)
                    }
                    chunks <- NULL
                    result <- NULL
                    if (quo_is_symbol(quo)) {
                      name <- as_string(quo_get_expr(quo))
                      if (name %in% names(new_columns)) {
                        result <- new_columns[[name]]
                        chunks <- mask$resolve(name)
                      }
                      else if (name %in% names(.data)) {
                        result <- .data[[name]]
                        chunks <- mask$resolve(name)
                      }
                      if (inherits(.data, "rowwise_df") && vec_is_list(result)) {
                        sizes <- list_sizes(result)
                        wrong <- which(sizes != 1)
                        if (length(wrong)) {
                          group <- wrong[1L]
                          mask$set_current_group(group)
                          abort(x_size = sizes[group], class = "dplyr:::mutate_incompatible_size")
                        }
                      }
                    }
                    if (is.null(chunks)) {
                      chunks <- mask$eval_all_mutate(quo)
                    }
                    if (is.null(chunks)) {
                      next
                    }
                    if (is.null(result)) {
                      if (length(rows) == 1) {
                        result <- chunks[[1]]
                      }
                      else {
                        result <- withCallingHandlers(vec_unchop(chunks <- vec_cast_common(!!!chunks), 
                          rows), vctrs_error_incompatible_type = function(cnd) {
                          abort(class = "dplyr:::error_mutate_incompatible_combine", 
                            parent = cnd)
                        })
                      }
                    }
                    quosures_results[[k]] <- list(result = result, 
                      chunks = chunks)
                }
                for (k in seq_along(quosures)) {
                    quo <- quosures[[k]]
                    quo_data <- attr(quo, "dplyr:::data")
                    quo_result <- quosures_results[[k]]
                    if (is.null(quo_result)) {
                      if (quo_data$is_named) {
                        name <- quo_data$name_given
                        new_columns[[name]] <- zap()
                        mask$remove(name)
                      }
                      next
                    }
                    result <- quo_result$result
                    chunks <- quo_result$chunks
                    if (!quo_data$is_named && is.data.frame(result)) {
                      new_columns[names(result)] <- result
                      mask$add_many(result, chunks)
                    }
                    else {
                      name <- quo_data$name_auto
                      new_columns[[name]] <- result
                      mask$add_one(name, chunks)
                    }
                }
            }
        }, error = function(e) {
            local_call_step(dots = dots, .index = i, .fn = "mutate", 
                .dot_data = inherits(e, "rlang_error_data_pronoun_not_found"))
            call_step_envir <- peek_call_step()
            error_name <- call_step_envir$error_name
            error_expression <- call_step_envir$error_expression
            show_group_details <- TRUE
            if (inherits(e, "dplyr:::mutate_incompatible_size")) {
                size <- vec_size(rows[[mask$get_current_group()]])
                x_size <- e$x_size
                bullets <- c(i = cnd_bullet_column_info(), i = glue("`{error_name}` must be size {or_1(size)}, not {x_size}."), 
                    i = cnd_bullet_rowwise_unlist())
            }
            else if (inherits(e, "dplyr:::mutate_mixed_null")) {
                show_group_details <- FALSE
                bullets <- c(i = cnd_bullet_column_info(), x = glue("`{error_name}` must return compatible vectors across groups."), 
                    i = "Cannot combine NULL and non NULL results.", 
                    i = cnd_bullet_rowwise_unlist())
            }
            else if (inherits(e, "dplyr:::mutate_not_vector")) {
                bullets <- c(i = cnd_bullet_column_info(), x = glue("`{error_name}` must be a vector, not {friendly_type_of(e$result)}."), 
                    i = cnd_bullet_rowwise_unlist())
            }
            else if (inherits(e, "dplyr:::error_mutate_incompatible_combine")) {
                show_group_details <- FALSE
                bullets <- c(i = cnd_bullet_column_info(), x = glue("`{error_name}` must return compatible vectors across groups"), 
                    i = cnd_bullet_combine_details(e$parent$x, e$parent$x_arg), 
                    i = cnd_bullet_combine_details(e$parent$y, e$parent$y_arg))
            }
            else {
                bullets <- c(i = cnd_bullet_column_info(), x = conditionMessage(e))
            }
            bullets <- c(cnd_bullet_header(), bullets, i = if (show_group_details) cnd_bullet_cur_group_label())
            abort(bullets, class = c("dplyr:::mutate_error", "dplyr_error"), 
                error_name = error_name, error_expression = error_expression, 
                parent = e, bullets = bullets)
        }, warning = function(w) {
            if (check_muffled_warning(w)) {
                maybe_restart("muffleWarning")
            }
            local_call_step(dots = dots, .index = i, .fn = "mutate")
            warn(c(cnd_bullet_header(), i = cnd_bullet_column_info(), 
                i = conditionMessage(w), i = cnd_bullet_cur_group_label(what = "warning")))
            maybe_restart("muffleWarning")
        }), mask$eval_all_mutate(quo), dplyr:::abort_glue(character(0), 
            list(x_size = 50L), "dplyr:::mutate_incompatible_size"), 
        rlang::exec(abort, class = class, !!!data)), parents = c(0L, 
    0L, 2L, 3L, 4L, 5L, 0L, 5L, 5L, 9L, 10L, 10L, 0L, 13L), indices = 1:14), class = "rlang_trace", version = 1L), 
        parent = NULL, x_size = 50L), class = c("dplyr:::mutate_incompatible_size", 
    "rlang_error", "error", "condition")))
18: signalCondition(cnd)
17: signal_abort(cnd)
16: (function (message = NULL, class = NULL, ..., trace = NULL, parent = NULL, 
        .subclass = deprecated()) 
    {
        validate_signal_args(.subclass)
        if (is_null(trace) && is_null(peek_option("rlang:::disable_trace_capture"))) {
            with_options(`rlang:::disable_trace_capture` = TRUE, 
                {
                    trace <- trace_back()
                    if (is_null(parent)) {
                      context <- trace_length(trace)
                    }
                    else {
                      context <- trace_capture_depth(trace)
                    }
                    trace <- trace_trim_context(trace, context)
                })
        }
        message <- validate_signal_message(message, class)
        message <- collapse_cnd_message(message)
        cnd <- error_cnd(class, ..., message = message, parent = parent, 
            trace = trace)
        signal_abort(cnd)
    })(class = "dplyr:::mutate_incompatible_size", x_size = 50L)
15: exec(abort, class = class, !!!data)
14: abort_glue(character(0), list(x_size = 50L), "dplyr:::mutate_incompatible_size")
13: .Call(dplyr_mask_eval_all_mutate, quo, private)
12: mask$eval_all_mutate(quo)
11: withCallingHandlers({
        for (i in seq_along(dots)) {
            mask$across_cache_reset()
            context_poke("column", old_current_column)
            quosures <- expand_across(dots[[i]])
            quosures_results <- vector(mode = "list", length = length(quosures))
            for (k in seq_along(quosures)) {
                quo <- quosures[[k]]
                quo_data <- attr(quo, "dplyr:::data")
                if (!is.null(quo_data$column)) {
                    context_poke("column", quo_data$column)
                }
                chunks <- NULL
                result <- NULL
                if (quo_is_symbol(quo)) {
                    name <- as_string(quo_get_expr(quo))
                    if (name %in% names(new_columns)) {
                      result <- new_columns[[name]]
                      chunks <- mask$resolve(name)
                    }
                    else if (name %in% names(.data)) {
                      result <- .data[[name]]
                      chunks <- mask$resolve(name)
                    }
                    if (inherits(.data, "rowwise_df") && vec_is_list(result)) {
                      sizes <- list_sizes(result)
                      wrong <- which(sizes != 1)
                      if (length(wrong)) {
                        group <- wrong[1L]
                        mask$set_current_group(group)
                        abort(x_size = sizes[group], class = "dplyr:::mutate_incompatible_size")
                      }
                    }
                }
                if (is.null(chunks)) {
                    chunks <- mask$eval_all_mutate(quo)
                }
                if (is.null(chunks)) {
                    next
                }
                if (is.null(result)) {
                    if (length(rows) == 1) {
                      result <- chunks[[1]]
                    }
                    else {
                      result <- withCallingHandlers(vec_unchop(chunks <- vec_cast_common(!!!chunks), 
                        rows), vctrs_error_incompatible_type = function(cnd) {
                        abort(class = "dplyr:::error_mutate_incompatible_combine", 
                          parent = cnd)
                      })
                    }
                }
                quosures_results[[k]] <- list(result = result, chunks = chunks)
            }
            for (k in seq_along(quosures)) {
                quo <- quosures[[k]]
                quo_data <- attr(quo, "dplyr:::data")
                quo_result <- quosures_results[[k]]
                if (is.null(quo_result)) {
                    if (quo_data$is_named) {
                      name <- quo_data$name_given
                      new_columns[[name]] <- zap()
                      mask$remove(name)
                    }
                    next
                }
                result <- quo_result$result
                chunks <- quo_result$chunks
                if (!quo_data$is_named && is.data.frame(result)) {
                    new_columns[names(result)] <- result
                    mask$add_many(result, chunks)
                }
                else {
                    name <- quo_data$name_auto
                    new_columns[[name]] <- result
                    mask$add_one(name, chunks)
                }
            }
        }
    }, error = function(e) {
        local_call_step(dots = dots, .index = i, .fn = "mutate", 
            .dot_data = inherits(e, "rlang_error_data_pronoun_not_found"))
        call_step_envir <- peek_call_step()
        error_name <- call_step_envir$error_name
        error_expression <- call_step_envir$error_expression
        show_group_details <- TRUE
        if (inherits(e, "dplyr:::mutate_incompatible_size")) {
            size <- vec_size(rows[[mask$get_current_group()]])
            x_size <- e$x_size
            bullets <- c(i = cnd_bullet_column_info(), i = glue("`{error_name}` must be size {or_1(size)}, not {x_size}."), 
                i = cnd_bullet_rowwise_unlist())
        }
        else if (inherits(e, "dplyr:::mutate_mixed_null")) {
            show_group_details <- FALSE
            bullets <- c(i = cnd_bullet_column_info(), x = glue("`{error_name}` must return compatible vectors across groups."), 
                i = "Cannot combine NULL and non NULL results.", 
                i = cnd_bullet_rowwise_unlist())
        }
        else if (inherits(e, "dplyr:::mutate_not_vector")) {
            bullets <- c(i = cnd_bullet_column_info(), x = glue("`{error_name}` must be a vector, not {friendly_type_of(e$result)}."), 
                i = cnd_bullet_rowwise_unlist())
        }
        else if (inherits(e, "dplyr:::error_mutate_incompatible_combine")) {
            show_group_details <- FALSE
            bullets <- c(i = cnd_bullet_column_info(), x = glue("`{error_name}` must return compatible vectors across groups"), 
                i = cnd_bullet_combine_details(e$parent$x, e$parent$x_arg), 
                i = cnd_bullet_combine_details(e$parent$y, e$parent$y_arg))
        }
        else {
            bullets <- c(i = cnd_bullet_column_info(), x = conditionMessage(e))
        }
        bullets <- c(cnd_bullet_header(), bullets, i = if (show_group_details) cnd_bullet_cur_group_label())
        abort(bullets, class = c("dplyr:::mutate_error", "dplyr_error"), 
            error_name = error_name, error_expression = error_expression, 
            parent = e, bullets = bullets)
    }, warning = function(w) {
        if (check_muffled_warning(w)) {
            maybe_restart("muffleWarning")
        }
        local_call_step(dots = dots, .index = i, .fn = "mutate")
        warn(c(cnd_bullet_header(), i = cnd_bullet_column_info(), 
            i = conditionMessage(w), i = cnd_bullet_cur_group_label(what = "warning")))
        maybe_restart("muffleWarning")
    })
10: mutate_cols(.data, ..., caller_env = caller_env())
9: mutate.data.frame(x, sample = sample_names)
8: dplyr::mutate(x, sample = sample_names)
7: dplyr::relocate(., sample, .before = 1)
6: dplyr::mutate(x, sample = sample_names) %>% dplyr::relocate(sample, 
       .before = 1)
5: FUN(X[[i]], ...)
4: lapply(X = X, FUN = FUN, ...)
3: sapply(xx, function(x, sample_names = "unknown") {
       dplyr::mutate(x, sample = sample_names) %>% dplyr::relocate(sample, 
           .before = 1)
   }, simplify = FALSE, sample_names = sample_names)
2: paired_ee_threshold(., sample_names = sample.names, filename = file.path(CFG$output_path, 
       "EE_thresholds.csv"))
1: paired_quality_scores(input_files) %>% paired_ee_per_read() %>% 
       paired_ee_threshold(sample_names = sample.names, filename = file.path(CFG$output_path, 
           "EE_thresholds.csv"))
btupper commented 3 years ago

So, after running any operation that removes 1 or more filepairs, shouldn't the user run extract_sample_name() again?

https://github.com/BigelowLab/dadautils/blob/main/R/misc.R#L125

robinsleith commented 3 years ago

perfect, yep that makes sense, folded into workflow