BioinformaticsFMRP / TCGAbiolinks

TCGAbiolinks
http://bioconductor.org/packages/devel/bioc/vignettes/TCGAbiolinks/inst/doc/index.html
289 stars 110 forks source link

GDCprepare output ERROR #506

Closed liujilei156231 closed 2 years ago

liujilei156231 commented 2 years ago
library(TCGAbiolinks)
library(TCGAbiolinksGUI.data)
library(dplyr)
library(DT)

query <- GDCquery(
  project = "CPTAC-3",
  data.category = "Transcriptome Profiling",
  data.type = "Gene Expression Quantification",
    workflow.type = "STAR - Counts"
)
GDCdownload(query)
data <- GDCprepare(query)

--------------------------------------

o GDCquery: Searching in GDC database

--------------------------------------

Genome of reference: hg38

--------------------------------------------

oo Accessing GDC. This might take a while...

--------------------------------------------

ooo Project: CPTAC-3

--------------------

oo Filtering results

--------------------

ooo By data.type

ooo By workflow.type

----------------

oo Checking data

----------------

ooo Check if there are duplicated cases

ooo Check if there results for the query

-------------------

o Preparing output

-------------------

Downloading data for project CPTAC-3

Of the 1275 files for download 1275 already exist.

All samples have been already downloaded

|====================================================|100%                      Completed after 1 m 

type data , out is a function???

data

function (..., list = character(), package = NULL, lib.loc = NULL, 
    verbose = getOption("verbose"), envir = .GlobalEnv, overwrite = TRUE) 
{
    fileExt <- function(x) {
        db <- grepl("\\.[^.]+\\.(gz|bz2|xz)$", x)
        ans <- sub(".*\\.", "", x)
        ans[db] <- sub(".*\\.([^.]+\\.)(gz|bz2|xz)$", "\\1\\2", 
            x[db])
        ans
    }
    my_read_table <- function(...) {
        lcc <- Sys.getlocale("LC_COLLATE")
        on.exit(Sys.setlocale("LC_COLLATE", lcc))
        Sys.setlocale("LC_COLLATE", "C")
        read.table(...)
    }
    names <- c(as.character(substitute(list(...))[-1L]), list)
    if (!is.null(package)) {
        if (!is.character(package)) 
            stop("'package' must be a character string or NULL")
        if (FALSE) {
            if (any(package %in% "base")) 
                warning("datasets have been moved from package 'base' to package 'datasets'")
            if (any(package %in% "stats")) 
                warning("datasets have been moved from package 'stats' to package 'datasets'")
            package[package %in% c("base", "stats")] <- "datasets"
        }
    }
    paths <- find.package(package, lib.loc, verbose = verbose)
    if (is.null(lib.loc)) 
        paths <- c(path.package(package, TRUE), if (!length(package)) getwd(), 
            paths)
    paths <- unique(normalizePath(paths[file.exists(paths)]))
    paths <- paths[dir.exists(file.path(paths, "data"))]
    dataExts <- tools:::.make_file_exts("data")
    if (length(names) == 0L) {
        db <- matrix(character(), nrow = 0L, ncol = 4L)
        for (path in paths) {
            entries <- NULL
            packageName <- if (file_test("-f", file.path(path, 
                "DESCRIPTION"))) 
                basename(path)
            else "."
            if (file_test("-f", INDEX <- file.path(path, "Meta", 
                "data.rds"))) {
                entries <- readRDS(INDEX)
            }
            else {
                dataDir <- file.path(path, "data")
                entries <- tools::list_files_with_type(dataDir, 
                  "data")
                if (length(entries)) {
                  entries <- unique(tools::file_path_sans_ext(basename(entries)))
                  entries <- cbind(entries, "")
                }
            }
            if (NROW(entries)) {
                if (is.matrix(entries) && ncol(entries) == 2L) 
                  db <- rbind(db, cbind(packageName, dirname(path), 
                    entries))
                else warning(gettextf("data index for package %s is invalid and will be ignored", 
                  sQuote(packageName)), domain = NA, call. = FALSE)
            }
        }
        colnames(db) <- c("Package", "LibPath", "Item", "Title")
        footer <- if (missing(package)) 
            paste0("Use ", sQuote(paste("data(package =", ".packages(all.available = TRUE))")), 
                "\n", "to list the data sets in all *available* packages.")
        else NULL
        y <- list(title = "Data sets", header = NULL, results = db, 
            footer = footer)
        class(y) <- "packageIQR"
        return(y)
    }
    paths <- file.path(paths, "data")
    for (name in names) {
        found <- FALSE
        for (p in paths) {
            tmp_env <- if (overwrite) 
                envir
            else new.env()
            if (file_test("-f", file.path(p, "Rdata.rds"))) {
                rds <- readRDS(file.path(p, "Rdata.rds"))
                if (name %in% names(rds)) {
                  found <- TRUE
                  if (verbose) 
                    message(sprintf("name=%s:\t found in Rdata.rds", 
                      name), domain = NA)
                  thispkg <- sub(".*/([^/]*)/data$", "\\1", p)
                  thispkg <- sub("_.*$", "", thispkg)
                  thispkg <- paste0("package:", thispkg)
                  objs <- rds[[name]]
                  lazyLoad(file.path(p, "Rdata"), envir = tmp_env, 
                    filter = function(x) x %in% objs)
                  break
                }
                else if (verbose) 
                  message(sprintf("name=%s:\t NOT found in names() of Rdata.rds, i.e.,\n\t%s\n", 
                    name, paste(names(rds), collapse = ",")), 
                    domain = NA)
            }
            if (file_test("-f", file.path(p, "Rdata.zip"))) {
                warning("zipped data found for package ", sQuote(basename(dirname(p))), 
                  ".\nThat is defunct, so please re-install the package.", 
                  domain = NA)
                if (file_test("-f", fp <- file.path(p, "filelist"))) 
                  files <- file.path(p, scan(fp, what = "", quiet = TRUE))
                else {
                  warning(gettextf("file 'filelist' is missing for directory %s", 
                    sQuote(p)), domain = NA)
                  next
                }
            }
            else {
                files <- list.files(p, full.names = TRUE)
            }
            files <- files[grep(name, files, fixed = TRUE)]
            if (length(files) > 1L) {
                o <- match(fileExt(files), dataExts, nomatch = 100L)
                paths0 <- dirname(files)
                paths0 <- factor(paths0, levels = unique(paths0))
                files <- files[order(paths0, o)]
            }
            if (length(files)) {
                for (file in files) {
                  if (verbose) 
                    message("name=", name, ":\t file= ...", .Platform$file.sep, 
                      basename(file), "::\t", appendLF = FALSE, 
                      domain = NA)
                  ext <- fileExt(file)
                  if (basename(file) != paste0(name, ".", ext)) 
                    found <- FALSE
                  else {
                    found <- TRUE
                    zfile <- file
                    zipname <- file.path(dirname(file), "Rdata.zip")
                    if (file.exists(zipname)) {
                      Rdatadir <- tempfile("Rdata")
                      dir.create(Rdatadir, showWarnings = FALSE)
                      topic <- basename(file)
                      rc <- .External(C_unzip, zipname, topic, 
                        Rdatadir, FALSE, TRUE, FALSE, FALSE)
                      if (rc == 0L) 
                        zfile <- file.path(Rdatadir, topic)
                    }
                    if (zfile != file) 
                      on.exit(unlink(zfile))
                    switch(ext, R = , r = {
                      library("utils")
                      sys.source(zfile, chdir = TRUE, envir = tmp_env)
                    }, RData = , rdata = , rda = load(zfile, 
                      envir = tmp_env), TXT = , txt = , tab = , 
                      tab.gz = , tab.bz2 = , tab.xz = , txt.gz = , 
                      txt.bz2 = , txt.xz = assign(name, my_read_table(zfile, 
                        header = TRUE, as.is = FALSE), envir = tmp_env), 
                      CSV = , csv = , csv.gz = , csv.bz2 = , 
                      csv.xz = assign(name, my_read_table(zfile, 
                        header = TRUE, sep = ";", as.is = FALSE), 
                        envir = tmp_env), found <- FALSE)
                  }
                  if (found) 
                    break
                }
                if (verbose) 
                  message(if (!found) 
                    "*NOT* ", "found", domain = NA)
            }
            if (found) 
                break
        }
        if (!found) {
            warning(gettextf("data set %s not found", sQuote(name)), 
                domain = NA)
        }
        else if (!overwrite) {
            for (o in ls(envir = tmp_env, all.names = TRUE)) {
                if (exists(o, envir = envir, inherits = FALSE)) 
                  warning(gettextf("an object named %s already exists and will not be overwritten", 
                    sQuote(o)))
                else assign(o, get(o, envir = tmp_env, inherits = FALSE), 
                  envir = envir)
            }
            rm(tmp_env)
        }
    }
    invisible(names)
}
()
sessionInfo()
R version 4.1.2 (2021-11-01)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19044)

Matrix products: default

locale:
[1] LC_COLLATE=Chinese (Simplified)_China.936 
[2] LC_CTYPE=Chinese (Simplified)_China.936   
[3] LC_MONETARY=Chinese (Simplified)_China.936
[4] LC_NUMERIC=C                              
[5] LC_TIME=Chinese (Simplified)_China.936    

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
[1] TCGAbiolinksGUI.data_1.15.1 Biobase_2.52.0             
[3] BiocGenerics_0.38.0         DT_0.20                    
[5] dplyr_1.0.7                 TCGAbiolinks_2.23.12       

loaded via a namespace (and not attached):
 [1] bitops_1.0-7                matrixStats_0.60.1         
 [3] bit64_4.0.5                 filelock_1.0.2             
 [5] progress_1.2.2              httr_1.4.2                 
 [7] GenomeInfoDb_1.28.4         repr_1.1.3                 
 [9] tools_4.1.0                 utf8_1.2.2                 
[11] R6_2.5.1                    DBI_1.1.1                  
[13] colorspace_2.0-2            tidyselect_1.1.1           
[15] prettyunits_1.1.1           bit_4.0.4                  
[17] curl_4.3.2                  compiler_4.1.0             
[19] rvest_1.0.2                 xml2_1.3.2                 
[21] DelayedArray_0.18.0         scales_1.1.1               
[23] readr_2.0.2                 rappdirs_0.3.3             
[25] pbdZMQ_0.3-6                stringr_1.4.0              
[27] digest_0.6.27               XVector_0.32.0             
[29] base64enc_0.1-3             pkgconfig_2.0.3            
[31] htmltools_0.5.2             MatrixGenerics_1.4.3       
[33] dbplyr_2.1.1                fastmap_1.1.0              
[35] htmlwidgets_1.5.4           rlang_0.4.11               
[37] RSQLite_2.2.9               generics_0.1.1             
[39] jsonlite_1.7.2              crosstalk_1.2.0            
[41] RCurl_1.98-1.5              magrittr_2.0.1             
[43] GenomeInfoDbData_1.2.6      Matrix_1.3-4               
[45] Rcpp_1.0.7                  IRkernel_1.2               
[47] munsell_0.5.0               S4Vectors_0.30.2           
[49] fansi_0.5.0                 lifecycle_1.0.1            
[51] stringi_1.7.4               SummarizedExperiment_1.22.0
[53] zlibbioc_1.38.0             plyr_1.8.6                 
[55] BiocFileCache_2.0.0         grid_4.1.0                 
[57] blob_1.2.2                  crayon_1.4.2               
[59] lattice_0.20-44             IRdisplay_1.0              
[61] Biostrings_2.60.2           hms_1.1.1                  
[63] KEGGREST_1.32.0             knitr_1.36                 
[65] pillar_1.6.4                GenomicRanges_1.44.0       
[67] uuid_1.0-3                  biomaRt_2.48.3             
[69] stats4_4.1.0                XML_3.99-0.8               
[71] glue_1.6.2                  evaluate_0.14              
[73] downloader_0.4              data.table_1.14.0          
[75] png_0.1-7                   vctrs_0.3.8                
[77] tzdb_0.1.2                  gtable_0.3.0               
[79] purrr_0.3.4                 tidyr_1.1.3                
[81] assertthat_0.2.1            cachem_1.0.6               
[83] ggplot2_3.3.5               xfun_0.26                  
[85] tibble_3.1.4                AnnotationDbi_1.54.1       
[87] memoise_2.0.1               IRanges_2.26.0             
[89] ellipsis_0.3.2   
tiagochst commented 2 years ago

I ran in both linux and mac, this is what you should get.

Screen Shot 2022-04-21 at 11 04 39 AM

Some libraries are not also the same version as on my computer. I have data.table_1.14.2 and dplyr_1.0.8.

Please, could you update those ?

liujilei156231 commented 2 years ago

A new Error after update these two packages.

image

> query <- GDCquery(
+     project = "CPTAC-3",
+     data.category = "Transcriptome Profiling",
+     data.type = "Gene Expression Quantification",
+     workflow.type = "STAR - Counts"
+ )
--------------------------------------
o GDCquery: Searching in GDC database
--------------------------------------
Genome of reference: hg38
--------------------------------------------
oo Accessing GDC. This might take a while...
--------------------------------------------
ooo Project: CPTAC-3
--------------------
oo Filtering results
--------------------
ooo By data.type
ooo By workflow.type
----------------
oo Checking data
----------------
ooo Check if there are duplicated cases
ooo Check if there results for the query
-------------------
o Preparing output
-------------------
> GDCdownload(query)
Downloading data for project CPTAC-3
Of the 1275 files for download 1275 already exist.
All samples have been already downloaded
> data <- GDCprepare(query)
|=============================================|100%                      Completed after 1 m 
Error: cannot allocate vector of size 590.1 Mb

> sessionInfo()
R version 4.1.2 (2021-11-01)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19044)

Matrix products: default

locale:
[1] LC_COLLATE=Chinese (Simplified)_China.936 
[2] LC_CTYPE=Chinese (Simplified)_China.936   
[3] LC_MONETARY=Chinese (Simplified)_China.936
[4] LC_NUMERIC=C                              
[5] LC_TIME=Chinese (Simplified)_China.936    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] DT_0.20                     data.table_1.14.2          
[3] dplyr_1.0.8                 TCGAbiolinksGUI.data_1.15.1
[5] TCGAbiolinks_2.23.12       
liujilei156231 commented 2 years ago

Problem has been solved after adding memory.limit(size=56000)