BioinformaticsFMRP / TCGAbiolinks

286 stars 109 forks source link

GDCprepare: Error in if (query$data.type == "Masked Intensities" | query$data.category == : missing value where TRUE/FALSE needed #580

Closed OlegsBorodins closed 1 year ago

OlegsBorodins commented 1 year ago

Dear Sir / Madam,

I am currently working with TCGA data and I have run it into an error.

tcga_HNSC_data <- GDCprepare(query_TCGA, summarizedExperiment = TRUE) Error in if (query$data.type == "Masked Intensities" | query$data.category == : missing value where TRUE/FALSE needed

I have tried: TCGAbiolinks ( 2.28.2) TCGAbiolinks ( 2.28.1)

It works for some of the data in TCGA-HNSC (see both examples).

You had a similar issue in

I hope you can help me. All the best, OB

Output workflow:

> query_TCGA <- GDCquery(project = 'TCGA-HNSC',
+                        data.category = 'Transcriptome Profiling',
+                        experimental.strategy = 'RNA-Seq',
+                        workflow.type = 'STAR - Counts',
+                        access = 'open',
+                        barcode = c('TCGA-BB-4217-01A-11R-2081-07'
+                        ))
o GDCquery: Searching in GDC database
Genome of reference: hg38
oo Accessing GDC. This might take a while...
ooo Project: TCGA-HNSC
oo Filtering results
ooo By access
ooo By experimental.strategy
ooo By workflow.type
ooo By barcode
oo Checking data
ooo Checking if there are duplicated cases
ooo Checking if there are results for the query
o Preparing output
> getResults(query_TCGA)
                                      id data_format                        cases access
474 75b8e891-29bb-4993-b867-cf65aa85c8ed         TSV TCGA-BB-4217-01A-11R-2081-07   open
                                                                      file_name                         submitter_id           data_category
474 33f30e2c-ec0d-469d-a23d-0941645139e5.rna_seq.augmented_star_gene_counts.tsv 6b29e8e8-72a0-4819-b657-3aa3114c9d52 Transcriptome Profiling
               type file_size                 created_datetime                           md5sum                 updated_datetime
474 gene_expression   4247546 2021-12-13T18:42:20.012845-06:00 9975b18b72e6cfa0445c8b6845606d53 2022-01-19T12:38:14.995174-06:00
                                 file_id                      data_type    state experimental_strategy version data_release   project
474 75b8e891-29bb-4993-b867-cf65aa85c8ed Gene Expression Quantification released               RNA-Seq       1  32.0 - 37.0 TCGA-HNSC
                             analysis_id analysis_state                             analysis_submitter_id
474 b9b81bc9-293b-4d3c-b302-d6f5a6617116       released 33f30e2c-ec0d-469d-a23d-0941645139e5_star__counts
    analysis_workflow_type                analysis_workflow_version   sample_type is_ffpe cases.submitter_id sample.submitter_id
474          STAR - Counts 5d8c131bbff59fb0c969217fc1d44e6d1503cd1f Primary Tumor      NA       TCGA-BB-4217    TCGA-BB-4217-01A
> destfile_1<- "C:/Users/0.13/Desktop/TCGA_raw_data/RNA_Expression/"
> GDCdownload(query_TCGA,directory = destfile_1)
Downloading data for project TCGA-HNSC
Of the 1 files for download 1 already exist.
All samples have been already downloaded
> tcga_brca_data <- GDCprepare(query_TCGA, summarizedExperiment = TRUE)
Error in if (query$data.type == "Masked Intensities" | query$data.category ==  : 
  missing value where TRUE/FALSE needed
> gdcprojects <- getGDCprojects()
> getProjectSummary('TCGA-HNSC')
[1] 26901

  file_count case_count               data_category
1       3709        528            Sequencing Reads
2       2858        528                 Biospecimen
3       4340        526       Copy Number Variation
4       8263        528 Simple Nucleotide Variation
5       2270        528     Transcriptome Profiling
6       1740        528             DNA Methylation
7       1103        528                    Clinical
8       2264        521        Structural Variation
9        354        354          Proteome Profiling

[1] 528

[1] 7.174608e+13

> query_TCGA <- GDCquery(project = 'TCGA-HNSC',
+                        data.category = 'Transcriptome Profiling',
+                        experimental.strategy = 'RNA-Seq',
+                        workflow.type = 'STAR - Counts',
+                        access = 'open',
+                        barcode = c('TCGA-BB-4217-01A-11R-2081-07'
+                        ))
o GDCquery: Searching in GDC database
Genome of reference: hg38
oo Accessing GDC. This might take a while...
ooo Project: TCGA-HNSC
oo Filtering results
ooo By access
ooo By experimental.strategy
ooo By workflow.type
ooo By barcode
oo Checking data
ooo Checking if there are duplicated cases
ooo Checking if there are results for the query
o Preparing output
> getResults(query_TCGA)
                                      id data_format                        cases access
474 75b8e891-29bb-4993-b867-cf65aa85c8ed         TSV TCGA-BB-4217-01A-11R-2081-07   open
                                                                      file_name                         submitter_id           data_category
474 33f30e2c-ec0d-469d-a23d-0941645139e5.rna_seq.augmented_star_gene_counts.tsv 6b29e8e8-72a0-4819-b657-3aa3114c9d52 Transcriptome Profiling
               type file_size                 created_datetime                           md5sum                 updated_datetime
474 gene_expression   4247546 2021-12-13T18:42:20.012845-06:00 9975b18b72e6cfa0445c8b6845606d53 2022-01-19T12:38:14.995174-06:00
                                 file_id                      data_type    state experimental_strategy version data_release   project
474 75b8e891-29bb-4993-b867-cf65aa85c8ed Gene Expression Quantification released               RNA-Seq       1  32.0 - 37.0 TCGA-HNSC
                             analysis_id analysis_state                             analysis_submitter_id
474 b9b81bc9-293b-4d3c-b302-d6f5a6617116       released 33f30e2c-ec0d-469d-a23d-0941645139e5_star__counts
    analysis_workflow_type                analysis_workflow_version   sample_type is_ffpe cases.submitter_id sample.submitter_id
474          STAR - Counts 5d8c131bbff59fb0c969217fc1d44e6d1503cd1f Primary Tumor      NA       TCGA-BB-4217    TCGA-BB-4217-01A

> GDCdownload(query_TCGA,directory = destfile_1)
Downloading data for project TCGA-HNSC
GDCdownload will download: 4.247546 MB
Downloading as: 33f30e2c-ec0d-469d-a23d-0941645139e5.rna_seq.augmented_star_gene_counts.tsv
  |===========================================================================================================================================| 100%
> tcga_HNSC_data <- GDCprepare(query_TCGA, summarizedExperiment = TRUE)
Error in if (query$data.type == "Masked Intensities" | query$data.category ==  : 
  missing value where TRUE/FALSE needed

xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!


xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

> query_TCGA <- GDCquery(project = 'TCGA-HNSC',
+                        data.category = 'Transcriptome Profiling',
+                        experimental.strategy = 'RNA-Seq',
+                        workflow.type = 'STAR - Counts',
+                        access = 'open',
+                        barcode = c('TCGA-CV-7177-11A-01R-2016-07'
+                        ))
o GDCquery: Searching in GDC database
Genome of reference: hg38
oo Accessing GDC. This might take a while...
ooo Project: TCGA-HNSC
oo Filtering results
ooo By access
ooo By experimental.strategy
ooo By workflow.type
ooo By barcode
oo Checking data
ooo Checking if there are duplicated cases
ooo Checking if there are results for the query
o Preparing output
> getResults(query_TCGA)
                                    id data_format                        cases access
1 a40e28c3-b3ed-4edf-bfce-71dfb740e8a6         TSV TCGA-CV-7177-11A-01R-2016-07   open
                                                                    file_name                         submitter_id           data_category
1 d8bb9aab-ed89-4087-b0dc-0a43d06e3256.rna_seq.augmented_star_gene_counts.tsv beb324c4-c014-40fa-9b47-117b47f71079 Transcriptome Profiling
             type file_size                 created_datetime                           md5sum                 updated_datetime
1 gene_expression   4230911 2021-12-13T19:11:03.296681-06:00 33ef0d6b659ccc743526d4593be7a8ca 2022-01-19T12:38:14.995174-06:00
                               file_id                      data_type    state experimental_strategy version data_release   project
1 a40e28c3-b3ed-4edf-bfce-71dfb740e8a6 Gene Expression Quantification released               RNA-Seq       1  32.0 - 37.0 TCGA-HNSC
                           analysis_id analysis_state                             analysis_submitter_id
1 a1beed3e-adac-4a25-9dee-d3a841e9ae1a       released d8bb9aab-ed89-4087-b0dc-0a43d06e3256_star__counts
  analysis_workflow_type                analysis_workflow_version         sample_type is_ffpe cases.submitter_id sample.submitter_id
1          STAR - Counts 5d8c131bbff59fb0c969217fc1d44e6d1503cd1f Solid Tissue Normal      NA       TCGA-CV-7177    TCGA-CV-7177-11A
> GDCdownload(query_TCGA,directory = destfile_1)
Downloading data for project TCGA-HNSC
GDCdownload will download: 4.230911 MB
Downloading as: d8bb9aab-ed89-4087-b0dc-0a43d06e3256.rna_seq.augmented_star_gene_counts.tsv
  |===========================================================================================================================================| 100%
> tcga_HNSC_data <- GDCprepare(query_TCGA, summarizedExperiment = TRUE)
|=========================================================================================================================|100%                      Completed after 0 s 
Starting to add information to samples
 => Add clinical information to samples
 => Adding TCGA molecular information from marker papers
 => Information will have prefix 'paper_' 
hnsc subtype information from:doi:10.1038/nature14129
Available assays in SummarizedExperiment : 
  => unstranded
  => stranded_first
  => stranded_second
  => tpm_unstrand
  => fpkm_unstrand
  => fpkm_uq_unstrand
OlegsBorodins commented 1 year ago

Additionally, I have also checked: devtools::install_github("BioinformaticsFMRP/TCGAbiolinks")

TCGAbiolinks (2.29.0)

And my version of R : 4.3.0

tiagochst commented 1 year ago

You need to specify the same directory you used in GDCdownload in GDCprepare.

GDCdownload(query_TCGA,directory = destfile_1)
tcga_HNSC_data <- GDCprepare(query_TCGA, summarizedExperiment = TRUE,directory = destfile_1)
OlegsBorodins commented 1 year ago
