BioinformaticsFMRP / TCGAbiolinks

TCGAbiolinks
http://bioconductor.org/packages/devel/bioc/vignettes/TCGAbiolinks/inst/doc/index.html
286 stars 109 forks source link

Error with TCGAanalyze_DEA #542

Open sagarutturkar opened 1 year ago

sagarutturkar commented 1 year ago

Below is my code:

CancerProject <- "TARGET-OS"
DataDirectory <- paste0("../GDC/",gsub("-","_",CancerProject))
FileNameData <- paste0(DataDirectory, "_","STAR_Counts",".rda")

query <- GDCquery(project = CancerProject,
                  data.category = "Transcriptome Profiling",
                  data.type = "Gene Expression Quantification",
                  workflow.type = "STAR - Counts")

GDCdownload(query = query)

data <- GDCprepare(query = query)                   # Object

dataPrep <- TCGAanalyze_Preprocessing(
    object = data, 
    cor.cut = 0.6
)                      

dataNorm <- TCGAanalyze_Normalization(
    tabDF = dataPrep,
    geneInfo = geneInfoHT,
    method = "gcContent"
)                

dataFilt <- TCGAanalyze_Filtering(
    tabDF = dataNorm,
    method = "quantile", 
    qnt.cut =  0.25
)

dataDEGs <- TCGAanalyze_DEA(
    mat1 = dataFilt[,high],
    mat2 = dataFilt[,low],
    Cond1type = "High",
    Cond2type = "Low",
    fdr.cut = 0.01 ,
    logFC.cut = 1,
    method = "glmLRT",
    pipeline = "edgeR"
)

In the last part TCGAanalyze_DEA, I have manually defined samples based on a specific condition:

> high
 [1] "TARGET-40-PAKZZK-01A-01R" "TARGET-40-PALFYN-01A-01R" "TARGET-40-PARBGW-01A-01R" "TARGET-40-PAKUZU-01A-01R" "TARGET-40-PANXSC-01A-01R"
 [6] "TARGET-40-PATJVI-01A-01R" "TARGET-40-PAMJXS-01A-01R" "TARGET-40-PALKGN-01A-01R" "TARGET-40-PALZGU-01A-01R" "TARGET-40-PAVECB-01A-01R"
[11] "TARGET-40-PANZHX-01A-01R" "TARGET-40-PANGRW-01A-01R" "TARGET-40-PASUUH-01A-01R" "TARGET-40-PAVCLP-01A-01R" "TARGET-40-PAUTWB-01A-01R"
[16] "TARGET-40-PAUTYB-01A-01R" "TARGET-40-PALWWX-01A-01R" "TARGET-40-0A4I0S-01A-01R" "TARGET-40-PAMEKS-01A-01R" "TARGET-40-PARJXU-01A-01R"
[21] "TARGET-40-PARGTM-01A-01R" "TARGET-40-PARKAF-01A-01R" "TARGET-40-PASFCV-01A-01R" "TARGET-40-0A4I3S-01A-01R" "TARGET-40-PANVJJ-01A-01R"
[26] "TARGET-40-PAPIJR-01A-01R" "TARGET-40-PAPNVD-01A-01R" "TARGET-40-0A4I5B-01A-01R" "TARGET-40-0A4HY5-01A-01R" "TARGET-40-0A4HLD-01A-01R"
[31] "TARGET-40-PASYUK-01A-01R" "TARGET-40-0A4HXS-01A-01R" "TARGET-40-PASEBY-01A-01R" "TARGET-40-PAPXGT-01A-01R" "TARGET-40-0A4HMC-01A-01R"
[36] "TARGET-40-0A4I48-01A-01R" "TARGET-40-PASNZV-01A-01R" "TARGET-40-PASRNE-01A-01R" "TARGET-40-0A4I4M-01A-01R" "TARGET-40-0A4I6O-01A-01R"
[41] "TARGET-40-PATPBS-01A-01R" "TARGET-40-PATMIF-01A-01R" "TARGET-40-PATEEM-01A-01R"

> low
 [1] "TARGET-40-PALECC-01A-01R" "TARGET-40-PARDAX-01A-01R" "TARGET-40-0A4I9K-01A-01R" "TARGET-40-PAPKWD-01A-01R" "TARGET-40-PAKXLD-01A-01R"
 [6] "TARGET-40-PALKDP-01A-01R" "TARGET-40-PAVALD-01A-01R" "TARGET-40-PAPFLB-01A-01R" "TARGET-40-PATMPU-01A-01R" "TARGET-40-0A4I0Q-01A-01R"
[11] "TARGET-40-PATKSS-01A-01R" "TARGET-40-PAMRHD-01A-01R" "TARGET-40-PAMYYJ-01A-01R" "TARGET-40-PAMLKS-01A-01R" "TARGET-40-PAMHLF-01A-01R"
[16] "TARGET-40-PAMHYN-01A-01R" "TARGET-40-PASEFS-01A-01R" "TARGET-40-PASKZZ-01A-01R" "TARGET-40-0A4I42-01A-01R" "TARGET-40-0A4I65-01A-01R"
[21] "TARGET-40-PATAWV-01A-01R" "TARGET-40-0A4I0W-01A-01R" "TARGET-40-PAUBIT-01A-01R" "TARGET-40-PAUXPZ-01A-01R" "TARGET-40-PAUYTT-01A-01R"
[26] "TARGET-40-PAUUML-01A-01R" "TARGET-40-PAUVUL-01A-01R" "TARGET-40-0A4HX8-01A-01R" "TARGET-40-PAMTCM-01A-01R" "TARGET-40-PARFTG-01A-01R"
[31] "TARGET-40-PAPWWC-01A-01R" "TARGET-40-PAVDTY-01A-01R" "TARGET-40-PANMIG-01A-01R" "TARGET-40-0A4I4O-01A-01R" "TARGET-40-PANPUM-01A-01R"
[36] "TARGET-40-PASSLM-01A-01R" "TARGET-40-PALHRL-01A-01R" "TARGET-40-PAKFVX-01A-01R" "TARGET-40-PATUXZ-01A-01R" "TARGET-40-PANSEN-01A-01R"
[41] "TARGET-40-0A4I4E-01A-01R" "TARGET-40-PANGPE-01A-01R" "TARGET-40-0A4I8U-01A-01R" "TARGET-40-PANZZJ-01A-01R" "TARGET-40-PATMXR-01A-01R"

I get an error as:

Error in names(x) <- value : 'names' attribute [7] must be the same length as the vector [5]

Can you please help to figure out the error?

tiagochst commented 1 year ago

The code was initially written to support only TCGA samples. I fixed the part giving problems for non-TCGA samples. It should be working now.