pachterlab / sleuth

Differential analysis of RNA-Seq
http://pachterlab.github.io/sleuth
GNU General Public License v3.0
305 stars 95 forks source link

DGE Kallisto / Sleuth #211

Closed shahab178 closed 5 years ago

shahab178 commented 5 years ago

I am doing RNA-seq (Comparative Transcriptome, DETs and DEGs) and I am using your tools (Kallisto, Sleuth).

My question is: 1- If in the ltr or wt with the condition of q value =<0.05 we have 0 transcripts or gene It means there is no any statistically different between my conditions (DTEs, DGEs)? (~treatment_s) In this case, I will not be able to continue my analysis with the b >1 and b <1 for up and down regulated? As you can see in the print screen I have 0 (zero) transcript and a gene with the q value =<0.05 but at the end when I do up and down regulate considering the b value I have some result about 5000 up and 10000 down genes that I now It is not intuitive. 2- Is there any necessity that the number of my samples form each condition (~treatment) be the same or not? For instance, I have 3 samples as control, 3 samples as Vaccinal and 6 samples as Sylvatic.

Thank you very much in advance

library(devtools) library(sleuth) library(ggplot2) library(biomaRt) library(DESeq2)

Set the working directory from RStudio(where all the folders of runs are)

setwd("~/Downloads/tra")

Set the working directory

base_dir <- "~/Downloads/tra"

Where Kallisto result are ( make a folder with the name result, put all the result inside)

sample_id <- dir(file.path(base_dir,"results_S_V_C")) sample_id

A list of paths to the kallisto results indexed by the sample IDs

Get the directories where the kallisto runs live

kal_dirs <- sapply(sample_id, function(id) file.path(base_dir, "results_S_V_C", id)) kal_dirs

Fill in metadata about the samples

Auxillary table describes the experimental design and the relationship between the kallisto directories and the samples

s2c <- read.table(file.path(base_dir, "sample_table_S_V_C.csv"), sep = ',', header = TRUE, stringsAsFactors = FALSE) s2c <- dplyr::select(s2c, sample ='Run_s', 'gender_s', 'age_s', 'tissue_region_s', 'treatment_s') s2c

Add file paths to the column (s2c table)

s2c <- dplyr::mutate(s2c, path = kal_dirs) print(s2c)

so <- sleuth_prep(s2c, extra_bootstrap_summary = TRUE,read_bootstrap_tpm=TRUE)

so <- sleuth_fit(so, ~treatment_s, 'full')

so <- sleuth_fit(so, ~1, 'reduced')

so <- sleuth_lrt(so, 'reduced', 'full')

models(so)

transcript_ltr <- sleuth_results(so, 'reduced:full', 'lrt', show_all = FALSE) significant_transcript_ltr <- dplyr::filter(transcript_ltr, qval <= 0.05) head(significant_transcript_ltr, 20)

so <- sleuth_wt(so, 'treatment_sVaccine') transcript_wt <- sleuth_results(so, 'treatment_sVaccine', test_type = 'wt') significant_transcript_wt <- dplyr::filter(transcript_wt, qval <= 0.05) head(significant_transcript_wt, 20)

write.csv(transcript_ltr, "/home/shahab/Desktop/transcript_ltr", row.names = F, quote = F) write.csv(transcript_wt, "/home/shahab/Desktop/transcript_wt", row.names = F, quote = F)

write.csv(significant_transcript_ltr, "/home/shahab/Desktop/significant_transcript_ltr", row.names = F, quote = F) write.csv(significant_transcript_wt, "/home/shahab/Desktop/significant_transcript_wt", row.names = F, quote = F)

final_results_transcript <- merge(transcript_ltr, transcript_wt, by="target_id") final_results_transcript <- final_results_transcript[,c("target_id", "qval.x", "qval.y", "b")] colnames(final_results_transcript) <- c("target_id", "qval_lrt", "qval_wt", "b") head (final_results_transcript)

final_results_significant_transcript <- merge(significant_transcript_ltr, significant_transcript_wt, by="target_id") final_results_significant_transcript <- final_results_significant_transcript[,c("target_id", "qval.x", "qval.y", "b")] colnames(final_results_significant_transcript) <- c("target_id", "qval_lrt", "qval_wt", "b") head (final_results_significant_transcript)

write.csv(final_results_transcript, "/home/shahab/Desktop/final_transcript_results", row.names = F, quote = F) write.csv(final_results_significant_transcript, "/home/shahab/Desktop/final_results_transcript_significant", row.names = F, quote = F)

Obtaining consistent transcript-level differential expression results

sleuth_table_tx <- sleuth_results(so, 'reduced:full', 'lrt', show_all = FALSE, pval_aggregate = FALSE) sleuth_table_tx <- dplyr::filter(sleuth_table_tx, qval <= 0.05) head(sleuth_table_tx, 20)

up/down regulation

sleuth_up_trans<- dplyr::filter(final_results_transcript, b > 1) nrow(sleuth_up_trans) sleuth_down_trans <- dplyr::filter(final_results_transcript, b < 1) nrow(sleuth_down_trans) write.table(sleuth_down_trans,"/home/shahab/Desktop/sleuth_down_trans.tab", sep = "\t") write.table(sleuth_up_trans,"/home/shahab/Desktop/sleuth_up_trans.tab", sep = "\t")

include gene names into transcript-level analysys

mart <- biomaRt::useMart(biomart = "ENSEMBL_MART_ENSEMBL", dataset = "hsapiens_gene_ensembl",

host = "mar2015.archive.ensembl.org",

                     host = "uswest.ensembl.org",
                     path = "/biomart/martservice")

ttg <- biomaRt::getBM (attributes = c("ensembl_transcript_id", "transcript_version", "ensembl_gene_id", "external_gene_name", "description", "transcript_biotype"), mart = mart) head(ttg)

ttg <- dplyr::rename(ttg, target_id = ensembl_transcript_id, ens_gene = ensembl_gene_id, ext_gene = external_gene_name)

ttg$ens_gene<- paste(ttg$ens_gene, ttg$transcript_version, sep = ".") ttg$target_id<- paste(ttg$target_id, ttg$transcript_version, sep = ".")

head(ttg)

so <- sleuth_prep(sample_to_covariates = s2c, ~treatment_s, target_mapping = ttg, aggregation_column = 'ens_gene', gene_mode= T, extra_bootstrap_summary = T, read_bootstrap_tpm = TRUE, transformation_function = function(x) log2(x + 0.5))

so <- sleuth_fit(so, ~treatment_s, 'full')

so <- sleuth_fit(so, ~1, 'reduced')

so <- sleuth_lrt(so, 'reduced', 'full')

models (so)

gene_ltr <- sleuth_results(so, 'reduced:full', 'lrt', show_all = FALSE) significant_gene_ltr <- dplyr::filter(gene_ltr, qval <= 0.05) head(significant_gene_ltr, 20)

so <- sleuth_wt(so, 'treatment_sVaccine' ) gene_wt <- sleuth_results(so, 'treatment_sVaccine', test_type = 'wt', show_all = F) significant_gene_wt <- dplyr::filter(gene_wt, qval <= 0.05) head(significant_gene_wt, 20)

write.csv(gene_ltr, "/home/shahab/Desktop/gene_ltr", row.names = F, quote = F) write.csv(gene_wt, "/home/shahab/Desktop/gene_wt", row.names = F, quote = F)

final_results_gene <- merge(gene_ltr, gene_wt, by="target_id") final_results_gene <- final_results_gene[,c("target_id", "qval.x", "qval.y", "b")] colnames(final_results_gene) <- c("target_id", "qval_lrt", "qval_wt", "b") head (final_results_gene )

write.csv(final_results_gene, "/home/shahab/Desktop/final_results_gene", row.names = F, quote = F)

up/down regulation

sleuth_up_gene<- dplyr::filter(final_results_gene, b > 1) nrow(sleuth_up_gene) sleuth_down_gene <- dplyr::filter(final_results_gene, b < 1) nrow(sleuth_down_gene)

write.table(sleuth_down_gene,"/home/shahab/Desktop/sleuth_down_gene.tab", sep = "\t") write.table(sleuth_up_gene,"/home/shahab/Desktop/sleuth_up_gene.tab", sep = "\t")

write.table(sleuth_down_gene,"/home/shahab/Desktop/sleuth_down_gene.tab", sep = "\t", row.names = FALSE, quote = FALSE)

write.table(sleuth_up_gene,"/home/shahab/Desktop/sleuth_up_gene.tab", sep = "\t", row.names = FALSE, quote = FALSE)

sleuth_live(so)

warrenmcg commented 5 years ago

This is better suited for the google groups, since this doesn't seem related to a bug or a feature request with sleuth, but rather why your data is not yielding results. As you have already cross-posted in the google groups, I'm going to close this issue.

In general, it's not a good idea to cross-post (see here and here for why). For the future, think about what each forum is intended to do before posting, and if you're going to cross-post, make it clear at the top of your post that you are doing so, and why you are doing so.

shahab178 commented 5 years ago

Ok. Close or Open still I didn't get my answer and maybe Use another software freely available for doing the same things and you will lose the citation.

warrenmcg commented 5 years ago

Fair, I should have responded to this after I answered your question on the forum. Sorry for the mistake there!