arnesmits / DEP

DEP package
26 stars 12 forks source link

P adiusted bigger than p value? #39

Open mavino opened 5 months ago

mavino commented 5 months ago

Hi there, I am attaching my code, my inputs and my results after using DEP. I do not understand how for one condition, the p value (column E of file output "data_results.csv") is bigger than its correspondent p adjusted vale (column H), giving me at the end an inflated number of significant protein (column K). Thank you

setwd("/media/mariano/Elements1/Jean/12022024_Jesus/")
library(DEP)
library(tidyverse)
library(data.table)
set.seed(123)
data <- read.table("Jesus 06-01-2024 proteinGroups.csv", sep = "\t", header = TRUE, stringsAsFactors = FALSE,dec=".")
#data <- dplyr::filter(data, Reverse != "+",is.na(Potential.contaminant))
data$Gene.names %>% duplicated() %>% any()
data %>% group_by(Gene.names) %>% summarize(frequency = n()) %>%
  arrange(desc(frequency)) %>% filter(frequency > 1)
data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
data$name %>% duplicated() %>% any()
LFQ_data <- dplyr::select(data_unique,contains("LFQ"))
colnames(LFQ_data)<-gsub("LFQ.intensity.","",as.character(colnames(LFQ_data)))
colnames(LFQ_data)<-gsub("^0","",as.character(colnames(LFQ_data)))
LFQ_columns <- grep("LFQ", colnames(data_unique))
exp_design <- read.table(file = "Exp_design.csv", sep = "\t", header = T)
exp_design$label <- as.character(exp_design$label)
colnames(data_unique)<-gsub("LFQ.intensity.","",as.character(colnames(data_unique)))
colnames(data_unique)<-gsub("^0","",as.character(colnames(data_unique)))
data_se <- DEP::make_se(data_unique, columns=LFQ_columns, exp_design)
plot_frequency(data_se)
plot_numbers(data_se)
plot_coverage(data_se)
data_filt <- filter_missval(data_se, thr = 2)
plot_coverage(data_filt)
plot_numbers(data_filt)
data_norm <- normalize_vsn(data_filt)
plot_normalization(data_filt, data_norm)
plot_missval(data_filt)
plot_detect(data_filt)
data_imp <- impute(data_norm, fun = "MinProb", q = 0.01)
plot_imputation(data_norm, data_imp)
data_diff_all_contrasts <- test_diff(data_imp, type = "all")
dep <- add_rejections(data_diff_all_contrasts, alpha = 0.05)
plot_pca(dep, x = 1, y = 2, n = 7669, point_size = 3, label = F, indicate = "condition") +
  geom_text(label=exp_design$sample_name)
data_results <- get_results(dep)
data_results %>% filter(significant) %>% nrow()
colnames(data_results)
write.table(data_results,"data_results.csv", sep = "\t", col.names = T, row.names = F, quote = F)
plot_heatmap(dep, type = "centered", kmeans = F, 
             col_limit = 4, show_row_names = F,
             indicate = c("condition"), row_font_size = 3, column_labels = exp_design$sample_name)
plot_volcano(dep, contrast = "WT_vs_KO", label_size = 2, add_names = TRUE)
save.image(file="Data.RData")

data_results.csv Jesus 06-01-2024 proteinGroups.csv Exp_design.csv

ericloud commented 5 months ago

Check #7 and #21