edyp-lab / DaparToolshed

0 stars 0 forks source link

[FEATURE DaparToolshed] group t-test #3

Open samWieczorek opened 2 years ago

samWieczorek commented 2 years ago

Describe the feature Following, a code allowing to make a t-test by grouping all the specific peptides (we forget the shared ones). However, the code is currently not very generic (only works for 2 3vs3 conditions), so some refactoring is needed...


# peptide-level t-test

## Build design matrix X
X <- as.matrix(BuildAdjacencyMatrix(data, 'Protein.group.IDs', unique = FALSE))
X.spec <- as.matrix(BuildAdjacencyMatrix(data, 'Protein.group.IDs', unique = TRUE))
 
y <- exprs(data)
y <- y[rownames(X), ]
n <- n1+n2 # number of samples in c1 and c2
 
## Keep track of proteins that are lost in aggregation step
unsup.prot <- !(colnames(X) %in% colnames(X.spec))
q <- nrow(X) # Number of peptides
p <- ncol(X) # Number of proteins
 
 
# Two ways to compute it (function groupttest below)
peptide.spec.based.tmp <- apply(X.spec, 2, FUN=function(mask) t.test(x=as.vector(y[mask == 1, 1:n1]), y=as.vector(y[mask == 1, -(1:n1)]), var.equal=TRUE)$p.value)
peptide.spec.based.tmp <- groupttest(X.spec,y)
 
# then:
peptide.spec.based.pv <- rep(1, ncol(X))
peptide.spec.based.pv[!unsup.prot] <- peptide.spec.based.tmp
 
#######################
### attention, à rendre générique: pour l'instant ne marche qu'avec n1=3 (car codé en dur)
groupttest <- function(MatAdj, expr){
nProt <- dim(MatAdj)[2]
res <- rep(0,nProt)
for(i in 1:nProt){
#print(i)
index <- names(which(MatAdj[,i]==1))
if(length(index)== 0){
res[i] <- 1
} else{
peptidestotest <- expr[which(rownames(expr)%in% index),]
if(length(index)== 1){
res[i] <- t.test(x=peptidestotest[1:3], y=peptidestotest[-(1:3)], var.equal=TRUE)$p.value
} else{
res[i] <- t.test(x=peptidestotest[,1:3], y=peptidestotest[,-(1:3)], var.equal=TRUE)$p.value
}
}
}
return(res)
}

# peptide-level t-test
##########
## Build design matrix X
X <- as.matrix(BuildAdjacencyMatrix(data, 'Protein.group.IDs', unique = FALSE))
X.spec <- as.matrix(BuildAdjacencyMatrix(data, 'Protein.group.IDs', unique = TRUE))
y <- exprs(data)
y <- y[rownames(X), ]
n <- n1+n[2](https://bioproj.extra.cea.fr/redmine/attachments/30010#L2) # number of samples in c1 and c2
## Keep track of proteins that are lost in aggregation step
unsup.prot <- !(colnames(X) %in% colnames(X.spec))
q <- nrow(X) # Number of peptides
p <- ncol(X) # Number of proteins
# Two ways to compute it (function groupttest below)
peptide.spec.based.tmp <- apply(X.spec, 2, FUN=function(mask) t.test(x=as.vector(y[mask == 1, 1:n1]), y=as.vector(y[mask == 1, -(1:n1)]), var.equal=TRUE)$p.value)
peptide.spec.based.tmp <- groupttest(X.spec,y)
# then:
peptide.spec.based.pv <- rep(1, ncol(X))
peptide.spec.based.pv[!unsup.prot] <- peptide.spec.based.tmp
#######################
### attention, à rendre générique: pour l'instant ne marche qu'avec n1=[3](https://bioproj.extra.cea.fr/redmine/attachments/30010#L3) (car codé en dur)
groupttest <- function(MatAdj, expr){
  nProt <- dim(MatAdj)[2]
  res <- rep(0,nProt)
  for(i in 1:nProt){
    #print(i)
    index <- names(which(MatAdj[,i]==1))
    if(length(index)== 0){
      res[i] <- 1
    } else{
      peptidestotest <- expr[which(rownames(expr)%in% index),]
      if(length(index)== 1){
        res[i] <- t.test(x=peptidestotest[1:3], y=peptidestotest[-(1:3)], var.equal=TRUE)$p.value
      } else{
        res[i] <- t.test(x=peptidestotest[,1:3], y=peptidestotest[,-(1:3)], var.equal=TRUE)$p.value
      }

    }
  }
  return(res)
}
samWieczorek commented 2 years ago

for the moment, integration only in DAPAR. Once this integration is done, lower the priority of the ticket but do not close it: it will have to be integrated in prostar 2.