PoonLab / tn

Optimization of genetic clustering methods by predictive modeling
GNU General Public License v3.0
0 stars 0 forks source link

Changes in repo on Erasmas staged but not committed #36

Closed ArtPoon closed 3 years ago

ArtPoon commented 3 years ago
cchato@Erasmas:~/git/tn$ git diff --cached
diff --git a/packaging/analysis.R b/packaging/analysis.R
index 388323f..37ac59f 100644
--- a/packaging/analysis.R
+++ b/packaging/analysis.R
@@ -1,12 +1,11 @@
 #' Runs a given clustering method over a range of parameters values.
 multi.cluster <- function(cluster.method, param.list, mc.cores=1, verbose=T, rangeID=0) {
-  #'@param t: The input tree file, annotated with vertex and edge information
-  #'@param param.list: A named list of parameter sets. Each must correspond to the clustering method used. 
+  #'@param param.list: A named list of parameter sets. Each must correspond to the clustering method used.
   #'@param rangeID: If several different parameter ranges are used, the rangeID can identify them.
   #'@param mc.cores: A parallel option
   #'@param verbose: An output monitoring option
   #'@return: A larger data.table with parameter sets noted
-  
+
   #Cluster method loop
   cluster.range <- parallel::mclapply(1:length(param.list), function(i){
     x <- param.list[[i]]
@@ -17,10 +16,10 @@ multi.cluster <- function(cluster.method, param.list, mc.cores=1, verbose=T, ran
     }
     do.call(cluster.method, x)
   }, mc.cores=mc.cores)
-  
+
   cluster.range <- dplyr::bind_rows(cluster.range)
   suppressWarnings(cluster.range[,"RangeID" := rangeID])
-  
+
   return(cluster.range)
 }

@@ -28,7 +27,7 @@ multi.cluster <- function(cluster.method, param.list, mc.cores=1, verbose=T, ran
 #' The AIC obtained is based on a predictive model of cluster growth
 #' NOTE: The default additional parameter for this analysis is "Time". This may or may not be a row in inputted cluster.range data
 #' The default outcome variable is growth. This generally means that this function is expecting a cluster with annotated growth data.
-fit.analysis <- function(cluster.data, mc.cores=1, null.formula=Growth~Size, full.formula=Growth~Size+Time, 
+fit.analysis <- function(cluster.data, mc.cores=1, null.formula=Growth~Size, full.formula=Growth~Size+Time,
                          predictor.model=function(f, x){glm(formula=f, data = x, family="poisson")},
                          predictor.transformations=list("Time"=function(x){mean(x)})) {
   #' @param cluster.data: Inputted set(s) of cluster data. May or may not be sorted into ranges
@@ -37,7 +36,7 @@ fit.analysis <- function(cluster.data, mc.cores=1, null.formula=Growth~Size, ful
   #' @param full.formula: The full model for the prediction of growth. This will be compared to a null Growth~Size model
   #' @param predictor.transformations: A named list of transformation functions for each predictor variable
   #' @return: A data.table of analysis results. Several important summary values such as null and full AIC are proposed here
-  
+
   #Check inputs
   predictors <- names(predictor.transformations)
   setIDs <- unique(cluster.data[,SetID])
@@ -56,24 +55,24 @@ fit.analysis <- function(cluster.data, mc.cores=1, null.formula=Growth~Size, ful
     warning("No Growth information from clusters. By default this will be set to 0 for all sets")
     cluster.data[,"Growth" := 0]
   }
-  
+
   #Transform cluster data for modelling based on inputs
   model.data <- cluster.data[, c("Header", "Size", "Growth", "SetID", "RangeID")]
   model.data[, (predictors) := lapply(predictors, function(x){
     sapply(cluster.data[, get(x)], function(z){(predictor.transformations[[x]])(z)})
   })]
-    
+
   #Obtain fit data for each cluster set
   cluster.analysis <- dplyr::bind_rows(
     parallel::mclapply(setIDs, function(id) {
       DT <- model.data[SetID==id, ]
       suppressWarnings(null.fit <- predictor.model(null.formula, DT))
       suppressWarnings(full.fit <- predictor.model(full.formula, DT))
-      
+
       res <- data.table::data.table("NullFit"=list(null.fit), "FullFit"=list(full.fit), "SetID"=DT[1,SetID], "RangeID"=DT[1,RangeID])
       return(res)
     }, mc.cores=mc.cores))
-  
+
   return(cluster.analysis)
 }

@@ -83,22 +82,22 @@ fit.analysis <- function(cluster.data, mc.cores=1, null.formula=Growth~Size, ful
 plot.aic.diff <- function(res){
   #'@param res: The result of a fit.analysis() run.
   #'@return: A set of AIC differences.
-  
+
   #Check inputs
   if(!all(c("NullFit", "FullFit")%in%colnames(res))){
     stop("NullFit and FullFit are not names in result output. Ensure that fit.analysis() was run to
          obtain the result plotted here.")
   }
-  
-  #Get AIC info and create plot 
+
+  #Get AIC info and create plot
   null.aic <- sapply(res$NullFit, function(x){x$aic})
   full.aic <- sapply(res$FullFit, function(x){x$aic})
   aic.diff <- full.aic-null.aic
-  
+
   par(mfrow=c(2, 1), mar = c(0,4.2,1,2), cex.lab=1.2)
   plot(x=res$SetID, type="n", ylim=c(0, max(c(null.aic,full.aic))),
        xlab="", ylab="Akaike's Information Criterion", xaxt='n')
-  
+
   #Background
   bg <- par('usr')
   rect(xl=bg[1], yb=bg[3], xr=bg[2], yt=bg[4], col='blanchedalmond', border=NA)
diff --git a/packaging/analysis.R b/packaging/analysis.R
index 388323f..37ac59f 100644
--- a/packaging/analysis.R
+++ b/packaging/analysis.R
@@ -1,12 +1,11 @@
 #' Runs a given clustering method over a range of parameters values.
 multi.cluster <- function(cluster.method, param.list, mc.cores=1, verbose=T, rangeID=
0) {
-  #'@param t: The input tree file, annotated with vertex and edge information
-  #'@param param.list: A named list of parameter sets. Each must correspond to the cl
ustering method used. 
+  #'@param param.list: A named list of parameter sets. Each must correspond to the cl
ustering method used.
   #'@param rangeID: If several different parameter ranges are used, the rangeID can i
dentify them.
   #'@param mc.cores: A parallel option
   #'@param verbose: An output monitoring option
   #'@return: A larger data.table with parameter sets noted
-  
+
   #Cluster method loop
   cluster.range <- parallel::mclapply(1:length(param.list), function(i){
     x <- param.list[[i]]
@@ -17,10 +16,10 @@ multi.cluster <- function(cluster.method, param.list, mc.cores=1, 
verbose=T, ran
     }
     do.call(cluster.method, x)
   }, mc.cores=mc.cores)
-  
+
   cluster.range <- dplyr::bind_rows(cluster.range)
   suppressWarnings(cluster.range[,"RangeID" := rangeID])
-  
+
   return(cluster.range)
 }

@@ -28,7 +27,7 @@ multi.cluster <- function(cluster.method, param.list, mc.cores=1, ve
rbose=T, ran
 #' The AIC obtained is based on a predictive model of cluster growth
 #' NOTE: The default additional parameter for this analysis is "Time". This may or ma
y not be a row in inputted cluster.range data
 #' The default outcome variable is growth. This generally means that this function is
 expecting a cluster with annotated growth data.
-fit.analysis <- function(cluster.data, mc.cores=1, null.formula=Growth~Size, full.for
mula=Growth~Size+Time, 
+fit.analysis <- function(cluster.data, mc.cores=1, null.formula=Growth~Size, full.for
mula=Growth~Size+Time,
                          predictor.model=function(f, x){glm(formula=f, data = x, fami
ly="poisson")},
ArtPoon commented 3 years ago

Looks like all of these are trivial changes, e.g., whitespace, extraneous documentation of deprecated function argument