cchato@Erasmas:~/git/tn$ git diff --cached
diff --git a/packaging/analysis.R b/packaging/analysis.R
index 388323f..37ac59f 100644
--- a/packaging/analysis.R
+++ b/packaging/analysis.R
@@ -1,12 +1,11 @@
#' Runs a given clustering method over a range of parameters values.
multi.cluster <- function(cluster.method, param.list, mc.cores=1, verbose=T, rangeID=0) {
- #'@param t: The input tree file, annotated with vertex and edge information
- #'@param param.list: A named list of parameter sets. Each must correspond to the clustering method used.
+ #'@param param.list: A named list of parameter sets. Each must correspond to the clustering method used.
#'@param rangeID: If several different parameter ranges are used, the rangeID can identify them.
#'@param mc.cores: A parallel option
#'@param verbose: An output monitoring option
#'@return: A larger data.table with parameter sets noted
-
+
#Cluster method loop
cluster.range <- parallel::mclapply(1:length(param.list), function(i){
x <- param.list[[i]]
@@ -17,10 +16,10 @@ multi.cluster <- function(cluster.method, param.list, mc.cores=1, verbose=T, ran
}
do.call(cluster.method, x)
}, mc.cores=mc.cores)
-
+
cluster.range <- dplyr::bind_rows(cluster.range)
suppressWarnings(cluster.range[,"RangeID" := rangeID])
-
+
return(cluster.range)
}
@@ -28,7 +27,7 @@ multi.cluster <- function(cluster.method, param.list, mc.cores=1, verbose=T, ran
#' The AIC obtained is based on a predictive model of cluster growth
#' NOTE: The default additional parameter for this analysis is "Time". This may or may not be a row in inputted cluster.range data
#' The default outcome variable is growth. This generally means that this function is expecting a cluster with annotated growth data.
-fit.analysis <- function(cluster.data, mc.cores=1, null.formula=Growth~Size, full.formula=Growth~Size+Time,
+fit.analysis <- function(cluster.data, mc.cores=1, null.formula=Growth~Size, full.formula=Growth~Size+Time,
predictor.model=function(f, x){glm(formula=f, data = x, family="poisson")},
predictor.transformations=list("Time"=function(x){mean(x)})) {
#' @param cluster.data: Inputted set(s) of cluster data. May or may not be sorted into ranges
@@ -37,7 +36,7 @@ fit.analysis <- function(cluster.data, mc.cores=1, null.formula=Growth~Size, ful
#' @param full.formula: The full model for the prediction of growth. This will be compared to a null Growth~Size model
#' @param predictor.transformations: A named list of transformation functions for each predictor variable
#' @return: A data.table of analysis results. Several important summary values such as null and full AIC are proposed here
-
+
#Check inputs
predictors <- names(predictor.transformations)
setIDs <- unique(cluster.data[,SetID])
@@ -56,24 +55,24 @@ fit.analysis <- function(cluster.data, mc.cores=1, null.formula=Growth~Size, ful
warning("No Growth information from clusters. By default this will be set to 0 for all sets")
cluster.data[,"Growth" := 0]
}
-
+
#Transform cluster data for modelling based on inputs
model.data <- cluster.data[, c("Header", "Size", "Growth", "SetID", "RangeID")]
model.data[, (predictors) := lapply(predictors, function(x){
sapply(cluster.data[, get(x)], function(z){(predictor.transformations[[x]])(z)})
})]
-
+
#Obtain fit data for each cluster set
cluster.analysis <- dplyr::bind_rows(
parallel::mclapply(setIDs, function(id) {
DT <- model.data[SetID==id, ]
suppressWarnings(null.fit <- predictor.model(null.formula, DT))
suppressWarnings(full.fit <- predictor.model(full.formula, DT))
-
+
res <- data.table::data.table("NullFit"=list(null.fit), "FullFit"=list(full.fit), "SetID"=DT[1,SetID], "RangeID"=DT[1,RangeID])
return(res)
}, mc.cores=mc.cores))
-
+
return(cluster.analysis)
}
@@ -83,22 +82,22 @@ fit.analysis <- function(cluster.data, mc.cores=1, null.formula=Growth~Size, ful
plot.aic.diff <- function(res){
#'@param res: The result of a fit.analysis() run.
#'@return: A set of AIC differences.
-
+
#Check inputs
if(!all(c("NullFit", "FullFit")%in%colnames(res))){
stop("NullFit and FullFit are not names in result output. Ensure that fit.analysis() was run to
obtain the result plotted here.")
}
-
- #Get AIC info and create plot
+
+ #Get AIC info and create plot
null.aic <- sapply(res$NullFit, function(x){x$aic})
full.aic <- sapply(res$FullFit, function(x){x$aic})
aic.diff <- full.aic-null.aic
-
+
par(mfrow=c(2, 1), mar = c(0,4.2,1,2), cex.lab=1.2)
plot(x=res$SetID, type="n", ylim=c(0, max(c(null.aic,full.aic))),
xlab="", ylab="Akaike's Information Criterion", xaxt='n')
-
+
#Background
bg <- par('usr')
rect(xl=bg[1], yb=bg[3], xr=bg[2], yt=bg[4], col='blanchedalmond', border=NA)
diff --git a/packaging/analysis.R b/packaging/analysis.R
index 388323f..37ac59f 100644
--- a/packaging/analysis.R
+++ b/packaging/analysis.R
@@ -1,12 +1,11 @@
#' Runs a given clustering method over a range of parameters values.
multi.cluster <- function(cluster.method, param.list, mc.cores=1, verbose=T, rangeID=
0) {
- #'@param t: The input tree file, annotated with vertex and edge information
- #'@param param.list: A named list of parameter sets. Each must correspond to the cl
ustering method used.
+ #'@param param.list: A named list of parameter sets. Each must correspond to the cl
ustering method used.
#'@param rangeID: If several different parameter ranges are used, the rangeID can i
dentify them.
#'@param mc.cores: A parallel option
#'@param verbose: An output monitoring option
#'@return: A larger data.table with parameter sets noted
-
+
#Cluster method loop
cluster.range <- parallel::mclapply(1:length(param.list), function(i){
x <- param.list[[i]]
@@ -17,10 +16,10 @@ multi.cluster <- function(cluster.method, param.list, mc.cores=1,
verbose=T, ran
}
do.call(cluster.method, x)
}, mc.cores=mc.cores)
-
+
cluster.range <- dplyr::bind_rows(cluster.range)
suppressWarnings(cluster.range[,"RangeID" := rangeID])
-
+
return(cluster.range)
}
@@ -28,7 +27,7 @@ multi.cluster <- function(cluster.method, param.list, mc.cores=1, ve
rbose=T, ran
#' The AIC obtained is based on a predictive model of cluster growth
#' NOTE: The default additional parameter for this analysis is "Time". This may or ma
y not be a row in inputted cluster.range data
#' The default outcome variable is growth. This generally means that this function is
expecting a cluster with annotated growth data.
-fit.analysis <- function(cluster.data, mc.cores=1, null.formula=Growth~Size, full.for
mula=Growth~Size+Time,
+fit.analysis <- function(cluster.data, mc.cores=1, null.formula=Growth~Size, full.for
mula=Growth~Size+Time,
predictor.model=function(f, x){glm(formula=f, data = x, fami
ly="poisson")},