SixiangHu / DataMan

R package for data cleaning, preliminary data analysis and modeling assessing with visualisation.
3 stars 0 forks source link

Need a prediction compare view #37

Closed SixiangHu closed 6 years ago

SixiangHu commented 8 years ago
library(RColorBrewer)

modelCompPlot <- function(x,act,pred,by=NULL,weights=NULL,newGroupNum=10){
  if (is.null(x)) stop("x provided is blank.")
  if (is.null(act)) stop("act provided is blank.")
  if (is.null(pred)) stop("pred provided is blank.")

  str_pred <- NULL
  num_pred <- NULL
  if (length(x) != length(act)) stop("x and act don't have the same length")
  if (is.vector(pred)){
    str_pred <- "pred"
    num_pred <- 1
    if(length(x) != length(pred)) stop("x and pred don't have the same length")
  }
  else {
    str_pred <- colnames(pred)
    if (is.null(str_pred)) str_pred <- paste("Pred",1:nrow(pred),sep="")
    num_pred <- ncol(pred)
    if(length(x) != nrow(pred)) stop("x and pred don't have the same length")
  }

  if(is.null(weights)) {weights <- rep(1,length(x))}

  #New Group for data which has too much levels.
  if ( (is.numeric(x) || is.integer(x)) && nlevels(as.factor(x))>100 ) {
    if ( is.null(newGroupNum) ) newGroupNum <- 10

    new_band <- seq(min(x, na.rm = TRUE),max(x, na.rm = TRUE),length.out=newGroupNum)
    x <- as.character(cut(x,new_band,include.lowest = TRUE))
  }

  if(!is.null(by)){
    data.plot <- data.table::as.data.table(as.data.frame(cbind(xvar=x,by=by,act,pred,weights),stringsAsFactors=FALSE))
    setkey(data.plot,xvar,by)

    dp_name_str <- c("act",str_pred,"weights")
    data.plot <- data.plot[,lapply(.SD,as.numeric),by=list(xvar,by),.SDcols=dp_name_str]
    data.agg  <- data.plot[,lapply(.SD,weighted.mean,w=weights),by=list(xvar,by),.SDcols=dp_name_str]

    p1 <- rbokeh::figure(xlab="x",ylab="",height=500) %>% 
      rbokeh::ly_lines(xvar,act,color=by,type=list(2),
                      width=2,data=data.agg) 

    for (i in num_pred){
      p1 <- p1 %>% rbokeh::ly_lines(xvar,deparse(substitute(str_pred[i])),color=by,type=list(2),
                                    width=2,data=data.agg) 
    }
  }
  else {
    data.plot <- data.table::as.data.table(as.data.frame(cbind(xvar=x,act,pred,weights),stringsAsFactors=FALSE))
    setkey(data.plot,xvar)

    dp_name_str <- c("act",str_pred,"weights")
    data.plot <- data.plot[,lapply(.SD,as.numeric),by=xvar,.SDcols=dp_name_str]
    data.agg  <- data.plot[,lapply(.SD,weighted.mean,w=weights),by=xvar,.SDcols=dp_name_str]

    p1 <- rbokeh::figure(xlab="x",ylab="",width=700,height=800) %>% 
      rbokeh::ly_lines(xvar,act,color="#CC3399",
                       width=2,data=data.agg,legend = "Actual") 

    str_col <- brewer.pal(max(num_pred,3), "Spectral")
    for (i in 1:num_pred){
      print(str_pred[i])
      p1 <- p1 %>% rbokeh::ly_lines(xvar,str_pred[i],color=str_col[i],width=2,data=data.agg,
                                    legend=str_pred[i]) 
    }
  }

  p1
}
SixiangHu commented 8 years ago

Check the issue on #38 which may have similar idea.

SixiangHu commented 6 years ago

this has the same functionality as compPlot function