Closed trinker closed 12 years ago
Right now v.outer works with matrix
data.frame
or unequal length list
.
p_load(qdap)
proximaty <- function(x, y) {
act <- which(x != 0)
len <- length(act)
comp <- which(y != 0)
nums <- sapply(list(x, y), is.numeric)
if (sum(nums) != 2) {
stop("all variables must be numeric")
}
if (identical(act, integer(0)) | identical(comp, integer(0))) {
return(0)
}
lapply(1:len, function(i) {
ming <- act[i] - comp
min(abs(ming))
}
)
}
cm_describe <- function(code, grouping.var = NULL) {
if (!is.null(grouping.var)) {
if (is.list(grouping.var)) {
m <- unlist(as.character(substitute(grouping.var))[-1])
m <- sapply(strsplit(m, "$", fixed = TRUE),
function(x) x[length(x)])
NAME <- paste(m, collapse = "&")
} else {
G <- as.character(substitute(grouping.var))
NAME <- G[length(G)]
}
cname <- strsplit(as.character(substitute(code)), "&")
NAME <- paste0(cname[[length(cname)]], "&", NAME)
group.var <- if (is.list(grouping.var) & length(grouping.var)>1) {
apply(data.frame(grouping.var), 1, function(x){
if (any(is.na(x))){
NA
} else {
paste(x, collapse = ".")
}
}
)
} else {
grouping.var
}
v <- do.call(data.frame, rle(paste2(list(code, group.var))))
} else {
v <- do.call(data.frame, rle(code))
}
v$end<- cumsum(v[, 1])
colnames(v)[1] <- "duration"
v$start <- c(0, c(v$end)[-c(length(v$end))])
v$center <- (v$start + v$end)/2
v2 <- v[, c("values", "center", "duration", "start", "end")]
if (!is.null(grouping.var)) {
nl <- if (is.list(grouping.var)) {
grouping.var
} else {
list(grouping.var)
}
L2 <- lapply(1:(length(nl) + 1), function(i) {
x <- strsplit(as.character(v2[, "values"]), "\\.")
sapply(1:length(x), function(j)x[[j]][i])
}
)
v3 <- data.frame(do.call(cbind, L2))
colnames(v3) <- unlist(strsplit(NAME, "\\&"))
v2 <- data.frame(v3, v2[, -1, drop=FALSE])
} else {
cname <- strsplit(as.character(substitute(code)), "&")
colnames(v2)[1] <- cname[[length(cname)]]
}
return(v2)
}
#next is to make a distance function
#may not be able to supply the grouping vars to cm_describe directly
cm_bidist <- function(code_x, code_y, grouping.var = NULL) {
x <- cm_describe(code_x, grouping.var)
x <- x[as.numeric(as.character(x[, "code_x"])) > 0, ]
y <- cm_describe(code_y, grouping.var)
y <- y[as.numeric(as.character(y[, "code_y"])) > 0, ]
Dnc <- sapply(1:nrow(x), function(i) {
yind <- 1:nrow(y)
if (sum(y[, "start"] >= x[i, "start"] & y[, "start"] <= x[i, "end"]) > 0 |
sum(y[, "end"] >= x[i, "start"] & y[, "end"] <= x[i, "start"]) > 0 |
sum(sapply(yind, function(j) {
y[j, "start"] < x[i, "start"] & y[j, "end"] > x[i, "end"]
}
)) > 0) {
return(0)
}
sdif <- c(y[, "start"], y[, "end"]) - x[i, "start"]
edif <- c(y[, "start"], y[, "end"]) - x[i, "end"]
min(abs(c(sdif, edif)))
}
)
Dc <- sapply(1:nrow(x), function(i) {
FUN <- function(xstart, xend, ystart){
max(0, min(ystart[ystart > xstart] - xend))
}
suppressWarnings(FUN(x[i, "start"], x[i, "end"], y[, "start"]))
}
)
Dc[is.infinite(Dc)] <- NA
list(associated_distance = Dnc, mean.sd_assoc_dist = c(mean(Dnc), sd(Dnc)),
causal_distance = Dc, mean.sd_causal_dist = c(mean(na.omit(Dc)), sd(na.omit(Dc))))
}
#outers the above
#may have to use diags to put 0s in diagonals
#cm_distance <- function(..., grouping.var = NULL, causal = FALSE)
set.seed(12)
fart <- rbinom(50, 1, .5)
burp <- rbinom(50, 1, .3)
sneeze <- rbinom(50, 1, .5)
hiccup <- rbinom(50, 1, .3)
cm_bidist(fart, burp)
cm_bidist(burp, fart)[[2]][1]
L2 <- data.frame(fart, burp, sneeze, hiccup)
v.outer(L2, cdm1)
L <- list(fart, burp, sneeze, hiccup) #make it work with lists
names(L) <- qcv(fart, burp, sneeze, hiccup)
v.outer(L, cdm1)
cdm1m <- function(x, y) cm_bidist(x, y)[[2]][1]
v.outer(L2, cdm1m)
cdm1s <- function(x, y) cm_bidist(x, y)[[2]][2]
v.outer(L2, cdm1s)
cdm1cm <- function(x, y) cm_bidist(x, y)[[4]][1]
v.outer(L2, cdm1cm)
cdm1cs <- function(x, y) cm_bidist(x, y)[[4]][2]
v.outer(L2, cdm1cs)
Uploaded cm_bidist, cm_describe and proximity: Here's the latest script:
p_load(qdap)
set.seed(12)
fart <- rbinom(50, 1, .5)
burp <- rbinom(50, 1, .3)
sneeze <- rbinom(50, 1, .5)
hiccup <- rbinom(50, 1, .3)
cm_bidist(fart, burp)
cm_bidist(burp, fart)[[2]][1]
cdm1 <- function(x, y) cm_bidist(x, y)[[2]][1]
(L2 <- data.frame(fart, burp, sneeze, hiccup))
v.outer(L2, cdm1)
(L <- list(fart, burp, sneeze, hiccup) #make it work with lists)
names(L) <- qcv(fart, burp, sneeze, hiccup)
v.outer(L, cdm1)
cdm1m <- function(x, y) cm_bidist(x, y)[[2]][1]
v.outer(L2, cdm1m)
cdm1s <- function(x, y) cm_bidist(x, y)[[2]][2]
v.outer(L2, cdm1s)
cdm1cm <- function(x, y) cm_bidist(x, y)[[4]][1]
v.outer(L2, cdm1cm)
cdm1cs <- function(x, y) cm_bidist(x, y)[[4]][2]
v.outer(L2, cdm1cs)
#the start:
cm_distance <- function(..., grouping.var = NULL, causal = FALSE, digits = 3)
#has to grab the start and end columns and create vectors that can be fed to bidist
#create a list of mean, sd and combined and a class of cm.distance, print the combined
Create a function that takes two columns (start and end times) and makes a dummy coded vector representing this:
cm_se2vect <- function(start, end, n.words = NULL) {
dat <- data.frame(start =start, end = end)
if (is.null(n)){
n <- max(dat$end) - 1
}
x <- rep(0, n)
x[unlist(lapply(1:nrow(dat), function(i) dat$start[i]:(dat$end[i]-1)))] <- 1
x
}
dat <- data.frame(start=c(2, 7, 12, 15), end=c(3, 10, 13, 17))
cm_se2vect(dat$start, dat$end)
put in a safety for if start and end are passed that increase and decrease that throws up a stop
Current development of cm_distance function:
p_load(qdap)
foo <- list(
AA = qcv(terms='1'),
BB = qcv(terms='1:2, 3:10'),
CC = qcv(terms='1:9, 100:150')
)
foo2 <- list(
AA = qcv(terms='40'),
BB = qcv(terms='50:90'),
CC = qcv(terms='60:90, 100:120, 150'),
DD = qcv(terms='')
)
dat <- cm_range2long(foo, foo2, v.name = "time")
cm_distance <- function(dataframe, time.var = NULL, code.var = "code",
causal = FALSE, start.var = "start", end.var = "end", digits = 3) {
if (!is.null(time.var)) {
L1 <- split(dataframe, dataframe[, time.var])
} else {
L1 <- dataframe
}
L2 <- lapply(L1, function(x) split(x, x[, code.var]))
NMS <- lapply(L2, names)
NMS <- unlist(lapply(seq_along(NMS), function(i) {
paste(names(NMS)[i], NMS[[i]], sep=".")
}))
lens <- sapply(L2, length)
mlens <- sapply(L2, function(x) {
max(do.call(rbind, x)[, end.var])
})
nt <- rep(mlens, lens)
v <- unlist(L2, recursive=FALSE)
L3 <- lapply(seq_along(v), function(i) {
cm_se2vect(v[[i]][, start.var], v[[i]][, end.var])
})
L4 <- lapply(seq_along(L3), function(i){
c(L3[[i]], rep(0, nt[i] - length(L3[[i]])))
})
dat <- do.call(cbind, L4)
colnames(dat) <- NMS
#various parmeters for casual here (sd, mean, pasted, and mean/sd provided
#make a print method (mean/sd)
cdm1 <- function(x, y) cm_bidist(x, y)[[2]][1]
v.outer(dat, cdm1, digits = digits)
#lapply(L1, )
}
cm_distance(dat, time.var = "time")
finished
Work in progress: