Closed bbbruce closed 2 years ago
We do not see any error with this example on our side. This is a pretty basic example that we test all the time.
First, can you confirm which version of randomForestSRC you are using?
3.1.1 on Windows 10 compiled from source (although this is an R file so doubt that should be related to issue)
> library(randomForestSRC)
randomForestSRC 3.1.1
Type rfsrc.news() to see new features, changes, and bug fixes.
> data(pbc, package = "randomForestSRC")
> pbc.obj <- rfsrc(Surv(days, status) ~ ., pbc)
Error in parseFormula(formula, data, ytry) :
Survival formula incorrectly specified.
> randomForestSRC:::parseFormula
function (f, data, ytry = NULL, coerce.factor = NULL)
{
if (!inherits(f, "formula")) {
stop("'formula' is not a formula object.")
}
if (is.null(data)) {
stop("'data' is missing.")
}
if (!is.data.frame(data)) {
stop("'data' must be a data frame.")
}
fmly <- all.names(f, max.names = 1e+07)[2]
all.names <- all.vars(f, max.names = 1e+07)
yvar.names <- all.vars(formula(paste(as.character(f)[2],
"~ .")), max.names = 1e+07)
yvar.names <- yvar.names[-length(yvar.names)]
subj.names <- NULL
coerce.factor.org <- coerce.factor
coerce.factor <- vector("list", 2)
names(coerce.factor) <- c("xvar.names", "yvar.names")
if (!is.null(coerce.factor.org)) {
coerce.factor$yvar.names <- intersect(yvar.names, coerce.factor.org)
if (length(coerce.factor$yvar.names) == 0) {
coerce.factor$yvar.names <- NULL
}
coerce.factor$xvar.names <- intersect(setdiff(colnames(data),
yvar.names), coerce.factor.org)
}
if (fmly == "Surv") {
if ((sum(is.element(yvar.names, names(data))) != 2) &&
(sum(is.element(yvar.names, names(data))) != 4)) {
stop("Survival formula incorrectly specified.")
}
else {
if (sum(is.element(yvar.names, names(data))) == 4) {
subj.names <- yvar.names[1]
yvar.names <- yvar.names[-1]
}
}
family <- "surv"
ytry <- 0
}
else if ((fmly == "Multivar" || fmly == "cbind") &&
length(yvar.names) > 1) {
if (sum(is.element(yvar.names, names(data))) < length(yvar.names)) {
stop("Multivariate formula incorrectly specified: y's listed in formula are not in data.")
}
Y <- data[, yvar.names, drop = FALSE]
logical.names <- unlist(lapply(Y, is.logical))
if (sum(logical.names) > 0) {
Y[, logical.names] <- 1 * Y[, logical.names, drop = FALSE]
}
if ((sum(unlist(lapply(Y, is.factor))) + length(coerce.factor$yvar.names)) ==
length(yvar.names)) {
family <- "class+"
}
else if ((sum(unlist(lapply(Y, is.factor))) + length(coerce.factor$yvar.names)) ==
0) {
family <- "regr+"
}
else if (((sum(unlist(lapply(Y, is.factor))) + length(coerce.factor$yvar.names)) >
0) && ((sum(unlist(lapply(Y, is.factor))) + length(coerce.factor$yvar.names)) <
length(yvar.names))) {
family <- "mix+"
}
else {
stop("y-outcomes must be either real or factors in multivariate forests.")
}
if (!is.null(ytry)) {
if ((ytry < 1) || (ytry > length(yvar.names))) {
stop("invalid value for ytry: ", ytry)
}
}
else {
ytry <- length(yvar.names)
}
}
else if (fmly == "Unsupervised") {
if (length(yvar.names) != 0) {
stop("Unsupervised forests require no y-responses")
}
family <- "unsupv"
yvar.names <- NULL
temp <- gsub(fmly, "", as.character(f)[2])
temp <- gsub("\\(|\\)", "", temp)
ytry <- as.integer(temp)
if (is.na(ytry)) {
ytry <- 1
}
else {
if (ytry <= 0) {
stop("Unsupervised forests require positive ytry value")
}
}
}
else {
if (sum(is.element(yvar.names, names(data))) != 1) {
stop("formula is incorrectly specified.")
}
Y <- data[, yvar.names]
if (is.logical(Y)) {
Y <- as.numeric(Y)
}
if (!(is.factor(Y) | is.numeric(Y))) {
stop("the y-outcome must be either real or a factor.")
}
if (is.factor(Y) || length(coerce.factor$yvar.names) ==
1) {
family <- "class"
}
else {
family <- "regr"
}
ytry <- 1
}
return(list(all.names = all.names, family = family, subj.names = subj.names,
yvar.names = yvar.names, ytry = ytry, coerce.factor = coerce.factor))
}
<bytecode: 0x000001cf15a0eee8>
<environment: namespace:randomForestSRC>
Problem solved... was simultaneously using a package that imports the formula.tools package which contains an as.character.formula that behaves differently than base R's as.character.default (as no as.character.formula exists in base R) and does not report masking another function when loaded because there was no such function before. Sorry about thinking the issue was on your side, but hopefully if others experience this behavior this will help them figure it out!
Explicitly calling base::as.character.default would be a simple change on your side that would prevent another package from interfering in this way if you were willing to make that change.
From help of package:
I think problem arises in parseFormula line 14
as.character(f)[2] should be as.character(f[2])