PoonLab / bayroot

Bayesian root-to-tip regression for dating reservoir sequences
MIT License
5 stars 3 forks source link

Error when running example code #27

Open ArtPoon opened 5 months ago

ArtPoon commented 5 months ago

I'm running through the example code in the README:

setwd("~/git/bayroot")
source("bayroot.R")
require(ape)
require(lubridate)

# 50 trees generated from a simulation of cell dynamics
trees <- read.tree("data/testdata-latent2.nwk")
times <- read.csv("data/testdata-latent2.csv")

# export one of the replicates to new files
rep <- 1
tf <- "data/example.nwk"
write.tree(trees[[rep]], file=tf)

cf <- "data/example.csv"
int.times <- times$int.time[times$rep==rep]
names(int.times) <- times$sample[times$rep==rep]
write.csv(as.data.frame(int.times), file=cf)

phy <- read.tree(tf)
settings <- list(
  seq.len=1233,  # AY772699
  format="%Y-%m-%d",
  # we arbitrarily assigned 2000-01-01 as the actual origin date
  mindate=as.Date("1999-12-01"),  # one month before origin
  maxdate=as.Date("2000-02-01"),  # one month after origin
  meanlog=-5, sdlog=2,  # hyperparameters for lognormal prior on rate
  root.delta=0.01,  # proposal on root location
  date.sd=10,  # days, origin proposal
  rate.delta=0.01,  # proposal on clock rate
  censored = phy$tip.label[grepl("^Latent", phy$tip.label)]
)

source("scripts/validate.R")  # loads package chemCal
set.seed(1)  # make this reproducible
res <- fit.bayroot(tf, cf, settings=settings, nstep=2e4, skip=20, 
                   max.date=as.Date("2001-04-01"), echo=TRUE)
plot(res$chain, burnin=100)

rt <- root2tip(phy)
true.vals <- get.true.values(rt, cf)
est <- get.estimates(res, rt)

This is throwing the followng error on my system (Ubuntu 22.04, R version 4.3.2):

> rt <- root2tip(phy)
Error in cor(y, x) : incompatible dimensions
In addition: There were 50 or more warnings (use warnings() to see the first 50)

I get the same error if I manually step through the root2tip code:

>   phy <- multi2di(phy)
>   tip.dates <- sapply(phy$tip.label, function(x) as.integer(strsplit(x, "_")[[1]][3]))
>   censored <- ifelse(grepl("Active", names(tip.dates)), tip.dates, NA)
>   names(censored) <- names(tip.dates)
>   rooted <- ape::rtt(t=phy, tip.dates=censored)
Error in cor(y, x) : incompatible dimensions
In addition: There were 50 or more warnings (use warnings() to see the first 50)

This indicates that the problem is associated with the rtt function. However if I step through the rtt code, no error gets thrown:

opt.tol <- .Machine$double.eps^0.25
objective <- function(x, y) {
  cor(y, x)
}

ut <- unroot(phy)
dist <- dist.nodes(ut)[, seq_len(Ntip(ut))]
X <- matrix(1, Ntip(ut), 2)
f <- function(x, parent, child) {
  edge.dist <- x * dist[parent, ] + (1 - x) * dist[child, 
  ]
  objective(tip.dates, edge.dist)
}
obj.edge <- apply(ut$edge, 1, function(e) {
  opt.fun <- function(x) f(x, e[1], e[2])
  optimize(opt.fun, c(0, 1), maximum = TRUE, tol = opt.tol)$objective
  })

best.edge <- which.max(obj.edge)
best.edge.parent <- ut$edge[best.edge, 1]
best.edge.child <- ut$edge[best.edge, 2]
best.edge.length <- ut$edge.length[best.edge]

opt.fun <- function(x) f(x, best.edge.parent, best.edge.child)
best.pos <- optimize(opt.fun, c(0, 1), maximum = TRUE, tol = opt.tol)$maximum
new.root <- list(edge = matrix(c(2L, 1L), 1, 2), tip.label = "new.root", 
                 edge.length = 1, Nnode = 1L, root.edge = 1)
class(new.root) <- "phylo"
ut <- bind.tree(ut, new.root, where = best.edge.child, position = best.pos * 
                  best.edge.length)
ut <- collapse.singles(ut)
ut <- root(ut, "new.root")

rooted <- drop.tip(ut, "new.root")
ekankaka commented 5 months ago

In rtt(t, tip.dates, ncpu = 1, objective = correlation, opt.tol = .Machine$double.eps^0.25), could changing the measure for goodness of fit resolve this problem - e.g. objective="rms" or objective="rsquared?