hmatsu1226 / HyPhyTree

1 stars 2 forks source link

scope wrong for example scripts #1

Open nick-youngblut opened 3 years ago

nick-youngblut commented 3 years ago

It appears that certain objects R not passed to functions in the example scripts, resulting in errors. For example:

Poincare_dist <- function(u,v){
    if(sum(u**2)>=1 || sum(v**2)>=1){
        return(Inf)
    }else{
        return(acosh(1+2*(sum((u-v)**2))/(1-sum(u**2))/(1-sum(v**2))))
    }
}

objfn_H1 <- function(tmpz){
    ret <- 0
    for(i in 1:Nleaves){
        a <- Poincare_dist(Z1[i,], tmpz)
        ret <- ret + (a - Dall[i,Nleaves+n])**2
    }
    return(ret)
}

objfn_H2 <- function(tmpz){
    ret <- 0
    for(i in 1:Nleaves){
        a <- Poincare_dist(Z2[i,], tmpz)
        ret <- ret + (log(cosh(a)) - Dall[i,Nleaves+n])**2
    }
    return(ret)
}

objfn_MDS <- function(tmpz){
    ret <- 0
    for(i in 1:Nleaves){
        a <- sqrt(sum((X1.mds$points[i,]-tmpz)**2))
        ret <- ret + (a - Dall[i,Nleaves+n])**2
    }
    return(ret)
}

embed_tree = function(tree, Ms=c(10,20)){
    Nleaves <- length(tree$tip.label)

    tree$mrca <- mrca(tree)
    Dall <- dist.nodes(tree)
    Dleaves <- cophenetic(tree)

    Nall <- dim(Dall)[1]
    X1 <- Dleaves
    X2 <- acosh(exp(Dleaves))

    MSE_H1 <- rep(0, length(Ms))
    MSE_H2 <- rep(0, length(Ms))
    MSE_MDS <- rep(0, length(Ms))

    for(M in Ms){
        #general hyperbolic embeddings
        X1.hydra <- hydraPlus(X1, dim=M, curvature=1, alpha=1, equi.adj=0, control=list(return.dist=1, isotropic.adj=FALSE))
        Z1 <- X1.hydra$r * X1.hydra$directional

        Z1inter <- matrix(0,Nall - Nleaves,M)
        for(n in 1:(Nall-Nleaves)){
            tmp <- optim(rep(0,M),objfn_H1,gr=NULL,method="BFGS")
            Z1inter[n,] <- tmp$par
        }

        Z1inter_dist <- matrix(0, Nall, Nall)
        for(i in 1:Nall){
            for(j in 1:Nall){
                if(i > Nleaves){
                    tmpz1 <- Z1inter[i-Nleaves,]
                }else{
                    tmpz1 <- Z1[i,]
                }

                if(j > Nleaves){
                    tmpz2 <- Z1inter[j-Nleaves,]
                }else{
                    tmpz2 <- Z1[j,]
                }

                Z1inter_dist[i,j] <- Poincare_dist(tmpz1, tmpz2)
            }
        }

        #our hyperbolic embeddings
        X2.hydra <- hydraPlus(X2, dim=M, curvature=1, alpha=1, equi.adj=0, control=list(return.dist=1, isotropic.adj=FALSE))
        Z2 <- X2.hydra$r * X2.hydra$directional

        Z2inter <- matrix(0,Nall-Nleaves,M)
        for(n in 1:(Nall-Nleaves)){
            tmp <- optim(rep(0,M),objfn_H2,gr=NULL,method="BFGS")
            Z2inter[n,] <- tmp$par
        }

        Z2inter_dist <- matrix(0, Nall, Nall)
        for(i in 1:Nall){
            for(j in 1:Nall){
                if(i > Nleaves){
                    tmpz1 <- Z2inter[i-Nleaves,]
                }else{
                    tmpz1 <- Z2[i,]
                }

                if(j > Nleaves){
                    tmpz2 <- Z2inter[j-Nleaves,]
                }else{
                    tmpz2 <- Z2[j,]
                }

                Z2inter_dist[i,j] <- Poincare_dist(tmpz1, tmpz2)
            }
        }

        #Euclidean embeddings
        X1.mds <- sammon(X1, k=M)

        Zmdsinter <- matrix(0,Nall-Nleaves,M)
        for(n in 1:(Nall-Nleaves)){
            tmp <- optim(rep(0,M),objfn_MDS,gr=NULL,method="BFGS")
            Zmdsinter[n,] <- tmp$par
        }

        Zmdsinter_dist <- matrix(0, Nall, Nall)
        for(i in 1:Nall){
            for(j in 1:Nall){
                if(i > Nleaves){
                    tmpz1 <- Zmdsinter[i-Nleaves,]
                }else{
                    tmpz1 <- X1.mds$points[i,]
                }

                if(j > Nleaves){
                    tmpz2 <- Zmdsinter[j-Nleaves,]
                }else{
                    tmpz2 <- X1.mds$points[j,]
                }

                Zmdsinter_dist[i,j] <- sqrt(sum((tmpz1-tmpz2)**2))
            }
        }

        #MSE
        MSE_H1[which(Ms==M)] <- sum((c(Dall[upper.tri(Dall)])-c(Z1inter_dist[upper.tri(Dall)]))**2) / choose(Nall,2)
        MSE_H2[which(Ms==M)] <- sum((c(Dall[upper.tri(Dall)])-c(log(cosh(Z2inter_dist[upper.tri(Dall)]))))**2) / choose(Nall,2)
        MSE_MDS[which(Ms==M)] <- sum((c(Dall[upper.tri(Dall)])-c(Zmdsinter_dist[upper.tri(Dall)]))**2) / choose(Nall,2)
    }

    output <- rbind(MSE_MDS, MSE_H1, MSE_H2)
    rownames(output) <- c("MDS","H1","H2")
    colnames(output) <- Ms
    #write.csv(output, paste("out/MSE_Dint_",nt,".csv",sep=""))
}

embed_tree(ape::rtree(10))

produces the output:

iter   10 value 0.021336
iter   20 value 0.001870
iter   30 value 0.000279
iter   40 value 0.000080
iter   50 value 0.000034
iter   60 value 0.000019
iter   70 value 0.000013
iter   80 value 0.000009
iter   90 value 0.000006
iter  100 value 0.000004
iter  110 value 0.000003
iter  120 value 0.000002
iter  130 value 0.000001
iter  140 value 0.000001
iter  150 value 0.000000
iter  160 value 0.000000
iter  170 value 0.000000
final  value 0.000000 
converged
Error in Poincare_dist(Z1[i, ], tmpz): object 'Z1' not found
Traceback:

1. embed_tree(ape::rtree(10))
2. optim(rep(0, M), objfn_H1, gr = NULL, method = "BFGS")   # at line 58 of file <text>
3. (function (par) 
 . fn(par, ...))(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0))
4. fn(par, ...)
5. Poincare_dist(Z1[i, ], tmpz)   # at line 12 of file <text>

Z1 isn't actually passed to objfn_H1() in optim(rep(0,M),objfn_MDS,gr=NULL,method="BFGS")

nick-youngblut commented 3 years ago

Also, why hard-code Nleaves instead of getting it from the tree object via length(tree$tip.label)?

hmatsu1226 commented 3 years ago

I think hydraPlus is not working well. The hydraPlus embedd the distance matrix into M dimensional space (now M=10), but M=10 is too large compared to the number of leaves in the tree you are giving, rtree(10). I think this may be the cause of error, so could you please check with M=3 to try?

hmatsu1226 commented 3 years ago

I investigated trees with outgroup in some situations, and Nleaves is length(tree$tip.label)-1 in this situation. The reason of hard-code Nleaves is a remnant of that. So, as pointed out, it would have been better to use length(tree$tip.label) in this case.

nick-youngblut commented 3 years ago

Yeah, the number of dimensions was too high, but the same error was generated with less dimensions. Passing Z1, Nleaves, and n to the intended function through optim() fixed the issue

hmatsu1226 commented 3 years ago

Sorry, I misread the situation. In the original script, those values are treated as global variables. It's not a complimentary implementation, but it's in the process of being improved through trial and error. I'm glad you were able to fix the error.