statnet / tergm

Fit, Simulate and Diagnose Models for Network Evolution Based on Exponential-Family Random Graph Models
Other
27 stars 8 forks source link

CMLE run produces simulation warning #25

Closed chad-klumb closed 3 years ago

chad-klumb commented 3 years ago
require(tergm)
data(samplk)
samp <- list(samplk1, samplk2, samplk3)
samp.fit <- tergm(samp ~ Form(~edges+mutual+cyclicalties+transitiveties) + 
                         Diss(~edges+mutual+cyclicalties+transitiveties),
                  estimate = "CMLE",
                  times = 1:3)

gives

Warning message:
In simulate_formula.network(object, nsim = nsim, seed = seed, ...,  :
  For dynamic simulation in ‘tergm’ you must pass ‘dynamic=TRUE’.  Attempting ‘ergm’ simulation instead...

Converting it to an error, the traceback is

> traceback()
24: doWithOneRestart(return(expr), restart)
23: withOneRestart(expr, restarts[[1L]])
22: withRestarts({
        .Internal(.signalCondition(simpleWarning(msg, call), msg, 
            call))
        .Internal(.dfltWarn(msg, call))
    }, muffleWarning = function() NULL)
21: .signalSimpleWarning("For dynamic simulation in ‘tergm’ you must pass ‘dynamic=TRUE’.  Attempting ‘ergm’ simulation instead...", 
        base::quote(simulate_formula.network(object, nsim = nsim, 
            seed = seed, ..., basis = attr(object, ".Basis"))))
20: warning("For dynamic simulation in ", sQuote("tergm"), " you must pass ", 
        sQuote("dynamic=TRUE"), ".  Attempting ", sQuote("ergm"), 
        " simulation instead...")
19: simulate_formula.network(object, nsim = nsim, seed = seed, ..., 
        basis = attr(object, ".Basis"))
18: simulate_formula(object, nsim = nsim, seed = seed, ..., basis = attr(object, 
        ".Basis"))
17: simulate.formula_lhs_network(object = NetSeries ~ Form(~edges + 
        mutual + cyclicalties + transitiveties) + Diss(~edges + mutual + 
        cyclicalties + transitiveties) + edges, nsim = 1, coef = from, 
        reference = reference, constraints = list(constraints, obs.constraints), 
        observational = FALSE, output = "ergm_state", verbose = max(verbose - 
            1, 0), control = gen_control(FALSE, "first"), return.args = "ergm_state")
16: stats::simulate(object = NetSeries ~ Form(~edges + mutual + cyclicalties + 
        transitiveties) + Diss(~edges + mutual + cyclicalties + transitiveties) + 
        edges, nsim = 1, coef = from, reference = reference, constraints = list(constraints, 
        obs.constraints), observational = FALSE, output = "ergm_state", 
        verbose = max(verbose - 1, 0), control = gen_control(FALSE, 
            "first"), return.args = "ergm_state")
15: eval(cl, parent.frame())
14: eval(cl, parent.frame())
13: simulate.formula(object, coef = from, nsim = 1, reference = reference, 
        constraints = list(constraints, obs.constraints), observational = FALSE, 
        output = "ergm_state", verbose = max(verbose - 1, 0), basis = basis, 
        control = gen_control(FALSE, "first"), ..., return.args = "ergm_state")
12: simulate(object, coef = from, nsim = 1, reference = reference, 
        constraints = list(constraints, obs.constraints), observational = FALSE, 
        output = "ergm_state", verbose = max(verbose - 1, 0), basis = basis, 
        control = gen_control(FALSE, "first"), ..., return.args = "ergm_state")
11: ergm.bridge.llr(form.aug, constraints = constraints, obs.constraints = obs.constraints, 
        from = coef.aug, to = coef.dind, basis = basis, target.stats = target.stats, 
        control = control, verbose = verbose)
10: ergm.bridge.dindstart.llk(formula, reference = reference, constraints = constraints, 
        obs.constraints = obs.constraints, coef = coef(object), target.stats = object$target.stats, 
        control = control.bridge, llkonly = FALSE, ...)
9: eval(substitute(expr), data, enclos = parent.frame())
8: eval(substitute(expr), data, enclos = parent.frame())
7: with.default(object, {
       if (!eval.loglik) 
           stop(NO_LOGLIK_MESSAGE)
       if (estimate == "MPLE" || (is.dyad.independent(object, term.options = control$term.options) && 
           is.null(object$sample) && !is.valued(object))) 
           structure(-glm$deviance/2 - -glm.null$deviance/2, vcov = 0)
       else if (is.dyad.independent(object$constrained, object$constrained.obs) && 
           !is.valued(object)) 
           ergm.bridge.dindstart.llk(formula, reference = reference, 
               constraints = constraints, obs.constraints = obs.constraints, 
               coef = coef(object), target.stats = object$target.stats, 
               control = control.bridge, llkonly = FALSE, ...)
       else ergm.bridge.0.llk(formula, reference = reference, constraints = constraints, 
           obs.constraints = obs.constraints, coef = coef(object), 
           target.stats = object$target.stats, control = control.bridge, 
           llkonly = FALSE, basis = object$network, ...)
   })
6: with(object, {
       if (!eval.loglik) 
           stop(NO_LOGLIK_MESSAGE)
       if (estimate == "MPLE" || (is.dyad.independent(object, term.options = control$term.options) && 
           is.null(object$sample) && !is.valued(object))) 
           structure(-glm$deviance/2 - -glm.null$deviance/2, vcov = 0)
       else if (is.dyad.independent(object$constrained, object$constrained.obs) && 
           !is.valued(object)) 
           ergm.bridge.dindstart.llk(formula, reference = reference, 
               constraints = constraints, obs.constraints = obs.constraints, 
               coef = coef(object), target.stats = object$target.stats, 
               control = control.bridge, llkonly = FALSE, ...)
       else ergm.bridge.0.llk(formula, reference = reference, constraints = constraints, 
           obs.constraints = obs.constraints, coef = coef(object), 
           target.stats = object$target.stats, control = control.bridge, 
           llkonly = FALSE, basis = object$network, ...)
   })
5: logLik.ergm(mainfit, add = TRUE, control = control$loglik, verbose = verbose)
4: logLik(mainfit, add = TRUE, control = control$loglik, verbose = verbose)
3: ergm(formula, ..., control = control$CMLE.ergm)
2: tergm.CMLE(formula = formula, times = times, constraints = constraints, 
       estimate = "MLE", offset.coef = offset.coef, target.stats = target.stats, 
       eval.loglik = eval.loglik, control = control, verbose = verbose, 
       ...)
1: tergm(samp ~ Form(~edges + mutual + cyclicalties + transitiveties) + 
       Diss(~edges + mutual + cyclicalties + transitiveties), estimate = "CMLE", 
       times = 1:3)

Apparently it's reaching tergm's simulate_formula.network, which expects a dynamic argument (which ergm doesn't properly know anything about).

chad-klumb commented 3 years ago

In principle, if ... got propagated all the way through, we might be able to address this by passing dynamic = FALSE to the ergm() call in tergm.CMLE. At the moment it does not appear to make it to ergm.bridge.llr though.

chad-klumb commented 3 years ago

Although it does also happen with a direct ergm() call when tergm is loaded, so passing an argument in tergm.CMLE wouldn't eliminate the basic issue.

martinamorris commented 3 years ago

@krivit Since this is the first example in the tergm workshop, I think we need to come up with a solution. The package shouldn't spit out a warning for the most basic call to a CMLE. And it's not a warning that will make sense to the naive user, who didn't call simulate.

If there is no way to suppress the warning, then it needs to be less frightening and more informative.

martinamorris commented 3 years ago

Also, why is the more helpful text we used to get:

This model was fit using MCMC.  To examine model diagnostics and check for degeneracy, use the mcmc.diagnostics() function.

no longer printed?

krivit commented 3 years ago

If tergm::simulate_formula.network() is going to intercept all of the calls with a network on the LHS, then it needs to exercise a bit more discernment for when to fire that warning.

My thinking is that we should assume that the user wants cross-sectional simulation unless there are strong indications to the contrary, such as

  1. networkDynamic on the LHS,
  2. a lasttoggle data structure attached to the network, or
  3. dynamic operators (Form, Diss, etc.) without a combined_networks subclass of network (which indicates CMLE).

Cases 1 and 2 should result in a warning, and Case 3 an error. Otherwise (i.e., plain network or combined_networks), maybe a message should be printed the first time the user runs simulate().

Otherwise, we get silly behaviour such as every call to simulate() an ordinary ergm() fit producing a warning whenever tergm is loaded---which will be the case if the user runs library(statnet).

Does this make sense, or am I missing something crucial?

Also, why is the more helpful text we used to get:

This model was fit using MCMC.  To examine model diagnostics and check for degeneracy, use the mcmc.diagnostics() function.

no longer printed?

Are you sure? Could you please give me a reprex?

martinamorris commented 3 years ago

Sure:

library(tergm)
data(samplk)
samplist <- list(samplk1,samplk2,samplk3)

samp.fit <- tergm(
  samplist ~
    Form(~edges+mutual+cyclicalties+transitiveties) +
    Diss(~edges+mutual+cyclicalties+transitiveties),
  estimate = "CMLE",
  times = c(1:3)
    )

For me, the only output I get is:

For dynamic simulation in �tergm� you must pass �dynamic=TRUE�.  Attempting �ergm� simulation instead...  

And yes, the weird icons are what appear on my Rstudio console. They also appear to break the formatting here in markdown.

other attached packages:
 [1] latticeExtra_0.6-29      lattice_0.20-44          htmlwidgets_1.5.3       
 [4] ndtv_0.13.0              sna_2.6                  statnet.common_4.5.0-355
 [7] animation_2.6            tsna_0.3.3               tergm_4.0.0-2177        
[10] networkDynamic_0.10.2    ergm_4.0-6365            network_1.17.0-682      
[13] knitr_1.33  
martinamorris commented 3 years ago

Ok, it looks like the above is an artifact of running a command from a chunk in an .Rmd file.

That said, when run in the console, this is the output I get. The helpful mcmc.diagnostics text is pretty hard to see, b/c that unhelpful warning comes after it.

Starting maximum pseudolikelihood estimation (MPLE):
Evaluating the predictor and response matrix.
Maximizing the pseudolikelihood.
Finished MPLE.
Starting Monte Carlo maximum likelihood estimation (MCMLE):
Iteration 1 of at most 60:
Optimizing with step length 1.0000.
The log-likelihood improved by 0.7130.
Estimating equations are not within tolerance region.
Iteration 2 of at most 60:
Optimizing with step length 1.0000.
The log-likelihood improved by 0.0877.
Convergence test p-value: 0.8014. Not converged with 99% confidence; increasing sample size.
Iteration 3 of at most 60:
Optimizing with step length 1.0000.
The log-likelihood improved by 0.0883.
Convergence test p-value: 0.1327. Not converged with 99% confidence; increasing sample size.
Iteration 4 of at most 60:
Optimizing with step length 1.0000.
The log-likelihood improved by 0.0537.
Convergence test p-value: 0.2608. Not converged with 99% confidence; increasing sample size.
Iteration 5 of at most 60:
Optimizing with step length 1.0000.
The log-likelihood improved by 0.0593.
Convergence test p-value: 0.0076. Converged with 99% confidence.
Finished MCMLE.
Evaluating log-likelihood at the estimate. Fitting the dyad-independent submodel...
Bridging between the dyad-independent submodel and the full model...
Setting up bridge sampling...
Using 16 bridges: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 .
Bridging finished.
This model was fit using MCMC.  To examine model diagnostics and check for
degeneracy, use the mcmc.diagnostics() function.
Warning message:
In simulate_formula.network(object, nsim = nsim, seed = seed, ...,  :
  For dynamic simulation in ‘tergm’ you must pass ‘dynamic=TRUE’.  Attempting ‘ergm’ simulation instead...
krivit commented 3 years ago

@chad-klumb , @martinamorris , does the behaviour described at the top of https://github.com/statnet/tergm/issues/25#issuecomment-855231658 seem reasonable? Since the spurious warning is currently triggered by all calls to simulate(nw~...) including those performed by the end-user, the fix has to be in tergm.

Regarding the MCMC message getting lost, I guess we could 1) reduce the default verbosity and/or 2) put a blank line before it, to separate it from the rest of the output.

martinamorris commented 3 years ago

My thinking is that we should assume that the user wants cross-sectional simulation unless there are strong indications to the contrary, such as

  1. networkDynamic on the LHS,
  2. a lasttoggle data structure attached to the network, or
  3. dynamic operators (Form, Diss, etc.) without a combined_networks subclass of network (which indicates CMLE).

Cases 1 and 2 should result in a warning, and Case 3 an error. Otherwise (i.e., plain network or combined_networks), maybe a message should be printed the first time the user runs simulate().


I'm not 100% sure I understand what you're proposing. Is it:

If so, then yes, that sounds reasonable. Not sure if there needs to be a message the first time an ergm user runs simulate(). It would just be confusing for most folks.

chad-klumb commented 3 years ago

As long as the user can control the behavior and avoid the warning/error by explicitly specifying dynamic (as they can now), then I guess that's fine.

Do people ever call ergm() with networkDynamic left hand sides and tergm loaded? (Or fit an ergm to the result of a tergm simulation, which would have lasttoggle attached?)

krivit commented 3 years ago

Implementing should be straightforward:

Also, we might want to add a dynamic= argument to simulate.tergm(), as well, because someone fitting CMLE might want to simulate what the conditional networks might have been (e.g., for GoF purposes). This should also be straightforward, since we know whether a tergm fit was CMLE/CMPLE or EGMME.

krivit commented 3 years ago

@chad-klumb, under those circumstances, a warning would be warranted, I think. We should make sure we don't trigger one during the ergm() call to compute EDApprox, but otherwise, I don't foresee any other corner cases.

krivit commented 3 years ago

I've implemented a solution that I am now testing. To avoid initialising the model twice, I decided to have terms that require "dynamic" facilities run a helper function to check that a term option dynamic=TRUE and stop with an error otherwise. For now, this should do.

One thing I am finding troublesome is the error message: right now, it's "This term given a single network can only be used in dynamic mode, e.g., from 'simulate(..., dynamic=TRUE)'.", but it feels insufficient. Any suggestions?

krivit commented 3 years ago

Update: the *E terms have been renamed to * (dynamic) (e.g., Form (dynamic)) because in R, function names with spaces are perfectly OK with the power of backticks.

The updated message is now something like this:

Error: In term ‘Form (dynamic)’ (called from term ‘Form’ in package ‘tergm’): This term requires either last-toggle data or dynamic mode, e.g., from ‘simulate(..., dynamic=TRUE)’.
krivit commented 3 years ago

I've committed a candidate fix to the simulate_warning_fix. The main piece of bad news is that summary() when using dynamic terms on networks with no lasttoggle structure now produces an error, unless passed dynamic=TRUE:

library(tergm)
data(florentine)
summary(flomarriage~Form(~edges))
#> Error: In term 'Form (dynamic)' (called from term 'Form' in package 'tergm'): This term requires either last-toggle data or dynamic mode, e.g., from 'simulate(..., dynamic=TRUE)'.

Created on 2021-06-10 by the reprex package (v2.0.0)

library(tergm)
data(florentine)
summary(flomarriage~Form(~edges), dynamic=TRUE)
#> Form~edges 
#>         20

Created on 2021-06-10 by the reprex package (v2.0.0)

We could also change it to a warning.

martinamorris commented 3 years ago

ack. why is just summary having this behavior? it's such a basic and commonly used function.

but i'm also not sure what this call is supposed to mean, for a single static network

summary(flomarriage~Form(~edges), dynamic=TRUE)

what is the use case for a temporal operator in this context?

krivit commented 3 years ago

simulate(flomarriage~Form(~edges)) does it too, but it's intended behaviour there.

but i'm also not sure what this call is supposed to mean, for a single static network

summary(flomarriage~Form(~edges), dynamic=TRUE)

what is the use case for a temporal operator in this context?

The way that's interpreted is that all the edges and nonedges in flomarraige have been around forever. In some sense, the error is a good thing, since passing dynamic=TRUE forces to use to make the originally implicit assumption explicit.

That said, we could add a package option (e.g., options(tergm.require_lasttoggle=FALSE)) or similar that suppresses the check.

krivit commented 3 years ago

@martinamorris , what do you think? Should I leave it as it is, change to a warning, and/or add an option?

martinamorris commented 3 years ago

i think this is an edge case, so a warning is appropriate. the warning text could be a bit more user friendly ... something like "Temporal operators require dynamic network data"

krivit commented 3 years ago

It's not just operators, though. For example, if you try summary(flomarriage~edges.ageinterval(...)), it'll produce a similar error. How about something like:

Error: In term ‘Form (dynamic)’ (called from term ‘Form’ in package ‘tergm’): This term requires either dynamic data (network series or network dynamic or last toggle information) or must be set to dynamic mode (typically by passing ‘dynamic=TRUE)’ to ‘summary()’, ‘simulate()’, etc..

Error: In term ‘edges.ageinterval’ in package ‘tergm’: This term requires either dynamic data (network dynamic or last toggle information) or must be set to dynamic mode (typically by passing ‘dynamic=TRUE)’ to ‘summary()’, ‘simulate()’, etc..

martinamorris commented 3 years ago

how about

Error: In term ‘edges.ageinterval’ in package ‘tergm’: This term requires either dynamic data (network dynamic or last toggle information) or must be set to dynamic mode (typically by passing the argument ‘dynamic=TRUE)’
krivit commented 3 years ago

On further thought, the grammar of that sentence is atrocious and I need to rewrite it.