MesserLab / SLiM

SLiM is a genetically explicit forward simulation software package for population genetics and evolutionary biology. It is highly flexible, with a built-in scripting language, and has a cross-platform graphical modeling environment called SLiMgui.
https://messerlab.org/slim/
GNU General Public License v3.0
160 stars 30 forks source link

tsk_treeseq_init mutation time error #208

Closed elissasoroj closed 2 years ago

elissasoroj commented 3 years ago

Hi there,

I am running an alternation of generations model in the development version of SLiM (installed around Sep 1, prior to the newest update with the new version of tskit). I have been running similar scripts on both my personal macOS machine and a remote server running Linux for the past few weeks. Recently, I updated my script to include user-defined functions and made a change to the survival callback portion and I started getting this error:

Screen Shot 2021-09-10 at 4 57 52 PM

The error highlights this portion of the code, but it is unclear whether that portion of the code is actually causing the problem. I have successfully used survival() callbacks in past scripts with no issues, but this is the first time I've used return T (sometimes this the portion of the code that is highlighted, sometimes not - depends on the seed):

Screen Shot 2021-09-10 at 5 10 58 PM

I've copied the SLiM code below that reproduces the error on both my mac and the server. Changing the GAM_CLONE_RATE to 0.0 makes the problem disappear, as does removing checkCoalescence=T

initialize() {
    //set seed
    setSeed(123);

    // model type
    initializeSLiMModelType("nonWF");

    // config
    initializeRecombinationRate(1e-08, 900000);
    initializeMutationRate(1e-07);
    initializeTreeSeq(checkCoalescence=T);

    // MutationType init
    initializeMutationType('m1', 0.5, 'f', 0.0);
    m1.convertToSubstitution = T;

    // ElementType init
    initializeGenomicElementType('g3', m1, 1);

    // Chromosome (GenomicElement init)
    initializeGenomicElement(g3, 0, 900000);

    // constants (Population sizes and others)
    defineConstant('SPO_POP_SIZE', 200);
    defineConstant('GAM_POP_SIZE', 400);
    defineConstant('SPO_MUTATION_RATE', 5e-08);
    defineConstant('GAM_MUTATION_RATE', 5e-08);
    defineConstant('GAM_CLONE_RATE', 0.6);
    defineConstant('GAM_CLONES_PER', 20);
    defineConstant('SPO_CLONE_RATE', 0);
    defineConstant('SPO_CLONES_PER', 1);
    defineConstant('SPO_SELF_RATE', 0.0);
    defineConstant('GAM_ARCHEGONIA_PER', 10);
    defineConstant('RS_MEGASPORANGIA_PER', 36);
    defineConstant('RS_MICROSPORANGIA_PER', 6);
    defineConstant('MEGASPORANGIA_MEGASPORES_PER', 1);
    defineConstant('MICROSPORANGIA_MICROSPORES_PER', 100);

    // extra scripts (Optional)

}

// shadie DEFINITIONS
//p0 = haploid population
//p1 = diploid population

//0 = hermaphrodite
//1 = female
//2 = male
//4 = gametophyte clone

//shadie-defined functions
// generates gametes from sporophytes
function (void)make_microspores(object<Individual>$ ind, integer$ reps) {
    for (rep in 1:reps){
        //4 microspores per meiosis rep
        breaks1 = sim.chromosome.drawBreakpoints(individual);
        breaks2 = sim.chromosome.drawBreakpoints(individual);
        child1 = p0.addRecombinant(individual.genome1, individual.genome2, breaks1, NULL, NULL, NULL);
        child2 = p0.addRecombinant(individual.genome2, individual.genome1, breaks1, NULL, NULL, NULL);
        child3 = p0.addRecombinant(individual.genome1, individual.genome2, breaks2, NULL, NULL, NULL);
        child4 = p0.addRecombinant(individual.genome2, individual.genome1, breaks2, NULL, NULL, NULL);

        children = c(child1, child2, child3, child4);
        children.tag = 2;   
    }
}

function (void)make_eggs(object<Individual>$ ind, integer$ reps) {
    for (rep in 1:reps){
        breaks = sim.chromosome.drawBreakpoints(individual);
        child1 = p0.addRecombinant(individual.genome1, individual.genome2, breaks, NULL, NULL, NULL);
        child1.tag = 1;
    }
}

// generates gametes from sporophytes
s5 reproduction(p1 ) {
    if (individual.tag == 0){
        strobilus_female_ratio = RS_MEGASPORANGIA_PER/(RS_MICROSPORANGIA_PER+RS_MEGASPORANGIA_PER);
        if (runif(1) < strobilus_female_ratio){
            meiosis_reps = asInteger(MEGASPORANGIA_MEGASPORES_PER*RS_MEGASPORANGIA_PER);
            make_eggs(individual, meiosis_reps);
        }
        else{
            meiosis_reps = asInteger((RS_MICROSPORANGIA_PER*MICROSPORANGIA_MICROSPORES_PER)/4);
            make_microspores(individual, meiosis_reps);
        }
    }
}

// generates gametes from sporophytes
s6 reproduction(p0 ) {
    // find the megaspores
    if (individual.tag == 1) {
        eggs = GAM_ARCHEGONIA_PER;
        //fertilize each egg
        for (rep in 1:eggs) {
            sperm = p0.sampleIndividuals(1, tag=2); //find a microspore
            //NOTE: each microspore gives rise to a male gametophyte, which will
            //produce many antheridia, giving rise to thousands of clonal sperm
            //because of this, sperm is not removed from the mating pool when used

            if (sperm.size() == 1) {
                child = p1.addRecombinant(individual.genome1, NULL, NULL, sperm.genome1, NULL, NULL);
                child.tag=0;

            }
        }
    }
    else if (individual.tag == 4) { //add new gametophyte clones to p1 as haploids
        for (i in 1:GAM_CLONES_PER)
            p0.addRecombinant(individual.genome1, NULL, NULL, NULL, NULL, NULL).tag = 4;
    }
}

// define subpops: p1=diploid sporophytes, p0=haploid gametophytes
1 early() {
    sim.addSubpop('p1', SPO_POP_SIZE);
    sim.addSubpop('p0', 0);
    p1.individuals.tag = 0;
}

// alternation of generations
early() {
    // diploids (p1) just generated haploid gametophytes
    if (sim.generation % 2 == 0) {

        // fitness affects gametophyte survival
        p0.fitnessScaling = (GAM_POP_SIZE / (p0.individualCount));;

        // p0 and p1 survival callbacks
        s1.active = 1;
        s2.active = 0;
        s3.active = 1;
        s4.active = 0;

        // haploids reproduce, diploids don't
        s5.active = 0;
        s6.active = 1;

        // haploids get modified fitness, without dominance
        s7.active = 1;
    }
    // odd generations = gametophytes (p0) just generated sporophytes
    else {

        // fitness affects sporophytes
        p1.fitnessScaling = SPO_POP_SIZE / p1.individualCount;

        // turn off p0 survival callbacks
        // turn on p1 survival callbacks
        s1.active = 0;
        s2.active = 1;
        s3.active = 0;
        s4.active = 1;

        // diploids reproduce, haploids don't
        s5.active = 1;
        s6.active = 0;

        // diploids get SLiM's standard fitness calculation, with dominance
        s7.active = 0;
    }
}

// maternal effects and survival
// remove p1 individuals during even generations
s1 survival(p1) {
    return F;
}

// remove p1s by random chance of death and apply maternal effects to fitness
s2 survival(p1) {   
    return NULL;
}

// remove p0s by random chance of death and apply maternal effects to fitness
s3 survival(p0) {
    return NULL;
}

// remove p0 individuals during odd generations
s4 survival(p0) {
    if ((individual.tag == 4) | (individual.tag == 6)) {
        individual.tag = 0;
        return T;
    }
    else
        return F;
}

// gametophytes have no dominance effects
s7 fitness(m1) {
    return 1 + mut.selectionCoeff;
}

// fixes mutations in haploid gen
late() {
    // gametophytes have just undergone fitness selection
    if (sim.generation % 2 == 0) {
        muts7 = sim.mutationsOfType(m1);
        freqs7 = sim.mutationFrequencies(NULL, muts7);
        if (any(freqs7 == 0.5))
            sim.subpopulations.genomes.removeMutations(muts7[freqs7 == 0.5], T);

        //tag gametophytes that will clone
        p0_size = length(p0.individuals);
        clones = p0.sampleIndividuals(asInteger(p0_size*GAM_CLONE_RATE));
        clones.tag = 4; //tag clones;
    }
}

// end of sim; save .trees file
200 late() {
    sim.treeSeqRememberIndividuals(sim.subpopulations.individuals);
    sim.treeSeqOutput('slim.trees');
}
bhaller commented 3 years ago

Hi Elissa. Thanks for the report, it's an interesting one :->. The bug reproduces 100% of the time on my machine with the latest SLiM including the newly merged tskit.

@petrelharp I'm going to need your help on this one I think! In fact we may need to rope in Ben Jeffrey as well, but let's wait on that for a bit, maybe? So I've looked into this a little, and here's what I've found so far.

First of all, let's start with what can be deduced just from looking at the code, without the debugger. This error message is generated in only one place in SLiM, in SLiMSim::CheckCoalescenceAfterSimplification(). This method checks, after simplification has just occurred, whether the tree sequence has fully coalesced or not. Note that the model requests coalescence checks with checkCoalescence=T; if that is removed, the bug no longer manifests.

The coalescence check is done by creating a new tree sequence, from the tables that just got simplified, by calling tsk_table_collection_copy(), adding a population table (IIRC, SLiM does this on demand, rather than maintaining it on the fly), calling tsk_table_collection_build_index(), and then calling tsk_treeseq_init(). That is the call that generates the error, inside tsk_table_collection_check_mutation_integrity() (that is the only place where tskit generates this specific error, I think). That function is called by tsk_table_collection_check_integrity(), which is called directly by tsk_treeseq_init().

The error position in the SLiM script indicated in SLiMgui is a red herring; it's just the last chunk of Eidos code that was executed before auto-simplification and then coalescence checking happened. The error position apparently didn't get zeroed out by somebody, and so the error got misattributed to the return statement, which is a minor bug orthogonal to the real problem here.

OK, now let's get into the debugger. I set a breakpoint in tsk_table_collection_check_mutation_integrity() where it raises this error, and the backtrace is as expected. The relevant code at the breakpoint is:

if (mutation_time < node_time[mutations.node[j]]) {
    ret = TSK_ERR_MUTATION_TIME_YOUNGER_THAN_NODE;
    goto out;
}

I examined values in the debugger:

(lldb) p mutation_time
(double) $0 = -124
(lldb) p j
(tsk_size_t) $1 = 9
(lldb) p node_time[mutations.node[j]]
(const double) $2 = -121

So the values don't look malformed (none of them is some huge value indicative of overflow or something), but the mutation time is indeed less than the node time – several generations less, in fact, so this is not some kind of off-by-one error.

But here's the really odd thing. Looking around the neighboring code, I notice that CheckAutoSimplification() is being called at slim_sim.cpp:4278, and immediately before that is a call to CheckTreeSeqIntegrity() that happens whenever we're running a DEBUG build (which I am, right now). So CheckTreeSeqIntegrity() was called just before CheckAutoSimplification(). And CheckTreeSeqIntegrity() is just a wrapper for... drum roll...

tsk_table_collection_check_integrity(&tables_, TSK_NO_CHECK_POPULATION_REFS);

In other words, it appears to me that:

  1. tsk_table_collection_check_integrity() was called, checked the mutation/node times, and said they were all fine
  2. auto-simplification simplified the tree sequence
  3. simplification completed without errors
  4. CheckCoalescenceAfterSimplification() was then called to check for coalescence
  5. that tried to create a tree sequence using the tables just emitted by simplification
  6. and that raised the error when it called tsk_table_collection_check_integrity().

Initially I thought this pointed the finger at simplification having corrupted previously good data in the tables, introducing the mutation/node timing error somehow. To confirm that, I added a call to CheckTreeSeqIntegrity() right at the top of CheckCoalescenceAfterSimplification(), so now it checks the integrity of the tables immediately after simplification has occurred... and that check passes, and then the check done by tsk_treeseq_init() just below fails. So it looks like simplification is off the hook.

The other interesting fact is, as I noted above, that if checkCoalescence=T is removed the model runs to completion without errors. Even if coalescence checking is turned off, CheckTreeSeqIntegrity() is getting called in every generation, and so the mutation times versus node times are passing the check done by tsk_table_collection_check_integrity() in every generation, both before and after the point where the error would occur if coalescence checking were enabled. That, again, suggests that there is nothing wrong with the data being recorded by SLiM.

So I think this suggests that the problem is inside CheckCoalescenceAfterSimplification(). It doesn't perform the check directly on the existing tables, because we didn't want to modify those tables mid-simulation. Instead, it copies them, then adds a population table, calls tsk_table_collection_build_index(), and then calls tsk_treeseq_init(). So it appears that perhaps the "real" tables being used by SLiM are fine, but there's something about that copying/building procedure that produces a new set of tables that is corrupted. But... how? I don't know what tsk_table_collection_build_index() does; could it be the culprit? WritePopulationTable() just adds the population table, I don't think it touches nodes/mutations at all.

The other possibility that occurs to me is that the integrity check done by CheckTreeSeqIntegrity() fails to catch the problem because it sets TSK_NO_CHECK_POPULATION_REFS; maybe the problem is then caught by tsk_treeseq_init() because when it calls tsk_table_collection_check_integrity() it does not pass that flag. But even with TSK_NO_CHECK_POPULATION_REFS set, tsk_table_collection_check_integrity() should still check the mutation/node times, right? It certainly looks to me like it does, looking at the code. So this explanation doesn't seem to hold water.

Where does that leave us? Maybe the next step would be to write out the tables to a .trees file, at the top of CheckTreeSeqIntegrity() and then again with the copied/built tables that it constructs? I can get the files written out pretty easily I think, but I would have no idea how to then diagnose the problem using them, so that's where you come in, @petrelharp. What do you think? If I hand you two .trees files, and tell you the mutation ID of the mutation that ultimately triggers the error, can you make progress from there, do you think? Or do you see a better way to explore this?

petrelharp commented 3 years ago

Wow, well that's a confusing one.

Where does that leave us? Maybe the next step would be to write out the tables to a .trees file, at the top of CheckTreeSeqIntegrity() and then again with the copied/built tables that it constructs? I can get the files written out pretty easily I think, but I would have no idea how to then diagnose the problem using them, so that's where you come in, @petrelharp. What do you think? If I hand you two .trees files, and tell you the mutation ID of the mutation that ultimately triggers the error, can you make progress from there, do you think? Or do you see a better way to explore this?

Yep - this is just what I was going to suggest - send me the files! (and just to make sure, where in the code you wrote them out at?)

bhaller commented 3 years ago

Wow, well that's a confusing one.

:->

Yep - this is just what I was going to suggest - send me the files! (and just to make sure, where in the code you wrote them out at?)

OK, so now that I'm getting into it I'm realizing that even just writing out the two .trees files is much more complicated than I thought. :-< Looking at SLiMSim::WriteTreeSequence(), it does a whole bunch of stuff before it writes out, even if pre-write simplification is turned off, like:

That stuff modifies the state of the tables, of course, so if we use WriteTreeSequence() to do the writing we won't be writing out the actual table state as it exists in SLiMSim::CheckCoalescenceAfterSimplification(), we'll be writing out some white-washed fiction that may well obscure the bug. (Incidentally – WriteTreeSequence() calls tsk_table_collection_sort() and tsk_table_collection_deduplicate_sites() on the simulation's tables, before it makes a copy of the simulation's tables with tsk_table_collection_copy(), when simplify=F, so calling treeSeqOutput(simplify=F) in Eidos has side effects on the simulation's tables; do we really want to be doing that before we copy the tables, or is that a separate bug?)

We could just call tsk_table_collection_dump() directly instead, but I think some of that stuff is probably essential for the write to work at all – without DerivedStatesToAscii(), for example, all the derived state info would be in binary and probably wouldn't write/read correctly at all. Ugh. And probably more of that stuff is needed for you to be able to work with the .trees files on the other side, right?

I'm starting to think I ought to just hand this whole thing over to you, since I don't even know how to write out the tables in the right way for your purposes. :-O I'm kind of thinking that we need a new debugging function that dumps out a given set of tree-seq tables to a given path on disk, making a copy of them first and then doing the absolute bare minimum of alterations to them – maybe just DerivedStatesToAscii()? – before writing them out with tsk_table_collection_dump(). I'm actually rather surprised we don't already have such a debugging-write function; how did we survive this long without it??

Alternatively, maybe we need to give up on this idea of writing out the two files, and instead try to tackle it further in the debugger somehow. I'm not sure how to approach it, but at least we have a totally reproducible case. We could do a zoom and share my screen and bang away at it in the debugger until we understand them problem, but yikes.

Or perhaps if you stare at the code in SLiMSim::CheckCoalescenceAfterSimplification() for a little while the answer will just come to you? I think the state of the actual simulation tables at the top of that function is almost certainly fine; they pass the check with CheckTreeSeqIntegrity() and if coalescence checking is turned off the whole model runs to completion just fine, including the write of a .trees file at the end, strongly suggesting that there is nothing wrong with the actual simulation state. I very strongly suspect that the problem is in the way that CheckCoalescenceAfterSimplification() copies the simulation tables and builds them into a tree sequence object that it can query for coalescence. It's probably just missing some important cleanup step before the call to tsk_treeseq_init() – perhaps one of the steps performed by WriteTreeSequence(). If you can "just" (ha) see what the problem is by staring at it, that would be by far the best solution. :->

petrelharp commented 3 years ago

Well, one thing I notice is that tsk_table_collection_check_integrity by default doesn't check mutation ordering, and so a call to CheckTreeSeqIntegrity would not catch this (note: the comment in CheckTreeSeqIntegrity is out of date).

In fact, changing this line to

    int ret = tsk_table_collection_check_integrity(&tables_, TSK_NO_CHECK_POPULATION_REFS | TSK_CHECK_MUTATION_ORDERING);

and inserting a call to CheckTreeSeqIntegrity at the start of SimplifyTreeSequence shows that the problem has already occurred coming in to SimplifyTreeSequence. (And, it's affecting a lot of nodes: instead inserting this code:

std::cout << "start check" << std::endl;
for (size_t j = 0; j < tables_.mutations.num_rows; j++) {
        double mutation_time = tables_.mutations.time[j];
                if (mutation_time < tables_.nodes.time[tables_.mutations.node[j]]) {
                    std::cout << "OH NO! " << j << " " << mutation_time << " >= " << tables_.nodes.time[tables_.mutations.node[j]] << std::endl;
                }
}
    CheckTreeSeqIntegrity();
std::cout << "end check" << std::endl;

tells us that there's actually quite a few mutations (looks like mutations numbers 5012 to 5351?) with invalid times.

I'm not sure where to go backwards from here, though?

bhaller commented 3 years ago

Well, one thing I notice is that tsk_table_collection_check_integrity by default doesn't check mutation ordering, and so a call to CheckTreeSeqIntegrity would not catch this (note: the comment in CheckTreeSeqIntegrity is out of date).

Aha, good catch! I guess that function ought to be updated to perform a more thorough check.

...the problem has already occurred coming in to SimplifyTreeSequence...

So the problem exists even before simplification has occurred, if I'm understanding correctly?

tells us that there's actually quite a few mutations (looks like mutations numbers 5012 to 5351?) with invalid times.

I'm not sure where to go backwards from here, though?

Well, I'm still sick, so my brain at the moment is a diseased, drugged-up fuzzball. :-> But I guess maybe the next step is to sprinkle calls to (a fixed version of) CheckTreeSeqIntegrity(), to try to detect the very first moment when the problem manifests? We need to know whether the issue is that SLiM is recording invalid information in the first place, or whether the tables are becoming invalid at some later point, presumably due to simplification since that's the only thing that touches the tables besides SLiM itself.

It is possible that the SLiM model actually breaks some rule, because of the very strange reproductive processes it implements, and that causes the mut/node mismatch problem. I don't see how that would happen, though; when we record a new mutation in SLiM, it always belongs to a specific node, and so the node it belongs to simply must already exist, so I don't see how the timing mismatch could ever occur in the raw data recorded by SLiM. (Famous last words, obvs. :->)

Anyhow, once I'm feeling better I think we probably ought to have a zoom debugging session; if you're making progress then great, but if you feel like you're beating your head against a brick wall then feel free to wait for me. :->

petrelharp commented 3 years ago

I've inserted some testing code here to tell me when illegal mutations are inserted. Observations:

... and, most importantly maybe:

I think that this should not happen, but I'm at a loss as to how this might have happened.

Here the testing code:

    if (ret < 0) handle_error("tsk_mutation_table_add_row", ret);
if (time < tables_.nodes.time[genomeTSKID]) 
std::cout << "UH OH! in " << Generation() << " genome " << genomeTSKID << " id " << p_genome->genome_id_ << " with time " << time << " >= " << tables_.nodes.time[genomeTSKID] << std::endl;

which gets me output like

UH OH! in 144 genome 53 id 4019903 with time -144 >= -141
UH OH! in 144 genome 53 id 4019903 with time -144 >= -141
UH OH! in 144 genome 53 id 4019903 with time -144 >= -141
UH OH! in 144 genome 53 id 4019903 with time -144 >= -141
UH OH! in 144 genome 53 id 4019903 with time -144 >= -141
UH OH! in 144 genome 53 id 4019903 with time -144 >= -141
UH OH! in 144 genome 53 id 4019903 with time -144 >= -141
UH OH! in 144 genome 121 id 4028359 with time -144 >= -142
UH OH! in 144 genome 121 id 4028359 with time -144 >= -142
UH OH! in 144 genome 121 id 4028359 with time -144 >= -142
UH OH! in 144 genome 121 id 4028359 with time -144 >= -142
UH OH! in 144 genome 121 id 4028359 with time -144 >= -142
UH OH! in 144 genome 121 id 4028359 with time -144 >= -142
UH OH! in 144 genome 121 id 4028359 with time -144 >= -142
UH OH! in 144 genome 167 id 4033327 with time -144 >= -142
UH OH! in 144 genome 167 id 4033327 with time -144 >= -142
UH OH! in 144 genome 167 id 4033327 with time -144 >= -142
UH OH! in 144 genome 167 id 4033327 with time -144 >= -142
UH OH! in 144 genome 167 id 4033327 with time -144 >= -142
UH OH! in 144 genome 167 id 4033327 with time -144 >= -142
# ... etcetera
petrelharp commented 3 years ago

So, I think I'm to the "wait for you" stage! Hope you feel better soon!

petrelharp commented 3 years ago

Ah HA! If I comment out this line:

            sim.subpopulations.genomes.removeMutations(muts7[freqs7 == 0.5], T);

then the bug does not occur! So it appears to be some rare interaction between removeMutations and clonal reproduction?

bhaller commented 3 years ago

Hi @elissasoroj. OK, Peter and I have figured this out. It boils down to the same issue as in #205, and so when #205 is fixed this will be fixed. I'm therefore going to discuss some details regarding the proposed fix over there.

Here I will just summarize why these two bugs are basically the same. The problem stems from the fact that your model (a) has haploids, and (b) those haploids live for more than one generation in some cases. When the removeMutations() call is made to remove fixed mutations from the model, the fixed mutations get removed and a new Substitution object gets created. Substitutions are considered to basically exist in all genomes, by definition; so creating a new Substitution therefore means that the mutation that fixed is removed, and then (conceptually) is added to all genomes – including the empty second genomes of your haploids. Normally, this is fine (if a little fishy), because the haploid models people have been playing with have non-overlapping generations and so the current generation are all newborns; and it is legal to add mutations to the genomes of newborns. However, in your model the haploids can be older, and so a mutation can end up being added to the empty genome of a haploid that is not newborn. And that is illegal, according to the rules of tskit: you can't add a mutation to a genome after the point in time when the genome was created. That is a general rule that SLiM usually enforces; calling addNewMutation() to add a mutation to a genome of an individual that is not newborn will raise an error, for example. In this case, though, it slipped through the cracks of SLiM's design.

Even worse – and I guess this is the problem that tskit detects and raises an error about – the mutation can be added to a genome that was created before the creation time of the mutation itself. This sort of implies, in tskit terms, that the mutation got created at time T, and then somehow teleported backwards in time to get itself introduced into a genome that existed before the mutation existed. Kind of a grandfather paradox. (The error message from tskit says "A mutation's time must be >= the node time", but that is in tskit time not SLiM time, and tskit time runs backwards from the present, so in SLiM terms this error message would translate to "A mutation's time must be <= the node time", to avoid the grandfather paradox.)

So. If turning off coalescence checking is working for you, Peter and I believe that that means things are pretty much fine for your purposes. Probably some fixed mutations do occur that create that sort of grandfather paradox in the treeseq tables, but that does no harm, and the inconsistent state gets swept away by simplification the next time it happens, so the errors get corrected without consequence. It is possible that a run of the model could happen where the inconsistency does not get swept away by simplification; in that case, we believe you would get a tskit error either when you write out the .trees file (probably) or when you try to create a tree sequence from the .trees in Python (definitely). So if you're not seeing errors later on, that should mean everything worked out fine in the end; we believe you should not see errors in your analysis results as a consequence of this bug.

Note that a trivial model of haploids + overlapping generations + treeseq triggers the same error:

initialize() { initializeSLiMModelType(“nonWF”); initializeTreeSeq(checkCoalescence=T); defineConstant(“K”, 500); // carrying capacity initializeMutationType(“m1”, 0.5, “f”, 0.0); initializeGenomicElementType(“g1”, m1, 1.0); initializeGenomicElement(g1, 0, 99999); initializeMutationRate(1e-7); initializeRecombinationRate(1e-8); } reproduction() { subpop.addRecombinant(genome1, NULL, NULL, NULL, NULL, NULL); } 1 early() { sim.addSubpop(“p1", 500); } early() { p1.fitnessScaling = K / p1.individualCount; } late() { // remove neutral mutations in the haploid genomes that have fixed muts = sim.mutationsOfType(m1); freqs = sim.mutationFrequencies(NULL, muts);

if (any(freqs == 0.5))
    sim.subpopulations.genomes.removeMutations(muts[freqs == 0.5], T);

} 50000 late() { sim.outputFixedMutations(); }

As SLiM's code stands, however, the error is only detected if coalescence checking is turned on, because there's a bug in SLiM's internal consistency checking that allows the inconsistent state to evade detection otherwise – which is the only reason this bug was not found 2+ years ago! So we'll be fixing the consistency-check code to be more robust as well.

Post here or email if you have any further questions about this issue. #205 is going to be the highest-priority thing on my plate now, and I'll be in touch once it's fixed to let you know about changes you will want to make to your model in response to that fix.

Thanks for reporting this!

bhaller commented 2 years ago

OK, the fix for this has been committed. @elissasoroj I would strongly recommend that you base your published recipe on this latest version of SLiM, which will require some revisions as mentioned above. Email me with your current model code and I'll be happy to revise it for you. This version of SLiM will be released as SLiM 3.7 soon. Please let me know what your timeframe for publication is, so I know how much I need to rush that; there are a couple of minor things I'd like to roll in to 3.7, but I'll push them to 3.8 if you need me to. Let me know if you see any problems with this (beyond the known/intentional breaks with backward compatibility).