tskit-dev / pyslim

Tools for dealing with tree sequences coming to and from SLiM.
MIT License
27 stars 23 forks source link

Discrepancies in diversity after recapitation and mutation overlay #340

Closed AudeCaizergues closed 2 weeks ago

AudeCaizergues commented 7 months ago

Hello,

I am running two simulations on slim which differ only by the number of burn in generations, one has 500,000 and the other 10,000, and my tree recording starts after this period for another 5,100 generations. The code is the following (same code for both except for the burn-in):

`////////////////////// // PARAMETERIZATION // ////////////////////// initialize() { initializeSLiMModelType("nonWF"); // non wright-fisher model initializeSLiMOptions(keepPedigrees=T, dimensionality="xy"); initializeTreeSeq();

// Model Parameters
defineConstant("W", 50.0);  // width of the simulated area 
defineConstant("rad",12.0); // radius of urban greenspace (20x20=5, 50x50=12) 
defineConstant("K", 1);  // carrying-capacity per unit square (roughly)
defineConstant("SD", 1);  // sigma_D, offspring dispersal distance
defineConstant("SI", 0.5);  // sigma_I, the spatial competition interaction distance
defineConstant("SM", 2);  // sigma_M, the mate choice distance
defineConstant("L", 5);    // mean lifetime at stationarity (see Battey et al 2020)
defineConstant("FECUN", 1/L); // mean fecundity**
defineConstant("G", 1e6);  // genome length (bp)
defineConstant("p", 1); // quality of urban habitat
defineConstant("g", 4); // quality of greenspace habitat
defineConstant("n", 4); // quality of nonurban habitat
defineConstant("REPRO", "outcrossing"); // reproductive scenario
// Number of time steps simulation will run (burn in = 500000)
defineConstant("maxgen",505001); 

// print constants
catn(c("W =", W));
catn(c("K =", K));
catn(c("SD =", SD));
catn(c("SI =", SI));
catn(c("SM =", SM));
catn(c("L =", L));
catn(c("FECUN =", FECUN));

// constant in spatial competition function, as derived in Battey et al 2020
// competition strength depends on fecundity (which is inversely related to lifespan)
defineConstant("RHO", FECUN/((1+FECUN) * K));

initializeMutationType("m1", 0.5, "f", 0.0); // define mutation type (neutral noncoding regions)
initializeGenomicElementType("g1", m1, 1.0); // define genome subject to m1
initializeGenomicElement(g1, 0, G-1); // define first and last base positions of genome g1
initializeMutationRate(0.0); // expected # mutations/bp per generation (overlayed later in Python)
// 
initializeRecombinationRate(1e-9); // 1 cM/Mbp = 1e-6 cM/bp = 1e-8 crossover prob/gen
// probability any 2 adjacent bps will cross over (breakpoint) per generation

// Define spatial competition interaction (i.e., density dependent mortality)
// draw strength from gaussian distribution, max 1/(2*PI*SI^2), standard deviation SI
initializeInteractionType(1, "xy", reciprocal=T, maxDistance=SI * 3);
i1.setInteractionFunction("n", 1.0/(2*PI*SI^2), SI);

// Define mate choice interaction
// interaction strength 0 beyond 3 standard deviations
// draw strength from gaussiandistribution , max 1/(2*PI*SI^2), standard deviation SM
initializeInteractionType(2, "xy", reciprocal=T, maxDistance=SM * 3);
i2.setInteractionFunction("n", 1.0/(2*PI*SM^2), SM);

}

////////////////////// // BEGIN SIMULATION // ////////////////////// // look for mates, evaluate reproduction interaction first() { i2.evaluate(sim.subpopulations); }

// --- DEFINE REPRODUCTIVE SCENARIO --- // Outcrossing reproduction() {
// choose one mate per individual, based on interaction strength mate = i2.drawByStrength(individual, 1); //mate = i2.nearestNeighbors(individual, 1); // if mate is found, otherwise no offspring produced if (mate.size()) { // number of offspring drawn from poisson distribution // mean of 1/L, larger L = less likely reproduction = more overlapping generations**
nOff = rpois(1, FECUN); // record # of offspring per individual, running tally individual.tag = individual.tag + nOff;

        for (i in seqLen(nOff)) {
            offspring = subpop.addCrossed(individual,mate);         
            do pos = individual.spatialPosition + rnorm(2, 0, SD); // disperse SD away 
            // reprising boundaries         
            while (!p1.pointInBounds(pos));
            offspring.setSpatialPosition(pos);
            offspring.tag=0;                    
        }   
    }
    return;
}

// --- SET UP INITIAL SPATIAL POPULATION --- 1 early() { sim.addSubpop("p1", asInteger(KWW)); // initialize population size (max) p1.setSpatialBounds(c(0, 0, W, W)); // set spatial bounds on p1 // random initial positions, loop over all individuals for (ind in p1.individuals) { ind.setSpatialPosition(p1.pointUniform()); ind.tag = 0; // initialize a tag value per individual (records # offspring) } // make the initial map (all nonurban): defineConstant("mapValues",matrix(repEach(c(n),asInteger(W*W)),ncol=50)); p1.defineSpatialMap("Kmap", "xy", mapValues, interpolate=T, valueRange=c(p, g), colors=c("black", "white"));

}

// --- RESCALE FITNESS DUE TO SPATIAL VARIATION IN DENSITY DEPENDENCE --- // (Competition + Mortality) 1: early() { i1.evaluate(sim.subpopulations); inds = p1.individuals; // calculate competition strengths of all neighbours competition = i1.localPopulationDensity(inds); inds.tagF = competition;

// apply competition adjusted for local habitat quality
for (ind in inds) {
    // determine local carrying capacity
    Klocal = p1.spatialMapValue("Kmap", ind.spatialPosition);
    // calculate probability of survival, see derivation in Battey et al 2020 appendix
    // capped at 95% to avoid very old individuals in sparse areas
    ind.fitnessScaling = pmin(0.98, 1/(1 + RHO * ind.tagF/Klocal)); 
}

//print(p1.selfingRate);

}

// --- RECORD TREE SEQUENCES & SET EXTINCTION STOP CONDITIONS --- 500000: late(){ //sim.treeSeqRememberIndividuals(p1.individuals);
// remember all individuals every 10 generations if(sim.cycle % 10 == 1){ sim.treeSeqRememberIndividuals(p1.individuals); } // Extinction stop conditions (simulation ends if p1 contains no individuals) if(p1.individualCount == 0){ sim.treeSeqOutput("CO_25_outcrossing_UinfR_Disp1_LifSp5.trees"); sim.simulationFinished(); } }

// --- OUTPUT TREE SEQUENCES --- 505101 late() { sim.treeSeqOutput("CO_25_outcrossing_UinfR_Disp1_LifSp5_10e6_nourb_2.trees"); sim.simulationFinished(); } `

And that I use pyslim to recapitate and overlay mutations: `genmean = np.nanmean(pyslim.individual_ages_at(orig_ts,0)) recap_ts = pyslim.recapitate(orig_ts,recombination_rate=1e-9, ancestral_Ne=5000, random_seed=42) ts = msprime.sim_mutations( recap_ts, rate=(1e-8)/genmean, model=msprime.SLiMMutationModel(type=0), keep=True)

sum([t.num_roots == 1 for t in ts.trees()]) print(f"The tree sequence now has {ts.num_mutations} mutations,\n" f"and mean pairwise nucleotide diversity is {ts.diversity():0.3e}.")`

My problem is that the levels of diversity I obtain in the two scenarios is quite different, with the 10,000 burn-in one displaying lower pi (the 500,000 burn-in has about 115,000 mutations whereas the 10,000gen burn-in has about 95,000). Is that a normal difference or is there something odd in that results?

Thanks for your help,

Aude

bhaller commented 7 months ago

Just to keep things in sync: I think (?) this is the same question as was asked on slim-discuss, and Peter has answered there: https://groups.google.com/g/slim-discuss/c/3OxxzD1Ogjc/m/EgZqf5BjAAAJ.