cesaraustralia / Dispersal.jl

Tools for simulating organism dispersal
Other
21 stars 3 forks source link

Create a new type for Population #72

Open virgile-baudrot opened 3 years ago

virgile-baudrot commented 3 years ago

Following discussion on Population type in https://github.com/cesaraustralia/Dispersal.jl/issues/70#issuecomment-737756996_

I start reviewing the possibilities, given quite big "Comment". There are 2 parts: one about Genetics, a second about Phenotypes

Genetics

For the Genetics, we have to define Locus, a Zygote_unit and a Zygote (an individual) and Deme (Population).

I use Locus definition like https://en.wikipedia.org/wiki/Locus_(genetics). The argument ploidy which is additional to the definition is to facilitate the handling of polysomy (like trisomy) but don't know if it's relevant for know.

struct Locus # internal
    chromosome::Int32 # to specify the number of the chromosome
    arm::Bool # 0 and 1 are short and long arm (i.e., p-arm and q-arm)
    position::Float32 # for distance and sub-band
    ploidy::Int32 # ploidy for this chormosome number
end
# no operation are possible on Locus

chromosome(l::Locus) = l.chromosome
arm(l::Locus) = l.arm
position(l::Locus) = l.position
ploidy(l::Locus) = l.ploidy

L1a = Locus(1,0,1,2) # locus on diploid
chromosome(L1a)
arm(L1a)
position(L1a)
ploidy(L1a)

L1b = Locus(1,0,1,2) # it can have many locus on chromosome 1
L1a == L1b # TRUE
L2 = Locus(2,0,1,2) 
L1a == L2 # FALSE

The Zygote_unit is used to define the zygosity for a single Locus

struct Zygote_unit
    locus::Locus
    zygosity::Array{Char, 1} # Char should be enough since we have Locus!
    # check Ploidy
    function Zygote_unit(l,z)
        if length(z) != ploidy(l)
            throw(ArgumentError("zygosity does not match locus ploidy"))
        end
        new(l,z)
    end
end

locus(zu::Zygote_unit) = zu.locus
zygosity(zu::Zygote_unit) = zu.zygosity
ploidy(zu::Zygote_unit) = ploidy(locus(zu))
chromosome(zu::Zygote_unit) = chromosome(locus(zu))

Lz = Locus(1,0,1,2)
Zygote_unit(Lz,['R','S']) # ok
Zygote_unit(Lz,['R']) # error
Zygote_unit(Lz,['R', 'S', 'R']) # error

Zz = Zygote_unit(Lz,['R','S']) # ok
locus(Zz)
zygosity(Zz)
ploidy(Zz)
chromosome(Zz)

import Base: push!
function push!(zu1::Zygote_unit, zu2::Zygote_unit) 
    init = [zu1]
    if chromosome(zu1) == chromosome(zu2)
        if ploidy(zu1) == ploidy(zu2)
            push!(init,zu2)
        else
            throw(ArgumentError("ploidies does not match"))
        end
    else
        push!(init,zu2)
    end
end

L1 = Locus(1,0,1,2)
L2 = Locus(2,0,1,2) 
L3 = Locus(1,0,1,3) 

Z1 = Zygote_unit(L1,['R','R'])
Z2 = Zygote_unit(L2,['R','S'])
push!(Z1,Z2) # should be validated
Z3 = Zygote_unit(L3,['R','S', 'A'])
push!(Z1,Z3) # should be error

Then, a Zygote is a pool of Zygote_unit use to define an individual

abstract type AbstractIndividual end
# Individual can be add in a population, multiply (reproduction?), remove from a population, ...

struct Zygote <: AbstractIndividual
    genotype::Array{Zygote_unit, 1}
    # internal constructor
    # # TODO: (1) respect ploidy of chromosome
    # # TODO: (2) respect unicity of Locus
    # function Zygote(g)
    #     ... 
    # end
end

# What is it to make a dispersal of an individual?
# Base.:+(z::Zygote, x::Number) = ... # no sense
# Base.:+(x::Number, z::Zygote) = ... # no sense
# Base.:+(z1::Zygote, z2::Zygote) = ... # could be concat, but concat in Julia is with '*'
# Base.:-(z::Zygote, x::Number) = ... # like +
# Base.:-(x::Number, z::Zygote) = ...
# Base.:-(z1::Zygote, z2::Zygote) = ...
# Base.:*(z::Deme, x::Number) = ... # like reproduction ?
# Base.:*(x::Number, z::Deme) = ... # no sense
# Base.:*(x::Deme, z::Deme) = ... # no sense
# Base.:/(z::Deme, x::Number) = ... # no sense... in-dividual
# Base.:/(x::Number, z::Deme) = ... # no sense... in-dividual
# Base.:/(x::Deme, z::Deme) = ... # no sense... in-dividual

Z1RR = Zygote_unit(L1,['R','R'])
Z1RS = Zygote_unit(L1,['R','S'])
Z1SS = Zygote_unit(L1,['S','S'])
Z2RR = Zygote_unit(L2,['R','R'])
Z2RS = Zygote_unit(L2,['R','S'])
Z2SS = Zygote_unit(L2,['S','S'])

# From this 3 zygote_unit we can have 9 genotypes, each defining a Zygote
Zygote_1RR_2RR =  Zygote([Z1RR,Z2RR])
Zygote_1RR_2RS =  Zygote([Z1RR,Z2RS])
Zygote_1RR_2SS =  Zygote([Z1RR,Z2SS])
Zygote_1RS_2RR =  Zygote([Z1RS,Z2RR])
Zygote_1RS_2RS =  Zygote([Z1RS,Z2RS])
Zygote_1RS_2SS =  Zygote([Z1RS,Z2SS])
Zygote_1SS_2RR =  Zygote([Z1SS,Z2RR])
Zygote_1SS_2RS =  Zygote([Z1SS,Z2RS])
Zygote_1SS_2SS =  Zygote([Z1SS,Z2SS])

# indexin(chromosome(zu),chromosome(z))[1] != nothing 
# function push!(z::Zygote,zu::Zygote_unit) 
#     index = indexin(chromosome(zu),chromosome(z))[1]
#     if index != nothing
#         if ploidy(zu) == ploidy(z[index])
#             push!(zu1,zu2)
#         else
#             throw(ArgumentError("ploidy does not match"))
#         end
#     else
#         push!(zu1,zu2)
#     end
# end

Finally, come the definition for Population. A Deme is a pool of Zygote.

# We should be able to define all operation (+,-,*,/, zero and dot) on AbstractPopulation object in order to be used with DynamicGrids.

abstract type AbstractPopulation end

struct Deme <: AbstractPopulation
    genotypes::Array{Zygote, 1}
end

Deme([Zygote([Z1RR]), Zygote([Z1RS]), Zygote([Z1SS]), Zygote([Z1SS])])

import Base

# Base.:+(d::Deme, x::Number) = ... # no sense
# Base.:+(x::Number, d::Deme) = ... # no sense
# Base.:+(d1::Deme, d2::Deme) = ... # could be concat, but concat in Julia is with '*'
# Base.:-(d::Deme, x::Number) = ... 
# Base.:-(x::Number, d::Deme) = ...
# Base.:-(d1::Deme, d2::Deme) = ...

# Base.:*(d::Deme, x::Number) = ... # Concatenation ?
# Base.:*(x::Number, d::Deme) = ... # a repeat concatenation, would be more relevant with '^'
# Base.:*(x::Deme, d::Deme) = ... # a repeat concatenation, would be more relevant with '^'

# Base.zero(::Type{<:Deme{T}}) where {T} = Deme(zero(T)) # should look like this

Since some difficulties on the Deme object, I found usefull to define a StatisticsPopulation. The StatisticsPopulation is then super easy to use for phenotypes.

# Maybe every thing make sense for a StatisticsPopulation rather than an AbstractPopulation

abstract type  StatisticsPopulation <: AbstractPopulation end

struct StatsDeme <: StatisticsPopulation
    size::Float64 # size of the population. Could be a Int64 but hard to use frequency/statistics on it
    genotypes::Array{Zygote, 1}
    frequency::Array{Float64,1}
    # TODO: 1. elements in `genotype` must be unique!
    # TODO: 2. length of `genotype` must = length frequency => and used to defined size
    # TODO: 3. frequency is not neccessary sum to 1, but scaling as to be done!
    function StatsDeme(s,g,f)
        new(s,g,f./sum(f))
    end
end

size(sd::StatsDeme) = sd.size
genotypes(sd::StatsDeme) = sd.genotypes
frequency(sd::StatsDeme) = sd.frequency

SD1 = StatsDeme(
    3,
    [Zygote([Z1RR]), Zygote([Z1RS]), Zygote([Z1SS])], # the possible genotypes
    [1, 2, 7] # the frequency of each
 )

size(SD1)
frequency(SD1)
genotypes(SD1)

import Base
Base.:*(a::Number, sd::StatsDeme) = StatsDeme(a*sd.size, sd.genotypes, sd.frequency)
Base.:*(sd::StatsDeme, a::Number) = StatsDeme(a*sd.size, sd.genotypes, sd.frequency)

size(SD1*4)
size(SD1*0.2)

# Base.:+(sd::StatsDeme, a::Number) = ... # no sense ?
# Base.:+(a::Number, sd::StatsDeme) = ... # no sense ?
# Base.:+(sd1::StatsDeme, sd2::StatsDeme) = ... # Concat... but in Julia it's '*' 
# Base.:-(sd::StatsDeme, a::Number) = ... # no sense ?
# Base.:-(a::Number, sd::StatsDeme) = ... # no sense ?
# Base.:-(sd1::StatsDeme, sd2::StatsDeme) = ... # Concat... but in Julia it's '*' 

# Base.zero(::Type{<:StatsDeme{T1,T2,T3}}) where {T1,T2,T3} = StatsDeme(zero(T1), zero(T2), zero(T3))
virgile-baudrot commented 3 years ago

and the second part on Phenotypes...

Phenotype

Phenotype is easier. First with a Phenotype_unitt giving the phenotype value for a single individual

struct Phenotype_unit
    name::String
    value::Float64 # value of the trait for 1 individual
end

name(pu::Phenotype_unit) = Phenotype_unit.name
value(pu::Phenotype_unit) = Phenotype_unit.value

Base.:*(p::Phenotype_unit, x::Number) = Phenotype_unit(x*p.phenotype) 
Base.:*(x::Number, p::Phenotype_unit) = Phenotype_unit(x*p.phenotype) 
Base.:+(p::Phenotype_unit, x::Number) = Phenotype_unit(p.phenotype + x)  
Base.:+(x::Number, p::Phenotype_unit) = Phenotype_unit(p.phenotype + x)  
# Base.:+(p1::Phenotype_unit, p2::Phenotype_unit) =  ... sense ? 
Base.:-(p::Phenotype_unit, x::Number) = Phenotype_unit(p.phenotype - x) 
Base.:-(x::Number, p::Phenotype_unit) = Phenotype_unit(p.phenotype - x) 
# Base.:-(p1::Phenotype_unit, p2::Phenotype_unit) = ... # sense ?
Base.zero(::Type{<:Phenotype_unit{T}}) where {T} = Phenotype_unit(zero(T))

Then there is several possibility


struct Phenotype <: AbstractIndividual
    phenotype::Array{Phenotype_unit, 1} # set of values of trait for 1 individual
    # TODO: 1. Each name of Phenotype_unit has to be unique within the phenotype vector
end

# Using genericity of Phenotype Unit
# Base.:*(p::Phenotype, x::Number) = ...
# Base.:*(x::Number, p::Phenotype) = ...

struct Population <: AbstractPopulation
    size::Float64 # Size of the population. Again an Integer could be relevant
    phenotypes::Array{Phenotype, 1} # set of values of trait for 1 individual
    # TODO: size is the length of phenotypes vector
end

size(pp::Population) = pp.size
phenotypes(pp::Population) = pp.phenotypes

Then, converting a Population to a StatsPopulation allow to make some operation by assuming Normal distribution of phenotypes, we can develop: X ~ N(\mu, \sigma^2 ) => a X + b ~ N(a \mu + b, a^2 \sigma^2) X ~ N(\mu_x, \sigma_x^2 ) and Y ~ N(\mu_y, \sigma_y^2 ) => X + Y ~ N(\mu_x + \mu_y, \sigma_x^2 + \sigma_y^2)


struct StatsPopulation <: StatisticsPopulation
    size::Int64 
    phenotypeNames::Array{String, 1} # retrieve from Phenotype_unit
    phenotypeMean::Array{Float64, 1}
    phenotypeVariance::Array{Float64, 1}
    # TODO: INTERNAL CONSTRUCTOR based on 
end

size(sp::StatsPopulation) = sp.size
names(sp::StatsPopulation) = sp.phenotypeNames
means(sp::StatsPopulation) = sp.phenotypeMean
variances(sp::StatsPopulation) = sp.phenotypeVariance