Open thibautjombart opened 5 months ago
Do you have profiles for this, especially from a case where this was a limiting factor? The generated odin code should be fairly easy to modify to swap in a proposed sparse matrix implementation for comparison that would help prioritise this.
Thanks for your reply. I have not gone all the way to proposing a C implementation (bit out of my depth) but here is a reprex illustrating the scaling issue. Sorry it is long but I included odin code for 2 models:
Reprex below:
library(odin)
if (!require(peakRAM)) install.packages("peakRAM")
#> Loading required package: peakRAM
# odin model: metapopulation, frequency-dependent SIR model
#
# Arguments passed to .$new() include:
#
# n_patches: the total number of patches
# delta: a matrix of spatial diffusion of FOI from/to patches (dim: n_patches x n_patches)
# pop_sizes: total population sizes (length: n_patches)
# ini_infected: initial number of infected (length: n_patches)
spatial_sir_txt <-
"
# compartment updates
update(S[]) <- S[i] - n_SI[i]
update(I[]) <- I[i] + n_SI[i] - n_IR[i]
update(R[]) <- R[i] + n_IR[i]
# compute forces of infection
# note: if delta is sparse, wasted computational and RAM allocation time here
lambda_prod[, ] <- delta[i, j] * I[i] / N[i]
lambda[] <- sum(lambda_prod[, i]) * beta # sum by column
# individual probabilities of transition
p_SI[] <- 1 - exp(-lambda[i]) # S to I
p_IR[] <- 1 - exp(-gamma) # I to R
# draws from binomial distributions for numbers changing between
# compartments:
n_SI[] <- rbinom(S[i], p_SI[i])
n_IR[] <- rbinom(I[i], p_IR[i])
# Total population size
N[] <- S[i] + I[i] + R[i]
# Initial states:
initial(S[]) <- pop_size[i] - ini_infected[i]
initial(I[]) <- ini_infected[i]
initial(R[]) <- 0
## Initialisations and dimensions
pop_size[] <- user()
ini_infected[] <- user()
beta <- user(0.2)
gamma <- user(0.1)
delta[,] <- user()
n_patches <- user(1)
dim(pop_size) <- n_patches
dim(ini_infected) <- n_patches
dim(S) <- n_patches
dim(I) <- n_patches
dim(R) <- n_patches
dim(N) <- n_patches
dim(p_SI) <- n_patches
dim(p_IR) <- n_patches
dim(n_SI) <- n_patches
dim(n_IR) <- n_patches
dim(delta) <- c(n_patches, n_patches)
dim(lambda_prod) <- c(n_patches, n_patches)
dim(lambda) <- n_patches
"
basic_sir_txt <-
"
# compartment updates
update(S[]) <- S[i] - n_SI[i]
update(I[]) <- I[i] + n_SI[i] - n_IR[i]
update(R[]) <- R[i] + n_IR[i]
# compute forces of infection
lambda[] <- beta * I[i] / N[i]
# individual probabilities of transition
p_SI[] <- 1 - exp(-lambda[i]) # S to I
p_IR[] <- 1 - exp(-gamma) # I to R
# draws from binomial distributions for numbers changing between
# compartments:
n_SI[] <- rbinom(S[i], p_SI[i])
n_IR[] <- rbinom(I[i], p_IR[i])
# Total population size
N[] <- S[i] + I[i] + R[i]
# Initial states:
initial(S[]) <- pop_size[i] - ini_infected[i]
initial(I[]) <- ini_infected[i]
initial(R[]) <- 0
## Initialisations and dimensions
pop_size[] <- user()
ini_infected[] <- user()
beta <- user(0.2)
gamma <- user(0.1)
n_patches <- user(1)
dim(pop_size) <- n_patches
dim(ini_infected) <- n_patches
dim(S) <- n_patches
dim(I) <- n_patches
dim(R) <- n_patches
dim(N) <- n_patches
dim(p_SI) <- n_patches
dim(p_IR) <- n_patches
dim(n_SI) <- n_patches
dim(n_IR) <- n_patches
dim(lambda) <- n_patches
"
# model generators
spatial_sir <- odin(spatial_sir_txt)
#> Loading required namespace: pkgbuild
#> Generating model in c
#> ℹ Re-compiling odin96e90a8a (debug build)
#> ── R CMD INSTALL ───────────────────────────────────────────────────────────────
#> ─ installing *source* package 'odin96e90a8a' ...
#> ** using staged installation
#> ** libs
#> using C compiler: 'gcc.exe (GCC) 12.3.0'
#> gcc -I"C:/PROGRA~1/R/R-43~1.3/include" -DNDEBUG -I"C:/RBuildTools/4.3/x86_64-w64-mingw32.static.posix/include" -O2 -Wall -gdwarf-2 -mfpmath=sse -msse2 -mstackrealign -UNDEBUG -Wall -pedantic -g -O0 -c odin.c -o odin.o
#> gcc -I"C:/PROGRA~1/R/R-43~1.3/include" -DNDEBUG -I"C:/RBuildTools/4.3/x86_64-w64-mingw32.static.posix/include" -O2 -Wall -gdwarf-2 -mfpmath=sse -msse2 -mstackrealign -UNDEBUG -Wall -pedantic -g -O0 -c registration.c -o registration.o
#> gcc -shared -static-libgcc -o odin96e90a8a.dll tmp.def odin.o registration.o -LC:/RBuildTools/4.3/x86_64-w64-mingw32.static.posix/lib/x64 -LC:/RBuildTools/4.3/x86_64-w64-mingw32.static.posix/lib -LC:/PROGRA~1/R/R-43~1.3/bin/x64 -lR
#> installing to C:/Users/thiba/AppData/Local/Temp/RtmpQdYUmU/devtools_install_77c73eb784b/00LOCK-file77c67194186/00new/odin96e90a8a/libs/x64
#> ─ DONE (odin96e90a8a)
#>
#> ℹ Loading odin96e90a8a
basic_sir <- odin(basic_sir_txt)
#> Generating model in c
#> ℹ Re-compiling odin88290f56 (debug build)
#> ── R CMD INSTALL ───────────────────────────────────────────────────────────────
#> ─ installing *source* package 'odin88290f56' ...
#> ** using staged installation
#> ** libs
#> using C compiler: 'gcc.exe (GCC) 12.3.0'
#> gcc -I"C:/PROGRA~1/R/R-43~1.3/include" -DNDEBUG -I"C:/RBuildTools/4.3/x86_64-w64-mingw32.static.posix/include" -O2 -Wall -gdwarf-2 -mfpmath=sse -msse2 -mstackrealign -UNDEBUG -Wall -pedantic -g -O0 -c odin.c -o odin.o
#> gcc -I"C:/PROGRA~1/R/R-43~1.3/include" -DNDEBUG -I"C:/RBuildTools/4.3/x86_64-w64-mingw32.static.posix/include" -O2 -Wall -gdwarf-2 -mfpmath=sse -msse2 -mstackrealign -UNDEBUG -Wall -pedantic -g -O0 -c registration.c -o registration.o
#> gcc -shared -static-libgcc -o odin88290f56.dll tmp.def odin.o registration.o -LC:/RBuildTools/4.3/x86_64-w64-mingw32.static.posix/lib/x64 -LC:/RBuildTools/4.3/x86_64-w64-mingw32.static.posix/lib -LC:/PROGRA~1/R/R-43~1.3/bin/x64 -lR
#> installing to C:/Users/thiba/AppData/Local/Temp/RtmpQdYUmU/devtools_install_77c2766711/00LOCK-file77c13472594/00new/odin88290f56/libs/x64
#> ─ DONE (odin88290f56)
#>
#> ℹ Loading odin88290f56
Helper functions to create SIR models of different sizes and time the model creation. Note we use 2 separate functions as model inputs differ.
@param
n number of patches of the model
@param
t number of time steps to run the model for
time_spatial_sir <- function(n, t = 10) {
peakRAM::peakRAM(
spatial_sir$new(
n_patches = n,
pop_size = rep(1000, n),
ini_infected = rep(1, n),
delta = diag(n)
)$run(t)
)
}
time_basic_sir <- function(n, t = 10) {
peakRAM::peakRAM(
basic_sir$new(
n_patches = n,
pop_size = rep(1000, n),
ini_infected = rep(1, n)
)$run(t)
)
}
# Size of the models in number of patches
model_sizes <- c(10, 100, 1000, 2000, 10000, 15000)
# note: size of the delta matrix for larger models:
# 2e4 patches: 3 Gb
# 3e4 patches: 6.7 Gb
# 4e4 patches: 11.9 Gb
gc()
#> used (Mb) gc trigger (Mb) max used (Mb)
#> Ncells 948871 50.7 1811591 96.8 1447368 77.3
#> Vcells 1708552 13.1 8388608 64.0 2699841 20.6
results_spatial <- lapply(
model_sizes,
time_spatial_sir
)
gc()
#> used (Mb) gc trigger (Mb) max used (Mb)
#> Ncells 955403 51.1 1811591 96.8 1811591 96.8
#> Vcells 1722866 13.2 210634364 1607.1 228977115 1747.0
results_basic <- lapply(
model_sizes,
time_basic_sir
)
# arrange and plot results
results_spatial <- Reduce(rbind.data.frame, results_spatial)
results_spatial$type <- "Spatial"
results_basic <- Reduce(rbind.data.frame, results_basic)
results_basic$type <- "Basic"
results <- rbind(results_spatial, results_basic)
results$size <- rep(model_sizes, 2)
library(ggplot2)
ggplot(results, aes(x = size, y = Elapsed_Time_sec, color = type)) +
geom_point(size = 3) +
geom_line() +
theme_bw() +
labs(
x = "Number of patches",
y = "Runtime (s)",
title = "Runtimes of the 2 implementations"
)
ggplot(results, aes(x = size, y = Peak_RAM_Used_MiB, color = type)) +
geom_point(size = 3) +
geom_line() +
theme_bw() +
labs(
x = "Number of patches",
y = "Peak RAM use (MiB)",
title = "RAM use of the 2 implementations"
)
Created on 2024-05-20 with reprex v2.1.0
I am not sure if R has a canonical data representation for sparse matrices; the 'Matrix' package seems like the likely candidate according to this post. It seems Matrix's matrix multiplication is implemented in src/matmult.c. Worth cloning if only to remember that subversion and R-forge are a thing.
Meta-population models with connectivity between patches typically require a matrix specifying proportions of the forces of infection going from patches to patches (example here). Such matrices can grow rather large as they scale as the square of the number of patches, so that memory allocation alone can take substantial time compared to running simulations, and can become a limiting factor. A solution would be to implement support for sparse matrices. Has this been considered before? How difficult do you think this would be to implement?