miaSim 1.6.0
miaSim
implements tools for simulating microbial community data
based on various ecological models. These can be used to simulate
species abundance matrices, including time series. A detailed
function documentation can be viewed at the function reference
page
Install the Bioconductor release version with
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
Load the library
library(miaSim)
Some of the models rely on interaction matrices that represents interaction heterogeneity between species. The interaction matrix can be generated with different distributional assumptions.
Generate interactions from normal distribution:
A_normal <- powerlawA(n_species = 4, alpha = 3)
Generate interactions from uniform distribution:
A_uniform <- randomA(n_species = 10, diagonal = -0.4, connectance = 0.5, interactions = runif(n = 10^2, min = -0.8, max = 0.8))
The generalized Lotka-Volterra simulation model generates time-series assuming microbial population dynamics and interaction.
glvmodel <- simulateGLV(n_species = 4, A = A_normal, t_start = 0,
t_store = 100, t_end=100, stochastic = FALSE, norm = FALSE)
miaViz::plotSeries(glvmodel, "time")
Ricker model is a discrete version of the gLV:
rickermodel <- simulateRicker(n_species=4, A = A_normal, t_end=100, norm = FALSE)
The number of species specified in the interaction matrix must be the same as the species used in the models.
Hubbell Neutral simulation model characterizes diversity and relative abundance of species in ecological communities assuming migration, births and deaths but no interactions. Losses become replaced by migration or birth.
hubbellmodel <- simulateHubbell(n_species = 8, M = 10, carrying_capacity = 1000,
k_events = 50, migration_p = 0.02, t_end = 100)
One can also simulate parameters for the Hubbell model.
hubbellmodelRates <- simulateHubbellRates(x0 = c(0,5,10),
migration_p = 0.1, metacommunity_probability = NULL, k_events = 1,
growth_rates = NULL, norm = FALSE, t_end=100)
miaViz::plotSeries(hubbellmodelRates, "time")
The Self-Organised Instability (SOI) model generates time series for communities and accelerates stochastic simulation.
soimodel <- simulateSOI(n_species = 4, carrying_capacity = 1000,
A = A_normal, k_events=5, x0 = NULL, t_end = 150, norm = TRUE)
Stochastic logistic model is used to determine dead and alive counts in community.
logisticmodel <- simulateStochasticLogistic(n_species = 5)
miaViz::plotSeries(logisticmodel, x = "time")
model_transformed <- mia::transformCounts(logisticmodel, method = "relabundance")
The consumer resource model requires the use of the randomE
function, which returns a matrix containing the production rates and
consumption rates of each species. The resulting matrix is used as a
determination of resource consumption efficiency.
crmodel <- simulateConsumerResource(n_species = 2,
n_resources = 4,
E = randomE(n_species = 2, n_resources = 4))
miaViz::plotSeries(crmodel, "time")
# example to get relative abundance and relative proportion of resources
#'norm = TRUE' can be added as a parameter.
# convert to relative abundance
ExampleCR <- mia::transformCounts(crmodel, method = "relabundance")
miaViz::plotSeries(ExampleCR, "time")
#Recommended standard way to generate a set of n simulations (n=2 here) from a given model
simulations <- lapply(seq_len(2), function (i) {do.call(simulateConsumerResource, params)})
# Visualize the model for the first instance
miaViz::plotSeries(simulations[[1]], "time")
# List state for each community (instance) at its last time point;
# this results in instances x species matrix; means and variances per species can be computed col-wise
communities <- t(sapply(simulations, function (x) {assay(x, "counts")[, which.max(x$time)]}))
# Some more advanced examples for hardcore users:
# test leave-one-out in CRM
.replaceByZero <- function(input_list) { # params_iter$x0 as input_list
if (!all(length(input_list) == unlist(unique(lapply(input_list, length))))) {
stop("Length of input_list doesn't match length of element in it.")
}
for (i in seq_along(input_list)) {
input_list[[i]][[i]] <- 0
}
return(input_list)
}
.createParamList <- function(input_param, n_repeat, replace_by_zero = FALSE) {
res_list <- vector(mode = "list", length = n_repeat)
for (i in seq_len(n_repeat)) {
res_list[[i]] <- input_param
}
res_list <- lapply(seq_len(n_repeat), function (i) {input_param})
}
# example of generateSimulations
# FIXME: reduce computational load by lowering the number of species and timesteps in the demo
params <- list(
n_species = 10,
n_resources = 5,
E = randomE(
n_species = 10, n_resources = 5,
mean_consumption = 1, mean_production = 3
),
x0 = rep(0.001, 10),
resources = rep(1000, 5),
monod_constant = matrix(rbeta(10 * 5, 10, 10), nrow = 10, ncol = 5),
inflow_rate = .5,
outflow_rate = .5,
migration_p = 0,
stochastic = TRUE,
t_start = 0,
t_end = 20,
t_store = 100,
growth_rates = runif(10),
norm = FALSE
)
# Test overwrite params
.createParamList <- function(input_param, n_repeat, replace_by_zero = FALSE) {
res_list <- unname(as.list(data.frame(t(matrix(rep(input_param, n_repeat), nrow = n_repeat)))))
}
paramx0 <- .createParamList(input_param = rep(0.001, 10), n_repeat = 10,
replace_by_zero = TRUE)
paramresources <- .createParamList(input_param = rep(1000, 5), n_repeat = 10)
params_iter <- list(x0 = paramx0, resources = paramresources)
simulations <- lapply(seq_len(2), function (i) {do.call(simulateConsumerResource, params)})
simulations_2 <- .generateSimulations(
model = "simulateConsumerResource",
params_list = params, param_iter = params_iter, n_instances = 1, t_end = 20
)
estimatedA <- .estimateAFromSimulations(simulations, simulations_2, n_instances = 1,
scale_off_diagonal = 1, diagonal = -0.5, connectance = 0.2
) / 1000
# Using these parameters with a specified simulator
m <- simulateGLV(n_species = 10, x0 = params$x0,
A = estimatedA, growth_rates = params$growth_rates, t_end = 20, t_store = 100)
miaViz::plotSeries(m, "time") # Plotting
The simulation functions gives TreeSummarizedExperiment
[@TreeSE]
object.
This provides access to a broad range of tools for microbiome analysis that support this format (see microbiome.github.io). More examples on can be found at OMA Online Manual. Other fields, such as rowData containing information about the samples, and colData, consisting of sample metadata describing the samples, or phylogenetic trees, can be added as necessary.
For instance, we can use the miaViz
R/Bioconductor package to
visualize the microbial community time series.
sessionInfo()
## R version 4.3.0 RC (2023-04-13 r84269)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.2 LTS
##
## Matrix products: default
## BLAS: /home/biocbuild/bbs-3.17-bioc/R/lib/libRblas.so
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB LC_COLLATE=C
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: America/New_York
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] miaSim_1.6.0 TreeSummarizedExperiment_2.8.0
## [3] Biostrings_2.68.0 XVector_0.40.0
## [5] SingleCellExperiment_1.22.0 SummarizedExperiment_1.30.0
## [7] Biobase_2.60.0 GenomicRanges_1.52.0
## [9] GenomeInfoDb_1.36.0 IRanges_2.34.0
## [11] S4Vectors_0.38.0 BiocGenerics_0.46.0
## [13] MatrixGenerics_1.12.0 matrixStats_0.63.0
## [15] cluster_2.1.4 philentropy_0.7.0
## [17] dplyr_1.1.2 ape_5.7-1
## [19] sna_2.7-1 statnet.common_4.8.0
## [21] network_1.18.1 GGally_2.1.2
## [23] colourvalues_0.3.9 ggplot2_3.4.2
## [25] BiocStyle_2.28.0
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 jsonlite_1.8.4
## [3] MultiAssayExperiment_1.26.0 magrittr_2.0.3
## [5] magick_2.7.4 ggbeeswarm_0.7.1
## [7] farver_2.1.1 rmarkdown_2.21
## [9] zlibbioc_1.46.0 vctrs_0.6.2
## [11] memoise_2.0.1 DelayedMatrixStats_1.22.0
## [13] RCurl_1.98-1.12 ggtree_3.8.0
## [15] htmltools_0.5.5 BiocNeighbors_1.18.0
## [17] deSolve_1.35 gridGraphics_0.5-1
## [19] sass_0.4.5 pracma_2.4.2
## [21] bslib_0.4.2 plyr_1.8.8
## [23] DECIPHER_2.28.0 cachem_1.0.7
## [25] igraph_1.4.2 lifecycle_1.0.3
## [27] pkgconfig_2.0.3 rsvd_1.0.5
## [29] Matrix_1.5-4 R6_2.5.1
## [31] fastmap_1.1.1 GenomeInfoDbData_1.2.10
## [33] aplot_0.1.10 digest_0.6.31
## [35] ggnewscale_0.4.8 colorspace_2.1-0
## [37] reshape_0.8.9 patchwork_1.1.2
## [39] scater_1.28.0 irlba_2.3.5.1
## [41] RSQLite_2.3.1 vegan_2.6-4
## [43] beachmat_2.16.0 labeling_0.4.2
## [45] fansi_1.0.4 mgcv_1.8-42
## [47] polyclip_1.10-4 compiler_4.3.0
## [49] bit64_4.0.5 withr_2.5.0
## [51] BiocParallel_1.34.0 viridis_0.6.2
## [53] DBI_1.1.3 highr_0.10
## [55] ggforce_0.4.1 MASS_7.3-59
## [57] poweRlaw_0.70.6 DelayedArray_0.26.0
## [59] permute_0.9-7 tools_4.3.0
## [61] vipor_0.4.5 beeswarm_0.4.0
## [63] glue_1.6.2 nlme_3.1-162
## [65] grid_4.3.0 mia_1.8.0
## [67] reshape2_1.4.4 generics_0.1.3
## [69] gtable_0.3.3 tidyr_1.3.0
## [71] BiocSingular_1.16.0 tidygraph_1.2.3
## [73] ScaledMatrix_1.8.0 utf8_1.2.3
## [75] ggrepel_0.9.3 pillar_1.9.0
## [77] stringr_1.5.0 yulab.utils_0.0.6
## [79] splines_4.3.0 tweenr_2.0.2
## [81] treeio_1.24.0 lattice_0.21-8
## [83] bit_4.0.5 tidyselect_1.2.0
## [85] DirichletMultinomial_1.42.0 scuttle_1.10.0
## [87] knitr_1.42 gridExtra_2.3
## [89] bookdown_0.33 xfun_0.39
## [91] graphlayouts_0.8.4 miaViz_1.8.0
## [93] stringi_1.7.12 ggfun_0.0.9
## [95] lazyeval_0.2.2 yaml_2.3.7
## [97] evaluate_0.20 codetools_0.2-19
## [99] ggraph_2.1.0 tibble_3.2.1
## [101] BiocManager_1.30.20 ggplotify_0.1.0
## [103] cli_3.6.1 munsell_0.5.0
## [105] jquerylib_0.1.4 Rcpp_1.0.10
## [107] coda_0.19-4 parallel_4.3.0
## [109] blob_1.2.4 sparseMatrixStats_1.12.0
## [111] bitops_1.0-7 decontam_1.20.0
## [113] viridisLite_0.4.1 tidytree_0.4.2
## [115] scales_1.2.1 purrr_1.0.1
## [117] crayon_1.5.2 rlang_1.1.0