3. Example Data

The HITChip Atlas dataset contains genus-level microbiota profiling with HITChip for 1006 western adults with no reported health complications, reported in (Lahti et al. 2014). The dataset is available via the microbiome R package (Lahti et al. 2017) in phyloseq (McMurdie and Holmes 2013) format.

data(atlas1006, package = "microbiome")
tse = mia::makeTreeSummarizedExperimentFromPhyloseq(atlas1006)

# subset to baseline
tse = tse[, tse$time == 0]

# Re-code the bmi group
tse$bmi = recode(tse$bmi_group,
                 obese = "obese",
                 severeobese = "obese",
                 morbidobese = "obese")
# Subset to lean, overweight, and obese subjects
tse = tse[, tse$bmi %in% c("lean", "overweight", "obese")]

# Create the region variable
tse$region = recode(as.character(tse$nationality),
                    Scandinavia = "NE", UKIE = "NE", SouthEurope = "SE", 
                    CentralEurope = "CE", EasternEurope = "EE",
                    .missing = "unknown")

# Discard "EE" as it contains only 1 subject
# Discard subjects with missing values of region
tse = tse[, ! tse$region %in% c("EE", "unknown")]

print(tse)

class: TreeSummarizedExperiment 
dim: 130 873 
metadata(0):
assays(1): counts
rownames(130): Actinomycetaceae Aerococcus ... Xanthomonadaceae
  Yersinia et rel.
rowData names(3): Phylum Family Genus
colnames(873): Sample-1 Sample-2 ... Sample-1005 Sample-1006
colData names(12): age sex ... bmi region
reducedDimNames(0):
mainExpName: NULL
altExpNames(0):
rowLinks: NULL
rowTree: NULL
colLinks: NULL
colTree: NULL

4. Run SECOM on a Single Ecosystem

4.1 Run secom functions

set.seed(123)
# Linear relationships
res_linear = secom_linear(data = list(tse), assay_name = "counts",
                          tax_level = "Phylum", pseudo = 0, 
                          prv_cut = 0.5, lib_cut = 1000, corr_cut = 0.5, 
                          wins_quant = c(0.05, 0.95), method = "pearson", 
                          soft = FALSE, thresh_len = 20, n_cv = 10, 
                          thresh_hard = 0.3, max_p = 0.005, n_cl = 2)

# Nonlinear relationships
res_dist = secom_dist(data = list(tse), assay_name = "counts",
                      tax_level = "Phylum", pseudo = 0, 
                      prv_cut = 0.5, lib_cut = 1000, corr_cut = 0.5, 
                      wins_quant = c(0.05, 0.95), R = 1000, 
                      thresh_hard = 0.3, max_p = 0.005, n_cl = 2)

4.2 Visualizations

Pearson correlation with thresholding

corr_linear = res_linear$corr_th
cooccur_linear = res_linear$mat_cooccur

# Filter by co-occurrence
overlap = 10
corr_linear[cooccur_linear < overlap] = 0

df_linear = data.frame(get_upper_tri(corr_linear)) %>%
  rownames_to_column("var1") %>%
  pivot_longer(cols = -var1, names_to = "var2", values_to = "value") %>%
  filter(!is.na(value)) %>%
  mutate(value = round(value, 2))

tax_name = sort(union(df_linear$var1, df_linear$var2))
df_linear$var1 = factor(df_linear$var1, levels = tax_name)
df_linear$var2 = factor(df_linear$var2, levels = tax_name)

heat_linear_th = df_linear %>%
  ggplot(aes(var2, var1, fill = value)) +
  geom_tile(color = "black") +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", na.value = "grey",
                       midpoint = 0, limit = c(-1,1), space = "Lab", 
                       name = NULL) +
  scale_x_discrete(drop = FALSE) +
  scale_y_discrete(drop = FALSE) +
  geom_text(aes(var2, var1, label = value), color = "black", size = 4) +
  labs(x = NULL, y = NULL, title = "Pearson (Thresholding)") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1, 
                                   face = "italic"),
        axis.text.y = element_text(size = 12, face = "italic"),
        strip.text.x = element_text(size = 14),
        strip.text.y = element_text(size = 14),
        legend.text = element_text(size = 12),
        plot.title = element_text(hjust = 0.5, size = 15),
        panel.grid.major = element_blank(),
        axis.ticks = element_blank(),
        legend.position = "none") +
  coord_fixed()

heat_linear_th

Pearson correlation with p-value filtering

corr_linear = res_linear$corr_fl
cooccur_linear = res_linear$mat_cooccur

# Filter by co-occurrence
overlap = 10
corr_linear[cooccur_linear < overlap] = 0

df_linear = data.frame(get_upper_tri(corr_linear)) %>%
  rownames_to_column("var1") %>%
  pivot_longer(cols = -var1, names_to = "var2", values_to = "value") %>%
  filter(!is.na(value)) %>%
  mutate(value = round(value, 2))

tax_name = sort(union(df_linear$var1, df_linear$var2))
df_linear$var1 = factor(df_linear$var1, levels = tax_name)
df_linear$var2 = factor(df_linear$var2, levels = tax_name)

heat_linear_fl = df_linear %>%
  ggplot(aes(var2, var1, fill = value)) +
  geom_tile(color = "black") +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", na.value = "grey",
                       midpoint = 0, limit = c(-1,1), space = "Lab", 
                       name = NULL) +
  scale_x_discrete(drop = FALSE) +
  scale_y_discrete(drop = FALSE) +
  geom_text(aes(var2, var1, label = value), color = "black", size = 4) +
  labs(x = NULL, y = NULL, title = "Pearson (Filtering)") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1, 
                                   face = "italic"),
        axis.text.y = element_text(size = 12, face = "italic"),
        strip.text.x = element_text(size = 14),
        strip.text.y = element_text(size = 14),
        legend.text = element_text(size = 12),
        plot.title = element_text(hjust = 0.5, size = 15),
        panel.grid.major = element_blank(),
        axis.ticks = element_blank(),
        legend.position = "none") +
  coord_fixed()

heat_linear_fl

Distance correlation with p-value filtering

corr_dist = res_dist$dcorr_fl
cooccur_dist = res_dist$mat_cooccur

# Filter by co-occurrence
overlap = 10
corr_dist[cooccur_dist < overlap] = 0

df_dist = data.frame(get_upper_tri(corr_dist)) %>%
  rownames_to_column("var1") %>%
  pivot_longer(cols = -var1, names_to = "var2", values_to = "value") %>%
  filter(!is.na(value)) %>%
  mutate(value = round(value, 2))

tax_name = sort(union(df_dist$var1, df_dist$var2))
df_dist$var1 = factor(df_dist$var1, levels = tax_name)
df_dist$var2 = factor(df_dist$var2, levels = tax_name)

heat_dist_fl = df_dist %>%
  ggplot(aes(var2, var1, fill = value)) +
  geom_tile(color = "black") +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", na.value = "grey",
                       midpoint = 0, limit = c(-1,1), space = "Lab", 
                       name = NULL) +
  scale_x_discrete(drop = FALSE) +
  scale_y_discrete(drop = FALSE) +
  geom_text(aes(var2, var1, label = value), color = "black", size = 4) +
  labs(x = NULL, y = NULL, title = "Distance (Filtering)") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1, 
                                   face = "italic"),
        axis.text.y = element_text(size = 12, face = "italic"),
        strip.text.x = element_text(size = 14),
        strip.text.y = element_text(size = 14),
        legend.text = element_text(size = 12),
        plot.title = element_text(hjust = 0.5, size = 15),
        panel.grid.major = element_blank(),
        axis.ticks = element_blank(),
        legend.position = "none") +
  coord_fixed()

heat_dist_fl

5. Run SECOM on Multiple Ecosystems

5.1 Data manipulation

To compute correlations whithin and across different ecosystems, one needs to make sure that there are samples in common across these ecosystems.

# Select subjects from "CE" and "NE"
tse1 = tse[, tse$region == "CE"]
tse2 = tse[, tse$region == "NE"]

# Rename samples to ensure there is an overlap of samples between CE and NE
colnames(tse1) = paste0("Sample-", seq_len(ncol(tse1)))
colnames(tse2) = paste0("Sample-", seq_len(ncol(tse2)))

print(tse1)

class: TreeSummarizedExperiment 
dim: 130 578 
metadata(0):
assays(1): counts
rownames(130): Actinomycetaceae Aerococcus ... Xanthomonadaceae
  Yersinia et rel.
rowData names(3): Phylum Family Genus
colnames(578): Sample-1 Sample-2 ... Sample-577 Sample-578
colData names(12): age sex ... bmi region
reducedDimNames(0):
mainExpName: NULL
altExpNames(0):
rowLinks: NULL
rowTree: NULL
colLinks: NULL
colTree: NULL

print(tse2)

class: TreeSummarizedExperiment 
dim: 130 181 
metadata(0):
assays(1): counts
rownames(130): Actinomycetaceae Aerococcus ... Xanthomonadaceae
  Yersinia et rel.
rowData names(3): Phylum Family Genus
colnames(181): Sample-1 Sample-2 ... Sample-180 Sample-181
colData names(12): age sex ... bmi region
reducedDimNames(0):
mainExpName: NULL
altExpNames(0):
rowLinks: NULL
rowTree: NULL
colLinks: NULL
colTree: NULL

5.2 Run secom functions

set.seed(123)
# Linear relationships
res_linear = secom_linear(data = list(CE = tse1, NE = tse2), 
                          assay_name = c("counts", "counts"),
                          tax_level = c("Phylum", "Phylum"), pseudo = 0, 
                          prv_cut = 0.5, lib_cut = 1000, corr_cut = 0.5, 
                          wins_quant = c(0.05, 0.95), method = "pearson", 
                          soft = FALSE, thresh_len = 20, n_cv = 10, 
                          thresh_hard = 0.3, max_p = 0.005, n_cl = 2)

# Nonlinear relationships
res_dist = secom_dist(data = list(CE = tse1, NE = tse2),
                      assay_name = c("counts", "counts"),
                      tax_level = c("Phylum", "Phylum"), pseudo = 0, 
                      prv_cut = 0.5, lib_cut = 1000, corr_cut = 0.5, 
                      wins_quant = c(0.05, 0.95), R = 1000, 
                      thresh_hard = 0.3, max_p = 0.005, n_cl = 2)

5.3 Visualizations

Pearson correlation with thresholding

corr_linear = res_linear$corr_th
cooccur_linear = res_linear$mat_cooccur

# Filter by co-occurrence
overlap = 10
corr_linear[cooccur_linear < overlap] = 0

df_linear = data.frame(get_upper_tri(corr_linear)) %>%
  rownames_to_column("var1") %>%
  pivot_longer(cols = -var1, names_to = "var2", values_to = "value") %>%
  filter(!is.na(value)) %>%
  mutate(var2 = gsub("\\...", " - ", var2),
         value = round(value, 2))

tax_name = sort(union(df_linear$var1, df_linear$var2))
df_linear$var1 = factor(df_linear$var1, levels = tax_name)
df_linear$var2 = factor(df_linear$var2, levels = tax_name)
txt_color = ifelse(grepl("CE", tax_name), "#1B9E77", "#D95F02")

heat_linear_th = df_linear %>%
  ggplot(aes(var2, var1, fill = value)) +
  geom_tile(color = "black") +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
                       na.value = "grey", midpoint = 0, limit = c(-1,1), 
                       space = "Lab", name = NULL) +
  scale_x_discrete(drop = FALSE) +
  scale_y_discrete(drop = FALSE) +
  geom_text(aes(var2, var1, label = value), color = "black", size = 4) +
  labs(x = NULL, y = NULL, title = "Pearson (Thresholding)") +
  theme_bw() +
  geom_vline(xintercept = 6.5, color = "blue", linetype = "dashed") +
  geom_hline(yintercept = 6.5, color = "blue", linetype = "dashed") +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1, 
                                   face = "italic", color = txt_color),
        axis.text.y = element_text(size = 12, face = "italic", 
                                   color = txt_color),
        strip.text.x = element_text(size = 14),
        strip.text.y = element_text(size = 14),
        legend.text = element_text(size = 12),
        plot.title = element_text(hjust = 0.5, size = 15),
        panel.grid.major = element_blank(),
        axis.ticks = element_blank(),
        legend.position = "none") +
  coord_fixed()

heat_linear_th

Pearson correlation with p-value filtering

corr_linear = res_linear$corr_th
cooccur_linear = res_linear$mat_cooccur

# Filter by co-occurrence
overlap = 10
corr_linear[cooccur_linear < overlap] = 0

df_linear = data.frame(get_upper_tri(corr_linear)) %>%
  rownames_to_column("var1") %>%
  pivot_longer(cols = -var1, names_to = "var2", values_to = "value") %>%
  filter(!is.na(value)) %>%
  mutate(var2 = gsub("\\...", " - ", var2),
         value = round(value, 2))

tax_name = sort(union(df_linear$var1, df_linear$var2))
df_linear$var1 = factor(df_linear$var1, levels = tax_name)
df_linear$var2 = factor(df_linear$var2, levels = tax_name)
txt_color = ifelse(grepl("CE", tax_name), "#1B9E77", "#D95F02")

heat_linear_fl = df_linear %>%
  ggplot(aes(var2, var1, fill = value)) +
  geom_tile(color = "black") +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
                       na.value = "grey", midpoint = 0, limit = c(-1,1), 
                       space = "Lab", name = NULL) +
  scale_x_discrete(drop = FALSE) +
  scale_y_discrete(drop = FALSE) +
  geom_text(aes(var2, var1, label = value), color = "black", size = 4) +
  labs(x = NULL, y = NULL, title = "Pearson (Filtering)") +
  theme_bw() +
  geom_vline(xintercept = 6.5, color = "blue", linetype = "dashed") +
  geom_hline(yintercept = 6.5, color = "blue", linetype = "dashed") +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1, 
                                   face = "italic", color = txt_color),
        axis.text.y = element_text(size = 12, face = "italic", 
                                   color = txt_color),
        strip.text.x = element_text(size = 14),
        strip.text.y = element_text(size = 14),
        legend.text = element_text(size = 12),
        plot.title = element_text(hjust = 0.5, size = 15),
        panel.grid.major = element_blank(),
        axis.ticks = element_blank(),
        legend.position = "none") +
  coord_fixed()

heat_linear_fl

Distance correlation with p-value filtering

corr_dist = res_dist$dcorr_fl
cooccur_dist = res_dist$mat_cooccur

# Filter by co-occurrence
overlap = 10
corr_dist[cooccur_dist < overlap] = 0

df_dist = data.frame(get_upper_tri(corr_dist)) %>%
  rownames_to_column("var1") %>%
  pivot_longer(cols = -var1, names_to = "var2", values_to = "value") %>%
  filter(!is.na(value)) %>%
  mutate(var2 = gsub("\\...", " - ", var2),
         value = round(value, 2))

tax_name = sort(union(df_dist$var1, df_dist$var2))
df_dist$var1 = factor(df_dist$var1, levels = tax_name)
df_dist$var2 = factor(df_dist$var2, levels = tax_name)
txt_color = ifelse(grepl("CE", tax_name), "#1B9E77", "#D95F02")

heat_dist_fl = df_dist %>%
  ggplot(aes(var2, var1, fill = value)) +
  geom_tile(color = "black") +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
                       na.value = "grey", midpoint = 0, limit = c(-1,1), 
                       space = "Lab", name = NULL) +
  scale_x_discrete(drop = FALSE) +
  scale_y_discrete(drop = FALSE) +
  geom_text(aes(var2, var1, label = value), color = "black", size = 4) +
  labs(x = NULL, y = NULL, title = "Distance (Filtering)") +
  theme_bw() +
  geom_vline(xintercept = 6.5, color = "blue", linetype = "dashed") +
  geom_hline(yintercept = 6.5, color = "blue", linetype = "dashed") +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1, 
                                   face = "italic", color = txt_color),
        axis.text.y = element_text(size = 12, face = "italic", 
                                   color = txt_color),
        strip.text.x = element_text(size = 14),
        strip.text.y = element_text(size = 14),
        legend.text = element_text(size = 12),
        plot.title = element_text(hjust = 0.5, size = 15),
        panel.grid.major = element_blank(),
        axis.ticks = element_blank(),
        legend.position = "none") +
  coord_fixed()

heat_dist_fl

Session information

sessionInfo()

R version 4.3.1 (2023-06-16)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 22.04.3 LTS

Matrix products: default
BLAS:   /home/biocbuild/bbs-3.17-bioc/R/lib/libRblas.so 
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_GB              LC_COLLATE=C              
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: America/New_York
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] doRNG_1.8.6     rngtools_1.5.2  foreach_1.5.2   DT_0.29        
 [5] phyloseq_1.44.0 lubridate_1.9.2 forcats_1.0.0   stringr_1.5.0  
 [9] dplyr_1.1.3     purrr_1.0.2     readr_2.1.4     tidyr_1.3.0    
[13] tibble_3.2.1    ggplot2_3.4.3   tidyverse_2.0.0 ANCOMBC_2.2.2  

loaded via a namespace (and not attached):
  [1] splines_4.3.1                  bitops_1.0-7                  
  [3] cellranger_1.1.0               rpart_4.1.19                  
  [5] DirichletMultinomial_1.42.0    lifecycle_1.0.3               
  [7] Rdpack_2.5                     doParallel_1.0.17             
  [9] lattice_0.21-8                 MASS_7.3-60                   
 [11] crosstalk_1.2.0                MultiAssayExperiment_1.26.0   
 [13] backports_1.4.1                magrittr_2.0.3                
 [15] Hmisc_5.1-1                    sass_0.4.7                    
 [17] rmarkdown_2.25                 jquerylib_0.1.4               
 [19] yaml_2.3.7                     gld_2.6.6                     
 [21] DBI_1.1.3                      minqa_1.2.6                   
 [23] ade4_1.7-22                    multcomp_1.4-25               
 [25] abind_1.4-5                    zlibbioc_1.46.0               
 [27] expm_0.999-7                   GenomicRanges_1.52.0          
 [29] BiocGenerics_0.46.0            RCurl_1.98-1.12               
 [31] yulab.utils_0.0.9              nnet_7.3-19                   
 [33] TH.data_1.1-2                  sandwich_3.0-2                
 [35] GenomeInfoDbData_1.2.10        IRanges_2.34.1                
 [37] S4Vectors_0.38.1               ggrepel_0.9.3                 
 [39] irlba_2.3.5.1                  tidytree_0.4.5                
 [41] vegan_2.6-4                    permute_0.9-7                 
 [43] DelayedMatrixStats_1.22.6      codetools_0.2-19              
 [45] DelayedArray_0.26.7            scuttle_1.10.2                
 [47] energy_1.7-11                  tidyselect_1.2.0              
 [49] farver_2.1.1                   lme4_1.1-34                   
 [51] gmp_0.7-2                      ScaledMatrix_1.8.1            
 [53] viridis_0.6.4                  matrixStats_1.0.0             
 [55] stats4_4.3.1                   base64enc_0.1-3               
 [57] jsonlite_1.8.7                 multtest_2.56.0               
 [59] BiocNeighbors_1.18.0           e1071_1.7-13                  
 [61] ellipsis_0.3.2                 decontam_1.20.0               
 [63] mia_1.8.0                      Formula_1.2-5                 
 [65] survival_3.5-7                 scater_1.28.0                 
 [67] iterators_1.0.14               tools_4.3.1                   
 [69] treeio_1.24.3                  DescTools_0.99.50             
 [71] Rcpp_1.0.11                    glue_1.6.2                    
 [73] gridExtra_2.3                  xfun_0.40                     
 [75] mgcv_1.9-0                     MatrixGenerics_1.12.3         
 [77] GenomeInfoDb_1.36.3            TreeSummarizedExperiment_2.8.0
 [79] withr_2.5.0                    numDeriv_2016.8-1.1           
 [81] fastmap_1.1.1                  rhdf5filters_1.12.1           
 [83] boot_1.3-28.1                  fansi_1.0.4                   
 [85] digest_0.6.33                  rsvd_1.0.5                    
 [87] timechange_0.2.0               R6_2.5.1                      
 [89] colorspace_2.1-0               gtools_3.9.4                  
 [91] RSQLite_2.3.1                  utf8_1.2.3                    
 [93] generics_0.1.3                 data.table_1.14.8             
 [95] DECIPHER_2.28.0                class_7.3-22                  
 [97] CVXR_1.0-11                    httr_1.4.7                    
 [99] htmlwidgets_1.6.2              S4Arrays_1.0.6                
[101] pkgconfig_2.0.3                gtable_0.3.4                  
[103] Exact_3.2                      Rmpfr_0.9-3                   
[105] blob_1.2.4                     SingleCellExperiment_1.22.0   
[107] XVector_0.40.0                 htmltools_0.5.6               
[109] biomformat_1.28.0              scales_1.2.1                  
[111] Biobase_2.60.0                 lmom_3.0                      
[113] knitr_1.44                     rstudioapi_0.15.0             
[115] tzdb_0.4.0                     reshape2_1.4.4                
[117] checkmate_2.2.0                nlme_3.1-163                  
[119] nloptr_2.0.3                   rhdf5_2.44.0                  
[121] proxy_0.4-27                   cachem_1.0.8                  
[123] zoo_1.8-12                     rootSolve_1.8.2.3             
[125] parallel_4.3.1                 vipor_0.4.5                   
[127] foreign_0.8-85                 pillar_1.9.0                  
[129] grid_4.3.1                     vctrs_0.6.3                   
[131] BiocSingular_1.16.0            beachmat_2.16.0               
[133] cluster_2.1.4                  beeswarm_0.4.0                
[135] htmlTable_2.4.1                evaluate_0.21                 
[137] mvtnorm_1.2-3                  cli_3.6.1                     
[139] compiler_4.3.1                 rlang_1.1.1                   
[141] crayon_1.5.2                   labeling_0.4.3                
[143] plyr_1.8.8                     ggbeeswarm_0.7.2              
[145] stringi_1.7.12                 viridisLite_0.4.2             
[147] BiocParallel_1.34.2            lmerTest_3.1-3                
[149] munsell_0.5.0                  Biostrings_2.68.1             
[151] gsl_2.1-8                      lazyeval_0.2.2                
[153] Matrix_1.6-1.1                 hms_1.1.3                     
[155] sparseMatrixStats_1.12.2       bit64_4.0.5                   
[157] Rhdf5lib_1.22.1                SummarizedExperiment_1.30.2   
[159] rbibutils_2.2.15               igraph_1.5.1                  
[161] memoise_2.0.1                  bslib_0.5.1                   
[163] bit_4.0.5                      readxl_1.4.3                  
[165] ape_5.7-1

SECOM Tutorial

Huang Lin $^1$

$^1$ NIEHS, Research Triangle Park, NC 27709, USA

September 19, 2023

1. Introduction

2. Installation

3. Example Data

4. Run SECOM on a Single Ecosystem

4.1 Run secom functions

4.2 Visualizations

Pearson correlation with thresholding

Pearson correlation with p-value filtering

Distance correlation with p-value filtering

5. Run SECOM on Multiple Ecosystems

5.1 Data manipulation

5.2 Run secom functions

5.3 Visualizations

Pearson correlation with thresholding

Pearson correlation with p-value filtering

Distance correlation with p-value filtering

Session information

References

SECOM Tutorial

Huang Lin1^1

1^1NIEHS, Research Triangle Park, NC 27709, USA

September 19, 2023

1. Introduction

2. Installation

3. Example Data

4. Run SECOM on a Single Ecosystem

4.1 Run secom functions

4.2 Visualizations

Pearson correlation with thresholding

Pearson correlation with p-value filtering

Distance correlation with p-value filtering

5. Run SECOM on Multiple Ecosystems

5.1 Data manipulation

5.2 Run secom functions

5.3 Visualizations

Pearson correlation with thresholding

Pearson correlation with p-value filtering

Distance correlation with p-value filtering

Session information

References

Huang Lin $^1$

$^1$ NIEHS, Research Triangle Park, NC 27709, USA