Contents

1 Introduction

Sequence-based TF affinity scoring can be conducted with the FIMO suite, see @Sonawane2017. We have serialized an object with references to FIMO outputs for 16 TFs.

suppressPackageStartupMessages({
library(TFutils)
library(GenomicRanges)
})
fimo16
## GenomicFiles object with 0 ranges and 16 files: 
## files: M0635_1.02sort.bed.gz, M3433_1.02sort.bed.gz, ..., M6159_1.02sort.bed.gz, M6497_1.02sort.bed.gz 
## detail: use files(), rowRanges(), colData(), ...

While the token bed is used in the filenames, the files are not actually bed format!

2 Importing with scanTabix

We can use reduceByRange to import selected scans.

if (.Platform$OS.type != "windows") {
 si = TFutils::seqinfo_hg19_chr17
 myg = GRanges("chr17", IRanges(38.07e6,38.09e6), seqinfo=si)
 colnames(fimo16) = fimo16$HGNC 
 lk2 = reduceByRange(fimo16[, c("POU2F1", "VDR")],
   MAP=function(r,f) scanTabix(f, param=r))
 str(lk2)
}

This result can be massaged into a GRanges or other desirable structure. fimo_granges takes care of this.

#fimo_ranges = function(gf, query) { # prototypical code
# rowRanges(gf) = query
# ans = reduceByRange(gf, MAP=function(r,f) scanTabix(f, param=r))
# ans = unlist(ans, recursive=FALSE)  # drop top list structure
# tabs = lapply(ans, lapply, function(x) {
#     con = textConnection(x)
#     on.exit(close(con))
#     dtf = read.delim(con, h=FALSE, stringsAsFactors=FALSE, sep="\t")
#     colnames(dtf) = c("chr", "start", "end", "rname", "score", "dir", "pval")
#     ans = with(dtf, GRanges(seqnames=chr, IRanges(start, end),
#            rname=rname, score=score, dir=dir, pval=pval))
#     ans
#     })
# GRangesList(unlist(tabs, recursive=FALSE))
#}
if (.Platform$OS.type != "windows") {
 rr = fimo_granges(fimo16[, c("POU2F1", "VDR")], myg)
 rr
}
sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows Server x64 (build 17763)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=C                          
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] UpSetR_1.4.0                magrittr_2.0.1             
##  [3] dplyr_1.0.7                 gwascat_2.26.0             
##  [5] GSEABase_1.56.0             graph_1.72.0               
##  [7] annotate_1.72.0             XML_3.99-0.8               
##  [9] png_0.1-7                   ggplot2_3.3.5              
## [11] knitr_1.36                  data.table_1.14.2          
## [13] GO.db_3.14.0                GenomicFiles_1.30.0        
## [15] rtracklayer_1.54.0          Rsamtools_2.10.0           
## [17] Biostrings_2.62.0           XVector_0.34.0             
## [19] BiocParallel_1.28.0         SummarizedExperiment_1.24.0
## [21] GenomicRanges_1.46.0        GenomeInfoDb_1.30.0        
## [23] MatrixGenerics_1.6.0        matrixStats_0.61.0         
## [25] org.Hs.eg.db_3.14.0         AnnotationDbi_1.56.0       
## [27] IRanges_2.28.0              S4Vectors_0.32.0           
## [29] Biobase_2.54.0              BiocGenerics_0.40.0        
## [31] TFutils_1.14.0              BiocStyle_2.22.0           
## 
## loaded via a namespace (and not attached):
##  [1] colorspace_2.0-2         rjson_0.2.20             ellipsis_0.3.2          
##  [4] farver_2.1.0             bit64_4.0.5              fansi_0.5.0             
##  [7] xml2_1.3.2               codetools_0.2-18         splines_4.1.1           
## [10] snpStats_1.44.0          cachem_1.0.6             jsonlite_1.7.2          
## [13] dbplyr_2.1.1             shiny_1.7.1              BiocManager_1.30.16     
## [16] readr_2.0.2              compiler_4.1.1           httr_1.4.2              
## [19] assertthat_0.2.1         Matrix_1.3-4             fastmap_1.1.0           
## [22] later_1.3.0              htmltools_0.5.2          prettyunits_1.1.1       
## [25] tools_4.1.1              gtable_0.3.0             glue_1.4.2              
## [28] GenomeInfoDbData_1.2.7   rappdirs_0.3.3           Rcpp_1.0.7              
## [31] cellranger_1.1.0         jquerylib_0.1.4          vctrs_0.3.8             
## [34] xfun_0.27                stringr_1.4.0            mime_0.12               
## [37] miniUI_0.1.1.1           lifecycle_1.0.1          restfulr_0.0.13         
## [40] zlibbioc_1.40.0          scales_1.1.1             BSgenome_1.62.0         
## [43] VariantAnnotation_1.40.0 hms_1.1.1                promises_1.2.0.1        
## [46] parallel_4.1.1           yaml_2.2.1               curl_4.3.2              
## [49] gridExtra_2.3            memoise_2.0.0            sass_0.4.0              
## [52] biomaRt_2.50.0           stringi_1.7.5            RSQLite_2.2.8           
## [55] highr_0.9                BiocIO_1.4.0             GenomicFeatures_1.46.0  
## [58] filelock_1.0.2           rlang_0.4.12             pkgconfig_2.0.3         
## [61] bitops_1.0-7             evaluate_0.14            lattice_0.20-45         
## [64] purrr_0.3.4              labeling_0.4.2           GenomicAlignments_1.30.0
## [67] bit_4.0.4                tidyselect_1.1.1         bookdown_0.24           
## [70] plyr_1.8.6               R6_2.5.1                 generics_0.1.1          
## [73] DelayedArray_0.20.0      DBI_1.1.1                pillar_1.6.4            
## [76] withr_2.4.2              survival_3.2-13          KEGGREST_1.34.0         
## [79] RCurl_1.98-1.5           tibble_3.1.5             crayon_1.4.1            
## [82] utf8_1.2.2               BiocFileCache_2.2.0      tzdb_0.1.2              
## [85] rmarkdown_2.11           progress_1.2.2           readxl_1.3.1            
## [88] blob_1.2.2               digest_0.6.28            xtable_1.8-4            
## [91] httpuv_1.6.3             munsell_0.5.0            bslib_0.3.1