To install this package, run
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("NanoMethViz")
library(NanoMethViz)
To generate a methylation plot we need 3 components:
The methylation information has been modified from the output of nanopolish/f5c. It has then been compressed and indexed using bgzip()
and indexTabix()
from the Rsamtools
package.
# methylation data stored in tabix file
methy <- system.file(package = "NanoMethViz", "methy_subset.tsv.bgz")
# tabix is just a special gzipped tab-separated-values file
read.table(gzfile(methy), col.names = methy_col_names(), nrows = 6)
## sample chr pos strand statistic
## 1 B6Cast_Prom_1_bl6 chr11 101463573 * -0.33
## 2 B6Cast_Prom_1_bl6 chr11 101463573 * -1.87
## 3 B6Cast_Prom_1_bl6 chr11 101463573 * -4.19
## 4 B6Cast_Prom_1_bl6 chr11 101463573 * 0.10
## 5 B6Cast_Prom_1_cast chr11 101463573 * -0.38
## 6 B6Cast_Prom_1_cast chr11 101463573 * -0.84
## read_name
## 1 6cc38b35-6570-4b44-a1e3-2605fcf2ffe8
## 2 787f5f43-d144-4e15-ab7d-6b1474083389
## 3 c7ee7fb4-a915-4da7-9f36-da6ed5e68af2
## 4 bff8b135-0296-4495-9354-098242ea8cc4
## 5 11fe130b-8d48-4399-a9fa-2ca2860fa355
## 6 502fef95-c2f2-46ad-9bc5-fb3fc80b4245
The exon annotation was obtained from the Mus.musculus package, and joined into a single table. It is important that the chromosomes share the same convention as that found in the methylation data.
# helper function extracts exons from Mus.musculus package
exon_tibble <- get_exons_mus_musculus()
## Loading required package: Mus.musculus
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## Filter, Find, Map, Position, Reduce, anyDuplicated, append,
## as.data.frame, basename, cbind, colnames, dirname, do.call,
## duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
## lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
## pmin.int, rank, rbind, rownames, sapply, setdiff, sort, table,
## tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:NanoMethViz':
##
## samples
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: OrganismDbi
## Loading required package: GenomicFeatures
## Loading required package: GenomeInfoDb
## Loading required package: GenomicRanges
##
## Attaching package: 'GenomicFeatures'
## The following object is masked from 'package:NanoMethViz':
##
## exons
## Loading required package: GO.db
##
## Loading required package: org.Mm.eg.db
##
## Loading required package: TxDb.Mmusculus.UCSC.mm10.knownGene
head(exon_tibble)
## # A tibble: 6 x 7
## gene_id chr strand start end transcript_id symbol
## <chr> <chr> <chr> <int> <int> <int> <chr>
## 1 100009600 chr9 - 21062393 21062717 74536 Zglp1
## 2 100009600 chr9 - 21062894 21062987 74536 Zglp1
## 3 100009600 chr9 - 21063314 21063396 74536 Zglp1
## 4 100009600 chr9 - 21066024 21066377 74536 Zglp1
## 5 100009600 chr9 - 21066940 21067093 74536 Zglp1
## 6 100009600 chr9 - 21062400 21062717 74538 Zglp1
We will defined the sample annotation ourselves. It is important that the sample names match those found in the methylation data.
sample <- c(
"B6Cast_Prom_1_bl6",
"B6Cast_Prom_1_cast",
"B6Cast_Prom_2_bl6",
"B6Cast_Prom_2_cast",
"B6Cast_Prom_3_bl6",
"B6Cast_Prom_3_cast"
)
group <- c(
"bl6",
"cast",
"bl6",
"cast",
"bl6",
"cast"
)
sample_anno <- data.frame(sample, group, stringsAsFactors = FALSE)
sample_anno
## sample group
## 1 B6Cast_Prom_1_bl6 bl6
## 2 B6Cast_Prom_1_cast cast
## 3 B6Cast_Prom_2_bl6 bl6
## 4 B6Cast_Prom_2_cast cast
## 5 B6Cast_Prom_3_bl6 bl6
## 6 B6Cast_Prom_3_cast cast
For convenience we assemble these three pieces of data into a single object.
nmeth_results <- NanoMethResult(methy, sample_anno, exon_tibble)
The genes we have available are
For demonstrative purposes we will plot Peg3.
plot_gene(nmeth_results, "Peg3")
We can also load in some DMR results to highlight DMR regions.
# loading saved results from previous bsseq analysis
bsseq_dmr <- read.table(
system.file(package = "NanoMethViz", "dmr_subset.tsv.gz"),
sep = "\t",
header = TRUE,
stringsAsFactors = FALSE
)
plot_gene(nmeth_results, "Peg3", anno_regions = bsseq_dmr)
Individual long reads can be visualised using the spaghetti
argument.
# warnings have been turned off in this vignette, but this will generally
# generate many warnings as the smoothing for many reads will fail
plot_gene(nmeth_results, "Peg3", anno_regions = bsseq_dmr, spaghetti = TRUE)