TCGAbiolinks has provided a few functions to download mutation data from GDC. There are two options to download the data:
GDCquery_Maf
which will download MAF aligned against hg38GDCquery
, GDCdownload
and GDCpreprare
to downoad MAF aligned against hg19This exmaple will download MAF (mutation annotation files) for variant calling pipeline muse. Pipelines options are: muse
, varscan2
, somaticsniper
, mutect
. For more information please access GDC docs.
maf <- GDCquery_Maf("CHOL", pipelines = "muse")
# Only first 50 to make render faster
datatable(maf[1:20,],
filter = 'top',
options = list(scrollX = TRUE, keys = TRUE, pageLength = 5),
rownames = FALSE)
This exmaple will download MAF (mutation annotation files) aligned against hg19 (Old TCGA maf files)
query.maf.hg19 <- GDCquery(project = "TCGA-CHOL",
data.category = "Simple nucleotide variation",
data.type = "Simple somatic mutation",
access = "open",
legacy = TRUE)
# Check maf availables
datatable(select(getResults(query.maf.hg19),-contains("cases")),
filter = 'top',
options = list(scrollX = TRUE, keys = TRUE, pageLength = 10),
rownames = FALSE)
query.maf.hg19 <- GDCquery(project = "TCGA-CHOL",
data.category = "Simple nucleotide variation",
data.type = "Simple somatic mutation",
access = "open",
file.type = "bcgsc.ca_CHOL.IlluminaHiSeq_DNASeq.1.somatic.maf",
legacy = TRUE)
GDCdownload(query.maf.hg19)
maf <- GDCprepare(query.maf.hg19)
# Only first 50 to make render faster
datatable(maf[1:20,],
filter = 'top',
options = list(scrollX = TRUE, keys = TRUE, pageLength = 5),
rownames = FALSE)
To visualize the data you can use the Bioconductor package maftools. For more information, please check its vignette.
library(maftools)
maf <- GDCquery_Maf("CHOL", pipelines = "muse") %>% read.maf(removeSilent = TRUE, useAll = FALSE)
datatable(getSampleSummary(maf),
filter = 'top',
options = list(scrollX = TRUE, keys = TRUE, pageLength = 5),
rownames = FALSE)
plotmafSummary(maf = maf, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE)
oncoplot(maf = maf, top = 10, removeNonMutated = TRUE)
titv = titv(maf = maf, plot = FALSE, useSyn = TRUE)
#plot titv summary
plotTiTv(res = titv)