createMAE {ELMER}R Documentation

Construct a Multi Assay Experiment for ELMER analysis

Description

This function will receive a gene expression and DNA methylation data objects and create a Multi Assay Experiment.

Usage

createMAE(
  exp,
  met,
  colData,
  sampleMap,
  linearize.exp = FALSE,
  filter.probes = NULL,
  met.na.cut = 0.2,
  filter.genes = NULL,
  met.platform = "450K",
  genome = NULL,
  save = TRUE,
  save.filename,
  TCGA = FALSE
)

Arguments

exp

A Summaerized Experiment, a matrix or path of rda file only containing the data. Rownames should be either Ensembl gene id (ensembl_gene_id) or gene symbol (external_gene_name)

met

A Summaerized Experiment, a matrix or path of rda file only containing the data.

colData

A DataFrame or data.frame of the phenotype data for all participants. Must have column primary (sample ID).

sampleMap

A DataFrame or data.frame of the matching samples and colnames of the gene expression and DNA methylation matrix. This should be used if your matrix have different columns names. This object must have following columns: assay ("DNA methylation" and "Gene expression"), primary (sample ID) and colname (names of the columns of the matrix).

linearize.exp

Take log2(exp + 1) in order to linearize relation between methylation and expression

filter.probes

A GRanges object contains the coordinate of probes which locate within promoter regions or distal feature regions such as union enhancer from REMC and FANTOM5. See get.feature.probe function.

met.na.cut

Define the percentage of NA that the line should have to remove the probes for humanmethylation platforms.

filter.genes

List of genes ensemble ids to filter from object

met.platform

DNA methylation platform "450K" or "EPIC"

genome

Which is the default genome to make gene information. Options hg19 and hg38

save

If TRUE, MAE object will be saved into a file named as the argument save.file if this was set, otherwise as mae_genome_met.platform.rda.

save.filename

Name of the rda file to save the object (must end in .rda)

TCGA

A logical. FALSE indicate data is not from TCGA (FALSE is default). TRUE indicates data is from TCGA and sample section will automatically filled in.

Value

A MultiAssayExperiment object

Examples

# NON TCGA example: matrices has different column names
gene.exp <- S4Vectors::DataFrame(sample1.exp = c("ENSG00000141510"=2.3,"ENSG00000171862"=5.4),
                  sample2.exp = c("ENSG00000141510"=1.6,"ENSG00000171862"=2.3))
dna.met <- S4Vectors::DataFrame(sample1.met = c("cg14324200"=0.5,"cg23867494"=0.1),
                       sample2.met =  c("cg14324200"=0.3,"cg23867494"=0.9))
sample.info <- S4Vectors::DataFrame(primary =  c("sample1","sample2"), 
                                    sample.type = c("Normal", "Tumor"))
sampleMap <- S4Vectors::DataFrame(
                 assay = c("Gene expression","DNA methylation","Gene expression","DNA methylation"),
                 primary = c("sample1","sample1","sample2","sample2"), 
                 colname = c("sample1.exp","sample1.met","sample2.exp","sample2.met"))
mae <- createMAE(exp = gene.exp, 
                 met = dna.met, 
                 sampleMap = sampleMap, 
                 met.platform ="450K",
                 colData = sample.info, 
                 genome = "hg38") 
# You can also use sample Mapping and Sample information tables from a tsv file
# You can use the createTSVTemplates function to create the tsv files
readr::write_tsv(as.data.frame(sampleMap), path = "sampleMap.tsv")
readr::write_tsv(as.data.frame(sample.info), path = "sample.info.tsv")
mae <- createMAE(exp = gene.exp, 
                 met = dna.met, 
                 sampleMap = "sampleMap.tsv", 
                 met.platform ="450K",
                 colData = "sample.info.tsv", 
                 genome = "hg38") 
                 
# NON TCGA example: matrices has same column names
gene.exp <- S4Vectors::DataFrame(sample1 = c("ENSG00000141510"=2.3,"ENSG00000171862"=5.4),
                  sample2 = c("ENSG00000141510"=1.6,"ENSG00000171862"=2.3))
dna.met <- S4Vectors::DataFrame(sample1 = c("cg14324200"=0.5,"cg23867494"=0.1),
                       sample2=  c("cg14324200"=0.3,"cg23867494"=0.9))
sample.info <- S4Vectors::DataFrame(primary =  c("sample1","sample2"), 
                                    sample.type = c("Normal", "Tumor"))
sampleMap <- S4Vectors::DataFrame(
                 assay = c("Gene expression","DNA methylation","Gene expression","DNA methylation"),
                 primary = c("sample1","sample1","sample2","sample2"), 
                 colname = c("sample1","sample1","sample2","sample2")
)
mae <- createMAE(exp = gene.exp, 
                 met = dna.met, 
                 sampleMap = sampleMap, 
                 met.platform ="450K",
                 colData = sample.info, 
                 genome = "hg38") 

## Not run: 
   # TCGA example using TCGAbiolinks
   # Testing creating MultyAssayExperiment object
   # Load library
   library(TCGAbiolinks)
   library(SummarizedExperiment)
   
   samples <- c("TCGA-BA-4074", "TCGA-BA-4075", "TCGA-BA-4077", "TCGA-BA-5149",
                "TCGA-UF-A7JK", "TCGA-UF-A7JS", "TCGA-UF-A7JT", "TCGA-UF-A7JV")
   
   #1) Get gene expression matrix
   query.exp <- GDCquery(project = "TCGA-HNSC", 
                         data.category = "Transcriptome Profiling", 
                         data.type = "Gene Expression Quantification", 
                         workflow.type = "HTSeq - FPKM-UQ",
                         barcode = samples)
   
   GDCdownload(query.exp)
   exp.hg38 <- GDCprepare(query = query.exp)
   
   
   # Aligned against Hg19
   query.exp.hg19 <- GDCquery(project = "TCGA-HNSC", 
                              data.category = "Gene expression",
                              data.type = "Gene expression quantification",
                              platform = "Illumina HiSeq", 
                              file.type  = "normalized_results",
                              experimental.strategy = "RNA-Seq",
                              barcode = samples,
                              legacy = TRUE)
   GDCdownload(query.exp.hg19)
   exp.hg19 <- GDCprepare(query.exp.hg19)
   
   # Our object needs to have emsembl gene id as rownames
   rownames(exp.hg19) <- values(exp.hg19)$ensembl_gene_id
   
   # DNA Methylation
   query.met <- GDCquery(project = "TCGA-HNSC",
                         legacy = TRUE,
                         data.category = "DNA methylation",
                         barcode = samples,
                         platform = "Illumina Human Methylation 450")
   
   GDCdownload(query.met)
   met <- GDCprepare(query = query.met)
   
   distal.enhancer <- get.feature.probe(genome = "hg19",met.platform = "450k")
   
   # Consisering it is TCGA and SE
   mae.hg19 <- createMAE(exp = exp.hg19, 
                         met =  met, 
                         TCGA = TRUE, 
                         genome = "hg19",  
                         filter.probes = distal.enhancer)
   values(getExp(mae.hg19))
   
   mae.hg38 <- createMAE(exp = exp.hg38, met = met, 
                        TCGA = TRUE, genome = "hg38",  
                        filter.probes = distal.enhancer)
   values(getExp(mae.hg38))
   
   # Consisering it is TCGA and not SE
   mae.hg19.test <- createMAE(exp = assay(exp.hg19), met =  assay(met), 
                              TCGA = TRUE, genome = "hg19",  
                              filter.probes = distal.enhancer)
   
   mae.hg38 <- createMAE(exp = assay(exp.hg38), met = assay(met), 
                         TCGA = TRUE, genome = "hg38",  
                         filter.probes = distal.enhancer)
   values(getExp(mae.hg38))
   
   # Consisering it is not TCGA and SE
   # DNA methylation and gene expression Objects should have same sample names in columns
   not.tcga.exp <- exp.hg19 
   colnames(not.tcga.exp) <- substr(colnames(not.tcga.exp),1,15)
   not.tcga.met <- met 
   colnames(not.tcga.met) <- substr(colnames(not.tcga.met),1,15)
   
   phenotype.data <- data.frame(row.names = colnames(not.tcga.exp),
                                primary = colnames(not.tcga.exp),
                                samples = colnames(not.tcga.exp), 
                                group = c(rep("group1",4),rep("group2",4)))
   distal.enhancer <- get.feature.probe(genome = "hg19",met.platform = "450k")
   mae.hg19 <- createMAE(exp = not.tcga.exp, 
                         met =  not.tcga.met, 
                         TCGA = FALSE, 
                         filter.probes = distal.enhancer,
                         genome = "hg19", 
                         colData = phenotype.data)

## End(Not run)
createMAE

[Package ELMER version 2.12.0 Index]