pathwayOverrepresent {PhosR} | R Documentation |
This function performes gene set over-representation analysis using Fisher's exact test.
pathwayOverrepresent(geneSet, annotation, universe, alter = "greater")
geneSet |
an array of gene or phosphosite IDs (IDs are gene symbols etc that match to your pathway annotation list). |
annotation |
a list of pathways with each element containing an array of gene or phosphosite IDs. |
universe |
the universe/backgrond of all genes or phosphosites in your profiled dataset. |
alter |
test for enrichment ('greater', default), depletion ('less'), or 'two.sided'. |
A matrix of pathways and their associated substrates and p-values.
library(limma) data('phospho_L6_ratio') data('SPSs') grps = gsub('_.+', '', colnames(phospho.L6.ratio)) # Cleaning phosphosite label phospho.site.names = rownames(phospho.L6.ratio) L6.sites = gsub(' ', '', sapply(strsplit(rownames(phospho.L6.ratio), '~'), function(x){paste(toupper(x[2]), x[3], '', sep=';')})) phospho.L6.ratio = t(sapply(split(data.frame(phospho.L6.ratio), L6.sites), colMeans)) phospho.site.names = split(phospho.site.names, L6.sites) # Construct a design matrix by condition design = model.matrix(~ grps - 1) # phosphoproteomics data normalisation using RUV ctl = which(rownames(phospho.L6.ratio) %in% SPSs) phospho.L6.ratio.RUV = RUVphospho(phospho.L6.ratio, M = design, k = 3, ctl = ctl) # divides the phospho.L6.ratio data into groups by phosphosites L6.sites <- gsub(' ', '', gsub('~[STY]', '~', sapply(strsplit(rownames(phospho.L6.ratio.RUV), '~'), function(x){paste(toupper(x[2]), x[3], sep='~')}))) phospho.L6.ratio.sites <- t(sapply(split(data.frame(phospho.L6.ratio.RUV), L6.sites), colMeans)) # fit linear model for each phosphosite f <- gsub('_exp\\d', '', colnames(phospho.L6.ratio.RUV)) X <- model.matrix(~ f - 1) fit <- lmFit(phospho.L6.ratio.RUV, X) # extract top-ranked phosphosites for each condition compared to basal table.AICAR <- topTable(eBayes(fit), number=Inf, coef = 1) table.Ins <- topTable(eBayes(fit), number=Inf, coef = 3) table.AICARIns <- topTable(eBayes(fit), number=Inf, coef = 2) DE1.RUV <- c(sum(table.AICAR[,'adj.P.Val'] < 0.05), sum(table.Ins[,'adj.P.Val'] < 0.05), sum(table.AICARIns[,'adj.P.Val'] < 0.05)) # extract top-ranked phosphosites for each group comparison contrast.matrix1 <- makeContrasts(fAICARIns-fIns, levels=X) contrast.matrix2 <- makeContrasts(fAICARIns-fAICAR, levels=X) fit1 <- contrasts.fit(fit, contrast.matrix1) fit2 <- contrasts.fit(fit, contrast.matrix2) table.AICARInsVSIns <- topTable(eBayes(fit1), number=Inf) table.AICARInsVSAICAR <- topTable(eBayes(fit2), number=Inf) DE2.RUV <- c(sum(table.AICARInsVSIns[,'adj.P.Val'] < 0.05), sum(table.AICARInsVSAICAR[,'adj.P.Val'] < 0.05)) o <- rownames(table.AICARInsVSIns) Tc <- cbind(table.Ins[o,'logFC'], table.AICAR[o,'logFC'], table.AICARIns[o,'logFC']) rownames(Tc) = gsub('(.*)(;[A-Z])([0-9]+)(;)', '\\1;\\3;', o) colnames(Tc) <- c('Ins', 'AICAR', 'AICAR+Ins') # summary phosphosite-level information to proteins for performing downstream # gene-centric analyses. Tc.gene <- phosCollapse(Tc, id=gsub(';.+', '', rownames(Tc)), stat=apply(abs(Tc), 1, max), by = 'max') geneSet <- names(sort(Tc.gene[,1], decreasing = TRUE))[1:round(nrow(Tc.gene) * 0.1)] #lapply(PhosphoSite.rat, function(x){gsub(';[STY]', ';', x)}) # 1D gene-centric pathway analysis path1 <- pathwayOverrepresent(geneSet, annotation=Pathways.reactome, universe = rownames(Tc.gene), alter = 'greater')