codonUsage-expressivity {coRdon} | R Documentation |
Calculate values of the CU expressivity measure
for every sequence in the given codonTable
object.
The following methods are implemented:
MELP
, CU expressivity measure based on
Measure Independent of Length and Composition
Supek & Vlahovicek (2005),
E
, gene expression measure (E)
Karlin and Mrazek (2000),
CAI
, Codon Adaptation Index (CAI)
Sharp and Li (1987),
Fop
, frequency of optimal codons (Fop)
Ikemura (1981),
GCB
, gene codon bias (GCB)
Merkl (2003).
MELP(cTobject, subsets = list(), ribosomal = FALSE, id_or_name2 = "1", alt.init = TRUE, stop.rm = FALSE, filtering = "none", len.threshold = 80) ## S4 method for signature 'codonTable' MELP(cTobject, subsets = list(), ribosomal = FALSE, id_or_name2 = "1", alt.init = TRUE, stop.rm = FALSE, filtering = "none", len.threshold = 80) E(cTobject, subsets = list(), ribosomal = FALSE, id_or_name2 = "1", alt.init = TRUE, stop.rm = FALSE, filtering = "none", len.threshold = 80) ## S4 method for signature 'codonTable' E(cTobject, subsets = list(), ribosomal = FALSE, id_or_name2 = "1", alt.init = TRUE, stop.rm = FALSE, filtering = "none", len.threshold = 80) CAI(cTobject, subsets = list(), ribosomal = FALSE, id_or_name2 = "1", alt.init = TRUE, stop.rm = FALSE, filtering = "none", len.threshold = 80) ## S4 method for signature 'codonTable' CAI(cTobject, subsets = list(), ribosomal = FALSE, id_or_name2 = "1", alt.init = TRUE, stop.rm = FALSE, filtering = "none", len.threshold = 80) Fop(cTobject, subsets = list(), ribosomal = FALSE, id_or_name2 = "1", alt.init = TRUE, stop.rm = FALSE, filtering = "none", len.threshold = 80) ## S4 method for signature 'codonTable' Fop(cTobject, subsets = list(), ribosomal = FALSE, id_or_name2 = "1", alt.init = TRUE, stop.rm = FALSE, filtering = "none", len.threshold = 80) GCB(cTobject, seed = logical(), ribosomal = FALSE, perc = 0.05, id_or_name2 = "1", alt.init = TRUE, stop.rm = FALSE, filtering = "none", len.threshold = 80) ## S4 method for signature 'codonTable' GCB(cTobject, seed = logical(), ribosomal = FALSE, perc = 0.05, id_or_name2 = "1", alt.init = TRUE, stop.rm = FALSE, filtering = "none", len.threshold = 80)
cTobject |
A |
subsets |
A (named) list of logical vectors, the length of each equal
to |
ribosomal |
Logical, if |
id_or_name2 |
A single string that uniquely identifies the genetic code to extract.
Should be one of the values in the |
alt.init |
logical, whether to use alternative initiation codons.
Default is |
stop.rm |
Logical, whether to remove stop codons. Default is
|
filtering |
Character vector, one of |
len.threshold |
Optional numeric, specifying sequence length, in codons, used for filtering. |
seed |
A logical vector, of the length equal to
|
perc |
percent of top ranking genes to be used as a target set for the next iteration of the algorithm that calculates GCB. Default is 0.05. |
A matrix (for GCB a numeric vector) with CU expressivity values
for every specified subset (subsets
, self
,
ribosomal
) in columns.
# load example DNA sequences exampledir <- system.file("extdata", package = "coRdon") cT <- codonTable(readSet(exampledir)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # In the examples below, MELP values are calculated for all sequences; # any other CU expressivity measure can be caluclated in the same way, # the only exception being GCB which takes `seed` instead of `subset` # parameter. (The exemples for GCB calculation are further below). # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # calculate MELP with respect to the CU # of ribosomal genes among the example DNA sequences melp <- MELP(cT, ribosomal = TRUE) head(melp) # calculate MELP distance with respect to the average CU # of the first 20 example DNA sequences # (i.e. the first half of the example DNA set) melp <- MELP(cT, subsets = list(half = c(rep(TRUE, 20), rep(FALSE, 20)))) # alternatively, you can specify codonTable as a subset halfcT <- codonTable(codonCounts(cT)[1:20,]) melp2 <- MELP(cT, subsets = list(half = halfcT)) all.equal(melp, melp2) # TRUE # filtering MELP(cT, ribosomal = TRUE, filtering = "hard", len.threshold = 80) # MELP for 9 sequences # (note that, accidentally, # all are ribosomal) sum(getlen(cT) > 80) # 9 sequences are longer than 80 codons melp1 <- MELP(cT, ribosomal = TRUE, filtering = "none") # no filtering melp2 <- MELP(cT, ribosomal = TRUE, filtering = "soft") # warning all.equal(melp1, melp2) # TRUE # options for genetic code melp <- MELP(cT, ribosomal = TRUE, stop.rm = TRUE) # don't use stop codons in calculation melp <- MELP(cT, ribosomal = TRUE, alt.init = FALSE) # don't use alternative start codons melp <- MELP(cT, ribosomal = TRUE, id_or_name2 = "2") # use different genetic code, for help # see `?Biostrings::GENETIC_CODE` # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # GCB calculationd # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # calculate GCB with CU of ribosomal genes among the example DNA sequences # used as a target (seed) in the first iteration of the algorithm gcb <- GCB(cT, ribosomal = TRUE) head(gcb) # calculate GCB distance with the first 20 example DNA sequences # (i.e. the first half of the example DNA set) as a seed gcb <- GCB(cT, seed = c(rep(TRUE, 20), rep(FALSE, 20))) # alternatively, you can specify codonTable as a seed halfcT <- codonTable(codonCounts(cT)[1:20,]) gcb2 <- GCB(cT, seed = halfcT) all.equal(gcb, gcb2) # TRUE # options for genetic code gcb <- GCB(cT, ribosomal = TRUE, stop.rm = TRUE) # don't use stop codons in calculation gcb <- GCB(cT, ribosomal = TRUE, alt.init = FALSE) # don't use alternative start codons gcb <- GCB(cT, ribosomal = TRUE, id_or_name2 = "2") # use different genetic code, for help # see `?Biostrings::GENETIC_CODE`