seqSetFilter-methods {SeqArray} | R Documentation |
Sets a filter to sample and/or variant.
## S4 method for signature 'SeqVarGDSClass,ANY' seqSetFilter(object, variant.sel, sample.sel=NULL, variant.id=NULL, sample.id=NULL, action=c("set", "intersect", "push", "push+set", "push+intersect", "pop"), ret.idx=FALSE, warn=TRUE, verbose=TRUE) ## S4 method for signature 'SeqVarGDSClass,GRanges' seqSetFilter(object, variant.sel, rm.txt="chr", intersect=FALSE, verbose=TRUE) ## S4 method for signature 'SeqVarGDSClass,GRangesList' seqSetFilter(object, variant.sel, rm.txt="chr", intersect=FALSE, verbose=TRUE) ## S4 method for signature 'SeqVarGDSClass,IRanges' seqSetFilter(object, variant.sel, chr, intersect=FALSE, verbose=TRUE) seqResetFilter(object, sample=TRUE, variant=TRUE, verbose=TRUE) seqSetFilterChrom(object, include=NULL, is.num=NA, from.bp=NULL, to.bp=NULL, intersect=FALSE, verbose=TRUE) seqSetFilterPos(object, chr, pos, ref=NULL, alt=NULL, intersect=FALSE, multi.pos=TRUE, ret.idx=FALSE, verbose=TRUE) seqSetFilterAnnotID(object, id, ret.idx=FALSE, verbose=TRUE)
object |
a |
variant.sel |
a logical/raw/index vector indicating the selected
variants; |
sample.sel |
a logical/raw/index vector indicating the selected samples |
variant.id |
ID of selected variants |
sample.id |
ID of selected samples |
action |
|
ret.idx |
if TRUE, return the index in the output array according to the order of 'sample.id', 'sample.sel', 'variant.id' or 'variant.sel' |
rm.txt |
a character, the characters will be removed from
|
chr |
a vector of character for chromsome coding |
pos |
a vector of numeric values for genome coordinate |
sample |
logical, if |
variant |
logical, if |
include |
NULL, or a vector of characters for specified chromosome(s) |
is.num |
a logical variable: |
from.bp |
NULL, no limit; a numeric vector, the lower bound of position |
to.bp |
NULL, no limit; a numeric vector, the upper bound of position |
intersect |
if |
ref |
the reference alleles |
alt |
the alternative alleles |
multi.pos |
|
id |
a character vector for RS IDs (stored in |
warn |
if |
verbose |
if |
seqResetFilter(file)
is equivalent to seqSetFilter(file)
,
where the selection arguments in seqSetFilter
are NULL
.
If from.bp
and to.bp
has values, they should be equal-size
as include
. A trio of include
, from.bp
and to.bp
indicates a region on human genomes. NA
in from.bp
is treated
as 0, and NA
in to.bp
is treated as the maximum of integer
(2^31 - 1).
If ret.idx=TRUE
, seqSetFilter()
returns a list with two
components sample_idx
and variant_idx
to indicate the indices
of the output array according to the input 'sample.id', 'sample.sel',
'variant.id' or 'variant.sel';
if ret.idx=TRUE
, seqSetFilterAnnotID()
return an index vector;
otherwise no return.
Xiuwen Zheng
seqSetFilterCond
, seqGetFilter
,
seqGetData
, seqApply
# the GDS file (gds.fn <- seqExampleFileName("gds")) # display (f <- seqOpen(gds.fn)) # get 'sample.id (samp.id <- seqGetData(f, "sample.id")) # "NA06984" "NA06985" "NA06986" ... # get 'variant.id' head(variant.id <- seqGetData(f, "variant.id")) # get 'chromosome' table(seqGetData(f, "chromosome")) # get 'allele' head(seqGetData(f, "allele")) # "T,C" "G,A" "G,A" ... # set sample filters seqSetFilter(f, sample.id=samp.id[c(2,4,6,8)]) seqSetFilter(f, sample.id=samp.id[c(2,4,6,8)], ret.idx=TRUE) (v <- seqSetFilter(f, sample.id=samp.id[c(8,2,6,4)], ret.idx=TRUE)) all(seqGetData(f, "sample.id")[v$sample_idx] == samp.id[c(8,2,6,4)]) # set variant filters seqSetFilter(f, variant.id=variant.id[c(2,4,6,8,10,12)], ret.idx=TRUE) (v <- seqSetFilter(f, variant.id=variant.id[c(12,4,6,10,8,12)], ret.idx=TRUE)) all(variant.id[c(12,4,6,10,8,12)] == seqGetData(f, "variant.id")[v$variant_idx]) set.seed(100) seqSetFilter(f, variant.id=sample(variant.id, 5)) # get genotypic data seqGetData(f, "genotype") ## OR # set sample and variant filters seqSetFilter(f, sample.sel=c(2,4,6,8)) set.seed(100) seqSetFilter(f, variant.sel=sample.int(length(variant.id), 5)) # get genotypic data seqGetData(f, "genotype") ## set the intersection seqResetFilter(f) seqSetFilterChrom(f, 10L) seqSummary(f, "genotype", check="none") AF <- seqAlleleFreq(f) table(AF <= 0.9) seqSetFilter(f, variant.sel=(AF<=0.9), action="intersect") seqSummary(f, "genotype", check="none") ## chromosome seqResetFilter(f) seqSetFilterChrom(f, is.num=TRUE) seqSummary(f, "genotype", check="none") seqSetFilterChrom(f, is.num=FALSE) seqSummary(f, "genotype", check="none") seqSetFilterChrom(f, 1:4) seqSummary(f, "genotype", check="none") table(seqGetData(f, "chromosome")) # HLA region seqSetFilterChrom(f, 6, from.bp=29719561, to.bp=32883508) seqSummary(f, "genotype", check="none") # two regions seqSetFilterChrom(f, c(1, 6), from.bp=c(1000000, 29719561), to.bp=c(90000000, 32883508)) seqSummary(f, "genotype", check="none") seqGetData(f, "chromosome") ## intersection option seqResetFilter(f) seqSetFilterChrom(f, 6, from.bp=29719561, to.bp=32883508) # MHC seqSetFilterChrom(f, include=6) # chromosome 6 seqResetFilter(f) seqSetFilterChrom(f, 6, from.bp=29719561, to.bp=32883508) # MHC seqSetFilterChrom(f, include=6, intersect=TRUE) # MHC region only # close the GDS file seqClose(f)