canonicalize_cluster {CellaRepertorium} | R Documentation |
Find a canonical contig to represent a cluster
canonicalize_cluster( ccdb, contig_filter_args, tie_break_keys = character(), order = 1, representative = ccdb$cluster_pk[1], contig_fields = c("cdr3", "cdr3_nt", "chain", "v_gene", "d_gene", "j_gene"), overwrite = TRUE )
ccdb |
|
contig_filter_args |
an expression passed to |
tie_break_keys |
(optional) |
order |
The rank order of the contig, based on |
representative |
an optional field from |
contig_fields |
Optional fields from |
overwrite |
|
ContigCellDB()
with some number of clusters/contigs/cells but with "canonical" values copied into cluster_tbl
canonicalize_cell()
left_join_warn()
library(dplyr) data(ccdb_ex) ccdb_ex_small = ccdb_ex ccdb_ex_small$cell_tbl = ccdb_ex_small$cell_tbl[1:200,] ccdb_ex_small = cdhit_ccdb(ccdb_ex_small, sequence_key = 'cdr3_nt', type = 'DNA', cluster_name = 'DNA97', identity = .965, min_length = 12, G = 1) ccdb_ex_small = fine_clustering(ccdb_ex_small, sequence_key = 'cdr3_nt', type = 'DNA') # Canonicalize with the medoid contig is probably what is most common ccdb_medoid = canonicalize_cluster(ccdb_ex_small) # But there are other possibilities. # To pass multiple "AND" filter arguments must use & ccdb_umi = canonicalize_cluster(ccdb_ex_small, contig_filter_args = chain == 'TRA' & length > 500, tie_break_keys = 'umis', contig_fields = c('chain', 'length')) ccdb_umi$cluster_tbl %>% dplyr::select(chain, length) %>% summary()