VariantKey
5.4.1
Numerical Encoding for Human Genetic Variants
|
VariantKey main functions. More...
Go to the source code of this file.
Data Structures | |
struct | variantkey_t |
struct | vkrange_t |
Macros | |
#define | VKMASK_CHROM 0xF800000000000000 |
VariantKey binary mask for CHROM [ 11111000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 ]. More... | |
#define | VKMASK_POS 0x07FFFFFF80000000 |
VariantKey binary mask for POS [ 00000111 11111111 11111111 11111111 10000000 00000000 00000000 00000000 ]. More... | |
#define | VKMASK_CHROMPOS 0xFFFFFFFF80000000 |
VariantKey binary mask for CHROM+POS [ 11111111 11111111 11111111 11111111 10000000 00000000 00000000 00000000 ]. More... | |
#define | VKMASK_REFALT 0x000000007FFFFFFF |
VariantKey binary mask for REF+ALT [ 00000000 00000000 00000000 00000000 01111111 11111111 11111111 11111111 ]. More... | |
#define | VKSHIFT_CHROM 59 |
CHROM LSB position from the VariantKey LSB. More... | |
#define | VKSHIFT_POS 31 |
POS LSB position from the VariantKey LSB. More... | |
#define | MAXUINT32 0xFFFFFFFF |
Maximum value for uint32_t. More... | |
Typedefs | |
typedef struct variantkey_t | variantkey_t |
typedef struct vkrange_t | vkrange_t |
Functions | |
static uint8_t | encode_chrom (const char *chrom, size_t size) |
Returns chromosome numerical encoding. More... | |
static size_t | decode_chrom (uint8_t code, char *chrom) |
Decode the chromosome numerical code. More... | |
static uint32_t | encode_base (const uint8_t c) |
static int | encode_allele (uint32_t *h, uint8_t *bitpos, const char *str, size_t size) |
static uint32_t | encode_refalt_rev (const char *ref, size_t sizeref, const char *alt, size_t sizealt) |
static uint32_t | muxhash (uint32_t k, uint32_t h) |
static uint32_t | encode_packchar (int c) |
static uint32_t | pack_chars_tail (const char *str, size_t size) |
static uint32_t | pack_chars (const char *str) |
static uint32_t | hash32 (const char *str, size_t size) |
static uint32_t | encode_refalt_hash (const char *ref, size_t sizeref, const char *alt, size_t sizealt) |
static uint32_t | encode_refalt (const char *ref, size_t sizeref, const char *alt, size_t sizealt) |
Returns reference+alternate numerical encoding. More... | |
static char | decode_base (uint32_t code, int bitpos) |
static size_t | decode_refalt_rev (uint32_t code, char *ref, size_t *sizeref, char *alt, size_t *sizealt) |
static size_t | decode_refalt (uint32_t code, char *ref, size_t *sizeref, char *alt, size_t *sizealt) |
Decode the 32 bit REF+ALT code if reversible (if it has 11 or less bases in total and only contains ACGT letters). More... | |
static uint64_t | encode_variantkey (uint8_t chrom, uint32_t pos, uint32_t refalt) |
Returns a 64 bit variant key based on the pre-encoded CHROM, POS (0-based) and REF+ALT. More... | |
static uint8_t | extract_variantkey_chrom (uint64_t vk) |
Extract the CHROM code from VariantKey. More... | |
static uint32_t | extract_variantkey_pos (uint64_t vk) |
Extract the POS code from VariantKey. More... | |
static uint32_t | extract_variantkey_refalt (uint64_t vk) |
Extract the REF+ALT code from VariantKey. More... | |
static void | decode_variantkey (uint64_t code, variantkey_t *vk) |
Decode a VariantKey code and returns the components as variantkey_t structure. More... | |
static uint64_t | variantkey (const char *chrom, size_t sizechrom, uint32_t pos, const char *ref, size_t sizeref, const char *alt, size_t sizealt) |
static void | variantkey_range (uint8_t chrom, uint32_t pos_min, uint32_t pos_max, vkrange_t *range) |
Returns minimum and maximum VariantKeys for range searches. More... | |
static int8_t | compare_uint64_t (uint64_t a, uint64_t b) |
static int8_t | compare_variantkey_chrom (uint64_t vka, uint64_t vkb) |
Compares two VariantKeys by chromosome only. More... | |
static int8_t | compare_variantkey_chrom_pos (uint64_t vka, uint64_t vkb) |
Compares two VariantKeys by chromosome and position. More... | |
static size_t | variantkey_hex (uint64_t vk, char *str) |
Returns VariantKey hexadecimal string (16 characters). More... | |
static uint64_t | parse_variantkey_hex (const char *vs) |
Parses a VariantKey hexadecimal string and returns the code. More... | |
The functions provided here allows to generate and process a 64 bit Unsigned Integer Keys for Human Genetic Variants. The VariantKey is sortable for chromosome and position, and it is also fully reversible for variants with up to 11 bases between Reference and Alternate alleles. It can be used to sort, search and match variant-based data easily and very quickly.
#define MAXUINT32 0xFFFFFFFF |
#define VKMASK_CHROM 0xF800000000000000 |
#define VKMASK_CHROMPOS 0xFFFFFFFF80000000 |
#define VKMASK_POS 0x07FFFFFF80000000 |
#define VKMASK_REFALT 0x000000007FFFFFFF |
#define VKSHIFT_CHROM 59 |
#define VKSHIFT_POS 31 |
typedef struct variantkey_t variantkey_t |
VariantKey struct. Contains the numerically encoded VariantKey components (CHROM, POS, REF+ALT).
Struct containing the minimum and maximum VariantKey values for range searches.
|
inlinestatic |
|
inlinestatic |
vka | The first VariantKey to be compared. |
vkb | The second VariantKey to be compared. |
|
inlinestatic |
vka | The first VariantKey to be compared. |
vkb | The second VariantKey to be compared. |
|
inlinestatic |
|
inlinestatic |
code | CHROM code. |
chrom | CHROM string buffer to be returned. Its size should be enough to contain the results (max 4 bytes). |
|
inlinestatic |
code | REF+ALT code |
ref | REF string buffer to be returned. |
sizeref | Pointer to the size of the ref buffer, excluding the terminating null byte. This will contain the final ref size. |
alt | ALT string buffer to be returned. |
sizealt | Pointer to the size of the alt buffer, excluding the terminating null byte. This will contain the final alt size. |
|
inlinestatic |
|
inlinestatic |
code | VariantKey code. |
vk | Decoded variantkey structure. |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
chrom | Chromosome. An identifier from the reference genome, no white-space permitted. |
size | Length of the chrom string, excluding the terminating null byte. |
|
inlinestatic |
|
inlinestatic |
ref | Reference allele. String containing a sequence of nucleotide letters. The value in the pos field refers to the position of the first nucleotide in the String. Characters must be A-Z, a-z or * |
sizeref | Length of the ref string, excluding the terminating null byte. |
alt | Alternate non-reference allele string. Characters must be A-Z, a-z or * |
sizealt | Length of the alt string, excluding the terminating null byte. |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
chrom | Encoded Chromosome (see encode_chrom). |
pos | Position. The reference position, with the first base having position 0. |
refalt | Encoded Reference + Alternate (see encode_refalt). |
|
inlinestatic |
vk | VariantKey code. |
|
inlinestatic |
vk | VariantKey code. |
|
inlinestatic |
vk | VariantKey code. |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
vs | VariantKey hexadecimal string (it must contain 16 hexadecimal characters). |
|
inlinestatic |
Returns a 64 bit variant key based on CHROM, POS (0-based), REF, ALT. The variant should be already normalized (see normalize_variant or use normalized_variantkey).
chrom | Chromosome. An identifier from the reference genome, no white-space or leading zeros permitted. |
sizechrom | Length of the chrom string, excluding the terminating null byte. |
pos | Position. The reference position, with the first base having position 0. |
ref | Reference allele. String containing a sequence of nucleotide letters. The value in the pos field refers to the position of the first nucleotide in the String. Characters must be A-Z, a-z or * |
sizeref | Length of the ref string, excluding the terminating null byte. |
alt | Alternate non-reference allele string. Characters must be A-Z, a-z or * |
sizealt | Length of the alt string, excluding the terminating null byte. |
|
inlinestatic |
The string represent a 64 bit number or:
vk | VariantKey code. |
str | String buffer to be returned (it must be sized 17 bytes at least). |
|
inlinestatic |
chrom | Chromosome encoded number. |
pos_min | Start reference position, with the first base having position 0. |
pos_max | End reference position, with the first base having position 0. |
range | VariantKey range values. |