VariantKey
5.4.1
Numerical Encoding for Human Genetic Variants
|
RegionKey main functions. More...
Go to the source code of this file.
Data Structures | |
struct | regionkey_t |
struct | regionkey_rev_t |
Macros | |
#define | RK_MAX_POS 0x000000000FFFFFFF |
Maximum position value (2^28 - 1) More... | |
#define | RKMASK_CHROM 0xF800000000000000 |
RegionKey binary mask for CHROM [ 11111000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 ]. More... | |
#define | RKMASK_STARTPOS 0x07FFFFFF80000000 |
RegionKey binary mask for START POS [ 00000111 11111111 11111111 11111111 10000000 00000000 00000000 00000000 ]. More... | |
#define | RKMASK_ENDPOS 0x000000007FFFFFF8 |
RegionKey binary mask for END POS [ 00000000 00000000 00000000 00000000 01111111 11111111 11111111 11111000 ]. More... | |
#define | RKMASK_STRAND 0x0000000000000006 |
RegionKey binary mask for STRAND [ 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000110 ]. More... | |
#define | RKMASK_NOPOS 0xF800000000000007 |
RegionKey binary mask WITHOUT POS [ 11111000 00000000 00000000 00000000 00000000 00000000 00000000 00000111 ]. More... | |
#define | RKSHIFT_CHROM 59 |
CHROM LSB position from the VariantKey LSB. More... | |
#define | RKSHIFT_STARTPOS 31 |
START POS LSB position from the VariantKey LSB. More... | |
#define | RKSHIFT_ENDPOS 3 |
END POS LSB position from the VariantKey LSB. More... | |
#define | RKSHIFT_STRAND 1 |
STRAND LSB position from the VariantKey LSB. More... | |
#define | RK_CHROM ((rk & RKMASK_CHROM) >> RKSHIFT_CHROM) |
Extract the CHROM code from RegionKey. More... | |
#define | RK_STARTPOS ((rk & RKMASK_STARTPOS) >> RKSHIFT_STARTPOS) |
Extract the START POS code from RegionKey. More... | |
#define | RK_ENDPOS ((rk & RKMASK_ENDPOS) >> RKSHIFT_ENDPOS) |
Extract the END POS code from RegionKey. More... | |
#define | RK_STRAND ((rk & RKMASK_STRAND) >> RKSHIFT_STRAND) |
Extract the STRAND from RegionKey. More... | |
Typedefs | |
typedef struct regionkey_t | regionkey_t |
typedef struct regionkey_rev_t | regionkey_rev_t |
Functions | |
static uint8_t | encode_region_strand (int8_t strand) |
Encode the strand direction (-1 > 2, 0 > 0, +1 > 1). More... | |
static int8_t | decode_region_strand (uint8_t strand) |
Decode the strand direction code (0 > 0, 1 > +1, 2 > -1). More... | |
static uint64_t | encode_regionkey (uint8_t chrom, uint32_t startpos, uint32_t endpos, uint8_t strand) |
Returns a 64 bit regionkey. More... | |
static uint8_t | extract_regionkey_chrom (uint64_t rk) |
Extract the CHROM code from RegionKey. More... | |
static uint32_t | extract_regionkey_startpos (uint64_t rk) |
Extract the START POS code from RegionKey. More... | |
static uint32_t | extract_regionkey_endpos (uint64_t rk) |
Extract the END POS code from RegionKey. More... | |
static uint8_t | extract_regionkey_strand (uint64_t rk) |
Extract the STRAND from RegionKey. More... | |
static void | decode_regionkey (uint64_t code, regionkey_t *rk) |
Decode a RegionKey code and returns the components as regionkey_t structure. More... | |
static void | reverse_regionkey (uint64_t rk, regionkey_rev_t *rev) |
static uint64_t | regionkey (const char *chrom, size_t sizechrom, uint32_t startpos, uint32_t endpos, int8_t strand) |
Returns a 64 bit regionkey based on CHROM, START POS (0-based), END POS and STRAND. More... | |
static uint64_t | extend_regionkey (uint64_t rk, uint32_t size) |
Extend a regionkey region by a fixed amount from the start and end position. More... | |
static size_t | regionkey_hex (uint64_t rk, char *str) |
Returns RegionKey hexadecimal string (16 characters). More... | |
static uint64_t | parse_regionkey_hex (const char *rs) |
Parses a RegionKey hexadecimal string and returns the code. More... | |
static uint64_t | get_regionkey_chrom_startpos (uint64_t rk) |
Get the CHROM + START POS encoding from RegionKey. More... | |
static uint64_t | get_regionkey_chrom_endpos (uint64_t rk) |
Get the CHROM + END POS encoding from RegionKey. More... | |
static uint8_t | are_overlapping_regions (uint8_t a_chrom, uint32_t a_startpos, uint32_t a_endpos, uint8_t b_chrom, uint32_t b_startpos, uint32_t b_endpos) |
Check if two regions are overlapping. More... | |
static uint8_t | are_overlapping_region_regionkey (uint8_t chrom, uint32_t startpos, uint32_t endpos, uint64_t rk) |
Check if a region and a regionkey are overlapping. More... | |
static uint8_t | are_overlapping_regionkeys (uint64_t rka, uint64_t rkb) |
Check if two regionkeys are overlapping. More... | |
static uint8_t | are_overlapping_variantkey_regionkey (nrvk_cols_t nvc, uint64_t vk, uint64_t rk) |
Check if variantkey and regionkey are overlapping. More... | |
static uint64_t | variantkey_to_regionkey (nrvk_cols_t nvc, uint64_t vk) |
Get RegionKey from VariantKey. More... | |
The functions provided here allows to generate and process a 64 bit Unsigned Integer Keys for Human Genomic Regions. The RegionKey is sortable for chromosome and start position, and it is also fully reversible.
#define RK_CHROM ((rk & RKMASK_CHROM) >> RKSHIFT_CHROM) |
#define RK_ENDPOS ((rk & RKMASK_ENDPOS) >> RKSHIFT_ENDPOS) |
#define RK_MAX_POS 0x000000000FFFFFFF |
#define RK_STARTPOS ((rk & RKMASK_STARTPOS) >> RKSHIFT_STARTPOS) |
#define RK_STRAND ((rk & RKMASK_STRAND) >> RKSHIFT_STRAND) |
#define RKMASK_CHROM 0xF800000000000000 |
#define RKMASK_ENDPOS 0x000000007FFFFFF8 |
#define RKMASK_NOPOS 0xF800000000000007 |
#define RKMASK_STARTPOS 0x07FFFFFF80000000 |
#define RKMASK_STRAND 0x0000000000000006 |
#define RKSHIFT_CHROM 59 |
#define RKSHIFT_ENDPOS 3 |
#define RKSHIFT_STARTPOS 31 |
#define RKSHIFT_STRAND 1 |
typedef struct regionkey_rev_t regionkey_rev_t |
RegionKey decoded struct
typedef struct regionkey_t regionkey_t |
RegionKey struct. Contains the numerically encoded RegionKey components (CHROM, STARTPOS, ENDPOS, STRAND).
|
inlinestatic |
chrom | Region A chromosome code. |
startpos | Region A start position. |
endpos | Region A end position (startpos + region length). |
rk | RegionKey or region B. |
|
inlinestatic |
rka | RegionKey A. |
rkb | RegionKey B. |
|
inlinestatic |
a_chrom | Region A chromosome code. |
a_startpos | Region A start position. |
a_endpos | Region A end position (startpos + region length). |
b_chrom | Region B chromosome code. |
b_startpos | Region B start position. |
b_endpos | Region B end position (startpos + region length). |
|
inlinestatic |
nvc | Structure containing the pointers to the NRVK memory mapped file columns. |
vk | VariantKey code. |
rk | RegionKey code. |
|
inlinestatic |
strand | Strand code. |
|
inlinestatic |
code | RegionKey code. |
rk | Decoded regionkey structure. |
|
inlinestatic |
strand | Strand direction (-1, 0, +1). |
|
inlinestatic |
chrom | Encoded Chromosome (see encode_chrom). |
startpos | Start position (zero based). |
endpos | End position (startpos + region_length). |
strand | Encoded Strand direction (-1 > 2, 0 > 0, +1 > 1) |
|
inlinestatic |
rk | RegionKey code. |
size | Amount to extend the region. |
|
inlinestatic |
rk | RegionKey code. |
|
inlinestatic |
rk | RegionKey code. |
|
inlinestatic |
rk | RegionKey code. |
|
inlinestatic |
rk | RegionKey code. |
|
inlinestatic |
rk | RegionKey code. |
|
inlinestatic |
rk | RegionKey code. |
|
inlinestatic |
rs | RegionKey hexadecimal string (it must contain 16 hexadecimal characters). |
|
inlinestatic |
chrom | Chromosome. An identifier from the reference genome, no white-space or leading zeros permitted. |
sizechrom | Length of the chrom string, excluding the terminating null byte. |
startpos | Start position (zero based). |
endpos | End position (startpos + region_length). |
strand | Strand direction (-1, 0, +1) |
|
inlinestatic |
rk | RegionKey code. |
str | String buffer to be returned (it must be sized 17 bytes at least). |
|
inlinestatic |
Reverse a RegionKey code and returns the normalized components as regionkey_rev_t structure.
rk | RegionKey code. |
rev | Structure containing the return values. |
|
inlinestatic |
nvc | Structure containing the pointers to the NRVK memory mapped file columns. |
vk | VariantKey code. |