44 #ifndef VARIANTKEY_GENOREF_H 45 #define VARIANTKEY_GENOREF_H 52 #ifndef ALLELE_MAXSIZE 53 #define ALLELE_MAXSIZE 256 57 #define NORM_WRONGPOS (-2) 58 #define NORM_INVALID (-1) 60 #define NORM_VALID (1) 61 #define NORM_SWAP (1 << 1) 62 #define NORM_FLIP (1 << 2) 63 #define NORM_LEXT (1 << 3) 64 #define NORM_RTRIM (1 << 4) 65 #define NORM_LTRIM (1 << 5) 101 return (c ^ (
'a' -
'A'));
115 static inline void prepend_char(
const uint8_t pre,
char *
string,
size_t *size)
117 memmove(
string + 1,
string, (*size + 1));
133 uint64_t offset = (mf.
index[chrom] + pos);
134 if (offset >= mf.
index[(chrom + 1)])
138 return *(mf.
src + offset);
158 uint64_t offset = (mf.
index[chrom] + pos);
159 if ((offset + sizeref - 1) >= mf.
index[(chrom + 1)])
166 for (i = 0; i < sizeref; i++)
169 gref = mf.
src[(offset + i)];
202 || ((uref ==
'B') && (gref !=
'A'))
203 || ((gref ==
'B') && (uref !=
'A'))
204 || ((uref ==
'D') && (gref !=
'C'))
205 || ((gref ==
'D') && (uref !=
'C'))
206 || ((uref ==
'H') && (gref !=
'G'))
207 || ((gref ==
'H') && (uref !=
'G'))
208 || ((uref ==
'V') && (gref !=
'T'))
209 || ((gref ==
'V') && (uref !=
'T'))
210 || ((uref ==
'W') && ((gref ==
'A') || (gref ==
'T')))
211 || ((gref ==
'W') && ((uref ==
'A') || (uref ==
'T')))
212 || ((uref ==
'S') && ((gref ==
'C') || (gref ==
'G')))
213 || ((gref ==
'S') && ((uref ==
'C') || (uref ==
'G')))
214 || ((uref ==
'M') && ((gref ==
'A') || (gref ==
'C')))
215 || ((gref ==
'M') && ((uref ==
'A') || (uref ==
'C')))
216 || ((uref ==
'K') && ((gref ==
'G') || (gref ==
'T')))
217 || ((gref ==
'K') && ((uref ==
'G') || (uref ==
'T')))
218 || ((uref ==
'R') && ((gref ==
'A') || (gref ==
'G')))
219 || ((gref ==
'R') && ((uref ==
'A') || (uref ==
'G')))
220 || ((uref ==
'Y') && ((gref ==
'C') || (gref ==
'T')))
221 || ((gref ==
'Y') && ((uref ==
'C') || (uref ==
'T'))))
257 static const char map[] =
"00000000000000000000000000000000" 258 "00000000000000000123456789000000" 260 "0TVGHEFCDIJMLKNOPQYSAUBWXRZ00000" 262 "0TVGHEFCDIJMLKNOPQYSAUBWXRZ00000" 263 "00000000000000000000000000000000" 264 "00000000000000000000000000000000" 265 "00000000000000000000000000000000" 266 "00000000000000000000000000000000";
268 for (i = 0; i < size; i++)
270 allele[i] = map[((uint8_t)allele[i])];
282 static inline void swap_alleles(
char *first,
size_t *sizefirst,
char *second,
size_t *sizesecond)
285 strncpy(tmp, first, *sizefirst);
286 strncpy(first, second, *sizesecond);
287 strncpy(second, tmp, *sizefirst);
289 first[*sizefirst] = 0;
290 second[*sizesecond] = 0;
336 strncpy(fref, ref, *sizeref);
341 strncpy(ref, fref, *sizealt);
347 strncpy(falt, alt, *sizealt);
352 strncpy(ref, falt, *sizealt);
353 strncpy(alt, fref, *sizeref);
364 if ((*sizealt == 1) && (*sizeref == 1))
371 if (((*sizealt == 0) || (*sizeref == 0)) && (*pos > 0))
374 left = (char)mf.
src[(mf.
index[chrom] + *pos)];
382 if ((*sizealt > 1) && (*sizeref > 1) && (
aztoupper(alt[(*sizealt - 1)]) ==
aztoupper(ref[(*sizeref - 1)])))
396 while ((offset < (*sizealt - 1)) && (offset < (*sizeref - 1)) && (
aztoupper(alt[offset]) ==
aztoupper(ref[offset])))
405 memmove(ref, ref + offset, *sizeref);
406 memmove(alt, alt + offset, *sizealt);
432 static inline uint64_t
normalized_variantkey(
mmfile_t mf,
const char *chrom,
size_t sizechrom, uint32_t *pos, uint8_t posindex,
char *ref,
size_t *sizeref,
char *alt,
size_t *sizealt,
int *ret)
440 #endif // VARIANTKEY_GENOREF_H #define ALLELE_MAXSIZE
Maximum allele length.
Definition: genoref.h:53
static int aztoupper(int c)
Definition: genoref.h:97
#define NORM_FLIP
Normalization: The alleles nucleotides have been flipped (each nucleotide have been replaced with its...
Definition: genoref.h:62
static void prepend_char(const uint8_t pre, char *string, size_t *size)
Definition: genoref.h:115
#define NORM_INVALID
Normalization: Invalid reference.
Definition: genoref.h:58
Definition: binsearch.h:229
static uint64_t encode_variantkey(uint8_t chrom, uint32_t pos, uint32_t refalt)
Returns a 64 bit variant key based on the pre-encoded CHROM, POS (0-based) and REF+ALT.
Definition: variantkey.h:440
static void mmap_genoref_file(const char *file, mmfile_t *mf)
Definition: genoref.h:75
uint8_t * src
Pointer to the memory map.
Definition: binsearch.h:231
static void mmap_binfile(const char *file, mmfile_t *mf)
Definition: binsearch.h:995
static int check_reference(mmfile_t mf, uint8_t chrom, uint32_t pos, const char *ref, size_t sizeref)
Definition: genoref.h:156
#define NORM_LTRIM
Normalization: Alleles have been left trimmed.
Definition: genoref.h:65
static void flip_allele(char *allele, size_t size)
Definition: genoref.h:239
#define NORM_SWAP
Normalization: The alleles have been swapped.
Definition: genoref.h:61
static void swap_sizes(size_t *first, size_t *second)
Definition: genoref.h:275
VariantKey main functions.
uint64_t size
File size in bytes.
Definition: binsearch.h:233
#define NORM_RTRIM
Normalization: Alleles have been right trimmed.
Definition: genoref.h:64
Functions to search values in binary files made of constant-length items.
static uint8_t encode_chrom(const char *chrom, size_t size)
Returns chromosome numerical encoding.
Definition: variantkey.h:86
uint8_t ncols
Number of columns - THIS MUST BE MANUALLY SET EXCEPT FOR THE "BINSRC1" FORMAT.
Definition: binsearch.h:237
#define NORM_LEXT
Normalization: Alleles have been left extended.
Definition: genoref.h:63
uint64_t index[256]
Index of the offsets to the beginning of each column.
Definition: binsearch.h:239
static void swap_alleles(char *first, size_t *sizefirst, char *second, size_t *sizesecond)
Definition: genoref.h:282
#define NORM_VALID
Normalization: The reference allele is inconsistent with the genome reference (i.e. when contains nucleotide letters other than A, C, G and T).
Definition: genoref.h:60
static char get_genoref_seq(mmfile_t mf, uint8_t chrom, uint32_t pos)
Definition: genoref.h:131
static uint64_t normalized_variantkey(mmfile_t mf, const char *chrom, size_t sizechrom, uint32_t *pos, uint8_t posindex, char *ref, size_t *sizeref, char *alt, size_t *sizealt, int *ret)
Returns a normalized 64 bit variant key based on CHROM, POS, REF, ALT.
Definition: genoref.h:432
static uint32_t encode_refalt(const char *ref, size_t sizeref, const char *alt, size_t sizealt)
Returns reference+alternate numerical encoding.
Definition: variantkey.h:317
#define NORM_WRONGPOS
Normalization: Invalid position.
Definition: genoref.h:57
static int normalize_variant(mmfile_t mf, uint8_t chrom, uint32_t *pos, char *ref, size_t *sizeref, char *alt, size_t *sizealt)
Definition: genoref.h:315