func AreOverlappingRegionKeys(rka, rkb uint64) bool
AreOverlappingRegionKeys check if two regionkeys are overlapping.
func AreOverlappingRegionRegionKey(chrom uint8, startpos, endpos uint32, rk uint64) bool
AreOverlappingRegionRegionKey check if a region and a regionkey are overlapping.
func AreOverlappingRegions(chromA uint8, startposA, endposA uint32, chromB uint8, startposB, endposB uint32) bool
AreOverlappingRegions check if two regions are overlapping.
func CompareVariantKeyChrom(va, vb uint64) int
CompareVariantKeyChrom compares two VariantKeys by chromosome only.
func CompareVariantKeyChromPos(va, vb uint64) int
CompareVariantKeyChromPos compares two VariantKeys by chromosome and position.
func DecodeChrom(c uint8) string
DecodeChrom decode chrom to string
func DecodeRefAlt(c uint32) (string, string, uint8, uint8, uint8)
DecodeRefAlt decode Ref+Alt code if reversible
func DecodeRegionStrand(strand uint8) int8
DecodeRegionStrand decode the strand direction code (0 > 0, 1 > +1, 2 > -1).
func DecodeStringID(esid uint64) string
DecodeStringID decode the encoded string ID.
func EncodeChrom(chrom string) uint8
EncodeChrom returns chromosome encoding.
func EncodeRefAlt(ref string, alt string) uint32
EncodeRefAlt returns reference+alternate code.
func EncodeRegionKey(chrom uint8, startpos, endpos uint32, strand uint8) uint64
EncodeRegionKey returns a 64 bit regionkey
func EncodeRegionStrand(strand int8) uint8
EncodeRegionStrand encode the strand direction (-1 > 2, 0 > 0, +1 > 1).
func EncodeStringID(s string, start uint32) uint64
EncodeStringID encode maximum 10 characters of a string into a 64 bit unsigned integer. The argument "start" indicate the first character to encode.
func EncodeStringNumID(s string, sep byte) uint64
EncodeStringNumID cncode a string composed by a character section followed by a separator character and a numerical section into a 64 bit unsigned integer. For example: "ABCDE:0001234". Encodes up to 5 characters in uppercase, a number up to 2^27, and up to 7 zero padding digits. If the string is 10 character or less, then the encode_string_id() is used.
func EncodeVariantKey(chrom uint8, pos, refalt uint32) uint64
EncodeVariantKey returns a Genetic Variant Key based on pre-encoded CHROM, POS (0-base), REF+ALT.
func ExtendRegionKey(rk uint64, size uint32) uint64
ExtendRegionKey extend a regionkey region by a fixed amount from the start and end position.
func ExtractRegionKeyChrom(rk uint64) uint8
ExtractRegionKeyChrom extracts the CHROM code from RegionKey.
func ExtractRegionKeyEndPos(rk uint64) uint32
ExtractRegionKeyEndPos extracts the END POS code from RegionKey.
func ExtractRegionKeyStartPos(rk uint64) uint32
ExtractRegionKeyStartPos extracts the START POS code from RegionKey.
func ExtractRegionKeyStrand(rk uint64) uint8
ExtractRegionKeyStrand extract the STRAND from RegionKey.
func ExtractVariantKeyChrom(v uint64) uint8
ExtractVariantKeyChrom extracts the CHROM code from VariantKey.
func ExtractVariantKeyPos(v uint64) uint32
ExtractVariantKeyPos extracts the POS code from VariantKey.
func ExtractVariantKeyRefAlt(v uint64) uint32
ExtractVariantKeyRefAlt extracts the REF+ALT code from VariantKey.
func FlipAllele(allele string) string
FlipAllele flips allele nucleotides.
func GetRegionKeyChromEndPos(rk uint64) uint64
GetRegionKeyChromEndPos get the CHROM + END POS encoding from RegionKey.
func GetRegionKeyChromStartPos(rk uint64) uint64
GetRegionKeyChromStartPos get the CHROM + START POS encoding from RegionKey.
func GetVariantKeyChromStartPos(vk uint64) uint64
GetVariantKeyChromStartPos get the CHROM + START POS encoding from VariantKey.
func HashStringID(s string) uint64
HashStringID hash the input string into a 64 bit unsigned integer.
func Hex(v uint64) string
Hex provides a 16 digits hexadecimal string representation of a 64bit unsigned number.
func MmapNRVKFile(file string) (TMMFile, NRVKCols, error)
MmapNRVKFile memory map the NRVK binary file.
func MmapRSVKFile(file string, ctbytes []uint8) (TMMFile, RSIDVARCols, error)
MmapRSVKFile memory map the RSVK binary file.
func MmapVKRSFile(file string, ctbytes []uint8) (TMMFile, RSIDVARCols, error)
MmapVKRSFile memory map the VKRS binary file.
func ParseHex(s string) uint64
ParseHex parses a 16 digit HEX string and returns the 64 bit unsigned number.
func RegionKey(chrom string, startpos, endpos uint32, strand int8) uint64
RegionKey returns a 64 bit regionkey based on CHROM, START POS (0-based), END POS and STRAND.
func ReverseVariantKey(v uint64) (chrom string, pos uint32, ref string, alt string, sizeref uint8, sizealt uint8)
ReverseVariantKey parses a variant key string and returns the components.
func StringToNTBytes(s string) []byte
StringToNTBytes safely convert a string to byte array with an extra null terminator This is to ensure a correct CGO conversion to char*
func StringToNTBytesN(s string, size uint32) []byte
StringToNTBytesN convert a string to byte array allocating "size" bytes.
func VariantKey(chrom string, pos uint32, ref, alt string) uint64
VariantKey returns a Genetic Variant Key based on CHROM, POS (0-base), REF, ALT. The variant should be already normalized (see NormalizeVariant or use NormalizedVariantkey).
NRVKCols contains the NRVK memory mapped file column info.
type NRVKCols struct { Vk unsafe.Pointer // Pointer to the VariantKey column. Offset unsafe.Pointer // Pointer to the Offset column. Data unsafe.Pointer // Pointer to the Data column. NRows uint64 // Number of rows. }
func (nr NRVKCols) AreOverlappingVariantKeyRegionKey(vk, rk uint64) bool
AreOverlappingVariantKeyRegionKey check if variantkey and regionkey are overlapping.
func (nr NRVKCols) FindRefAltByVariantKey(vk uint64) (string, string, uint8, uint8, uint32)
FindRefAltByVariantKey retrieve the REF and ALT strings for the specified VariantKey.
func (nr NRVKCols) GetVariantKeyChromEndPos(vk uint64) uint64
GetVariantKeyChromEndPos get the CHROM + END POS encoding from VariantKey.
func (nr NRVKCols) GetVariantKeyEndPos(vk uint64) uint32
GetVariantKeyEndPos get the VariantKey end position (POS + REF length).
func (nr NRVKCols) GetVariantKeyRefLength(vk uint64) uint8
GetVariantKeyRefLength retrieve the REF length for the specified VariantKey.
func (nr NRVKCols) ReverseVariantKey(vk uint64) (TVariantKeyRev, uint32)
ReverseVariantKey reverse a VariantKey code and returns the normalized components.
func (nr NRVKCols) VariantToRegionkey(vk uint64) uint64
VariantToRegionkey get RegionKey from VariantKey.
func (nr NRVKCols) VknrBinToTSV(tsvfile string) uint64
VknrBinToTSV converts a vrnr.bin file to a simple TSV. For the reverse operation see the resources/tools/nrvk.sh script.
RSIDVARCols contains the RSVK or VKRS memory mapped file column info.
type RSIDVARCols struct { Vk unsafe.Pointer // Pointer to the VariantKey column. Rs unsafe.Pointer // Pointer to the rsID column. NRows uint64 // Number of rows. }
func (crv RSIDVARCols) FindAllRVVariantKeyByRsid(first, last uint64, rsid uint32) (vks []uint64)
FindAllRVVariantKeyByRsid get all VariantKeys for the specified rsID in the RV file. Returns a list of VariantKeys
func (cvr RSIDVARCols) FindAllVRRsidByVariantKey(first, last uint64, vk uint64) (rsids []uint32)
FindAllVRRsidByVariantKey get all rsID for the specified VariantKeys in the VR file. Returns a list of rsIDs
func (crv RSIDVARCols) FindRVVariantKeyByRsid(first, last uint64, rsid uint32) (uint64, uint64)
FindRVVariantKeyByRsid search for the specified RSID and returns the first occurrence of VariantKey in the RV file.
func (crv RSIDVARCols) FindVRChromPosRange(first, last uint64, chrom uint8, posMin, posMax uint32) (uint32, uint64, uint64)
FindVRChromPosRange search for the specified CHROM-POS range and returns the first occurrence of RSID in the VR file.
func (crv RSIDVARCols) FindVRRsidByVariantKey(first uint64, last uint64, vk uint64) (uint32, uint64)
FindVRRsidByVariantKey search for the specified VariantKey and returns the first occurrence of RSID in the VR file.
func (crv RSIDVARCols) GetNextRVVariantKeyByRsid(pos, last uint64, rsid uint32) (uint64, uint64)
GetNextRVVariantKeyByRsid get the next VariantKey for the specified rsID in the RV file. Returns the VariantKey or 0, and the position
func (cvr RSIDVARCols) GetNextVRRsidByVariantKey(pos, last uint64, vk uint64) (uint32, uint64)
GetNextVRRsidByVariantKey get the next rsID for the specified VariantKey in the VR file. Returns the rsID or 0, and the position
TMMFile contains the memory mapped file info
type TMMFile struct { Src unsafe.Pointer // Pointer to the memory map. Fd int // File descriptor. Size uint64 // File size in bytes. DOffset uint64 // Offset to the beginning of the data block (address of the first byte of the first item in the first column). DLength uint64 // Length in bytes of the data block. NRows uint64 // Number of rows. NCols uint8 // Number of columns. CTBytes []uint8 // Number of bytes per column type (i.e. 1 for uint8_t, 2 for uint16_t, 4 for uint32_t, 8 for uint64_t) Index []uint64 // Index of the offsets to the beginning of each column. }
func MmapGenorefFile(file string) (TMMFile, error)
MmapGenorefFile maps the specified fasta file in memory.
func (mf TMMFile) CheckReference(chrom uint8, pos uint32, ref string) int
CheckReference checks if the reference allele matches the reference genome data.
func (mf TMMFile) Close() error
Close Unmap and close the memory-mapped file.
func (mf TMMFile) GetGenorefSeq(chrom uint8, pos uint32) byte
GetGenorefSeq returns the nucleotide at the specified chromosome and position.
func (mf TMMFile) NormalizeVariant(chrom uint8, pos uint32, ref string, alt string) (code int, npos uint32, nref, nalt string, nsizeref, nsizealt uint8)
NormalizeVariant flips alleles if required and apply the normalization algorithm described at: https://genome.sph.umich.edu/wiki/Variant_Normalization
func (mf TMMFile) NormalizedVariantKey(chrom string, pos uint32, posindex uint8, ref string, alt string) (vk uint64, code int)
NormalizedVariantKey returns a normalized Genetic Variant Key based on CHROM, POS, REF, ALT.
TRegionKey contains a representation of a genomic region key
type TRegionKey struct { Chrom uint8 `json:"chrom"` StartPos uint32 `json:"startpos"` EndPos uint32 `json:"endpos"` Strand uint8 `json:"strand"` }
func DecodeRegionKey(rk uint64) TRegionKey
DecodeRegionKey parses a regionkey string and returns the components as TRegionKey structure.
TRegionKeyRev contains a genomic region components
type TRegionKeyRev struct { Chrom string `json:"chrom"` StartPos uint32 `json:"startpos"` EndPos uint32 `json:"endpos"` Strand int8 `json:"strand"` }
func ReverseRegionKey(rk uint64) TRegionKeyRev
ReverseRegionKey parses a regionkey string and returns the components.
TVKRange contains min and max VariantKey values for range searches
type TVKRange struct { Min uint64 `json:"min"` Max uint64 `json:"max"` }
func Range(chrom uint8, posMin, posMax uint32) TVKRange
Range Returns minimum and maximum variant keys for range searches.
TVariantKey contains a representation of a genetic variant key
type TVariantKey struct { Chrom uint8 `json:"chrom"` Pos uint32 `json:"pos"` RefAlt uint32 `json:"refalt"` }
func DecodeVariantKey(v uint64) TVariantKey
DecodeVariantKey parses a variant key string and returns the components as TVariantKey structure.
TVariantKeyRev contains a genetic variant components
type TVariantKeyRev struct { Chrom string `json:"chrom"` Pos uint32 `json:"pos"` Ref string `json:"ref"` Alt string `json:"alt"` SizeRef uint8 `json:"size_ref"` SizeAlt uint8 `json:"size_alt"` }