51 #ifndef VARIANTKEY_BINSEARCH_H 52 #define VARIANTKEY_BINSEARCH_H 65 #ifdef WORDS_BIGENDIAN 66 #undef BINSEARCH_BIG_ENDIAN 67 #define BINSEARCH_BIG_ENDIAN 1 70 #if defined(BINSEARCH_LITTLE_ENDIAN) && defined(BINSEARCH_BIG_ENDIAN) 74 #if !defined(BINSEARCH_LITTLE_ENDIAN) && !defined(BINSEARCH_BIG_ENDIAN) 75 #define BINSEARCH_UNKNOWN_ENDIAN 1 78 #if !defined(bswap_16) || !defined(bswap_32) || !defined(bswap_64) 83 #if defined(HAVE_BUILTIN_BSWAP) || defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ >= 5)) 84 #define bswap_16(x) __builtin_bswap16(x) 85 #define bswap_32(x) __builtin_bswap32(x) 86 #define bswap_64(x) __builtin_bswap64(x) 91 #if defined(BINSEARCH_UNKNOWN_ENDIAN) || !defined(bswap_64) 98 #define bswap_16(x) _byteswap_ushort(x) 99 #define bswap_32(x) _byteswap_ulong(x) 100 #define bswap_64(x) _byteswap_uint64(x) 102 #elif defined(__APPLE__) 105 #include <libkern/OSByteOrder.h> 109 #define bswap_16(x) OSSwapInt16(x) 110 #define bswap_32(x) OSSwapInt32(x) 111 #define bswap_64(x) OSSwapInt64(x) 113 #elif defined(__sun) || defined(sun) 115 #include <sys/byteorder.h> 119 #define bswap_16(x) BSWAP_16(x) 120 #define bswap_32(x) BSWAP_32(x) 121 #define bswap_64(x) BSWAP_64(x) 123 #elif defined(__FreeBSD__) 125 #include <sys/endian.h> 129 #define bswap_16(x) bswap16(x) 130 #define bswap_32(x) bswap32(x) 131 #define bswap_64(x) bswap64(x) 133 #elif defined(__OpenBSD__) 135 #include <sys/types.h> 139 #define bswap_16(x) swap16(x) 140 #define bswap_32(x) swap32(x) 141 #define bswap_64(x) swap64(x) 143 #elif defined(__NetBSD__) 145 #include <sys/types.h> 146 #include <machine/bswap.h> 147 #if defined(__BSWAP_RENAME) && !defined(__bswap_32) 151 #define bswap_16(x) bswap16(x) 152 #define bswap_32(x) bswap32(x) 153 #define bswap_64(x) bswap64(x) 161 #include <byteswap.h> 165 #ifdef WORDS_BIGENDIAN 166 #define BINSEARCH_BIG_ENDIAN 1 173 #define order_be_uint8_t(x) (x) 174 #define order_le_uint8_t(x) (x) 176 #ifdef BINSEARCH_BIG_ENDIAN 177 #define order_be_uint16_t(x) (x) 178 #define order_be_uint32_t(x) (x) 179 #define order_be_uint64_t(x) (x) 180 #define order_le_uint16_t(x) (bswap_16(x)) 181 #define order_le_uint32_t(x) (bswap_32(x)) 182 #define order_le_uint64_t(x) (bswap_64(x)) 184 #define order_be_uint16_t(x) (bswap_16(x)) 185 #define order_be_uint32_t(x) (bswap_32(x)) 186 #define order_be_uint64_t(x) (bswap_64(x)) 187 #define order_le_uint16_t(x) (x) 188 #define order_le_uint32_t(x) (x) 189 #define order_le_uint64_t(x) (x) 203 #define get_address(blklen, blkpos, item) (((blklen) * (item)) + (blkpos)) 213 #define get_middle_point(first, last) ((first) + (((last) - (first)) >> 1)) 224 #define get_src_offset(T, src, offset) ((const T *)((src) + (offset))) 248 #define define_bytes_to(O, T) \ 254 static inline T bytes_##O##_to_##T(const uint8_t *src, uint64_t i) \ 256 return order_##O##_##T(*((const T *)(src + i))); \ 273 #define define_get_src_offset(T) \ 279 static inline const T *get_src_offset_##T(const uint8_t *src, uint64_t offset) \ 281 return get_src_offset(T, src, offset); \ 289 #define GET_MIDDLE_BLOCK(O, T) order_##O##_##T(*(get_src_offset(T, src, get_address(blklen, blkpos, middle)))) 291 #define FIND_START_LOOP_BLOCK(T) \ 292 uint64_t middle, notfound = *last; \ 294 while (*first < *last) \ 296 middle = get_middle_point(*first, *last); \ 298 #define FIND_END_LOOP_BLOCK \ 309 #define SUB_ITEM_VARS(T) \ 310 T bitmask = ((T)1 << (bitend - bitstart)); \ 311 bitmask ^= (bitmask - 1); \ 312 const uint8_t rshift = (((uint8_t)(sizeof(T) * 8) - 1) - bitend); 314 #define GET_ITEM_TASK(O, T) \ 315 x = GET_MIDDLE_BLOCK(O, T); 317 #define COL_GET_ITEM_TASK \ 320 #define GET_SUB_ITEM_TASK(O, T) \ 321 x = ((GET_MIDDLE_BLOCK(O, T) >> rshift) & bitmask); 323 #define COL_GET_SUB_ITEM_TASK \ 324 x = ((*(src + middle) >> rshift) & bitmask); 326 #define FIND_FIRST_INNER_CHECK \ 338 #define FIND_LAST_INNER_CHECK \ 351 #define HAS_NEXT_START_BLOCK \ 352 if (*pos >= (last - 1)) \ 358 #define HAS_PREV_START_BLOCK \ 359 if (*pos <= first) { \ 364 #define GET_POS_BLOCK(O, T) order_##O##_##T(*(get_src_offset(T, src, get_address(blklen, blkpos, *pos)))) 366 #define HAS_END_BLOCK(O, T) \ 367 return (GET_POS_BLOCK(O, T) == search); 369 #define COL_HAS_END_BLOCK(T) \ 370 return (*(src + *pos) == search); 372 #define HAS_SUB_END_BLOCK(O, T) \ 373 return (((GET_POS_BLOCK(O, T) >> rshift) & bitmask) == search); 375 #define COL_HAS_SUB_END_BLOCK(T) \ 376 return (((*(src + *pos) >> rshift) & bitmask) == search); 385 #define define_find_first(O, T) \ 397 static inline uint64_t find_first_##O##_##T(const uint8_t *src, uint64_t blklen, uint64_t blkpos, uint64_t *first, uint64_t *last, T search) \ 399 FIND_START_LOOP_BLOCK(T) \ 400 GET_ITEM_TASK(O, T) \ 401 FIND_FIRST_INNER_CHECK \ 402 GET_ITEM_TASK(O, T) \ 403 FIND_END_LOOP_BLOCK \ 422 #define define_find_first_sub(O, T) \ 436 static inline uint64_t find_first_sub_##O##_##T(const uint8_t *src, uint64_t blklen, uint64_t blkpos, uint8_t bitstart, uint8_t bitend, uint64_t *first, uint64_t *last, T search) \ 439 FIND_START_LOOP_BLOCK(T) \ 440 GET_SUB_ITEM_TASK(O, T) \ 441 FIND_FIRST_INNER_CHECK \ 442 GET_SUB_ITEM_TASK(O, T) \ 443 FIND_END_LOOP_BLOCK \ 462 #define define_find_last(O, T) \ 474 static inline uint64_t find_last_##O##_##T(const uint8_t *src, uint64_t blklen, uint64_t blkpos, uint64_t *first, uint64_t *last, T search) \ 476 FIND_START_LOOP_BLOCK(T) \ 477 GET_ITEM_TASK(O, T) \ 478 FIND_LAST_INNER_CHECK \ 479 GET_ITEM_TASK(O, T) \ 480 FIND_END_LOOP_BLOCK \ 499 #define define_find_last_sub(O, T) \ 513 static inline uint64_t find_last_sub_##O##_##T(const uint8_t *src, uint64_t blklen, uint64_t blkpos, uint8_t bitstart, uint8_t bitend, uint64_t *first, uint64_t *last, T search) \ 516 FIND_START_LOOP_BLOCK(T) \ 517 GET_SUB_ITEM_TASK(O, T) \ 518 FIND_LAST_INNER_CHECK \ 519 GET_SUB_ITEM_TASK(O, T) \ 520 FIND_END_LOOP_BLOCK \ 538 #define define_has_next(O, T) \ 552 static inline bool has_next_##O##_##T(const uint8_t *src, uint64_t blklen, uint64_t blkpos, uint64_t *pos, uint64_t last, T search) \ 554 HAS_NEXT_START_BLOCK \ 555 HAS_END_BLOCK(O, T) \ 573 #define define_has_next_sub(O, T) \ 589 static inline bool has_next_sub_##O##_##T(const uint8_t *src, uint64_t blklen, uint64_t blkpos, uint8_t bitstart, uint8_t bitend, uint64_t *pos, uint64_t last, T search) \ 591 HAS_NEXT_START_BLOCK \ 593 HAS_SUB_END_BLOCK(O, T) \ 611 #define define_has_prev(O, T) \ 625 static inline bool has_prev_##O##_##T(const uint8_t *src, uint64_t blklen, uint64_t blkpos, uint64_t first, uint64_t *pos, T search) \ 627 HAS_PREV_START_BLOCK \ 628 HAS_END_BLOCK(O, T) \ 646 #define define_has_prev_sub(O, T) \ 662 static inline bool has_prev_sub_##O##_##T(const uint8_t *src, uint64_t blklen, uint64_t blkpos, uint8_t bitstart, uint8_t bitend, uint64_t first, uint64_t *pos, T search) \ 664 HAS_PREV_START_BLOCK \ 666 HAS_SUB_END_BLOCK(O, T) \ 686 #define define_col_find_first(T) \ 696 static inline uint64_t col_find_first_##T(const T *src, uint64_t *first, uint64_t *last, T search) \ 698 FIND_START_LOOP_BLOCK(T) \ 700 FIND_FIRST_INNER_CHECK \ 702 FIND_END_LOOP_BLOCK \ 716 #define define_col_find_first_sub(T) \ 728 static inline uint64_t col_find_first_sub_##T(const T *src, uint8_t bitstart, uint8_t bitend, uint64_t *first, uint64_t *last, T search) \ 731 FIND_START_LOOP_BLOCK(T) \ 732 COL_GET_SUB_ITEM_TASK \ 733 FIND_FIRST_INNER_CHECK \ 734 COL_GET_SUB_ITEM_TASK \ 735 FIND_END_LOOP_BLOCK \ 749 #define define_col_find_last(T) \ 759 static inline uint64_t col_find_last_##T(const T *src, uint64_t *first, uint64_t *last, T search) \ 761 FIND_START_LOOP_BLOCK(T) \ 763 FIND_LAST_INNER_CHECK \ 765 FIND_END_LOOP_BLOCK \ 779 #define define_col_find_last_sub(T) \ 791 static inline uint64_t col_find_last_sub_##T(const T *src, uint8_t bitstart, uint8_t bitend, uint64_t *first, uint64_t *last, T search) \ 794 FIND_START_LOOP_BLOCK(T) \ 795 COL_GET_SUB_ITEM_TASK \ 796 FIND_LAST_INNER_CHECK \ 797 COL_GET_SUB_ITEM_TASK \ 798 FIND_END_LOOP_BLOCK \ 811 #define define_col_has_next(T) \ 823 static inline bool col_has_next_##T(const T *src, uint64_t *pos, uint64_t last, T search) \ 825 HAS_NEXT_START_BLOCK \ 826 COL_HAS_END_BLOCK(T) \ 839 #define define_col_has_next_sub(T) \ 853 static inline bool col_has_next_sub_##T(const T *src, uint8_t bitstart, uint8_t bitend, uint64_t *pos, uint64_t last, T search) \ 855 HAS_NEXT_START_BLOCK \ 857 COL_HAS_SUB_END_BLOCK(T) \ 870 #define define_col_has_prev(T) \ 882 static inline bool col_has_prev_##T(const T *src, uint64_t first, uint64_t *pos, T search) \ 884 HAS_PREV_START_BLOCK \ 885 COL_HAS_END_BLOCK(T) \ 898 #define define_col_has_prev_sub(T) \ 912 static inline bool col_has_prev_sub_##T(const T *src, uint8_t bitstart, uint8_t bitend, uint64_t first, uint64_t *pos, T search) \ 914 HAS_PREV_START_BLOCK \ 916 COL_HAS_SUB_END_BLOCK(T) \ 930 mf->index[0] = mf->doffset;
931 for (i = 0; i < mf->ncols; i++)
939 mf->nrows = (mf->dlength / b);
940 for (i = 1; i < mf->ncols; i++)
942 b = (mf->nrows * mf->ctbytes[(i - 1)]);
943 mf->index[i] = mf->index[(i - 1)] + b + ((8 - (b & 7)) & 7);
949 const uint8_t *tp = (
const uint8_t *)(mf->
src + 8);
952 const uint64_t *op = (
const uint64_t *)(mf->
src + mf->
doffset);
955 for (i = 0; i < mf->
ncols; i++)
958 mf->
index[i] = *op++;
966 mf->
doffset = (uint64_t)(*((
const uint32_t *)(mf->
src + 9))) + 13;
969 uint64_t type = (*((
const uint64_t *)(mf->
src + mf->
size - 8)));
970 if ((type & 0xffffffffffff0000) == 0x31574f5252410000)
972 mf->
dlength -= (uint64_t)(*((
const uint32_t *)(mf->
src + mf->
size - 10))) + 10;
980 uint32_t type = (*((
const uint32_t *)(mf->
src + mf->
size - 4)));
981 if (type == 0x31414546)
983 mf->
dlength -= (uint64_t)(*((
const uint32_t *)(mf->
src + mf->
size - 8))) + 8;
997 mf->
src = (uint8_t*)MAP_FAILED;
1003 struct stat statbuf;
1004 if (((mf->
fd = open(file, O_RDONLY)) < 0) || (fstat(mf->
fd, &statbuf) < 0))
1008 mf->
size = (uint64_t)statbuf.st_size;
1009 mf->
src = (uint8_t*)mmap(0, mf->
size, PROT_READ, MAP_PRIVATE, mf->
fd, 0);
1015 uint64_t type = (*((
const uint64_t *)(mf->
src)));
1019 case 0x00314352534e4942:
1023 case 0x000031574f525241:
1027 case 0x0000000031414546:
1044 int err = munmap(mf.
src, mf.
size);
1049 return close(mf.
fd);
1052 #endif // VARIANTKEY_BINSEARCH_H uint64_t nrows
Number of rows.
Definition: binsearch.h:236
Definition: binsearch.h:229
#define define_has_prev_sub(O, T)
Definition: binsearch.h:646
int fd
File descriptor.
Definition: binsearch.h:232
#define define_col_has_next(T)
Definition: binsearch.h:811
uint8_t * src
Pointer to the memory map.
Definition: binsearch.h:231
#define define_col_find_first(T)
Definition: binsearch.h:686
#define define_col_has_next_sub(T)
Definition: binsearch.h:839
#define define_has_next_sub(O, T)
Definition: binsearch.h:573
static void mmap_binfile(const char *file, mmfile_t *mf)
Definition: binsearch.h:995
#define define_find_first_sub(O, T)
Definition: binsearch.h:422
#define define_find_first(O, T)
Definition: binsearch.h:385
uint64_t doffset
Offset to the beginning of the data block (address of the first byte of the first item in the first c...
Definition: binsearch.h:234
uint64_t dlength
Length in bytes of the data block.
Definition: binsearch.h:235
uint64_t size
File size in bytes.
Definition: binsearch.h:233
#define define_get_src_offset(T)
Definition: binsearch.h:273
#define define_col_find_last_sub(T)
Definition: binsearch.h:779
uint8_t ctbytes[256]
Number of bytes per column type (i.e. 1 for uint8_t, 2 for uint16_t, 4 for uint32_t, 8 for uint64_t). - THIS MUST BE MANUALLY SET EXCEPT FOR THE "BINSRC1" FORMAT.
Definition: binsearch.h:238
#define MAXCOLS
Maximum number of columns indexable.
Definition: binsearch.h:192
uint8_t ncols
Number of columns - THIS MUST BE MANUALLY SET EXCEPT FOR THE "BINSRC1" FORMAT.
Definition: binsearch.h:237
#define define_col_find_first_sub(T)
Definition: binsearch.h:716
uint64_t index[256]
Index of the offsets to the beginning of each column.
Definition: binsearch.h:239
#define define_col_has_prev_sub(T)
Definition: binsearch.h:898
#define define_has_next(O, T)
Definition: binsearch.h:538
#define define_col_find_last(T)
Definition: binsearch.h:749
static int munmap_binfile(mmfile_t mf)
Definition: binsearch.h:1042
#define define_bytes_to(O, T)
Definition: binsearch.h:248
#define define_col_has_prev(T)
Definition: binsearch.h:870
static void parse_info_arrow(mmfile_t *mf)
Definition: binsearch.h:964
#define define_has_prev(O, T)
Definition: binsearch.h:611
static void parse_col_offset(mmfile_t *mf)
Definition: binsearch.h:926
#define define_find_last(O, T)
Definition: binsearch.h:462
static void parse_info_binsrc(mmfile_t *mf)
Definition: binsearch.h:947
static void parse_info_feather(mmfile_t *mf)
Definition: binsearch.h:976
#define define_find_last_sub(O, T)
Definition: binsearch.h:499