Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .clangd
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,11 @@ Diagnostics:
Suppress:
- pragma_attribute_no_pop_eof
- pragma_attribute_stack_mismatch
---
# clang 18
If:
PathMatch:
- dependencies/.cache/json11/json11.cpp
CompileFlags:
Add:
- -Wno-unqualified-std-cast-call
82 changes: 65 additions & 17 deletions include/simdjson/arm64/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,66 @@ namespace {
}
};

#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
static const uint8x16_t BITMASK64_BUILDER_MASK = simdjson_make_uint8x16_t(
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
);
#else
static const uint8x16_t BITMASK64_BUILDER_MASK = {
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
};
#endif

template <int N = 0>
struct simd_bitmask64_builder;

template<>
struct simd_bitmask64_builder<4> {
const uint64_t mask;
operator uint64_t() && { return mask; }
}; // struct simd_bitmask64_builder<4>

template<>
struct simd_bitmask64_builder<3> {
const uint8x16_t sum01;
const simd8<bool> val2;
simdjson_inline simd_bitmask64_builder<4> next(simd8<bool> val3) {
// Add each of the elements next to each other, successively, to stuff each 8 byte mask into one.
uint8x16_t sum23 = vpaddq_u8(val2 & BITMASK64_BUILDER_MASK, val3 & BITMASK64_BUILDER_MASK);
uint8x16_t sum0123 = vpaddq_u8(sum01, sum23);

// This algorithm is actually designed to create a 128-bit mask from 8 16-byte simd masks,
// but since we only want 64 bits, we add the mask to itself (creating the final mask twice).
uint8x16_t sum01230123 = vpaddq_u8(sum0123, sum0123);
return { vgetq_lane_u64(vreinterpretq_u64_u8(sum01230123), 0) };
}
}; // struct simd_bitmask64_builder<3>

template<>
struct simd_bitmask64_builder<2> {
const uint8x16_t sum01;
simdjson_inline simd_bitmask64_builder<3> next(simd8<bool> val2) {
return { sum01, val2 };
}
}; // struct simd_bitmask64_builder<2>

template<>
struct simd_bitmask64_builder<1> {
const simd8<bool> val0;
simdjson_inline simd_bitmask64_builder<2> next(simd8<bool> val1) {
return { vpaddq_u8(val0 & BITMASK64_BUILDER_MASK, val1 & BITMASK64_BUILDER_MASK) };
}
}; // struct simd_bitmask64_builder<1>

template<>
struct simd_bitmask64_builder<0> {
simdjson_inline simd_bitmask64_builder<1> next(simd8<bool> val) {
return { val };
}
}; // struct simd_bitmask64_builder<0>

template<typename T>
struct simd8x64 {
static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
Expand Down Expand Up @@ -449,23 +509,11 @@ namespace {
}

simdjson_inline uint64_t to_bitmask() const {
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
const uint8x16_t bit_mask = simdjson_make_uint8x16_t(
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
);
#else
const uint8x16_t bit_mask = {
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
};
#endif
// Add each of the elements next to each other, successively, to stuff each 8 byte mask into one.
uint8x16_t sum0 = vpaddq_u8(this->chunks[0] & bit_mask, this->chunks[1] & bit_mask);
uint8x16_t sum1 = vpaddq_u8(this->chunks[2] & bit_mask, this->chunks[3] & bit_mask);
sum0 = vpaddq_u8(sum0, sum1);
sum0 = vpaddq_u8(sum0, sum0);
return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0);
return simd_bitmask64_builder<0>()
.next(this->chunks[0])
.next(this->chunks[1])
.next(this->chunks[2])
.next(this->chunks[3]);
}

simdjson_inline uint64_t eq(const T m) const {
Expand Down
30 changes: 27 additions & 3 deletions include/simdjson/haswell/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,30 @@ namespace simd {
simdjson_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); }
};

template <int N = 0>
struct simd_bitmask64_builder;

template<>
struct simd_bitmask64_builder<2> {
const uint64_t bitmask;
operator uint64_t() && { return bitmask; }
}; // struct simd_bitmask64_builder<2>

template<>
struct simd_bitmask64_builder<1> {
const int bitmask;
simdjson_inline simd_bitmask64_builder<2> next(simd8<bool> val) {
uint64_t r_lo = uint32_t(bitmask);
uint64_t r_hi = val.to_bitmask();
return { r_lo | (r_hi << 32) };
}
}; // struct simd_bitmask64_builder<1>

template<>
struct simd_bitmask64_builder<0> {
simdjson_inline simd_bitmask64_builder<1> next(simd8<bool> val) { return { val.to_bitmask() }; }
}; // struct simd_bitmask64_builder<0>

template<typename T>
struct simd8x64 {
static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
Expand Down Expand Up @@ -322,9 +346,9 @@ namespace simd {
}

simdjson_inline uint64_t to_bitmask() const {
uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask());
uint64_t r_hi = this->chunks[1].to_bitmask();
return r_lo | (r_hi << 32);
return simd_bitmask64_builder<0>()
.next(this->chunks[0])
.next(this->chunks[1]);
}

simdjson_inline simd8<T> reduce_or() const {
Expand Down
14 changes: 14 additions & 0 deletions include/simdjson/icelake/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,20 @@ namespace simd {
simdjson_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); }
};

template <int N = 0>
struct simd_bitmask64_builder;

template<>
struct simd_bitmask64_builder<1> {
const __mmask64 bitmask;
operator __mmask64() && { return bitmask; }
}; // struct simd_bitmask64_builder<2>

template<>
struct simd_bitmask64_builder<0> {
simdjson_inline simd_bitmask64_builder<1> next(simd8<bool> val) { return { _mm512_movepi8_mask(val) }; }
}; // struct simd_bitmask64_builder<0>

template<typename T>
struct simd8x64 {
static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
Expand Down
39 changes: 32 additions & 7 deletions include/simdjson/lasx/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,35 @@ namespace simd {
simdjson_inline simd8<uint8_t> shl() const { return simd8<uint8_t>(__lasx_xvslli_b(*this, N)); }
};

template <int N = 0>
struct simd_bitmask64_builder;

template<>
struct simd_bitmask64_builder<2> {
const unsigned long int mask01;
operator uint64_t() { return mask01; }
}; // struct simd_bitmask64_builder<2>

template<>
struct simd_bitmask64_builder<1> {
const __m256i mask0;
simdjson_inline simd_bitmask64_builder<2> next(simd8<bool> val1) {
__m256i mask1 = __lasx_xvmskltz_b(val1);
__m256i mask_tmp = __lasx_xvpickve_w(mask0, 4);
__m256i tmp = __lasx_xvpickve_w(mask1, 4);
__m256i mask01 = __lasx_xvinsve0_w(mask0, mask1, 1);
__m256i mask01_tmp = __lasx_xvinsve0_w(mask_tmp, tmp, 1);
return { __lasx_xvpickve2gr_du(__lasx_xvpackev_h(mask01_tmp, mask01), 0) };
}
}; // struct simd_bitmask64_builder<1>

template<>
struct simd_bitmask64_builder<0> {
simdjson_inline simd_bitmask64_builder<1> next(simd8<bool> val) {
return { __lasx_xvmskltz_b(val0) };
}
}; // struct simd_bitmask64_builder<0>

template<typename T>
struct simd8x64 {
static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
Expand Down Expand Up @@ -331,13 +360,9 @@ namespace simd {
}

simdjson_inline uint64_t to_bitmask() const {
__m256i mask0 = __lasx_xvmskltz_b(this->chunks[0]);
__m256i mask1 = __lasx_xvmskltz_b(this->chunks[1]);
__m256i mask_tmp = __lasx_xvpickve_w(mask0, 4);
__m256i tmp = __lasx_xvpickve_w(mask1, 4);
mask0 = __lasx_xvinsve0_w(mask0, mask1, 1);
mask_tmp = __lasx_xvinsve0_w(mask_tmp, tmp, 1);
return __lasx_xvpickve2gr_du(__lasx_xvpackev_h(mask_tmp, mask0), 0);
return simd_bitmask64_builder<0>()
.next(this->chunks[0])
.next(this->chunks[1]);
}

simdjson_inline simd8<T> reduce_or() const {
Expand Down
56 changes: 49 additions & 7 deletions include/simdjson/lsx/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,50 @@ namespace simd {
simdjson_inline simd8<uint8_t> shl() const { return simd8<uint8_t>(__lsx_vslli_b(*this, N)); }
};

template <int N = 0>
struct simd_bitmask64_builder;

template<>
struct simd_bitmask64_builder<4> {
const unsigned long int result;
operator uint64_t() { return result; }
}; // struct simd_bitmask64_builder<4>

template<>
struct simd_bitmask64_builder<3> {
const __m128i mask01;
const __m128i mask2;
simdjson_inline simd_bitmask64_builder<4> next(simd8<bool> val3) {
__m128i mask3 = __lsx_vmskltz_b(val3);
__m128i mask23 = __lsx_vilvl_h(mask3, mask2);
return { __lsx_vpickve2gr_du(__lsx_vilvl_w(mask23, mask01), 0) };
}
}; // struct simd_bitmask64_builder<3>

template<>
struct simd_bitmask64_builder<2> {
const __m128i mask01;
simdjson_inline simd_bitmask64_builder<3> next(simd8<bool> val2) {
return { mask01, __lsx_vmskltz_b(val2) };
}
}; // struct simd_bitmask64_builder<2>

template<>
struct simd_bitmask64_builder<1> {
const __m128i mask0;
simdjson_inline simd_bitmask64_builder<2> next(simd8<bool> val1) {
__m128i mask1 = __lsx_vmskltz_b(val1);
return { __lsx_vilvl_h(mask1, mask0) };
}
}; // struct simd_bitmask64_builder<1>

template<>
struct simd_bitmask64_builder<0> {
simdjson_inline simd_bitmask64_builder<1> next(simd8<bool> val0) {
return { __lsx_vmskltz_b(val0) };
}
}; // struct simd_bitmask64_builder<0>

template<typename T>
struct simd8x64 {
static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
Expand Down Expand Up @@ -303,13 +347,11 @@ namespace simd {
}

simdjson_inline uint64_t to_bitmask() const {
__m128i mask1 = __lsx_vmskltz_b(this->chunks[0]);
__m128i mask2 = __lsx_vmskltz_b(this->chunks[1]);
__m128i mask3 = __lsx_vmskltz_b(this->chunks[2]);
__m128i mask4 = __lsx_vmskltz_b(this->chunks[3]);
mask1 = __lsx_vilvl_h(mask2, mask1);
mask2 = __lsx_vilvl_h(mask4, mask3);
return __lsx_vpickve2gr_du(__lsx_vilvl_w(mask2, mask1), 0);
return simd_bitmask64_builder<0>()
.next(this->chunks[0])
.next(this->chunks[1])
.next(this->chunks[2])
.next(this->chunks[3]);
}

simdjson_inline simd8<T> reduce_or() const {
Expand Down
55 changes: 50 additions & 5 deletions include/simdjson/ppc64/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,51 @@ template <> struct simd8<uint8_t> : base8_numeric<uint8_t> {
}
};

template <int N = 0>
struct simd_bitmask64_builder;

template<>
struct simd_bitmask64_builder<4> {
const uint64_t bitmask;
simdjson_inline operator uint64_t() && { return bitmask; }
}; // struct simd_bitmask64_builder<4>

template<>
struct simd_bitmask64_builder<3> {
const int bitmask01;
const int bitmask2;
simdjson_inline simd_bitmask64_builder<4> next(simd8<bool> val) {
return {
uint64_t(this->bitmask01) |
(uint64_t(this->bitmask2) << 32) |
(uint64_t(val.to_bitmask()) << 48)
};
}
}; // struct simd_bitmask64_builder<3>

template<>
struct simd_bitmask64_builder<2> {
const int bitmask01;
simdjson_inline simd_bitmask64_builder<3> next(simd8<bool> val) {
return { bitmask01, val.to_bitmask() };
}
}; // struct simd_bitmask64_builder<2>

template<>
struct simd_bitmask64_builder<1> {
const int bitmask0;
simdjson_inline simd_bitmask64_builder<2> next(simd8<bool> val) {
return { bitmask0 | (val.to_bitmask() << 16) };
}
}; // struct simd_bitmask64_builder<1>

template<>
struct simd_bitmask64_builder<0> {
simdjson_inline simd_bitmask64_builder<1> next(simd8<bool> val) {
return { val.to_bitmask() };
}
}; // struct simd_bitmask64_builder<0>

template <typename T> struct simd8x64 {
static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
static_assert(NUM_CHUNKS == 4,
Expand Down Expand Up @@ -434,11 +479,11 @@ template <typename T> struct simd8x64 {
}

simdjson_inline uint64_t to_bitmask() const {
uint64_t r0 = uint32_t(this->chunks[0].to_bitmask());
uint64_t r1 = this->chunks[1].to_bitmask();
uint64_t r2 = this->chunks[2].to_bitmask();
uint64_t r3 = this->chunks[3].to_bitmask();
return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48);
return simd_bitmask64_builder<0>()
.next(this->chunks[0])
.next(this->chunks[1])
.next(this->chunks[2])
.next(this->chunks[3]);
}

simdjson_inline uint64_t eq(const T m) const {
Expand Down
Loading