Skip to content

Commit

Permalink
Make big endians first class citizens again
Browse files Browse the repository at this point in the history
No longer do the big iron on yore which lack SIMD optimized loads need
to search strings a byte at a time like primitive machines of the vax
era. This guard here was mostly due to the fact that the string
comparison was searched with "count trailing zero", which assumes an
endianness.  We can just conditionally use leading zeros when on big
endian and stop using the extremely naive C implementation. This makes
things a tad bit faster.
  • Loading branch information
KungFuJesus authored and Dead2 committed Dec 21, 2024
1 parent dbccbd1 commit 04d1b75
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 7 deletions.
11 changes: 10 additions & 1 deletion arch/generic/compare256_c.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ Z_INTERNAL uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1) {

#include "match_tpl.h"

#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
#if OPTIMAL_CMP >= 32

/* 16-bit unaligned integer comparison */
static inline uint32_t compare256_unaligned_16_static(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;
Expand Down Expand Up @@ -111,7 +112,11 @@ static inline uint32_t compare256_unaligned_32_static(const uint8_t *src0, const

diff = sv ^ mv;
if (diff) {
#if BYTE_ORDER == LITTLE_ENDIAN
uint32_t match_byte = __builtin_ctz(diff) / 8;
#else
uint32_t match_byte = __builtin_clz(diff) / 8;
#endif
return len + match_byte;
}

Expand Down Expand Up @@ -151,7 +156,11 @@ static inline uint32_t compare256_unaligned_64_static(const uint8_t *src0, const

diff = sv ^ mv;
if (diff) {
#if BYTE_ORDER == LITTLE_ENDIAN
uint64_t match_byte = __builtin_ctzll(diff) / 8;
#else
uint64_t match_byte = __builtin_clzll(diff) / 8;
#endif
return len + (uint32_t)match_byte;
}

Expand Down
6 changes: 3 additions & 3 deletions arch/generic/generic_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ void inflate_fast_c(PREFIX3(stream) *strm, uint32_t start);
uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len);

uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1);
#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
#if OPTIMAL_CMP >= 32
uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1);
# ifdef HAVE_BUILTIN_CTZ
uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1);
Expand All @@ -44,7 +44,7 @@ void slide_hash_c(deflate_state *s);

uint32_t longest_match_c(deflate_state *const s, Pos cur_match);
uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
#if OPTIMAL_CMP >= 32
uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match);
uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match);
# ifdef HAVE_BUILTIN_CTZ
Expand All @@ -59,7 +59,7 @@ uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);


// Select generic implementation for longest_match, longest_match_slow, longest_match_slow functions.
#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
#if OPTIMAL_CMP >= 32
# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
# define longest_match_generic longest_match_unaligned_64
# define longest_match_slow_generic longest_match_slow_unaligned_64
Expand Down
4 changes: 1 addition & 3 deletions zbuild.h
Original file line number Diff line number Diff line change
Expand Up @@ -257,9 +257,7 @@
# define OPTIMAL_CMP 32
# endif
#elif defined(__powerpc64__) || defined(__ppc64__)
# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
# define OPTIMAL_CMP 64
# endif
# define OPTIMAL_CMP 64
#endif
#if defined(NO_UNALIGNED)
# undef OPTIMAL_CMP
Expand Down

0 comments on commit 04d1b75

Please sign in to comment.