Skip to content

Commit bf05e88

Browse files
committed
Continued cleanup of old UNALIGNED_OK checks
- Remove obsolete checks - Fix checks that are inconsistent - Stop compiling compare256/longest_match variants that never gets called - Improve how the generic compare256 functions are handled. - Allow overriding OPTIMAL_CMP This simplifies the code and avoids having a lot of code in the compiled library than can never get executed.
1 parent 1aeb291 commit bf05e88

15 files changed

+218
-345
lines changed

CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,6 +1074,9 @@ set(ZLIB_PUBLIC_HDRS
10741074
${CMAKE_CURRENT_BINARY_DIR}/zlib${SUFFIX}.h
10751075
)
10761076
set(ZLIB_PRIVATE_HDRS
1077+
arch/generic/chunk_permute_table.h
1078+
arch/generic/compare256_p.h
1079+
arch/generic/generic_functions.h
10771080
adler32_p.h
10781081
chunkset_tpl.h
10791082
compare256_rle.h

arch/generic/Makefile.in

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,10 @@ chunkset_c.o: $(SRCDIR)/chunkset_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/chunkset_tpl.
4040
chunkset_c.lo: $(SRCDIR)/chunkset_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/chunkset_tpl.h $(SRCTOP)/inffast_tpl.h
4141
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_c.c
4242

43-
compare256_c.o: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/zmemory.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
43+
compare256_c.o: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCDIR)/compare256_p.h $(SRCTOP)/zmemory.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
4444
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_c.c
4545

46-
compare256_c.lo: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/zmemory.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
46+
compare256_c.lo: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCDIR)/compare256_p.h $(SRCTOP)/zmemory.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
4747
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_c.c
4848

4949
crc32_braid_c.o: $(SRCDIR)/crc32_braid_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/crc32_braid_p.h $(SRCTOP)/crc32_braid_tbl.h

arch/generic/compare256_c.c

Lines changed: 15 additions & 174 deletions
Original file line numberDiff line numberDiff line change
@@ -4,187 +4,28 @@
44
*/
55

66
#include "zbuild.h"
7-
#include "zmemory.h"
8-
#include "deflate.h"
9-
#include "fallback_builtins.h"
10-
11-
/* ALIGNED, byte comparison */
12-
static inline uint32_t compare256_c_static(const uint8_t *src0, const uint8_t *src1) {
13-
uint32_t len = 0;
14-
15-
do {
16-
if (*src0 != *src1)
17-
return len;
18-
src0 += 1, src1 += 1, len += 1;
19-
if (*src0 != *src1)
20-
return len;
21-
src0 += 1, src1 += 1, len += 1;
22-
if (*src0 != *src1)
23-
return len;
24-
src0 += 1, src1 += 1, len += 1;
25-
if (*src0 != *src1)
26-
return len;
27-
src0 += 1, src1 += 1, len += 1;
28-
if (*src0 != *src1)
29-
return len;
30-
src0 += 1, src1 += 1, len += 1;
31-
if (*src0 != *src1)
32-
return len;
33-
src0 += 1, src1 += 1, len += 1;
34-
if (*src0 != *src1)
35-
return len;
36-
src0 += 1, src1 += 1, len += 1;
37-
if (*src0 != *src1)
38-
return len;
39-
src0 += 1, src1 += 1, len += 1;
40-
} while (len < 256);
41-
42-
return 256;
43-
}
7+
#include "compare256_p.h"
8+
9+
// Set optimal COMPARE256 function variant
10+
#if OPTIMAL_CMP == 8
11+
# define COMPARE256 compare256_8
12+
#elif defined(HAVE_BUILTIN_CTZLL)
13+
# define COMPARE256 compare256_64
14+
#elif defined(HAVE_BUILTIN_CTZ)
15+
# define COMPARE256 compare256_32
16+
#else
17+
# define COMPARE256 compare256_16
18+
#endif
4419

4520
Z_INTERNAL uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1) {
46-
return compare256_c_static(src0, src1);
21+
return COMPARE256(src0, src1);
4722
}
4823

24+
// Generate longest_match_c
4925
#define LONGEST_MATCH longest_match_c
50-
#define COMPARE256 compare256_c_static
51-
5226
#include "match_tpl.h"
5327

28+
// Generate longest_match_slow_c
5429
#define LONGEST_MATCH_SLOW
5530
#define LONGEST_MATCH longest_match_slow_c
56-
#define COMPARE256 compare256_c_static
57-
58-
#include "match_tpl.h"
59-
60-
#if OPTIMAL_CMP >= 32
61-
62-
/* 16-bit unaligned integer comparison */
63-
static inline uint32_t compare256_16_static(const uint8_t *src0, const uint8_t *src1) {
64-
uint32_t len = 0;
65-
66-
do {
67-
if (zng_memcmp_2(src0, src1) != 0)
68-
return len + (*src0 == *src1);
69-
src0 += 2, src1 += 2, len += 2;
70-
71-
if (zng_memcmp_2(src0, src1) != 0)
72-
return len + (*src0 == *src1);
73-
src0 += 2, src1 += 2, len += 2;
74-
75-
if (zng_memcmp_2(src0, src1) != 0)
76-
return len + (*src0 == *src1);
77-
src0 += 2, src1 += 2, len += 2;
78-
79-
if (zng_memcmp_2(src0, src1) != 0)
80-
return len + (*src0 == *src1);
81-
src0 += 2, src1 += 2, len += 2;
82-
} while (len < 256);
83-
84-
return 256;
85-
}
86-
87-
Z_INTERNAL uint32_t compare256_16(const uint8_t *src0, const uint8_t *src1) {
88-
return compare256_16_static(src0, src1);
89-
}
90-
91-
#define LONGEST_MATCH longest_match_16
92-
#define COMPARE256 compare256_16_static
93-
9431
#include "match_tpl.h"
95-
96-
#define LONGEST_MATCH_SLOW
97-
#define LONGEST_MATCH longest_match_slow_16
98-
#define COMPARE256 compare256_16_static
99-
100-
#include "match_tpl.h"
101-
102-
#ifdef HAVE_BUILTIN_CTZ
103-
/* 32-bit unaligned integer comparison */
104-
static inline uint32_t compare256_32_static(const uint8_t *src0, const uint8_t *src1) {
105-
uint32_t len = 0;
106-
107-
do {
108-
uint32_t sv, mv, diff;
109-
110-
sv = zng_memread_4(src0);
111-
mv = zng_memread_4(src1);
112-
113-
diff = sv ^ mv;
114-
if (diff) {
115-
#if BYTE_ORDER == LITTLE_ENDIAN
116-
uint32_t match_byte = __builtin_ctz(diff) / 8;
117-
#else
118-
uint32_t match_byte = __builtin_clz(diff) / 8;
119-
#endif
120-
return len + match_byte;
121-
}
122-
123-
src0 += 4, src1 += 4, len += 4;
124-
} while (len < 256);
125-
126-
return 256;
127-
}
128-
129-
Z_INTERNAL uint32_t compare256_32(const uint8_t *src0, const uint8_t *src1) {
130-
return compare256_32_static(src0, src1);
131-
}
132-
133-
#define LONGEST_MATCH longest_match_32
134-
#define COMPARE256 compare256_32_static
135-
136-
#include "match_tpl.h"
137-
138-
#define LONGEST_MATCH_SLOW
139-
#define LONGEST_MATCH longest_match_slow_32
140-
#define COMPARE256 compare256_32_static
141-
142-
#include "match_tpl.h"
143-
144-
#endif
145-
146-
#if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
147-
/* 64-bit integer comparison */
148-
static inline uint32_t compare256_64_static(const uint8_t *src0, const uint8_t *src1) {
149-
uint32_t len = 0;
150-
151-
do {
152-
uint64_t sv, mv, diff;
153-
154-
sv = zng_memread_8(src0);
155-
mv = zng_memread_8(src1);
156-
157-
diff = sv ^ mv;
158-
if (diff) {
159-
#if BYTE_ORDER == LITTLE_ENDIAN
160-
uint64_t match_byte = __builtin_ctzll(diff) / 8;
161-
#else
162-
uint64_t match_byte = __builtin_clzll(diff) / 8;
163-
#endif
164-
return len + (uint32_t)match_byte;
165-
}
166-
167-
src0 += 8, src1 += 8, len += 8;
168-
} while (len < 256);
169-
170-
return 256;
171-
}
172-
173-
Z_INTERNAL uint32_t compare256_64(const uint8_t *src0, const uint8_t *src1) {
174-
return compare256_64_static(src0, src1);
175-
}
176-
177-
#define LONGEST_MATCH longest_match_64
178-
#define COMPARE256 compare256_64_static
179-
180-
#include "match_tpl.h"
181-
182-
#define LONGEST_MATCH_SLOW
183-
#define LONGEST_MATCH longest_match_slow_64
184-
#define COMPARE256 compare256_64_static
185-
186-
#include "match_tpl.h"
187-
188-
#endif
189-
190-
#endif

arch/generic/compare256_p.h

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
/* compare256_p.h -- 256 byte memory comparison with match length return
2+
* Copyright (C) 2020 Nathan Moinvaziri
3+
* For conditions of distribution and use, see copyright notice in zlib.h
4+
*/
5+
6+
#include "zmemory.h"
7+
#include "deflate.h"
8+
#include "fallback_builtins.h"
9+
10+
/* 8-bit integer comparison */
11+
static inline uint32_t compare256_8(const uint8_t *src0, const uint8_t *src1) {
12+
uint32_t len = 0;
13+
14+
do {
15+
if (*src0 != *src1)
16+
return len;
17+
src0 += 1, src1 += 1, len += 1;
18+
if (*src0 != *src1)
19+
return len;
20+
src0 += 1, src1 += 1, len += 1;
21+
if (*src0 != *src1)
22+
return len;
23+
src0 += 1, src1 += 1, len += 1;
24+
if (*src0 != *src1)
25+
return len;
26+
src0 += 1, src1 += 1, len += 1;
27+
if (*src0 != *src1)
28+
return len;
29+
src0 += 1, src1 += 1, len += 1;
30+
if (*src0 != *src1)
31+
return len;
32+
src0 += 1, src1 += 1, len += 1;
33+
if (*src0 != *src1)
34+
return len;
35+
src0 += 1, src1 += 1, len += 1;
36+
if (*src0 != *src1)
37+
return len;
38+
src0 += 1, src1 += 1, len += 1;
39+
} while (len < 256);
40+
41+
return 256;
42+
}
43+
44+
/* 16-bit integer comparison */
45+
static inline uint32_t compare256_16(const uint8_t *src0, const uint8_t *src1) {
46+
uint32_t len = 0;
47+
48+
do {
49+
if (zng_memcmp_2(src0, src1) != 0)
50+
return len + (*src0 == *src1);
51+
src0 += 2, src1 += 2, len += 2;
52+
53+
if (zng_memcmp_2(src0, src1) != 0)
54+
return len + (*src0 == *src1);
55+
src0 += 2, src1 += 2, len += 2;
56+
57+
if (zng_memcmp_2(src0, src1) != 0)
58+
return len + (*src0 == *src1);
59+
src0 += 2, src1 += 2, len += 2;
60+
61+
if (zng_memcmp_2(src0, src1) != 0)
62+
return len + (*src0 == *src1);
63+
src0 += 2, src1 += 2, len += 2;
64+
} while (len < 256);
65+
66+
return 256;
67+
}
68+
69+
#ifdef HAVE_BUILTIN_CTZ
70+
/* 32-bit integer comparison */
71+
static inline uint32_t compare256_32(const uint8_t *src0, const uint8_t *src1) {
72+
uint32_t len = 0;
73+
74+
do {
75+
uint32_t sv, mv, diff;
76+
77+
sv = zng_memread_4(src0);
78+
mv = zng_memread_4(src1);
79+
80+
diff = sv ^ mv;
81+
if (diff) {
82+
# if BYTE_ORDER == LITTLE_ENDIAN
83+
uint32_t match_byte = __builtin_ctz(diff) / 8;
84+
# else
85+
uint32_t match_byte = __builtin_clz(diff) / 8;
86+
# endif
87+
return len + match_byte;
88+
}
89+
90+
src0 += 4, src1 += 4, len += 4;
91+
} while (len < 256);
92+
93+
return 256;
94+
}
95+
#endif
96+
97+
#ifdef HAVE_BUILTIN_CTZLL
98+
/* 64-bit integer comparison */
99+
static inline uint32_t compare256_64(const uint8_t *src0, const uint8_t *src1) {
100+
uint32_t len = 0;
101+
102+
do {
103+
uint64_t sv, mv, diff;
104+
105+
sv = zng_memread_8(src0);
106+
mv = zng_memread_8(src1);
107+
108+
diff = sv ^ mv;
109+
if (diff) {
110+
# if BYTE_ORDER == LITTLE_ENDIAN
111+
uint64_t match_byte = __builtin_ctzll(diff) / 8;
112+
# else
113+
uint64_t match_byte = __builtin_clzll(diff) / 8;
114+
# endif
115+
return len + (uint32_t)match_byte;
116+
}
117+
118+
src0 += 8, src1 += 8, len += 8;
119+
} while (len < 256);
120+
121+
return 256;
122+
}
123+
#endif

0 commit comments

Comments
 (0)