Skip to content

Commit

Permalink
Fix unaligned access in ACLE based crc32
Browse files Browse the repository at this point in the history
This fixes a rightful complaint from the alignment sanitizer that we
alias memory in an unaligned fashion. A nice added bonus is that this
improves performance a tiny bit on the larger buffers, perhaps due to
loops that idiomatically decrement a count and increment a single buffer
pointer rather than the maze of conditional pointer reassignments.

While here, let's write a unit test just for this. Since this is the only
variant that accesses memory in a potentially unaligned fashion that doesn't
explicitly go byte by byte or use intrinsics that don't require alignment,
we'll enable it only for this function for now. Adding more tests later if
need be should be possible. For everything else not crc, we're relying on
ubsan to hopefully catch things by chance.
  • Loading branch information
KungFuJesus authored and Dead2 committed Dec 23, 2024
1 parent 87d8e95 commit 06bba67
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 22 deletions.
42 changes: 20 additions & 22 deletions arch/arm/crc32_acle.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@

Z_INTERNAL Z_TARGET_CRC uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len) {
Z_REGISTER uint32_t c;
Z_REGISTER const uint16_t *buf2;
Z_REGISTER const uint32_t *buf4;
Z_REGISTER const uint64_t *buf8;
Z_REGISTER uint16_t buf2;
Z_REGISTER uint32_t buf4;
Z_REGISTER uint64_t buf8;

c = ~crc;

Expand All @@ -29,45 +29,43 @@ Z_INTERNAL Z_TARGET_CRC uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, si
len--;
}

if ((len >= sizeof(uint16_t)) && ((ptrdiff_t)buf & sizeof(uint16_t))) {
buf2 = (const uint16_t *) buf;
c = __crc32h(c, *buf2++);
if ((len >= sizeof(uint16_t)) && ((ptrdiff_t)buf & (sizeof(uint32_t) - 1))) {
buf2 = *((uint16_t*)buf);
c = __crc32h(c, buf2);
buf += sizeof(uint16_t);
len -= sizeof(uint16_t);
buf4 = (const uint32_t *) buf2;
} else {
buf4 = (const uint32_t *) buf;
}

if ((len >= sizeof(uint32_t)) && ((ptrdiff_t)buf & sizeof(uint32_t))) {
c = __crc32w(c, *buf4++);
if ((len >= sizeof(uint32_t)) && ((ptrdiff_t)buf & (sizeof(uint64_t) - 1))) {
buf4 = *((uint32_t*)buf);
c = __crc32w(c, buf4);
len -= sizeof(uint32_t);
buf += sizeof(uint32_t);
}

buf8 = (const uint64_t *) buf4;
} else {
buf8 = (const uint64_t *) buf;
}

while (len >= sizeof(uint64_t)) {
c = __crc32d(c, *buf8++);
buf8 = *((uint64_t*)buf);
c = __crc32d(c, buf8);
len -= sizeof(uint64_t);
buf += sizeof(uint64_t);
}

if (len >= sizeof(uint32_t)) {
buf4 = (const uint32_t *) buf8;
c = __crc32w(c, *buf4++);
buf4 = *((uint32_t*)buf);
c = __crc32w(c, buf4);
len -= sizeof(uint32_t);
buf2 = (const uint16_t *) buf4;
} else {
buf2 = (const uint16_t *) buf8;
buf += sizeof(uint32_t);
}

if (len >= sizeof(uint16_t)) {
c = __crc32h(c, *buf2++);
buf2 = *((uint16_t*)buf);
c = __crc32h(c, buf2);
len -= sizeof(uint16_t);
buf += sizeof(uint16_t);
}

buf = (const unsigned char *) buf2;
if (len) {
c = __crc32b(c, *buf);
}
Expand Down
34 changes: 34 additions & 0 deletions test/test_crc32.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "zutil.h"
#include "zutil_p.h"

extern "C" {
# include "zbuild.h"
Expand Down Expand Up @@ -195,6 +197,22 @@ class crc32_variant : public ::testing::TestWithParam<crc32_test> {
}
};

/* Specifically to test where we had dodgy alignment in the acle CRC32
* function. All others are either byte level access or use intrinsics
* that work with unaligned access */
class crc32_align : public ::testing::TestWithParam<int> {
public:
void hash(int param, crc32_func crc32) {
uint8_t *buf = (uint8_t*)zng_alloc(sizeof(uint8_t) * (128 + param));
if (buf != NULL) {
(void)crc32(0, buf + param, 128);
} else {
FAIL();
}
zng_free(buf);
}
};

INSTANTIATE_TEST_SUITE_P(crc32, crc32_variant, testing::ValuesIn(tests));

#define TEST_CRC32(name, func, support_flag) \
Expand All @@ -210,10 +228,26 @@ TEST_CRC32(braid, PREFIX(crc32_braid), 1)

#ifdef DISABLE_RUNTIME_CPU_DETECTION
TEST_CRC32(native, native_crc32, 1)

#else

#ifdef ARM_ACLE
static const int align_offsets[] = {
1, 2, 3, 4, 5, 6, 7
};

#define TEST_CRC32_ALIGN(name, func, support_flag) \
TEST_P(crc32_align, name) { \
if (!(support_flag)) { \
GTEST_SKIP(); \
return; \
} \
hash(GetParam(), func); \
}

INSTANTIATE_TEST_SUITE_P(crc32_alignment, crc32_align, testing::ValuesIn(align_offsets));
TEST_CRC32(acle, crc32_acle, test_cpu_features.arm.has_crc32)
TEST_CRC32_ALIGN(acle_align, crc32_acle, test_cpu_features.arm.has_crc32)
#endif
#ifdef POWER8_VSX_CRC32
TEST_CRC32(power8, crc32_power8, test_cpu_features.power.has_arch_2_07)
Expand Down

0 comments on commit 06bba67

Please sign in to comment.