Skip to content

Commit

Permalink
vertexfilter: Implement a floating-point exponent filter
Browse files Browse the repository at this point in the history
In some cases we can't quantize the floating point data because the
range of the data is unknown. While it's possible to use
meshopt_quantizeFloat to reduce the precision and gain some compression
back, this is often insufficient and suboptimal.

For inputs that represent a vector in 3D space, such as a position or
scale, a good alternative is to use a shared-exponent encoding - it's a
reasonable assumption that we are content with the same (absolute)
precision in all three components.

To be able to encode in shared exp, we use a modified floating point
like format, where we store a 24-bit signed integer mantissa (without
implicit 1) and a 8-bit exponent. This is less precise than a floating
point number - we lose 1 bit - but we gain an ability to individually
select the exponent and mantissa at any level of desired mantissa
precision. Additionally this moves exponent into a single byte, and
stores the mantissa as a two-complement integer - both of these are much
friendlier for vertex codec than a basic float encoding.

While ideally the shared exponent would be stored just once, this
complicates the SIMD decoding and is actually redundant if the output of
the filter is compressed with vertex encoder *and* a general purpose LZ,
because the stream of exponent bytes will be exactly the same between
all three components.

The resulting decoder runs at ~13 GB/s using WASM SIMD and ~2.5 GB/s
using scalar WASM.
  • Loading branch information
zeux committed Mar 31, 2020
1 parent adb3b47 commit 8c9bb5a
Show file tree
Hide file tree
Showing 7 changed files with 118 additions and 6 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ LDFLAGS=
WASM_SOURCES=src/vertexcodec.cpp src/indexcodec.cpp src/vertexfilter.cpp
WASM_EXPORTS="__start","_sbrk"
WASM_EXPORTS+=,"_meshopt_decodeVertexBuffer","_meshopt_decodeIndexBuffer"
WASM_EXPORTS+=,"_meshopt_decodeFilterOct","_meshopt_decodeFilterQuat"
WASM_EXPORTS+=,"_meshopt_decodeFilterOct","_meshopt_decodeFilterQuat","_meshopt_decodeFilterExp"
WASM_FLAGS=-O3 -DNDEBUG -s EXPORTED_FUNCTIONS='[$(WASM_EXPORTS)]' -s ALLOW_MEMORY_GROWTH=1 -s TOTAL_STACK=24576 -s TOTAL_MEMORY=65536

ifeq ($(config),iphone)
Expand Down
22 changes: 22 additions & 0 deletions demo/tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,27 @@ static void decodeFilterQuat12()
assert(memcmp(data, expected, sizeof(data)) == 0);
}

static void decodeFilterExp()
{
unsigned int data[4] = {
0,
0xff000003,
0x02fffff7,
0xfe7fffff, // clang-format :-/
};

meshopt_decodeFilterExp(data, 4, 4);

const unsigned int expected[4] = {
0,
0x3fc00000,
0xc2100000,
0x49fffffe, // clang-format :-/
};

assert(memcmp(data, expected, sizeof(data)) == 0);
}

static void clusterBoundsDegenerate()
{
const float vbd[] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
Expand Down Expand Up @@ -584,6 +605,7 @@ static void runTestsOnce()
decodeFilterOct8();
decodeFilterOct12();
decodeFilterQuat12();
decodeFilterExp();

clusterBoundsDegenerate();

Expand Down
9 changes: 6 additions & 3 deletions js/meshopt_decoder.js

Large diffs are not rendered by default.

21 changes: 21 additions & 0 deletions js/meshopt_decoder.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,27 @@ var tests = {

assert.deepStrictEqual(result, expected);
},

decodeFilterExp: function() {
var encoded = new Uint8Array([
0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0xff, 0xf7, 0xff, 0xff, 0x02, 0xff, 0xff, 0x7f,
0xfe,
]);

var expected = new Uint32Array([
0,
0x3fc00000,
0xc2100000,
0x49fffffe,
]);

var result = new Uint32Array(expected.length);
decoder.decodeVertexBuffer(new Uint8Array(result.buffer), 1, 16, encoded, /* filter= */ 3);

assert.deepStrictEqual(result, expected);
},
};

decoder.ready.then(() => {
Expand Down
4 changes: 4 additions & 0 deletions src/meshoptimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,9 +219,13 @@ MESHOPTIMIZER_API int meshopt_decodeVertexBuffer(void* destination, size_t verte
*
* meshopt_decodeFilterQuat decodes 3-component quaternion encoding with K-bit (4 <= K <= 16) component encoding and a 2-bit component index indicating which component to reconstruct.
* Each component is stored as an 16-bit integer; stride must be equal to 8.
*
* meshopt_decodeFilterExp decodes exponential encoding of floating-point data with 8-bit exponent and 24-bit integer mantissa as 2^E*M.
* Each 32-bit component is decoded in isolation; stride must be divisible by 4.
*/
MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterOct(void* buffer, size_t vertex_count, size_t vertex_size);
MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterQuat(void* buffer, size_t vertex_count, size_t vertex_size);
MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t vertex_count, size_t vertex_size);

/**
* Experimental: Mesh simplifier
Expand Down
57 changes: 57 additions & 0 deletions src/vertexfilter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,29 @@ static void decodeFilterQuat(short* data, size_t count)
data[i * 4 + order[qc][3]] = short(wf);
}
}

static void decodeFilterExp(unsigned int* data, size_t count)
{
for (size_t i = 0; i < count; ++i)
{
unsigned int v = data[i];

// decode mantissa and exponent
int m = int(v << 8) >> 8;
int e = char(v >> 24);

union {
float f;
unsigned int ui;
} u;

// optimized version of ldexp(float(m), e)
u.ui = unsigned(e + 127) << 23;
u.f = u.f * float(m);

data[i] = u.ui;
}
}
#endif

#ifdef SIMD_WASM
Expand Down Expand Up @@ -277,6 +300,26 @@ static void decodeFilterQuatSimd(short* data, size_t count)
out[3] = __builtin_rotateleft64(wasm_i64x2_extract_lane(res_1, 1), wasm_i32x4_extract_lane(cm, 3));
}
}

static void decodeFilterExpSimd(unsigned int* data, size_t count)
{
for (size_t i = 0; i < count; i += 4)
{
v128_t v = wasm_v128_load(&data[i]);

// decode exponent into 2^x directly
v128_t ef = wasm_i32x4_shr(v, 24);
v128_t es = wasm_i32x4_shl(wasm_i32x4_add(ef, wasm_i32x4_splat(127)), 23);

// decode 24-bit mantissa into floating-point value
v128_t mf = wasm_i32x4_shr(wasm_i32x4_shl(v, 8), 8);
v128_t m = wasm_f32x4_convert_i32x4(mf);

v128_t r = wasm_f32x4_mul(es, m);

wasm_v128_store(&data[i], r);
}
}
#endif

} // namespace meshopt
Expand Down Expand Up @@ -316,4 +359,18 @@ void meshopt_decodeFilterQuat(void* buffer, size_t vertex_count, size_t vertex_s
#endif
}

void meshopt_decodeFilterExp(void* buffer, size_t vertex_count, size_t vertex_size)
{
using namespace meshopt;

assert(vertex_count % 4 == 0);
assert(vertex_size % 4 == 0);

#if defined(SIMD_WASM)
decodeFilterExpSimd(static_cast<unsigned int*>(buffer), vertex_count * (vertex_size / 4));
#else
decodeFilterExp(static_cast<unsigned int*>(buffer), vertex_count * (vertex_size / 4));
#endif
}

#undef SIMD_WASM
9 changes: 7 additions & 2 deletions tools/codecbench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,17 @@ void benchFilters(size_t count)

double t3 = timestamp();

meshopt_decodeFilterExp(&d8[0], count4, 8);

double t4 = timestamp();

double GB = 1024 * 1024 * 1024;

printf("filter: oct8 %.2f ms (%.2f GB/sec), oct12 %.2f ms (%.2f GB/sec), quat12 %.2f ms (%.2f GB/sec)\n",
printf("filter: oct8 %.2f ms (%.2f GB/sec), oct12 %.2f ms (%.2f GB/sec), quat12 %.2f ms (%.2f GB/sec), exp %.2f ms (%.2f GB/sec)\n",
(t1 - t0) * 1000, double(d4.size()) / GB / (t1 - t0),
(t2 - t1) * 1000, double(d8.size()) / GB / (t2 - t1),
(t3 - t2) * 1000, double(d8.size()) / GB / (t3 - t2));
(t3 - t2) * 1000, double(d8.size()) / GB / (t3 - t2),
(t4 - t3) * 1000, double(d8.size()) / GB / (t4 - t3));
}
}

Expand Down

0 comments on commit 8c9bb5a

Please sign in to comment.