Skip to content

Commit

Permalink
vertexfilter: Consolidate Oct8 and Oct12 into a single entrypoint
Browse files Browse the repository at this point in the history
Since both filters can now handle variable bitrate it no longer makes as
much sense to split them. This change consolidates the two
implementations and uses a single templated scalar implementation as
well. This makes gltfpack code a bit simpler as well.
  • Loading branch information
zeux committed Feb 17, 2020
1 parent c3c6ec7 commit 9047ac1
Show file tree
Hide file tree
Showing 9 changed files with 111 additions and 115 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ LDFLAGS=
WASM_SOURCES=src/vertexcodec.cpp src/indexcodec.cpp src/vertexfilter.cpp
WASM_EXPORTS="__start","_sbrk"
WASM_EXPORTS+=,"_meshopt_decodeVertexBuffer","_meshopt_decodeIndexBuffer"
WASM_EXPORTS+=,"_meshopt_decodeFilterOct8","_meshopt_decodeFilterOct12","_meshopt_decodeFilterQuat12"
WASM_EXPORTS+=,"_meshopt_decodeFilterOct","_meshopt_decodeFilterQuat"
WASM_FLAGS=-O3 -DNDEBUG -s EXPORTED_FUNCTIONS='[$(WASM_EXPORTS)]' -s ALLOW_MEMORY_GROWTH=1 -s TOTAL_STACK=24576 -s TOTAL_MEMORY=65536

ifeq ($(config),iphone)
Expand Down
6 changes: 3 additions & 3 deletions demo/tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ static void decodeFilterOct8()
14, 130, 127, 1, // clang-format :-/
};

meshopt_decodeFilterOct8(data, 4, 4);
meshopt_decodeFilterOct(data, 4, 4);

const unsigned char expected[4 * 4] = {
0, 1, 127, 0,
Expand All @@ -380,7 +380,7 @@ static void decodeFilterOct12()
14, 1300, 2047, 1, // clang-format :-/
};

meshopt_decodeFilterOct12(data, 4, 8);
meshopt_decodeFilterOct(data, 4, 8);

const unsigned short expected[4 * 4] = {
0, 16, 32767, 0,
Expand All @@ -401,7 +401,7 @@ static void decodeFilterQuat12()
14, 1300, 0, 3, // clang-format :-/
};

meshopt_decodeFilterQuat12(data, 4, 8);
meshopt_decodeFilterQuat(data, 4, 8);

const unsigned short expected[4 * 4] = {
32767, 0, 11, 0,
Expand Down
5 changes: 2 additions & 3 deletions gltf/gltfpack.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,8 @@ struct StreamFormat
enum Filter
{
Filter_None = 0,
Filter_Oct8 = 1,
Filter_Oct12 = 2,
Filter_Quat12 = 3,
Filter_Oct = 1,
Filter_Quat = 2,
};

cgltf_type type;
Expand Down
8 changes: 4 additions & 4 deletions gltf/stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ StreamFormat writeVertexStream(std::string& bin, const Stream& stream, const Qua
bool oct = settings.compressmore && stream.target == 0;
int bits = settings.nrm_bits;

StreamFormat::Filter filter = oct ? (bits > 8 ? StreamFormat::Filter_Oct12 : StreamFormat::Filter_Oct8) : StreamFormat::Filter_None;
StreamFormat::Filter filter = oct ? StreamFormat::Filter_Oct : StreamFormat::Filter_None;

for (size_t i = 0; i < stream.data.size(); ++i)
{
Expand Down Expand Up @@ -383,7 +383,7 @@ StreamFormat writeVertexStream(std::string& bin, const Stream& stream, const Qua
bool oct = settings.compressmore && stream.target == 0;
int bits = (settings.nrm_bits > 8) ? 8 : settings.nrm_bits;

StreamFormat::Filter filter = oct ? StreamFormat::Filter_Oct8 : StreamFormat::Filter_None;
StreamFormat::Filter filter = oct ? StreamFormat::Filter_Oct : StreamFormat::Filter_None;

for (size_t i = 0; i < stream.data.size(); ++i)
{
Expand Down Expand Up @@ -586,15 +586,15 @@ StreamFormat writeKeyframeStream(std::string& bin, cgltf_animation_path_type typ
{
if (type == cgltf_animation_path_type_rotation)
{
StreamFormat::Filter filter = settings.compressmore ? StreamFormat::Filter_Quat12 : StreamFormat::Filter_None;
StreamFormat::Filter filter = settings.compressmore ? StreamFormat::Filter_Quat : StreamFormat::Filter_None;

for (size_t i = 0; i < data.size(); ++i)
{
const Attr& a = data[i];

int16_t v[4];

if (filter == StreamFormat::Filter_Quat12)
if (filter == StreamFormat::Filter_Quat)
{
encodeQuat(v, a, 12);
}
Expand Down
11 changes: 4 additions & 7 deletions js/meshopt_decoder.js

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions js/meshopt_decoder.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ var tests = {
]);

var result = new Uint16Array(expected.length);
decoder.decodeVertexBuffer(new Uint8Array(result.buffer), 4, 8, encoded, /* filter= */ 2);
decoder.decodeVertexBuffer(new Uint8Array(result.buffer), 4, 8, encoded, /* filter= */ 1);

assert.deepStrictEqual(result, expected);
},
Expand All @@ -138,7 +138,7 @@ var tests = {
]);

var result = new Uint16Array(expected.length);
decoder.decodeVertexBuffer(new Uint8Array(result.buffer), 4, 8, encoded, /* filter= */ 3);
decoder.decodeVertexBuffer(new Uint8Array(result.buffer), 4, 8, encoded, /* filter= */ 2);

assert.deepStrictEqual(result, expected);
},
Expand Down
14 changes: 5 additions & 9 deletions src/meshoptimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,18 +208,14 @@ MESHOPTIMIZER_API int meshopt_decodeVertexBuffer(void* destination, size_t verte
* These functions can be used to filter output of meshopt_decodeVertexBuffer in-place.
* count must be aligned by 4 and stride is fixed for each function to facilitate SIMD implementation.
*
* meshopt_decodeFilterOct8 decodes octahedral encoding of a unit vector with 8-bit signed X/Y as an input.
* Each component is stored as an 8-bit integer; stride must be equal to 4. W is preserved as is.
* meshopt_decodeFilterOct decodes octahedral encoding of a unit vector with K-bit (K <= 16) signed X/Y as an input; Z must store 1.0f.
* Each component is stored as an 8-bit or 16-bit normalized integer; stride must be equal to 4 or 8. W is preserved as is.
*
* meshopt_decodeFilterOct12 decodes octahedral encoding of a unit vector with 12-bit signed X/Y as an input.
* Each component is stored as an 16-bit integer; stride must be equal to 8. W is preserved as is.
*
* meshopt_decodeFilterQuat12 decodes 3-component quaternion encoding with 12-bit component encoding and a 2-bit component index indicating which component to reconstruct.
* meshopt_decodeFilterQuat decodes 3-component quaternion encoding with 12-bit component encoding and a 2-bit component index indicating which component to reconstruct.
* Each component is stored as an 16-bit integer; stride must be equal to 8.
*/
MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterOct8(void* buffer, size_t vertex_count, size_t vertex_size);
MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterOct12(void* buffer, size_t vertex_count, size_t vertex_size);
MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterQuat12(void* buffer, size_t vertex_count, size_t vertex_size);
MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterOct(void* buffer, size_t vertex_count, size_t vertex_size);
MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterQuat(void* buffer, size_t vertex_count, size_t vertex_size);

/**
* Experimental: Mesh simplifier
Expand Down
170 changes: 87 additions & 83 deletions src/vertexfilter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,15 @@
#define wasmx_unpackhi_v16x8(a, b) wasm_v8x16_shuffle(a, b, 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31)
#endif

void meshopt_decodeFilterOct8(void* buffer, size_t vertex_count, size_t vertex_size)
namespace meshopt
{
assert(vertex_count % 4 == 0);
assert(vertex_size == 4);
(void)vertex_size;

signed char* data = static_cast<signed char*>(buffer);

#ifdef SIMD_WASM
static void decodeFilterOctSimd(signed char* data, size_t count)
{
const v128_t sign = wasm_f32x4_splat(-0.f);

for (size_t i = 0; i < vertex_count; i += 4)
for (size_t i = 0; i < count; i += 4)
{
v128_t n4 = wasm_v128_load(&data[i * 4]);

Expand Down Expand Up @@ -71,49 +68,14 @@ void meshopt_decodeFilterOct8(void* buffer, size_t vertex_count, size_t vertex_s

wasm_v128_store(&data[i * 4], res);
}
#else
for (size_t i = 0; i < vertex_count; ++i)
{
// convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
float x = float(data[i * 4 + 0]);
float y = float(data[i * 4 + 1]);
float z = float(data[i * 4 + 2]) - fabsf(x) - fabsf(y);

// fixup octahedral coordinates for z<0
float t = (z >= 0.f) ? 0.f : z;

x += (x >= 0.f) ? t : -t;
y += (y >= 0.f) ? t : -t;

// compute normal length & scale
float l = sqrtf(x * x + y * y + z * z);
float s = 127.f / l;

// rounded signed float->int
int xf = int(x * s + (x >= 0.f ? 0.5f : -0.5f));
int yf = int(y * s + (y >= 0.f ? 0.5f : -0.5f));
int zf = int(z * s + (z >= 0.f ? 0.5f : -0.5f));

data[i * 4 + 0] = (signed char)(xf);
data[i * 4 + 1] = (signed char)(yf);
data[i * 4 + 2] = (signed char)(zf);
}
#endif
}

void meshopt_decodeFilterOct12(void* buffer, size_t vertex_count, size_t vertex_size)
static void decodeFilterOctSimd(short* data, size_t count)
{
assert(vertex_count % 4 == 0);
assert(vertex_size == 8);
(void)vertex_size;

short* data = static_cast<short*>(buffer);

#ifdef SIMD_WASM
const v128_t sign = wasm_f32x4_splat(-0.f);
volatile v128_t zmask = wasm_i32x4_splat(0x7fff); // volatile works around LLVM shuffle "optimizations"

for (size_t i = 0; i < vertex_count; i += 4)
for (size_t i = 0; i < count; i += 4)
{
v128_t n4_0 = wasm_v128_load(&data[(i + 0) * 4]);
v128_t n4_1 = wasm_v128_load(&data[(i + 2) * 4]);
Expand Down Expand Up @@ -168,48 +130,13 @@ void meshopt_decodeFilterOct12(void* buffer, size_t vertex_count, size_t vertex_
wasm_v128_store(&data[(i + 0) * 4], res_0);
wasm_v128_store(&data[(i + 2) * 4], res_1);
}
#else
for (size_t i = 0; i < vertex_count; ++i)
{
// convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
float x = float(data[i * 4 + 0]);
float y = float(data[i * 4 + 1]);
float z = float(data[i * 4 + 2]) - fabsf(x) - fabsf(y);

// fixup octahedral coordinates for z<0
float t = z >= 0.f ? 0.f : z;

x += (x >= 0.f) ? t : -t;
y += (y >= 0.f) ? t : -t;

// compute normal length & scale
float l = sqrtf(x * x + y * y + z * z);
float s = 32767.f / l;

// rounded signed float->int
int xf = int(x * s + (x >= 0.f ? 0.5f : -0.5f));
int yf = int(y * s + (y >= 0.f ? 0.5f : -0.5f));
int zf = int(z * s + (z >= 0.f ? 0.5f : -0.5f));

data[i * 4 + 0] = short(xf);
data[i * 4 + 1] = short(yf);
data[i * 4 + 2] = short(zf);
}
#endif
}

void meshopt_decodeFilterQuat12(void* buffer, size_t vertex_count, size_t vertex_size)
static void decodeFilterQuatSimd(short* data, size_t count)
{
assert(vertex_count % 4 == 0);
assert(vertex_size == 8);
(void)vertex_size;

const float scale = 1.f / (2047.f * sqrtf(2.f));

short* data = static_cast<short*>(buffer);

#ifdef SIMD_WASM
for (size_t i = 0; i < vertex_count; i += 4)
for (size_t i = 0; i < count; i += 4)
{
v128_t q4_0 = wasm_v128_load(&data[(i + 0) * 4]);
v128_t q4_1 = wasm_v128_load(&data[(i + 2) * 4]);
Expand Down Expand Up @@ -263,15 +190,55 @@ void meshopt_decodeFilterQuat12(void* buffer, size_t vertex_count, size_t vertex
out[2] = __builtin_rotateleft64(wasm_i64x2_extract_lane(res_1, 0), wasm_i32x4_extract_lane(cm, 2));
out[3] = __builtin_rotateleft64(wasm_i64x2_extract_lane(res_1, 1), wasm_i32x4_extract_lane(cm, 3));
}
#else
}
#endif

#if !defined(SIMD_WASM)
template <typename T>
static void decodeFilterOct(T* data, size_t count)
{
const float max = float((1 << (sizeof(T) * 8 - 1)) - 1);

for (size_t i = 0; i < count; ++i)
{
// convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
float x = float(data[i * 4 + 0]);
float y = float(data[i * 4 + 1]);
float z = float(data[i * 4 + 2]) - fabsf(x) - fabsf(y);

// fixup octahedral coordinates for z<0
float t = (z >= 0.f) ? 0.f : z;

x += (x >= 0.f) ? t : -t;
y += (y >= 0.f) ? t : -t;

// compute normal length & scale
float l = sqrtf(x * x + y * y + z * z);
float s = max / l;

// rounded signed float->int
int xf = int(x * s + (x >= 0.f ? 0.5f : -0.5f));
int yf = int(y * s + (y >= 0.f ? 0.5f : -0.5f));
int zf = int(z * s + (z >= 0.f ? 0.5f : -0.5f));

data[i * 4 + 0] = T(xf);
data[i * 4 + 1] = T(yf);
data[i * 4 + 2] = T(zf);
}
}

static void decodeFilterQuat(short* data, size_t count)
{
const float scale = 1.f / (2047.f * sqrtf(2.f));

static const int order[4][4] = {
{1, 2, 3, 0},
{2, 3, 0, 1},
{3, 0, 1, 2},
{0, 1, 2, 3},
};

for (size_t i = 0; i < vertex_count; ++i)
for (size_t i = 0; i < count; ++i)
{
// convert x/y/z to [-1..1] (scaled...)
float x = float(data[i * 4 + 0]) * scale;
Expand All @@ -296,6 +263,43 @@ void meshopt_decodeFilterQuat12(void* buffer, size_t vertex_count, size_t vertex
data[i * 4 + order[qc][2]] = short(zf);
data[i * 4 + order[qc][3]] = short(wf);
}
}
#endif

}

void meshopt_decodeFilterOct(void* buffer, size_t vertex_count, size_t vertex_size)
{
using namespace meshopt;

assert(vertex_count % 4 == 0);
assert(vertex_size == 4 || vertex_size == 8);

#if defined(SIMD_WASM)
if (vertex_size == 4)
decodeFilterOctSimd(static_cast<signed char*>(buffer), vertex_count);
else
decodeFilterOctSimd(static_cast<short*>(buffer), vertex_count);
#else
if (vertex_size == 4)
decodeFilterOct(static_cast<signed char*>(buffer), vertex_count);
else
decodeFilterOct(static_cast<short*>(buffer), vertex_count);
#endif
}

void meshopt_decodeFilterQuat(void* buffer, size_t vertex_count, size_t vertex_size)
{
using namespace meshopt;

assert(vertex_count % 4 == 0);
assert(vertex_size == 8);
(void)vertex_size;

#if defined(SIMD_WASM)
decodeFilterQuatSimd(static_cast<short*>(buffer), vertex_count);
#else
decodeFilterQuat(static_cast<short*>(buffer), vertex_count);
#endif
}

Expand Down
6 changes: 3 additions & 3 deletions tools/codecbench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,15 +103,15 @@ void benchFilters(size_t count)
{
double t0 = timestamp();

meshopt_decodeFilterOct8(&d4[0], count4, 4);
meshopt_decodeFilterOct(&d4[0], count4, 4);

double t1 = timestamp();

meshopt_decodeFilterOct12(&d8[0], count4, 8);
meshopt_decodeFilterOct(&d8[0], count4, 8);

double t2 = timestamp();

meshopt_decodeFilterQuat12(&d8[0], count4, 8);
meshopt_decodeFilterQuat(&d8[0], count4, 8);

double t3 = timestamp();

Expand Down

0 comments on commit 9047ac1

Please sign in to comment.