vertexfilter: Consolidate Oct8 and Oct12 into a single entrypoint

Since both filters can now handle variable bitrate it no longer makes as much sense to split them. This change consolidates the two implementations and uses a single templated scalar implementation as well. This makes gltfpack code a bit simpler as well.
zeux · Feb 17, 2020 · 9047ac1 · 9047ac1
1 parent c3c6ec7
commit 9047ac1
Show file tree

Hide file tree

Showing 9 changed files with 111 additions and 115 deletions.
diff --git a/Makefile b/Makefile
@@ -26,7 +26,7 @@ LDFLAGS=
 WASM_SOURCES=src/vertexcodec.cpp src/indexcodec.cpp src/vertexfilter.cpp
 WASM_EXPORTS="__start","_sbrk"
 WASM_EXPORTS+=,"_meshopt_decodeVertexBuffer","_meshopt_decodeIndexBuffer"
-WASM_EXPORTS+=,"_meshopt_decodeFilterOct8","_meshopt_decodeFilterOct12","_meshopt_decodeFilterQuat12"
+WASM_EXPORTS+=,"_meshopt_decodeFilterOct","_meshopt_decodeFilterQuat"
 WASM_FLAGS=-O3 -DNDEBUG -s EXPORTED_FUNCTIONS='[$(WASM_EXPORTS)]' -s ALLOW_MEMORY_GROWTH=1 -s TOTAL_STACK=24576 -s TOTAL_MEMORY=65536
 
 ifeq ($(config),iphone)

diff --git a/demo/tests.cpp b/demo/tests.cpp
@@ -359,7 +359,7 @@ static void decodeFilterOct8()
 	    14, 130, 127, 1, // clang-format :-/
 	};
 
-	meshopt_decodeFilterOct8(data, 4, 4);
+	meshopt_decodeFilterOct(data, 4, 4);
 
 	const unsigned char expected[4 * 4] = {
 	    0, 1, 127, 0,
@@ -380,7 +380,7 @@ static void decodeFilterOct12()
 	    14, 1300, 2047, 1, // clang-format :-/
 	};
 
-	meshopt_decodeFilterOct12(data, 4, 8);
+	meshopt_decodeFilterOct(data, 4, 8);
 
 	const unsigned short expected[4 * 4] = {
 	    0, 16, 32767, 0,
@@ -401,7 +401,7 @@ static void decodeFilterQuat12()
 	    14, 1300, 0, 3, // clang-format :-/
 	};
 
-	meshopt_decodeFilterQuat12(data, 4, 8);
+	meshopt_decodeFilterQuat(data, 4, 8);
 
 	const unsigned short expected[4 * 4] = {
 	    32767, 0, 11, 0,

diff --git a/gltf/gltfpack.h b/gltf/gltfpack.h
@@ -120,9 +120,8 @@ struct StreamFormat
 	enum Filter
 	{
 		Filter_None = 0,
-		Filter_Oct8 = 1,
-		Filter_Oct12 = 2,
-		Filter_Quat12 = 3,
+		Filter_Oct = 1,
+		Filter_Quat = 2,
 	};
 
 	cgltf_type type;

diff --git a/gltf/stream.cpp b/gltf/stream.cpp
@@ -309,7 +309,7 @@ StreamFormat writeVertexStream(std::string& bin, const Stream& stream, const Qua
 		bool oct = settings.compressmore && stream.target == 0;
 		int bits = settings.nrm_bits;
 
-		StreamFormat::Filter filter = oct ? (bits > 8 ? StreamFormat::Filter_Oct12 : StreamFormat::Filter_Oct8) : StreamFormat::Filter_None;
+		StreamFormat::Filter filter = oct ? StreamFormat::Filter_Oct : StreamFormat::Filter_None;
 
 		for (size_t i = 0; i < stream.data.size(); ++i)
 		{
@@ -383,7 +383,7 @@ StreamFormat writeVertexStream(std::string& bin, const Stream& stream, const Qua
 		bool oct = settings.compressmore && stream.target == 0;
 		int bits = (settings.nrm_bits > 8) ? 8 : settings.nrm_bits;
 
-		StreamFormat::Filter filter = oct ? StreamFormat::Filter_Oct8 : StreamFormat::Filter_None;
+		StreamFormat::Filter filter = oct ? StreamFormat::Filter_Oct : StreamFormat::Filter_None;
 
 		for (size_t i = 0; i < stream.data.size(); ++i)
 		{
@@ -586,15 +586,15 @@ StreamFormat writeKeyframeStream(std::string& bin, cgltf_animation_path_type typ
 {
 	if (type == cgltf_animation_path_type_rotation)
 	{
-		StreamFormat::Filter filter = settings.compressmore ? StreamFormat::Filter_Quat12 : StreamFormat::Filter_None;
+		StreamFormat::Filter filter = settings.compressmore ? StreamFormat::Filter_Quat : StreamFormat::Filter_None;
 
 		for (size_t i = 0; i < data.size(); ++i)
 		{
 			const Attr& a = data[i];
 
 			int16_t v[4];
 
-			if (filter == StreamFormat::Filter_Quat12)
+			if (filter == StreamFormat::Filter_Quat)
 			{
 				encodeQuat(v, a, 12);
 			}

diff --git a/js/meshopt_decoder.js b/js/meshopt_decoder.js
diff --git a/js/meshopt_decoder.test.js b/js/meshopt_decoder.test.js
@@ -116,7 +116,7 @@ var tests = {
 		]);
 
 		var result = new Uint16Array(expected.length);
-		decoder.decodeVertexBuffer(new Uint8Array(result.buffer), 4, 8, encoded, /* filter= */ 2);
+		decoder.decodeVertexBuffer(new Uint8Array(result.buffer), 4, 8, encoded, /* filter= */ 1);
 
 		assert.deepStrictEqual(result, expected);
 	},
@@ -138,7 +138,7 @@ var tests = {
 		]);
 
 		var result = new Uint16Array(expected.length);
-		decoder.decodeVertexBuffer(new Uint8Array(result.buffer), 4, 8, encoded, /* filter= */ 3);
+		decoder.decodeVertexBuffer(new Uint8Array(result.buffer), 4, 8, encoded, /* filter= */ 2);
 
 		assert.deepStrictEqual(result, expected);
 	},

diff --git a/src/meshoptimizer.h b/src/meshoptimizer.h
@@ -208,18 +208,14 @@ MESHOPTIMIZER_API int meshopt_decodeVertexBuffer(void* destination, size_t verte
  * These functions can be used to filter output of meshopt_decodeVertexBuffer in-place.
  * count must be aligned by 4 and stride is fixed for each function to facilitate SIMD implementation.
  *
- * meshopt_decodeFilterOct8 decodes octahedral encoding of a unit vector with 8-bit signed X/Y as an input.
- * Each component is stored as an 8-bit integer; stride must be equal to 4. W is preserved as is.
+ * meshopt_decodeFilterOct decodes octahedral encoding of a unit vector with K-bit (K <= 16) signed X/Y as an input; Z must store 1.0f.
+ * Each component is stored as an 8-bit or 16-bit normalized integer; stride must be equal to 4 or 8. W is preserved as is.
  *
- * meshopt_decodeFilterOct12 decodes octahedral encoding of a unit vector with 12-bit signed X/Y as an input.
- * Each component is stored as an 16-bit integer; stride must be equal to 8. W is preserved as is.
- *
- * meshopt_decodeFilterQuat12 decodes 3-component quaternion encoding with 12-bit component encoding and a 2-bit component index indicating which component to reconstruct.
+ * meshopt_decodeFilterQuat decodes 3-component quaternion encoding with 12-bit component encoding and a 2-bit component index indicating which component to reconstruct.
  * Each component is stored as an 16-bit integer; stride must be equal to 8.
  */
-MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterOct8(void* buffer, size_t vertex_count, size_t vertex_size);
-MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterOct12(void* buffer, size_t vertex_count, size_t vertex_size);
-MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterQuat12(void* buffer, size_t vertex_count, size_t vertex_size);
+MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterOct(void* buffer, size_t vertex_count, size_t vertex_size);
+MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterQuat(void* buffer, size_t vertex_count, size_t vertex_size);
 
 /**
  * Experimental: Mesh simplifier

diff --git a/src/vertexfilter.cpp b/src/vertexfilter.cpp
@@ -17,18 +17,15 @@
 #define wasmx_unpackhi_v16x8(a, b) wasm_v8x16_shuffle(a, b, 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31)
 #endif
 
-void meshopt_decodeFilterOct8(void* buffer, size_t vertex_count, size_t vertex_size)
+namespace meshopt
 {
-	assert(vertex_count % 4 == 0);
-	assert(vertex_size == 4);
-	(void)vertex_size;
-
-	signed char* data = static_cast<signed char*>(buffer);
 
 #ifdef SIMD_WASM
+static void decodeFilterOctSimd(signed char* data, size_t count)
+{
 	const v128_t sign = wasm_f32x4_splat(-0.f);
 
-	for (size_t i = 0; i < vertex_count; i += 4)
+	for (size_t i = 0; i < count; i += 4)
 	{
 		v128_t n4 = wasm_v128_load(&data[i * 4]);
 
@@ -71,49 +68,14 @@ void meshopt_decodeFilterOct8(void* buffer, size_t vertex_count, size_t vertex_s
 
 		wasm_v128_store(&data[i * 4], res);
 	}
-#else
-	for (size_t i = 0; i < vertex_count; ++i)
-	{
-		// convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
-		float x = float(data[i * 4 + 0]);
-		float y = float(data[i * 4 + 1]);
-		float z = float(data[i * 4 + 2]) - fabsf(x) - fabsf(y);
-
-		// fixup octahedral coordinates for z<0
-		float t = (z >= 0.f) ? 0.f : z;
-
-		x += (x >= 0.f) ? t : -t;
-		y += (y >= 0.f) ? t : -t;
-
-		// compute normal length & scale
-		float l = sqrtf(x * x + y * y + z * z);
-		float s = 127.f / l;
-
-		// rounded signed float->int
-		int xf = int(x * s + (x >= 0.f ? 0.5f : -0.5f));
-		int yf = int(y * s + (y >= 0.f ? 0.5f : -0.5f));
-		int zf = int(z * s + (z >= 0.f ? 0.5f : -0.5f));
-
-		data[i * 4 + 0] = (signed char)(xf);
-		data[i * 4 + 1] = (signed char)(yf);
-		data[i * 4 + 2] = (signed char)(zf);
-	}
-#endif
 }
 
-void meshopt_decodeFilterOct12(void* buffer, size_t vertex_count, size_t vertex_size)
+static void decodeFilterOctSimd(short* data, size_t count)
 {
-	assert(vertex_count % 4 == 0);
-	assert(vertex_size == 8);
-	(void)vertex_size;
-
-	short* data = static_cast<short*>(buffer);
-
-#ifdef SIMD_WASM
 	const v128_t sign = wasm_f32x4_splat(-0.f);
 	volatile v128_t zmask = wasm_i32x4_splat(0x7fff); // volatile works around LLVM shuffle "optimizations"
 
-	for (size_t i = 0; i < vertex_count; i += 4)
+	for (size_t i = 0; i < count; i += 4)
 	{
 		v128_t n4_0 = wasm_v128_load(&data[(i + 0) * 4]);
 		v128_t n4_1 = wasm_v128_load(&data[(i + 2) * 4]);
@@ -168,48 +130,13 @@ void meshopt_decodeFilterOct12(void* buffer, size_t vertex_count, size_t vertex_
 		wasm_v128_store(&data[(i + 0) * 4], res_0);
 		wasm_v128_store(&data[(i + 2) * 4], res_1);
 	}
-#else
-	for (size_t i = 0; i < vertex_count; ++i)
-	{
-		// convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
-		float x = float(data[i * 4 + 0]);
-		float y = float(data[i * 4 + 1]);
-		float z = float(data[i * 4 + 2]) - fabsf(x) - fabsf(y);
-
-		// fixup octahedral coordinates for z<0
-		float t = z >= 0.f ? 0.f : z;
-
-		x += (x >= 0.f) ? t : -t;
-		y += (y >= 0.f) ? t : -t;
-
-		// compute normal length & scale
-		float l = sqrtf(x * x + y * y + z * z);
-		float s = 32767.f / l;
-
-		// rounded signed float->int
-		int xf = int(x * s + (x >= 0.f ? 0.5f : -0.5f));
-		int yf = int(y * s + (y >= 0.f ? 0.5f : -0.5f));
-		int zf = int(z * s + (z >= 0.f ? 0.5f : -0.5f));
-
-		data[i * 4 + 0] = short(xf);
-		data[i * 4 + 1] = short(yf);
-		data[i * 4 + 2] = short(zf);
-	}
-#endif
 }
 
-void meshopt_decodeFilterQuat12(void* buffer, size_t vertex_count, size_t vertex_size)
+static void decodeFilterQuatSimd(short* data, size_t count)
 {
-	assert(vertex_count % 4 == 0);
-	assert(vertex_size == 8);
-	(void)vertex_size;
-
 	const float scale = 1.f / (2047.f * sqrtf(2.f));
 
-	short* data = static_cast<short*>(buffer);
-
-#ifdef SIMD_WASM
-	for (size_t i = 0; i < vertex_count; i += 4)
+	for (size_t i = 0; i < count; i += 4)
 	{
 		v128_t q4_0 = wasm_v128_load(&data[(i + 0) * 4]);
 		v128_t q4_1 = wasm_v128_load(&data[(i + 2) * 4]);
@@ -263,15 +190,55 @@ void meshopt_decodeFilterQuat12(void* buffer, size_t vertex_count, size_t vertex
 		out[2] = __builtin_rotateleft64(wasm_i64x2_extract_lane(res_1, 0), wasm_i32x4_extract_lane(cm, 2));
 		out[3] = __builtin_rotateleft64(wasm_i64x2_extract_lane(res_1, 1), wasm_i32x4_extract_lane(cm, 3));
 	}
-#else
+}
+#endif
+
+#if !defined(SIMD_WASM)
+template <typename T>
+static void decodeFilterOct(T* data, size_t count)
+{
+	const float max = float((1 << (sizeof(T) * 8 - 1)) - 1);
+
+	for (size_t i = 0; i < count; ++i)
+	{
+		// convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
+		float x = float(data[i * 4 + 0]);
+		float y = float(data[i * 4 + 1]);
+		float z = float(data[i * 4 + 2]) - fabsf(x) - fabsf(y);
+
+		// fixup octahedral coordinates for z<0
+		float t = (z >= 0.f) ? 0.f : z;
+
+		x += (x >= 0.f) ? t : -t;
+		y += (y >= 0.f) ? t : -t;
+
+		// compute normal length & scale
+		float l = sqrtf(x * x + y * y + z * z);
+		float s = max / l;
+
+		// rounded signed float->int
+		int xf = int(x * s + (x >= 0.f ? 0.5f : -0.5f));
+		int yf = int(y * s + (y >= 0.f ? 0.5f : -0.5f));
+		int zf = int(z * s + (z >= 0.f ? 0.5f : -0.5f));
+
+		data[i * 4 + 0] = T(xf);
+		data[i * 4 + 1] = T(yf);
+		data[i * 4 + 2] = T(zf);
+	}
+}
+
+static void decodeFilterQuat(short* data, size_t count)
+{
+	const float scale = 1.f / (2047.f * sqrtf(2.f));
+
 	static const int order[4][4] = {
 	    {1, 2, 3, 0},
 	    {2, 3, 0, 1},
 	    {3, 0, 1, 2},
 	    {0, 1, 2, 3},
 	};
 
-	for (size_t i = 0; i < vertex_count; ++i)
+	for (size_t i = 0; i < count; ++i)
 	{
 		// convert x/y/z to [-1..1] (scaled...)
 		float x = float(data[i * 4 + 0]) * scale;
@@ -296,6 +263,43 @@ void meshopt_decodeFilterQuat12(void* buffer, size_t vertex_count, size_t vertex
 		data[i * 4 + order[qc][2]] = short(zf);
 		data[i * 4 + order[qc][3]] = short(wf);
 	}
+}
+#endif
+
+}
+
+void meshopt_decodeFilterOct(void* buffer, size_t vertex_count, size_t vertex_size)
+{
+	using namespace meshopt;
+
+	assert(vertex_count % 4 == 0);
+	assert(vertex_size == 4 || vertex_size == 8);
+
+#if defined(SIMD_WASM)
+	if (vertex_size == 4)
+		decodeFilterOctSimd(static_cast<signed char*>(buffer), vertex_count);
+	else
+		decodeFilterOctSimd(static_cast<short*>(buffer), vertex_count);
+#else
+	if (vertex_size == 4)
+		decodeFilterOct(static_cast<signed char*>(buffer), vertex_count);
+	else
+		decodeFilterOct(static_cast<short*>(buffer), vertex_count);
+#endif
+}
+
+void meshopt_decodeFilterQuat(void* buffer, size_t vertex_count, size_t vertex_size)
+{
+	using namespace meshopt;
+
+	assert(vertex_count % 4 == 0);
+	assert(vertex_size == 8);
+	(void)vertex_size;
+
+#if defined(SIMD_WASM)
+	decodeFilterQuatSimd(static_cast<short*>(buffer), vertex_count);
+#else
+	decodeFilterQuat(static_cast<short*>(buffer), vertex_count);
 #endif
 }
 

diff --git a/tools/codecbench.cpp b/tools/codecbench.cpp
@@ -103,15 +103,15 @@ void benchFilters(size_t count)
 	{
 		double t0 = timestamp();
 
-		meshopt_decodeFilterOct8(&d4[0], count4, 4);
+		meshopt_decodeFilterOct(&d4[0], count4, 4);
 
 		double t1 = timestamp();
 
-		meshopt_decodeFilterOct12(&d8[0], count4, 8);
+		meshopt_decodeFilterOct(&d8[0], count4, 8);
 
 		double t2 = timestamp();
 
-		meshopt_decodeFilterQuat12(&d8[0], count4, 8);
+		meshopt_decodeFilterQuat(&d8[0], count4, 8);
 
 		double t3 = timestamp();