-
Notifications
You must be signed in to change notification settings - Fork 24
Closed
Labels
good first issueGood for newcomersGood for newcomershelp wantedExtra attention is neededExtra attention is neededperfPerformance relatedPerformance related
Description
I was experimenting when making Func#call faster, and noticed there is a decent performance win by avoiding rb_ary_push. It requires a bit of unsafe code, but I believe the implemntation is sound:
diff --git i/bench/host_call.rb w/bench/host_call.rb
index ca4d3fc..b75fe40 100644
--- i/bench/host_call.rb
+++ w/bench/host_call.rb
@@ -1,14 +1,18 @@
require_relative "bench"
-# Call host func (4 args) 337.181k (± 3.7%) i/s - 1.692M in 5.024584s
-# Call host func (16 args) 296.615k (± 5.2%) i/s - 1.498M in 5.064241s
-# Call host func (64 args) 217.487k (± 3.2%) i/s - 1.106M in 5.090547s
-# Call host func (128 args) 119.689k (± 3.8%) i/s - 605.136k in 5.063428s
+# New:
+# Call host func (4 results): 348836.6 i/s
+# Call host func (16 results): 320159.3 i/s - 1.09x slower
+# Call host func (64 results): 224814.2 i/s - 1.55x slower
+# Call host func (128 results): 126469.2 i/s - 2.76x slower
+# Call host func (256 results): 50566.7 i/s - 6.90x slower
-# Call host func (4 args): 333800.6 i/s
-# Call host func (16 args): 291889.7 i/s - 1.14x slower
-# Call host func (64 args): 185375.6 i/s - 1.80x slower
-# Call host func (128 args): 97043.2 i/s - 3.44x slower
+# Old:
+# Call host func (4 args): 331524.8 i/s
+# Call host func (16 args): 280723.9 i/s - 1.18x slower
+# Call host func (64 args): 182873.7 i/s - 1.81x slower
+# Call host func (128 args): 96891.1 i/s - 3.42x slower
+# Call host func (256 args): 40983.9 i/s - 8.09x slower
Bench.ips do |x|
engine = Wasmtime::Engine.new
diff --git i/ext/src/ruby_api/func.rs w/ext/src/ruby_api/func.rs
index 1ef4ba8..e42a494 100644
--- i/ext/src/ruby_api/func.rs
+++ w/ext/src/ruby_api/func.rs
@@ -1,3 +1,5 @@
+use std::mem::transmute;
+
use super::{
convert::{ToRubyValue, ToSym, ToValTypeVec, ToWasmVal},
errors::result_error,
@@ -178,16 +180,57 @@ impl<'a> Func<'a> {
[] => Ok(QNIL.into()),
[result] => result.to_ruby_value(store),
_ => {
- let array = RArray::with_capacity(results.len());
- for result in results {
- array.push(result.to_ruby_value(store)?)?;
+ // We want to initialized a sized ruby array so we can write to
+ // it without using `rb_ary_push`, which is slow. So we just
+ // pass in the results as a slice for the initial values. They
+ // will be overridden by the loop below. But be careful not to
+ // let Ruby try to use the values in the array before we've
+ // written to them.
+ //
+ // For result sizes of 128, this is about 20% faster than using
+ // rb_ary_push.
+ //
+ // # Safety
+ //
+ // Safety is guaranteed by the assertions below:
+ // - `rb_ary_new_from_values` will allocated `results.len()`
+ // elements of usize (not wasmtime::Val)
+ // - Ruby never access the values in the array before we've
+ // written to them, since doing so would point to the Rust vec
+ // until we write to it
+ let fake_values = unsafe { transmute(results.as_slice()) };
+ let array = RArray::from_slice::<Value>(fake_values);
+ let array_slice = unsafe { rarray_as_mut_slice(array, results.len()) };
+ let results_iter = results.iter().enumerate();
+
+ assert!(array_slice.len() == results_iter.len()); // optimize out bounds check
+
+ for (i, result) in results.iter().enumerate() {
+ array_slice[i] = result.to_ruby_value(store).map_err(|e| {
+ // If we fail along the way, zero out the array just to be safe
+ let _ = array.clear();
+ e
+ })?;
}
+
Ok(array.into())
}
}
}
}
+/// Converts an `RArray` into a mutable slice.
+///
+/// # Safety
+/// The capacity of the array must be known in advance for this to be safe, since we provide mutable access to the array's contents.
+unsafe fn rarray_as_mut_slice<'a>(array: RArray, capacity: usize) -> &'a mut [Value] {
+ let array_slice = unsafe { array.as_slice() };
+ let ptr = array_slice.as_ptr();
+ let array_slice = unsafe { std::slice::from_raw_parts_mut(ptr as *mut Value, capacity) };
+
+ array_slice
+}
+
impl From<&Func<'_>> for wasmtime::Extern {
fn from(func: &Func) -> Self {
Self::Func(func.get())jbourassa
Metadata
Metadata
Assignees
Labels
good first issueGood for newcomersGood for newcomershelp wantedExtra attention is neededExtra attention is neededperfPerformance relatedPerformance related