From 44c3f8ad2d67495ca5b8c154a28ca034c9ac5f77 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Mon, 9 Sep 2019 14:00:04 -0700 Subject: [PATCH] Introduce the `multi-value-xform` crate This crate provides a transformation to turn exported functions that use a return pointer into exported functions that use multi-value. Consider the following function: ```rust pub extern "C" fn pair(a: u32, b: u32) -> [u32; 2] { [a, b] } ``` LLVM will by default compile this down into the following Wasm: ```wasm (func $pair (param i32 i32 i32) local.get 0 local.get 2 i32.store offset=4 local.get 0 local.get 1 i32.store) ``` What's happening here is that the function is not directly returning the pair at all, but instead the first `i32` parameter is a pointer to some scratch space, and the return value is written into the scratch space. LLVM does this because it doesn't yet have support for multi-value Wasm, and so it only knows how to return a single value at a time. Ideally, with multi-value, what we would like instead is this: ```wasm (func $pair (param i32 i32) (result i32 i32) local.get 0 local.get 1) ``` However, that's not what this transformation does at the moment. This transformation is a little simpler than mutating existing functions to produce a multi-value result, instead it introduces new functions that wrap the original function and translate the return pointer to multi-value results in this wrapper function. With our running example, we end up with this: ```wasm ;; The original function. (func $pair (param i32 i32 i32) local.get 0 local.get 2 i32.store offset=4 local.get 0 local.get 1 i32.store) (func $pairWrapper (param i32 i32) (result i32 i32) ;; Our return pointer that points to the scratch space we are allocating ;; on the shadow stack for calling `$pair`. (local i32) ;; Allocate space on the shadow stack for the result. global.get $shadowStackPointer i32.const 8 i32.sub local.tee 2 global.set $shadowStackPointer ;; Call `$pair` with our allocated shadow stack space for its results. local.get 2 local.get 0 local.get 1 call $pair ;; Copy the return values from the shadow stack to the wasm stack. local.get 2 i32.load local.get 2 offset=4 i32.load ;; Finally, restore the shadow stack pointer. local.get 2 i32.const 8 i32.add global.set $shadowStackPointer) ``` This `$pairWrapper` function is what we actually end up exporting instead of `$pair`. --- crates/cli-support/Cargo.toml | 1 + crates/multi-value-xform/Cargo.toml | 16 ++ crates/multi-value-xform/src/lib.rs | 338 ++++++++++++++++++++++++++++ publish.rs | 1 + 4 files changed, 356 insertions(+) create mode 100644 crates/multi-value-xform/Cargo.toml create mode 100644 crates/multi-value-xform/src/lib.rs diff --git a/crates/cli-support/Cargo.toml b/crates/cli-support/Cargo.toml index d510a25ce..3e6498d8a 100644 --- a/crates/cli-support/Cargo.toml +++ b/crates/cli-support/Cargo.toml @@ -21,6 +21,7 @@ tempfile = "3.0" walrus = "0.12.0" wasm-bindgen-anyref-xform = { path = '../anyref-xform', version = '=0.2.50' } wasm-bindgen-shared = { path = "../shared", version = '=0.2.50' } +wasm-bindgen-multi-value-xform = { path = '../multi-value-xform', version = '=0.2.50' } wasm-bindgen-threads-xform = { path = '../threads-xform', version = '=0.2.50' } wasm-bindgen-wasm-interpreter = { path = "../wasm-interpreter", version = '=0.2.50' } wasm-webidl-bindings = "0.5.0" diff --git a/crates/multi-value-xform/Cargo.toml b/crates/multi-value-xform/Cargo.toml new file mode 100644 index 000000000..1e1c2599b --- /dev/null +++ b/crates/multi-value-xform/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "wasm-bindgen-multi-value-xform" +version = "0.2.50" +authors = ["The wasm-bindgen Developers"] +license = "MIT/Apache-2.0" +repository = "https://github.com/rustwasm/wasm-bindgen/tree/master/crates/multi-value-xform" +homepage = "https://rustwasm.github.io/wasm-bindgen/" +documentation = "https://docs.rs/wasm-bindgen-multi-value-xform" +description = """ +Internal multi-value transformations for wasm-bindgen +""" +edition = "2018" + +[dependencies] +failure = "0.1" +walrus = "0.12.0" diff --git a/crates/multi-value-xform/src/lib.rs b/crates/multi-value-xform/src/lib.rs new file mode 100644 index 000000000..e805fd3a9 --- /dev/null +++ b/crates/multi-value-xform/src/lib.rs @@ -0,0 +1,338 @@ +//! The `wasm-bindgen` multi-value transformation. +//! +//! This crate provides a transformation to turn exported functions that use a +//! return pointer into exported functions that use multi-value. +//! +//! Consider the following function: +//! +//! ``` +//! #[no_mangle] +//! pub extern "C" fn pair(a: u32, b: u32) -> [u32; 2] { +//! [a, b] +//! } +//! ``` +//! +//! LLVM will by default compile this down into the following Wasm: +//! +//! ```wasm +//! (func $pair (param i32 i32 i32) +//! local.get 0 +//! local.get 2 +//! i32.store offset=4 +//! local.get 0 +//! local.get 1 +//! i32.store) +//! ``` +//! +//! What's happening here is that the function is not directly returning the +//! pair at all, but instead the first `i32` parameter is a pointer to some +//! scratch space, and the return value is written into the scratch space. LLVM +//! does this because it doesn't yet have support for multi-value Wasm, and so +//! it only knows how to return a single value at a time. +//! +//! Ideally, with multi-value, what we would like instead is this: +//! +//! ```wasm +//! (func $pair (param i32 i32) (result i32 i32) +//! local.get 0 +//! local.get 1) +//! ``` +//! +//! However, that's not what this transformation does at the moment. This +//! transformation is a little simpler than mutating existing functions to +//! produce a multi-value result, instead it introduces new functions that wrap +//! the original function and translate the return pointer to multi-value +//! results in this wrapper function. +//! +//! With our running example, we end up with this: +//! +//! ```wasm +//! ;; The original function. +//! (func $pair (param i32 i32 i32) +//! local.get 0 +//! local.get 2 +//! i32.store offset=4 +//! local.get 0 +//! local.get 1 +//! i32.store) +//! +//! (func $pairWrapper (param i32 i32) (result i32 i32) +//! ;; Our return pointer that points to the scratch space we are allocating +//! ;; on the shadow stack for calling `$pair`. +//! (local i32) +//! +//! ;; Allocate space on the shadow stack for the result. +//! global.get $shadowStackPointer +//! i32.const 8 +//! i32.sub +//! local.tee 2 +//! global.set $shadowStackPointer +//! +//! ;; Call `$pair` with our allocated shadow stack space for its results. +//! local.get 2 +//! local.get 0 +//! local.get 1 +//! call $pair +//! +//! ;; Copy the return values from the shadow stack to the wasm stack. +//! local.get 2 +//! i32.load +//! local.get 2 offset=4 +//! i32.load +//! +//! ;; Finally, restore the shadow stack pointer. +//! local.get 2 +//! i32.const 8 +//! i32.add +//! global.set $shadowStackPointer) +//! ``` +//! +//! This `$pairWrapper` function is what we actually end up exporting instead of +//! `$pair`. + +#![deny(missing_docs, missing_debug_implementations)] + +/// Run the transformation. +/// +/// See the module-level docs for details on the transformation. +/// +/// * `memory` is the module's memory that has the shadow stack where return +/// pointers are allocated within. +/// +/// * `shadow_stack_pointer` is the global that is being used as the stack +/// pointer for the shadow stack. With LLVM, this is typically the first +/// global. +/// +/// * `to_xform` is the set of exported functions we want to transform and +/// information required to transform them. The `usize` is the index of the +/// return pointer parameter that will be removed. The `Vec` +/// is the new result type that will be returned directly instead of via the +/// return pointer. +pub fn run( + module: &mut walrus::Module, + memory: walrus::MemoryId, + shadow_stack_pointer: walrus::GlobalId, + to_xform: &[(walrus::ExportId, usize, &[walrus::ValType])], +) -> Result<(), failure::Error> { + for &(export, return_pointer_index, results) in to_xform { + xform_one( + module, + memory, + shadow_stack_pointer, + export, + return_pointer_index, + results, + )?; + } + Ok(()) +} + +// Ensure that `n` is aligned to `align`, rounding up as necessary. +fn round_up_to_alignment(n: u32, align: u32) -> u32 { + debug_assert!(align.is_power_of_two()); + (n + align - 1) & !(align - 1) +} + +fn xform_one( + module: &mut walrus::Module, + memory: walrus::MemoryId, + shadow_stack_pointer: walrus::GlobalId, + export: walrus::ExportId, + return_pointer_index: usize, + results: &[walrus::ValType], +) -> Result<(), failure::Error> { + if module.globals.get(shadow_stack_pointer).ty != walrus::ValType::I32 { + failure::bail!("shadow stack pointer global does not have type `i32`"); + } + + let func = match module.exports.get(export).item { + walrus::ExportItem::Function(f) => f, + _ => { + failure::bail!("can only multi-value transform exported functions, found non-function") + } + }; + + // Compute the total size of all results, potentially with padding to ensure + // that each result is aligned. + let mut results_size = 0; + for ty in results { + results_size = match ty { + walrus::ValType::I32 | walrus::ValType::F32 => { + debug_assert_eq!(results_size % 4, 0); + results_size + 4 + } + walrus::ValType::I64 | walrus::ValType::F64 => { + round_up_to_alignment(results_size, 8) + 8 + } + walrus::ValType::V128 => round_up_to_alignment(results_size, 16) + 16, + walrus::ValType::Anyref => failure::bail!( + "cannot multi-value transform functions that return \ + anyref, since they can't go into linear memory" + ), + }; + } + // Round up to 16-byte alignment, since that's what LLVM's emitted Wasm code + // seems to expect. + let results_size = round_up_to_alignment(results_size, 16); + + let ty = module.funcs.get(func).ty(); + let (ty_params, ty_results) = module.types.params_results(ty); + + if !ty_results.is_empty() { + failure::bail!( + "can only multi-value transform functions that don't return any \ + results (since they should be returned on the stack via a pointer)" + ); + } + + match ty_params.get(return_pointer_index) { + Some(walrus::ValType::I32) => {} + None => failure::bail!("the return pointer parameter doesn't exist"), + Some(_) => failure::bail!("the return pointer parameter is not `i32`"), + } + + let new_params: Vec<_> = ty_params + .iter() + .cloned() + .enumerate() + .filter_map(|(i, ty)| { + if i == return_pointer_index { + None + } else { + Some(ty) + } + }) + .collect(); + + // The locals for the function parameters. + let params: Vec<_> = new_params.iter().map(|ty| module.locals.add(*ty)).collect(); + + // A local to hold our shadow stack-allocated return pointer. + let return_pointer = module.locals.add(walrus::ValType::I32); + + let mut wrapper = walrus::FunctionBuilder::new(&mut module.types, &new_params, results); + let mut body = wrapper.func_body(); + + // Allocate space in the shadow stack for the call. + body.global_get(shadow_stack_pointer) + .i32_const(results_size as i32) + .binop(walrus::ir::BinaryOp::I32Sub) + .local_tee(return_pointer) + .global_set(shadow_stack_pointer); + + // Push the parameters for calling our wrapped function -- including the + // return pointer! -- on to the stack. + for (i, local) in params.iter().enumerate() { + if i == return_pointer_index { + body.local_get(return_pointer); + } + body.local_get(*local); + } + if return_pointer_index == params.len() { + body.local_get(return_pointer); + } + + // Call our wrapped function. + body.call(func); + + // Copy the return values from our shadow stack-allocated space and onto the + // Wasm stack. + let mut offset = 0; + for ty in results { + debug_assert!(offset < results_size); + body.local_get(return_pointer); + match ty { + walrus::ValType::I32 => { + debug_assert_eq!(offset % 4, 0); + body.load( + memory, + walrus::ir::LoadKind::I32 { atomic: false }, + walrus::ir::MemArg { align: 4, offset }, + ); + offset += 4; + } + walrus::ValType::I64 => { + offset = round_up_to_alignment(offset, 8); + body.load( + memory, + walrus::ir::LoadKind::I64 { atomic: false }, + walrus::ir::MemArg { align: 8, offset }, + ); + offset += 8; + } + walrus::ValType::F32 => { + debug_assert_eq!(offset % 4, 0); + body.load( + memory, + walrus::ir::LoadKind::F32, + walrus::ir::MemArg { align: 4, offset }, + ); + offset += 4; + } + walrus::ValType::F64 => { + offset = round_up_to_alignment(offset, 8); + body.load( + memory, + walrus::ir::LoadKind::F64, + walrus::ir::MemArg { align: 8, offset }, + ); + offset += 8; + } + walrus::ValType::V128 => { + offset = round_up_to_alignment(offset, 16); + body.load( + memory, + walrus::ir::LoadKind::V128, + walrus::ir::MemArg { align: 16, offset }, + ); + offset += 16; + } + walrus::ValType::Anyref => unreachable!(), + } + } + + // Finally, restore the shadow stack pointer. + body.local_get(return_pointer) + .i32_const(results_size as i32) + .binop(walrus::ir::BinaryOp::I32Add) + .global_set(shadow_stack_pointer); + + let wrapper = wrapper.finish(params, &mut module.funcs); + + // Replace the old export with our new multi-value wrapper for it! + match module.exports.get_mut(export).item { + walrus::ExportItem::Function(ref mut f) => *f = wrapper, + _ => unreachable!(), + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + #[test] + fn round_up_to_alignment_works() { + for (n, align, expected) in vec![ + (0, 1, 0), + (1, 1, 1), + (2, 1, 2), + (0, 2, 0), + (1, 2, 2), + (2, 2, 2), + (3, 2, 4), + (0, 4, 0), + (1, 4, 4), + (2, 4, 4), + (3, 4, 4), + (4, 4, 4), + (5, 4, 8), + ] { + let actual = super::round_up_to_alignment(n, align); + println!( + "round_up_to_alignment(n = {}, align = {}) = {} (expected {})", + n, align, actual, expected + ); + assert_eq!(actual, expected); + } + } +} diff --git a/publish.rs b/publish.rs index 2d198e469..29bcf7136 100644 --- a/publish.rs +++ b/publish.rs @@ -27,6 +27,7 @@ const CRATES_TO_PUBLISH: &[&str] = &[ "wasm-bindgen-wasm-interpreter", "wasm-bindgen-webidl", "wasm-bindgen-threads-xform", + "wasm-bindgen-multi-value-xform", "wasm-bindgen-anyref-xform", "wasm-bindgen-cli-support", "wasm-bindgen-cli",