From 6343f2659a5b81a26afd7d8910811c7c3ce6230a Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Sun, 19 Aug 2018 17:07:30 -0700 Subject: [PATCH] Remove dependency on `wasmi` This is a pretty heavyweight dependency which accounts for a surprising amount of runtime for larger modules in `wasm-bindgen`. We don't need 90% of the crate and so this commit bundles a small interpreter for instructions we know are only going to appear in describe-related functions. --- crates/cli-support/Cargo.toml | 2 +- crates/cli-support/src/descriptor.rs | 2 +- crates/cli-support/src/js/mod.rs | 7 +- crates/cli-support/src/lib.rs | 126 +------------- crates/wasm-interpreter/Cargo.toml | 17 ++ crates/wasm-interpreter/src/lib.rs | 231 ++++++++++++++++++++++++ crates/wasm-interpreter/tests/smoke.rs | 232 +++++++++++++++++++++++++ src/describe.rs | 20 ++- 8 files changed, 503 insertions(+), 134 deletions(-) create mode 100644 crates/wasm-interpreter/Cargo.toml create mode 100644 crates/wasm-interpreter/src/lib.rs create mode 100644 crates/wasm-interpreter/tests/smoke.rs diff --git a/crates/cli-support/Cargo.toml b/crates/cli-support/Cargo.toml index e47dfeb51..40102d7dd 100644 --- a/crates/cli-support/Cargo.toml +++ b/crates/cli-support/Cargo.toml @@ -18,5 +18,5 @@ serde = "1.0" serde_json = "1.0" tempfile = "3.0" wasm-bindgen-shared = { path = "../shared", version = '=0.2.17' } +wasm-bindgen-wasm-interpreter = { path = "../wasm-interpreter", version = '=0.2.17' } wasm-gc-api = "0.1.9" -wasmi = "0.3" diff --git a/crates/cli-support/src/descriptor.rs b/crates/cli-support/src/descriptor.rs index 122edad96..593e75a23 100644 --- a/crates/cli-support/src/descriptor.rs +++ b/crates/cli-support/src/descriptor.rs @@ -94,7 +94,7 @@ pub enum VectorKind { impl Descriptor { pub fn decode(mut data: &[u32]) -> Descriptor { let descriptor = Descriptor::_decode(&mut data); - assert!(data.is_empty()); + assert!(data.is_empty(), "remaining data {:?}", data); descriptor } diff --git a/crates/cli-support/src/js/mod.rs b/crates/cli-support/src/js/mod.rs index 357c3837a..a1986c35b 100644 --- a/crates/cli-support/src/js/mod.rs +++ b/crates/cli-support/src/js/mod.rs @@ -10,6 +10,7 @@ use wasm_gc; use super::Bindgen; use descriptor::{Descriptor, VectorKind}; +use wasm_interpreter::Interpreter; mod js2rust; use self::js2rust::Js2Rust; @@ -41,7 +42,7 @@ pub struct Context<'a> { pub exported_classes: HashMap, pub function_table_needed: bool, - pub run_descriptor: &'a Fn(&str) -> Option>, + pub interpreter: &'a mut Interpreter, } #[derive(Default)] @@ -1668,9 +1669,9 @@ impl<'a> Context<'a> { Ok(()) } - fn describe(&self, name: &str) -> Option { + fn describe(&mut self, name: &str) -> Option { let name = format!("__wbindgen_describe_{}", name); - (self.run_descriptor)(&name).map(|d| Descriptor::decode(&d)) + Some(Descriptor::decode(self.interpreter.interpret(&name, self.module)?)) } fn global(&mut self, s: &str) { diff --git a/crates/cli-support/src/lib.rs b/crates/cli-support/src/lib.rs index 59f4eb85f..fdb09dc68 100644 --- a/crates/cli-support/src/lib.rs +++ b/crates/cli-support/src/lib.rs @@ -4,14 +4,13 @@ extern crate parity_wasm; extern crate wasm_bindgen_shared as shared; extern crate serde_json; extern crate wasm_gc; -extern crate wasmi; #[macro_use] extern crate failure; +extern crate wasm_bindgen_wasm_interpreter as wasm_interpreter; use std::any::Any; use std::collections::BTreeSet; use std::env; -use std::fmt; use std::fs; use std::mem; use std::path::{Path, PathBuf}; @@ -184,13 +183,7 @@ impl Bindgen { // This means that whenever we encounter an import or export we'll // execute a shim function which informs us about its type so we can // then generate the appropriate bindings. - // - // TODO: avoid a `clone` here of the module if we can - let instance = wasmi::Module::from_parity_wasm_module(module.clone()) - .with_context(|_| "failed to create wasmi module")?; - let instance = wasmi::ModuleInstance::new(&instance, &MyResolver) - .with_context(|_| "failed to instantiate wasm module")?; - let instance = instance.not_started_instance(); + let mut instance = wasm_interpreter::Interpreter::new(&module); let (js, ts) = { let mut cx = js::Context { @@ -206,20 +199,7 @@ impl Bindgen { config: &self, module: &mut module, function_table_needed: false, - run_descriptor: &|name| { - let mut v = MyExternals(Vec::new()); - match instance.invoke_export(name, &[], &mut v) { - Ok(None) => Some(v.0), - Ok(Some(_)) => unreachable!( - "there is only one export, and we only return None from it" - ), - // Allow missing exported describe functions. This can - // happen when a nested dependency crate exports things - // but the root crate doesn't use them. - Err(wasmi::Error::Function(_)) => None, - Err(e) => panic!("unexpected error running descriptor: {}", e), - } - }, + interpreter: &mut instance, }; for program in programs.iter() { js::SubContext { @@ -409,106 +389,6 @@ to open an issue at https://github.com/rustwasm/wasm-bindgen/issues! Ok(ret) } -struct MyResolver; - -impl wasmi::ImportResolver for MyResolver { - fn resolve_func( - &self, - module_name: &str, - field_name: &str, - signature: &wasmi::Signature, - ) -> Result { - // Route our special "describe" export to 1 and everything else to 0. - // That way whenever the function 1 is invoked we know what to do and - // when 0 is invoked (by accident) we'll trap and produce an error. - let idx = (module_name == "__wbindgen_placeholder__" && field_name == "__wbindgen_describe") - as usize; - Ok(wasmi::FuncInstance::alloc_host(signature.clone(), idx)) - } - - fn resolve_global( - &self, - _module_name: &str, - _field_name: &str, - descriptor: &wasmi::GlobalDescriptor, - ) -> Result { - // dummy implementation to ensure instantiation succeeds - let val = match descriptor.value_type() { - wasmi::ValueType::I32 => wasmi::RuntimeValue::I32(0), - wasmi::ValueType::I64 => wasmi::RuntimeValue::I64(0), - wasmi::ValueType::F32 => wasmi::RuntimeValue::F32(0.0.into()), - wasmi::ValueType::F64 => wasmi::RuntimeValue::F64(0.0.into()), - }; - Ok(wasmi::GlobalInstance::alloc(val, descriptor.is_mutable())) - } - - fn resolve_memory( - &self, - _module_name: &str, - _field_name: &str, - descriptor: &wasmi::MemoryDescriptor, - ) -> Result { - // dummy implementation to ensure instantiation succeeds - use wasmi::memory_units::Pages; - let initial = Pages(descriptor.initial() as usize); - let maximum = descriptor.maximum().map(|i| Pages(i as usize)); - wasmi::MemoryInstance::alloc(initial, maximum) - } - - fn resolve_table( - &self, - _module_name: &str, - _field_name: &str, - descriptor: &wasmi::TableDescriptor, - ) -> Result { - // dummy implementation to ensure instantiation succeeds - let initial = descriptor.initial(); - let maximum = descriptor.maximum(); - wasmi::TableInstance::alloc(initial, maximum) - } -} - -struct MyExternals(Vec); - -#[derive(Debug)] -struct MyError(String); - -impl wasmi::Externals for MyExternals { - fn invoke_index( - &mut self, - index: usize, - args: wasmi::RuntimeArgs, - ) -> Result, wasmi::Trap> { - macro_rules! bail { - ($($t:tt)*) => ({ - let s = MyError(format!($($t)*)); - return Err(wasmi::Trap::new(wasmi::TrapKind::Host(Box::new(s)))) - }) - } - // We only recognize one function here which was mapped to the index 1 - // by the resolver above. - if index != 1 { - bail!("only __wbindgen_describe can be run at this time") - } - if args.len() != 1 { - bail!("must have exactly one argument"); - } - match args.nth_value_checked(0)? { - wasmi::RuntimeValue::I32(i) => self.0.push(i as u32), - _ => bail!("expected one argument of i32 type"), - } - Ok(None) - } -} - -impl wasmi::HostError for MyError {} - -impl fmt::Display for MyError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self.0.fmt(f) - } -} - fn reset_indentation(s: &str) -> String { let mut indent: u32 = 0; let mut dst = String::new(); diff --git a/crates/wasm-interpreter/Cargo.toml b/crates/wasm-interpreter/Cargo.toml new file mode 100644 index 000000000..eb837fedb --- /dev/null +++ b/crates/wasm-interpreter/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "wasm-bindgen-wasm-interpreter" +version = "0.2.17" +authors = ["The wasm-bindgen Developers"] +license = "MIT/Apache-2.0" +repository = "https://github.com/rustwasm/wasm-bindgen/tree/master/crates/wasm-interpreter" +homepage = "https://rustwasm.github.io/wasm-bindgen/" +documentation = "https://docs.rs/wasm-bindgen-wasm-interpreter" +description = """ +Micro-interpreter optimized for wasm-bindgen's use case +""" + +[dependencies] +parity-wasm = "0.31" + +[dev-dependencies] +tempfile = "3" diff --git a/crates/wasm-interpreter/src/lib.rs b/crates/wasm-interpreter/src/lib.rs new file mode 100644 index 000000000..bac963908 --- /dev/null +++ b/crates/wasm-interpreter/src/lib.rs @@ -0,0 +1,231 @@ +//! A tiny and incomplete wasm interpreter +//! +//! This module contains a tiny and incomplete wasm interpreter built on top of +//! `parity-wasm`'s module structure. Each `Interpreter` contains some state +//! about the execution of a wasm instance. The "incomplete" part here is +//! related to the fact that this is *only* used to execute the various +//! descriptor functions for wasm-bindgen. +//! +//! As a recap, the wasm-bindgen macro generate "descriptor functions" which +//! basically as a mapping of rustc's trait resolution in executable code. This +//! allows us to detect, after the macro is invoke, what trait selection did and +//! what types of functions look like. By executing descriptor functions they'll +//! each invoke a known import (with only one argument) some number of times, +//! which gives us a list of `u32` values to then decode. +//! +//! The interpreter here is only geared towards this one exact use case, so it's +//! quite small and likely not extra-efficient. + +#![deny(missing_docs)] + +extern crate parity_wasm; + +use std::collections::HashMap; + +use parity_wasm::elements::*; + +/// A ready-to-go interpreter of a wasm module. +/// +/// An interpreter currently represents effectively cached state. It is reused +/// between calls to `interpret` and is precomputed from a `Module`. It houses +/// state like the wasm stack, wasm memory, etc. +#[derive(Default)] +pub struct Interpreter { + // Number of imported functions in the wasm module (used in index + // calculations) + imports: usize, + + // Function index of the `__wbindgen_describe` imported function. We special + // case this to know when the environment's imported function is called. + describe_idx: Option, + + // A mapping of string names to the function index, filled with all exported + // functions. + name_map: HashMap, + + // The numerical index of the code section in the wasm module, indexed into + // the module's list of sections. + code_idx: Option, + + // The current stack pointer (global 0) and wasm memory (the stack). Only + // used in a limited capacity. + sp: i32, + mem: Vec, + + // The wasm stack. Note how it's just `i32` which is intentional, we don't + // support other types. + stack: Vec, + + // The descriptor which we're assembling, a list of `u32` entries. This is + // very specific to wasm-bindgen and is the purpose for the existence of + // this module. + descriptor: Vec, +} + +impl Interpreter { + /// Creates a new interpreter from a provided `Module`, precomputing all + /// information necessary to interpret further. + /// + /// Note that the `module` passed in to this function must be the same as + /// the `module` passed to `interpret` below. + pub fn new(module: &Module) -> Interpreter { + let mut ret = Interpreter::default(); + + // The descriptor functions shouldn't really use all that much memory + // (the LLVM call stack, now the wasm stack). To handle that let's give + // our selves a little bit of memory and set the stack pointer (global + // 0) to the top. + ret.mem = vec![0; 0x100]; + ret.sp = ret.mem.len() as i32; + + // Figure out where our code section, if any, is. + for (i, s) in module.sections().iter().enumerate() { + match s { + Section::Code(_) => ret.code_idx = Some(i), + _ => {} + } + } + + // Figure out where the `__wbindgen_describe` imported function is, if + // it exists. We'll special case calls to this function as our + // interpretation should only invoke this function as an imported + // function. + if let Some(i) = module.import_section() { + ret.imports = i.functions(); + for (i, entry) in i.entries().iter().enumerate() { + if entry.module() != "__wbindgen_placeholder__" { + continue + } + if entry.field() != "__wbindgen_describe" { + continue + } + ret.describe_idx = Some(i as u32); + } + } + + // Build up the mapping of exported functions to function indices. + if let Some(e) = module.export_section() { + for e in e.entries() { + let i = match e.internal() { + Internal::Function(i) => i, + _ => continue, + }; + ret.name_map.insert(e.field().to_string(), *i); + } + } + + return ret + } + + /// Interprets the execution of the descriptor function `func`. + /// + /// This function will execute `func` in the `module` provided. Note that + /// the `module` provided here must be the same as the one passed to `new` + /// when this `Interpreter` was constructed. + /// + /// The `func` must be a wasm-bindgen descriptor function meaning that it + /// doesn't do anything like use floats or i64. Instead all it should do is + /// call other functions, sometimes some stack pointer manipulation, and + /// then call the one imported `__wbindgen_describe` function. Anything else + /// will cause this interpreter to panic. + /// + /// When the descriptor has finished running the assembled descriptor list + /// is returned. The descriptor returned can then be re-parsed into an + /// actual `Descriptor` in the cli-support crate. + /// + /// # Return value + /// + /// Returns `Some` if `func` was found in the `module` and `None` if it was + /// not found in the `module`. + pub fn interpret(&mut self, func: &str, module: &Module) -> Option<&[u32]> { + self.descriptor.truncate(0); + let idx = *self.name_map.get(func)?; + let code = match &module.sections()[self.code_idx.unwrap()] { + Section::Code(s) => s, + _ => panic!(), + }; + + // We should have a blank wasm and LLVM stack at both the start and end + // of the call. + assert_eq!(self.sp, self.mem.len() as i32); + assert_eq!(self.stack.len(), 0); + self.call(idx, code); + assert_eq!(self.stack.len(), 0); + assert_eq!(self.sp, self.mem.len() as i32); + Some(&self.descriptor) + } + + fn call(&mut self, idx: u32, code: &CodeSection) { + use parity_wasm::elements::Instruction::*; + + let idx = idx as usize; + assert!(idx >= self.imports); // can't call imported functions + let body = &code.bodies()[idx - self.imports]; + + // Allocate space for our call frame's local variables. All local + // variables should be of the `i32` type. + assert!(body.locals().len() <= 1, "too many local types"); + let locals = body.locals() + .get(0) + .map(|i| { + assert_eq!(i.value_type(), ValueType::I32); + i.count() + }) + .unwrap_or(0); + let mut locals = vec![0; locals as usize]; + + // Actual interpretation loop! We keep track of our stack's length to + // recover it as part of the `Return` instruction, and otherwise this is + // a pretty straightforward interpretation loop. + let before = self.stack.len(); + for instr in body.code().elements() { + match instr { + I32Const(x) => self.stack.push(*x), + SetLocal(i) => locals[*i as usize] = self.stack.pop().unwrap(), + GetLocal(i) => self.stack.push(locals[*i as usize]), + Call(idx) => { + if Some(*idx) == self.describe_idx { + self.descriptor.push(self.stack.pop().unwrap() as u32); + } else { + self.call(*idx, code); + } + } + GetGlobal(0) => self.stack.push(self.sp), + SetGlobal(0) => self.sp = self.stack.pop().unwrap(), + I32Sub => { + let b = self.stack.pop().unwrap(); + let a = self.stack.pop().unwrap(); + self.stack.push(a - b); + } + I32Add => { + let a = self.stack.pop().unwrap(); + let b = self.stack.pop().unwrap(); + self.stack.push(a + b); + } + I32Store(/* align = */ 2, offset) => { + let val = self.stack.pop().unwrap(); + let addr = self.stack.pop().unwrap() as u32; + self.mem[((addr + *offset) as usize) / 4] = val; + } + I32Load(/* align = */ 2, offset) => { + let addr = self.stack.pop().unwrap() as u32; + self.stack.push(self.mem[((addr + *offset) as usize) / 4]); + } + Return => self.stack.truncate(before), + End => break, + + // All other instructions shouldn't be used by our various + // descriptor functions. LLVM optimizations may mean that some + // of the above instructions aren't actually needed either, but + // the above instructions have empirically been required when + // executing our own test suite in wasm-bindgen. + // + // Note that LLVM may change over time to generate new + // instructions in debug mode, and we'll have to react to those + // sorts of changes as they arise. + s => panic!("unknown instruction {:?}", s), + } + } + assert_eq!(self.stack.len(), before); + } +} diff --git a/crates/wasm-interpreter/tests/smoke.rs b/crates/wasm-interpreter/tests/smoke.rs new file mode 100644 index 000000000..f2fe894d5 --- /dev/null +++ b/crates/wasm-interpreter/tests/smoke.rs @@ -0,0 +1,232 @@ +extern crate parity_wasm; +extern crate tempfile; +extern crate wasm_bindgen_wasm_interpreter; + +use std::fs; +use std::process::Command; + +use wasm_bindgen_wasm_interpreter::Interpreter; + +fn interpret(wat: &str, name: &str, result: Option<&[u32]>) { + let input = tempfile::NamedTempFile::new().unwrap(); + let output = tempfile::NamedTempFile::new().unwrap(); + fs::write(input.path(), wat).unwrap(); + let status = Command::new("wat2wasm") + .arg(input.path()) + .arg("-o").arg(output.path()) + .status() + .unwrap(); + println!("status: {}", status); + assert!(status.success()); + let module = parity_wasm::deserialize_file(output.path()).unwrap(); + let mut i = Interpreter::new(&module); + assert_eq!(i.interpret(name, &module), result); +} + +#[test] +fn smoke() { + let wat = r#" + (module + (export "foo" (func $foo)) + + (func $foo) + ) + "#; + interpret(wat, "foo", Some(&[])); + interpret(wat, "bar", None); + + let wat = r#" + (module + (import "__wbindgen_placeholder__" "__wbindgen_describe" + (func $__wbindgen_describe (param i32))) + + (func $foo + i32.const 1 + call $__wbindgen_describe + ) + + (export "foo" (func $foo)) + ) + "#; + interpret(wat, "foo", Some(&[1])); +} + +#[test] +fn locals() { + let wat = r#" + (module + (import "__wbindgen_placeholder__" "__wbindgen_describe" + (func $__wbindgen_describe (param i32))) + + (func $foo + (local i32) + i32.const 2 + set_local 0 + get_local 0 + call $__wbindgen_describe + ) + + (export "foo" (func $foo)) + ) + "#; + interpret(wat, "foo", Some(&[2])); +} + +#[test] +fn globals() { + let wat = r#" + (module + (import "__wbindgen_placeholder__" "__wbindgen_describe" + (func $__wbindgen_describe (param i32))) + + (global i32 (i32.const 0)) + + (func $foo + (local i32) + get_global 0 + set_local 0 + get_local 0 + call $__wbindgen_describe + get_local 0 + set_global 0 + ) + + (export "foo" (func $foo)) + ) + "#; + interpret(wat, "foo", Some(&[256])); +} + +#[test] +fn arithmetic() { + let wat = r#" + (module + (import "__wbindgen_placeholder__" "__wbindgen_describe" + (func $__wbindgen_describe (param i32))) + + (func $foo + i32.const 1 + i32.const 2 + i32.add + call $__wbindgen_describe + i32.const 2 + i32.const 1 + i32.sub + call $__wbindgen_describe + ) + + (export "foo" (func $foo)) + ) + "#; + interpret(wat, "foo", Some(&[3, 1])); +} + +#[test] +fn return_early() { + let wat = r#" + (module + (import "__wbindgen_placeholder__" "__wbindgen_describe" + (func $__wbindgen_describe (param i32))) + + (func $foo + i32.const 1 + i32.const 2 + call $__wbindgen_describe + return + ) + + (export "foo" (func $foo)) + ) + "#; + interpret(wat, "foo", Some(&[2])); +} + +#[test] +fn loads_and_stores() { + let wat = r#" + (module + (import "__wbindgen_placeholder__" "__wbindgen_describe" + (func $__wbindgen_describe (param i32))) + + (global i32 (i32.const 0)) + (memory 1) + + (func $foo + (local i32) + + ;; decrement the stack pointer, setting our local to the + ;; lowest address of our stack + get_global 0 + i32.const 16 + i32.sub + set_local 0 + get_local 0 + set_global 0 + + ;; store 1 at fp+0 + get_local 0 + i32.const 1 + i32.store offset=0 + + ;; store 2 at fp+4 + get_local 0 + i32.const 2 + i32.store offset=4 + + ;; store 3 at fp+8 + get_local 0 + i32.const 3 + i32.store offset=8 + + ;; load fp+0 and call + get_local 0 + i32.load offset=0 + call $__wbindgen_describe + + ;; load fp+4 and call + get_local 0 + i32.load offset=4 + call $__wbindgen_describe + + ;; load fp+8 and call + get_local 0 + i32.load offset=8 + call $__wbindgen_describe + + ;; increment our stack pointer + get_local 0 + i32.const 16 + i32.add + set_global 0 + ) + + (export "foo" (func $foo)) + ) + "#; + interpret(wat, "foo", Some(&[1, 2, 3])); +} + +#[test] +fn calling_functions() { + let wat = r#" + (module + (import "__wbindgen_placeholder__" "__wbindgen_describe" + (func $__wbindgen_describe (param i32))) + + (global i32 (i32.const 0)) + (memory 1) + + (func $foo + call $bar + ) + + (func $bar + i32.const 0 + call $__wbindgen_describe + ) + + (export "foo" (func $foo)) + ) + "#; + interpret(wat, "foo", Some(&[0])); +} diff --git a/src/describe.rs b/src/describe.rs index 34c90e2ba..cbc0f7c9f 100644 --- a/src/describe.rs +++ b/src/describe.rs @@ -41,6 +41,7 @@ tys! { OPTIONAL } +#[inline(always)] // see `interpret.rs` in the the cli-support crate pub fn inform(a: u32) { unsafe { super::__wbindgen_describe(a) } } @@ -130,8 +131,15 @@ if_std! { } } -fn _cnt() -> u32 { - 1 +macro_rules! cnt { + () => (0); + (A) => (1); + (A B) => (2); + (A B C) => (3); + (A B C D) => (4); + (A B C D E) => (5); + (A B C D E F) => (6); + (A B C D E F G) => (7); } macro_rules! doit { @@ -142,7 +150,7 @@ macro_rules! doit { { fn describe() { inform(FUNCTION); - inform(0 $(+ _cnt::<$var>())*); + inform(cnt!($($var)*)); $(<$var as WasmDescribe>::describe();)* inform(1); ::describe(); @@ -154,7 +162,7 @@ macro_rules! doit { { fn describe() { inform(FUNCTION); - inform(0 $(+ _cnt::<$var>())*); + inform(cnt!($($var)*)); $(<$var as WasmDescribe>::describe();)* inform(0); } @@ -166,7 +174,7 @@ macro_rules! doit { { fn describe() { inform(FUNCTION); - inform(0 $(+ _cnt::<$var>())*); + inform(cnt!($($var)*)); $(<$var as WasmDescribe>::describe();)* inform(1); ::describe(); @@ -178,7 +186,7 @@ macro_rules! doit { { fn describe() { inform(FUNCTION); - inform(0 $(+ _cnt::<$var>())*); + inform(cnt!($($var)*)); $(<$var as WasmDescribe>::describe();)* inform(0); }