Remove dependency on wasmi

This is a pretty heavyweight dependency which accounts for a surprising amount
of runtime for larger modules in `wasm-bindgen`. We don't need 90% of the crate
and so this commit bundles a small interpreter for instructions we know are only
going to appear in describe-related functions.
This commit is contained in:
Alex Crichton 2018-08-19 17:07:30 -07:00
parent 7486fa5104
commit 6343f2659a
8 changed files with 503 additions and 134 deletions

View File

@ -18,5 +18,5 @@ serde = "1.0"
serde_json = "1.0" serde_json = "1.0"
tempfile = "3.0" tempfile = "3.0"
wasm-bindgen-shared = { path = "../shared", version = '=0.2.17' } wasm-bindgen-shared = { path = "../shared", version = '=0.2.17' }
wasm-bindgen-wasm-interpreter = { path = "../wasm-interpreter", version = '=0.2.17' }
wasm-gc-api = "0.1.9" wasm-gc-api = "0.1.9"
wasmi = "0.3"

View File

@ -94,7 +94,7 @@ pub enum VectorKind {
impl Descriptor { impl Descriptor {
pub fn decode(mut data: &[u32]) -> Descriptor { pub fn decode(mut data: &[u32]) -> Descriptor {
let descriptor = Descriptor::_decode(&mut data); let descriptor = Descriptor::_decode(&mut data);
assert!(data.is_empty()); assert!(data.is_empty(), "remaining data {:?}", data);
descriptor descriptor
} }

View File

@ -10,6 +10,7 @@ use wasm_gc;
use super::Bindgen; use super::Bindgen;
use descriptor::{Descriptor, VectorKind}; use descriptor::{Descriptor, VectorKind};
use wasm_interpreter::Interpreter;
mod js2rust; mod js2rust;
use self::js2rust::Js2Rust; use self::js2rust::Js2Rust;
@ -41,7 +42,7 @@ pub struct Context<'a> {
pub exported_classes: HashMap<String, ExportedClass>, pub exported_classes: HashMap<String, ExportedClass>,
pub function_table_needed: bool, pub function_table_needed: bool,
pub run_descriptor: &'a Fn(&str) -> Option<Vec<u32>>, pub interpreter: &'a mut Interpreter,
} }
#[derive(Default)] #[derive(Default)]
@ -1668,9 +1669,9 @@ impl<'a> Context<'a> {
Ok(()) Ok(())
} }
fn describe(&self, name: &str) -> Option<Descriptor> { fn describe(&mut self, name: &str) -> Option<Descriptor> {
let name = format!("__wbindgen_describe_{}", name); let name = format!("__wbindgen_describe_{}", name);
(self.run_descriptor)(&name).map(|d| Descriptor::decode(&d)) Some(Descriptor::decode(self.interpreter.interpret(&name, self.module)?))
} }
fn global(&mut self, s: &str) { fn global(&mut self, s: &str) {

View File

@ -4,14 +4,13 @@ extern crate parity_wasm;
extern crate wasm_bindgen_shared as shared; extern crate wasm_bindgen_shared as shared;
extern crate serde_json; extern crate serde_json;
extern crate wasm_gc; extern crate wasm_gc;
extern crate wasmi;
#[macro_use] #[macro_use]
extern crate failure; extern crate failure;
extern crate wasm_bindgen_wasm_interpreter as wasm_interpreter;
use std::any::Any; use std::any::Any;
use std::collections::BTreeSet; use std::collections::BTreeSet;
use std::env; use std::env;
use std::fmt;
use std::fs; use std::fs;
use std::mem; use std::mem;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
@ -184,13 +183,7 @@ impl Bindgen {
// This means that whenever we encounter an import or export we'll // This means that whenever we encounter an import or export we'll
// execute a shim function which informs us about its type so we can // execute a shim function which informs us about its type so we can
// then generate the appropriate bindings. // then generate the appropriate bindings.
// let mut instance = wasm_interpreter::Interpreter::new(&module);
// TODO: avoid a `clone` here of the module if we can
let instance = wasmi::Module::from_parity_wasm_module(module.clone())
.with_context(|_| "failed to create wasmi module")?;
let instance = wasmi::ModuleInstance::new(&instance, &MyResolver)
.with_context(|_| "failed to instantiate wasm module")?;
let instance = instance.not_started_instance();
let (js, ts) = { let (js, ts) = {
let mut cx = js::Context { let mut cx = js::Context {
@ -206,20 +199,7 @@ impl Bindgen {
config: &self, config: &self,
module: &mut module, module: &mut module,
function_table_needed: false, function_table_needed: false,
run_descriptor: &|name| { interpreter: &mut instance,
let mut v = MyExternals(Vec::new());
match instance.invoke_export(name, &[], &mut v) {
Ok(None) => Some(v.0),
Ok(Some(_)) => unreachable!(
"there is only one export, and we only return None from it"
),
// Allow missing exported describe functions. This can
// happen when a nested dependency crate exports things
// but the root crate doesn't use them.
Err(wasmi::Error::Function(_)) => None,
Err(e) => panic!("unexpected error running descriptor: {}", e),
}
},
}; };
for program in programs.iter() { for program in programs.iter() {
js::SubContext { js::SubContext {
@ -409,106 +389,6 @@ to open an issue at https://github.com/rustwasm/wasm-bindgen/issues!
Ok(ret) Ok(ret)
} }
struct MyResolver;
impl wasmi::ImportResolver for MyResolver {
fn resolve_func(
&self,
module_name: &str,
field_name: &str,
signature: &wasmi::Signature,
) -> Result<wasmi::FuncRef, wasmi::Error> {
// Route our special "describe" export to 1 and everything else to 0.
// That way whenever the function 1 is invoked we know what to do and
// when 0 is invoked (by accident) we'll trap and produce an error.
let idx = (module_name == "__wbindgen_placeholder__" && field_name == "__wbindgen_describe")
as usize;
Ok(wasmi::FuncInstance::alloc_host(signature.clone(), idx))
}
fn resolve_global(
&self,
_module_name: &str,
_field_name: &str,
descriptor: &wasmi::GlobalDescriptor,
) -> Result<wasmi::GlobalRef, wasmi::Error> {
// dummy implementation to ensure instantiation succeeds
let val = match descriptor.value_type() {
wasmi::ValueType::I32 => wasmi::RuntimeValue::I32(0),
wasmi::ValueType::I64 => wasmi::RuntimeValue::I64(0),
wasmi::ValueType::F32 => wasmi::RuntimeValue::F32(0.0.into()),
wasmi::ValueType::F64 => wasmi::RuntimeValue::F64(0.0.into()),
};
Ok(wasmi::GlobalInstance::alloc(val, descriptor.is_mutable()))
}
fn resolve_memory(
&self,
_module_name: &str,
_field_name: &str,
descriptor: &wasmi::MemoryDescriptor,
) -> Result<wasmi::MemoryRef, wasmi::Error> {
// dummy implementation to ensure instantiation succeeds
use wasmi::memory_units::Pages;
let initial = Pages(descriptor.initial() as usize);
let maximum = descriptor.maximum().map(|i| Pages(i as usize));
wasmi::MemoryInstance::alloc(initial, maximum)
}
fn resolve_table(
&self,
_module_name: &str,
_field_name: &str,
descriptor: &wasmi::TableDescriptor,
) -> Result<wasmi::TableRef, wasmi::Error> {
// dummy implementation to ensure instantiation succeeds
let initial = descriptor.initial();
let maximum = descriptor.maximum();
wasmi::TableInstance::alloc(initial, maximum)
}
}
struct MyExternals(Vec<u32>);
#[derive(Debug)]
struct MyError(String);
impl wasmi::Externals for MyExternals {
fn invoke_index(
&mut self,
index: usize,
args: wasmi::RuntimeArgs,
) -> Result<Option<wasmi::RuntimeValue>, wasmi::Trap> {
macro_rules! bail {
($($t:tt)*) => ({
let s = MyError(format!($($t)*));
return Err(wasmi::Trap::new(wasmi::TrapKind::Host(Box::new(s))))
})
}
// We only recognize one function here which was mapped to the index 1
// by the resolver above.
if index != 1 {
bail!("only __wbindgen_describe can be run at this time")
}
if args.len() != 1 {
bail!("must have exactly one argument");
}
match args.nth_value_checked(0)? {
wasmi::RuntimeValue::I32(i) => self.0.push(i as u32),
_ => bail!("expected one argument of i32 type"),
}
Ok(None)
}
}
impl wasmi::HostError for MyError {}
impl fmt::Display for MyError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.0.fmt(f)
}
}
fn reset_indentation(s: &str) -> String { fn reset_indentation(s: &str) -> String {
let mut indent: u32 = 0; let mut indent: u32 = 0;
let mut dst = String::new(); let mut dst = String::new();

View File

@ -0,0 +1,17 @@
[package]
name = "wasm-bindgen-wasm-interpreter"
version = "0.2.17"
authors = ["The wasm-bindgen Developers"]
license = "MIT/Apache-2.0"
repository = "https://github.com/rustwasm/wasm-bindgen/tree/master/crates/wasm-interpreter"
homepage = "https://rustwasm.github.io/wasm-bindgen/"
documentation = "https://docs.rs/wasm-bindgen-wasm-interpreter"
description = """
Micro-interpreter optimized for wasm-bindgen's use case
"""
[dependencies]
parity-wasm = "0.31"
[dev-dependencies]
tempfile = "3"

View File

@ -0,0 +1,231 @@
//! A tiny and incomplete wasm interpreter
//!
//! This module contains a tiny and incomplete wasm interpreter built on top of
//! `parity-wasm`'s module structure. Each `Interpreter` contains some state
//! about the execution of a wasm instance. The "incomplete" part here is
//! related to the fact that this is *only* used to execute the various
//! descriptor functions for wasm-bindgen.
//!
//! As a recap, the wasm-bindgen macro generate "descriptor functions" which
//! basically as a mapping of rustc's trait resolution in executable code. This
//! allows us to detect, after the macro is invoke, what trait selection did and
//! what types of functions look like. By executing descriptor functions they'll
//! each invoke a known import (with only one argument) some number of times,
//! which gives us a list of `u32` values to then decode.
//!
//! The interpreter here is only geared towards this one exact use case, so it's
//! quite small and likely not extra-efficient.
#![deny(missing_docs)]
extern crate parity_wasm;
use std::collections::HashMap;
use parity_wasm::elements::*;
/// A ready-to-go interpreter of a wasm module.
///
/// An interpreter currently represents effectively cached state. It is reused
/// between calls to `interpret` and is precomputed from a `Module`. It houses
/// state like the wasm stack, wasm memory, etc.
#[derive(Default)]
pub struct Interpreter {
// Number of imported functions in the wasm module (used in index
// calculations)
imports: usize,
// Function index of the `__wbindgen_describe` imported function. We special
// case this to know when the environment's imported function is called.
describe_idx: Option<u32>,
// A mapping of string names to the function index, filled with all exported
// functions.
name_map: HashMap<String, u32>,
// The numerical index of the code section in the wasm module, indexed into
// the module's list of sections.
code_idx: Option<usize>,
// The current stack pointer (global 0) and wasm memory (the stack). Only
// used in a limited capacity.
sp: i32,
mem: Vec<i32>,
// The wasm stack. Note how it's just `i32` which is intentional, we don't
// support other types.
stack: Vec<i32>,
// The descriptor which we're assembling, a list of `u32` entries. This is
// very specific to wasm-bindgen and is the purpose for the existence of
// this module.
descriptor: Vec<u32>,
}
impl Interpreter {
/// Creates a new interpreter from a provided `Module`, precomputing all
/// information necessary to interpret further.
///
/// Note that the `module` passed in to this function must be the same as
/// the `module` passed to `interpret` below.
pub fn new(module: &Module) -> Interpreter {
let mut ret = Interpreter::default();
// The descriptor functions shouldn't really use all that much memory
// (the LLVM call stack, now the wasm stack). To handle that let's give
// our selves a little bit of memory and set the stack pointer (global
// 0) to the top.
ret.mem = vec![0; 0x100];
ret.sp = ret.mem.len() as i32;
// Figure out where our code section, if any, is.
for (i, s) in module.sections().iter().enumerate() {
match s {
Section::Code(_) => ret.code_idx = Some(i),
_ => {}
}
}
// Figure out where the `__wbindgen_describe` imported function is, if
// it exists. We'll special case calls to this function as our
// interpretation should only invoke this function as an imported
// function.
if let Some(i) = module.import_section() {
ret.imports = i.functions();
for (i, entry) in i.entries().iter().enumerate() {
if entry.module() != "__wbindgen_placeholder__" {
continue
}
if entry.field() != "__wbindgen_describe" {
continue
}
ret.describe_idx = Some(i as u32);
}
}
// Build up the mapping of exported functions to function indices.
if let Some(e) = module.export_section() {
for e in e.entries() {
let i = match e.internal() {
Internal::Function(i) => i,
_ => continue,
};
ret.name_map.insert(e.field().to_string(), *i);
}
}
return ret
}
/// Interprets the execution of the descriptor function `func`.
///
/// This function will execute `func` in the `module` provided. Note that
/// the `module` provided here must be the same as the one passed to `new`
/// when this `Interpreter` was constructed.
///
/// The `func` must be a wasm-bindgen descriptor function meaning that it
/// doesn't do anything like use floats or i64. Instead all it should do is
/// call other functions, sometimes some stack pointer manipulation, and
/// then call the one imported `__wbindgen_describe` function. Anything else
/// will cause this interpreter to panic.
///
/// When the descriptor has finished running the assembled descriptor list
/// is returned. The descriptor returned can then be re-parsed into an
/// actual `Descriptor` in the cli-support crate.
///
/// # Return value
///
/// Returns `Some` if `func` was found in the `module` and `None` if it was
/// not found in the `module`.
pub fn interpret(&mut self, func: &str, module: &Module) -> Option<&[u32]> {
self.descriptor.truncate(0);
let idx = *self.name_map.get(func)?;
let code = match &module.sections()[self.code_idx.unwrap()] {
Section::Code(s) => s,
_ => panic!(),
};
// We should have a blank wasm and LLVM stack at both the start and end
// of the call.
assert_eq!(self.sp, self.mem.len() as i32);
assert_eq!(self.stack.len(), 0);
self.call(idx, code);
assert_eq!(self.stack.len(), 0);
assert_eq!(self.sp, self.mem.len() as i32);
Some(&self.descriptor)
}
fn call(&mut self, idx: u32, code: &CodeSection) {
use parity_wasm::elements::Instruction::*;
let idx = idx as usize;
assert!(idx >= self.imports); // can't call imported functions
let body = &code.bodies()[idx - self.imports];
// Allocate space for our call frame's local variables. All local
// variables should be of the `i32` type.
assert!(body.locals().len() <= 1, "too many local types");
let locals = body.locals()
.get(0)
.map(|i| {
assert_eq!(i.value_type(), ValueType::I32);
i.count()
})
.unwrap_or(0);
let mut locals = vec![0; locals as usize];
// Actual interpretation loop! We keep track of our stack's length to
// recover it as part of the `Return` instruction, and otherwise this is
// a pretty straightforward interpretation loop.
let before = self.stack.len();
for instr in body.code().elements() {
match instr {
I32Const(x) => self.stack.push(*x),
SetLocal(i) => locals[*i as usize] = self.stack.pop().unwrap(),
GetLocal(i) => self.stack.push(locals[*i as usize]),
Call(idx) => {
if Some(*idx) == self.describe_idx {
self.descriptor.push(self.stack.pop().unwrap() as u32);
} else {
self.call(*idx, code);
}
}
GetGlobal(0) => self.stack.push(self.sp),
SetGlobal(0) => self.sp = self.stack.pop().unwrap(),
I32Sub => {
let b = self.stack.pop().unwrap();
let a = self.stack.pop().unwrap();
self.stack.push(a - b);
}
I32Add => {
let a = self.stack.pop().unwrap();
let b = self.stack.pop().unwrap();
self.stack.push(a + b);
}
I32Store(/* align = */ 2, offset) => {
let val = self.stack.pop().unwrap();
let addr = self.stack.pop().unwrap() as u32;
self.mem[((addr + *offset) as usize) / 4] = val;
}
I32Load(/* align = */ 2, offset) => {
let addr = self.stack.pop().unwrap() as u32;
self.stack.push(self.mem[((addr + *offset) as usize) / 4]);
}
Return => self.stack.truncate(before),
End => break,
// All other instructions shouldn't be used by our various
// descriptor functions. LLVM optimizations may mean that some
// of the above instructions aren't actually needed either, but
// the above instructions have empirically been required when
// executing our own test suite in wasm-bindgen.
//
// Note that LLVM may change over time to generate new
// instructions in debug mode, and we'll have to react to those
// sorts of changes as they arise.
s => panic!("unknown instruction {:?}", s),
}
}
assert_eq!(self.stack.len(), before);
}
}

View File

@ -0,0 +1,232 @@
extern crate parity_wasm;
extern crate tempfile;
extern crate wasm_bindgen_wasm_interpreter;
use std::fs;
use std::process::Command;
use wasm_bindgen_wasm_interpreter::Interpreter;
fn interpret(wat: &str, name: &str, result: Option<&[u32]>) {
let input = tempfile::NamedTempFile::new().unwrap();
let output = tempfile::NamedTempFile::new().unwrap();
fs::write(input.path(), wat).unwrap();
let status = Command::new("wat2wasm")
.arg(input.path())
.arg("-o").arg(output.path())
.status()
.unwrap();
println!("status: {}", status);
assert!(status.success());
let module = parity_wasm::deserialize_file(output.path()).unwrap();
let mut i = Interpreter::new(&module);
assert_eq!(i.interpret(name, &module), result);
}
#[test]
fn smoke() {
let wat = r#"
(module
(export "foo" (func $foo))
(func $foo)
)
"#;
interpret(wat, "foo", Some(&[]));
interpret(wat, "bar", None);
let wat = r#"
(module
(import "__wbindgen_placeholder__" "__wbindgen_describe"
(func $__wbindgen_describe (param i32)))
(func $foo
i32.const 1
call $__wbindgen_describe
)
(export "foo" (func $foo))
)
"#;
interpret(wat, "foo", Some(&[1]));
}
#[test]
fn locals() {
let wat = r#"
(module
(import "__wbindgen_placeholder__" "__wbindgen_describe"
(func $__wbindgen_describe (param i32)))
(func $foo
(local i32)
i32.const 2
set_local 0
get_local 0
call $__wbindgen_describe
)
(export "foo" (func $foo))
)
"#;
interpret(wat, "foo", Some(&[2]));
}
#[test]
fn globals() {
let wat = r#"
(module
(import "__wbindgen_placeholder__" "__wbindgen_describe"
(func $__wbindgen_describe (param i32)))
(global i32 (i32.const 0))
(func $foo
(local i32)
get_global 0
set_local 0
get_local 0
call $__wbindgen_describe
get_local 0
set_global 0
)
(export "foo" (func $foo))
)
"#;
interpret(wat, "foo", Some(&[256]));
}
#[test]
fn arithmetic() {
let wat = r#"
(module
(import "__wbindgen_placeholder__" "__wbindgen_describe"
(func $__wbindgen_describe (param i32)))
(func $foo
i32.const 1
i32.const 2
i32.add
call $__wbindgen_describe
i32.const 2
i32.const 1
i32.sub
call $__wbindgen_describe
)
(export "foo" (func $foo))
)
"#;
interpret(wat, "foo", Some(&[3, 1]));
}
#[test]
fn return_early() {
let wat = r#"
(module
(import "__wbindgen_placeholder__" "__wbindgen_describe"
(func $__wbindgen_describe (param i32)))
(func $foo
i32.const 1
i32.const 2
call $__wbindgen_describe
return
)
(export "foo" (func $foo))
)
"#;
interpret(wat, "foo", Some(&[2]));
}
#[test]
fn loads_and_stores() {
let wat = r#"
(module
(import "__wbindgen_placeholder__" "__wbindgen_describe"
(func $__wbindgen_describe (param i32)))
(global i32 (i32.const 0))
(memory 1)
(func $foo
(local i32)
;; decrement the stack pointer, setting our local to the
;; lowest address of our stack
get_global 0
i32.const 16
i32.sub
set_local 0
get_local 0
set_global 0
;; store 1 at fp+0
get_local 0
i32.const 1
i32.store offset=0
;; store 2 at fp+4
get_local 0
i32.const 2
i32.store offset=4
;; store 3 at fp+8
get_local 0
i32.const 3
i32.store offset=8
;; load fp+0 and call
get_local 0
i32.load offset=0
call $__wbindgen_describe
;; load fp+4 and call
get_local 0
i32.load offset=4
call $__wbindgen_describe
;; load fp+8 and call
get_local 0
i32.load offset=8
call $__wbindgen_describe
;; increment our stack pointer
get_local 0
i32.const 16
i32.add
set_global 0
)
(export "foo" (func $foo))
)
"#;
interpret(wat, "foo", Some(&[1, 2, 3]));
}
#[test]
fn calling_functions() {
let wat = r#"
(module
(import "__wbindgen_placeholder__" "__wbindgen_describe"
(func $__wbindgen_describe (param i32)))
(global i32 (i32.const 0))
(memory 1)
(func $foo
call $bar
)
(func $bar
i32.const 0
call $__wbindgen_describe
)
(export "foo" (func $foo))
)
"#;
interpret(wat, "foo", Some(&[0]));
}

View File

@ -41,6 +41,7 @@ tys! {
OPTIONAL OPTIONAL
} }
#[inline(always)] // see `interpret.rs` in the the cli-support crate
pub fn inform(a: u32) { pub fn inform(a: u32) {
unsafe { super::__wbindgen_describe(a) } unsafe { super::__wbindgen_describe(a) }
} }
@ -130,8 +131,15 @@ if_std! {
} }
} }
fn _cnt<T: WasmDescribe>() -> u32 { macro_rules! cnt {
1 () => (0);
(A) => (1);
(A B) => (2);
(A B C) => (3);
(A B C D) => (4);
(A B C D E) => (5);
(A B C D E F) => (6);
(A B C D E F G) => (7);
} }
macro_rules! doit { macro_rules! doit {
@ -142,7 +150,7 @@ macro_rules! doit {
{ {
fn describe() { fn describe() {
inform(FUNCTION); inform(FUNCTION);
inform(0 $(+ _cnt::<$var>())*); inform(cnt!($($var)*));
$(<$var as WasmDescribe>::describe();)* $(<$var as WasmDescribe>::describe();)*
inform(1); inform(1);
<R as WasmDescribe>::describe(); <R as WasmDescribe>::describe();
@ -154,7 +162,7 @@ macro_rules! doit {
{ {
fn describe() { fn describe() {
inform(FUNCTION); inform(FUNCTION);
inform(0 $(+ _cnt::<$var>())*); inform(cnt!($($var)*));
$(<$var as WasmDescribe>::describe();)* $(<$var as WasmDescribe>::describe();)*
inform(0); inform(0);
} }
@ -166,7 +174,7 @@ macro_rules! doit {
{ {
fn describe() { fn describe() {
inform(FUNCTION); inform(FUNCTION);
inform(0 $(+ _cnt::<$var>())*); inform(cnt!($($var)*));
$(<$var as WasmDescribe>::describe();)* $(<$var as WasmDescribe>::describe();)*
inform(1); inform(1);
<R as WasmDescribe>::describe(); <R as WasmDescribe>::describe();
@ -178,7 +186,7 @@ macro_rules! doit {
{ {
fn describe() { fn describe() {
inform(FUNCTION); inform(FUNCTION);
inform(0 $(+ _cnt::<$var>())*); inform(cnt!($($var)*));
$(<$var as WasmDescribe>::describe();)* $(<$var as WasmDescribe>::describe();)*
inform(0); inform(0);
} }