Create the wasm-bindgen-wasm-conventions crate

This tiny crate provides utilities for working with Wasm codegen conventions (typically established by LLVM or lld) such as getting the shadow stack pointer. It also de-duplicates all the places in the codebase where we were implementing these conventions in one-off ways.
2024-12-14 20:11:37 +03:00 · 2019-09-16 11:18:06 -07:00 · 2019-09-16 11:18:06 -07:00 · 2fbb8359e0
commit 2fbb8359e0
parent 68af85d001
7 changed files with 173 additions and 121 deletions
--- a/crates/cli-support/Cargo.toml
+++ b/crates/cli-support/Cargo.toml
@ -23,5 +23,6 @@ wasm-bindgen-anyref-xform = { path = '../anyref-xform', version = '=0.2.50' }
 wasm-bindgen-shared = { path = "../shared", version = '=0.2.50' }
 wasm-bindgen-multi-value-xform = { path = '../multi-value-xform', version = '=0.2.50' }
 wasm-bindgen-threads-xform = { path = '../threads-xform', version = '=0.2.50' }
+wasm-bindgen-wasm-conventions = { path = '../wasm-conventions', version = '=0.2.50' }
 wasm-bindgen-wasm-interpreter = { path = "../wasm-interpreter", version = '=0.2.50' }
 wasm-webidl-bindings = "0.5.0"
--- a/crates/cli-support/src/lib.rs
+++ b/crates/cli-support/src/lib.rs
@ -8,6 +8,7 @@ use std::mem;
 use std::path::{Path, PathBuf};
 use std::str;
 use walrus::Module;
+use wasm_bindgen_wasm_conventions as wasm_conventions;

 mod anyref;
 mod decode;
@ -278,18 +279,13 @@ impl Bindgen {
            }
        };

-        // Our multi-value xform relies on the presence of the stack pointer, so
-        // temporarily export it so that our many GC's don't remove it before
-        // the xform runs.
-        if self.multi_value {
-            // Assume that the first global is the shadow stack pointer, since that is
-            // what LLVM codegens.
-            match module.globals.iter().next() {
-                Some(g) if g.ty == walrus::ValType::I32 => {
-                    module.exports.add("__shadow_stack_pointer", g.id());
-                }
-                _ => {}
-            }
+        // Our threads and multi-value xforms rely on the presence of the stack
+        // pointer, so temporarily export it so that our many GC's don't remove
+        // it before the xform runs.
+        let mut exported_shadow_stack_pointer = false;
+        if self.multi_value || self.threads.is_enabled() {
+            wasm_conventions::export_shadow_stack_pointer(&mut module)?;
+            exported_shadow_stack_pointer = true;
        }

        // This isn't the hardest thing in the world too support but we
@ -387,6 +383,17 @@ impl Bindgen {
            }
        }

+        // If we exported the shadow stack pointer earlier, remove it from the
+        // export set now.
+        if exported_shadow_stack_pointer {
+            wasm_conventions::unexport_shadow_stack_pointer(&mut module)?;
+            // The shadow stack pointer is potentially unused now, but since it
+            // most likely _is_ in use, we don't pay the cost of a full GC here
+            // just to remove one potentially unnecessary global.
+            //
+            // walrus::passes::gc::run(&mut module);
+        }
+
        Ok(Output {
            module,
            stem: stem.to_string(),
--- a/crates/cli-support/src/webidl/standard.rs
+++ b/crates/cli-support/src/webidl/standard.rs
@ -53,9 +53,10 @@ use crate::descriptor::VectorKind;
 use crate::webidl::{AuxExportKind, AuxImport, AuxValue, JsImport, JsImportName};
 use crate::webidl::{NonstandardIncoming, NonstandardOutgoing};
 use crate::webidl::{NonstandardWebidlSection, WasmBindgenAux};
-use failure::{bail, format_err, Error, ResultExt};
-use walrus::{GlobalId, MemoryId, Module};
+use failure::{bail, Error, ResultExt};
+use walrus::Module;
 use wasm_bindgen_multi_value_xform as multi_value_xform;
+use wasm_bindgen_wasm_conventions as wasm_conventions;
 use wasm_webidl_bindings::ast;

 pub fn add_multi_value(
@ -79,8 +80,8 @@ pub fn add_multi_value(
        return Ok(());
    }

-    let memory = get_memory(module)?;
-    let shadow_stack_pointer = get_shadow_stack_pointer(module)?;
+    let shadow_stack_pointer = wasm_conventions::get_shadow_stack_pointer(module)?;
+    let memory = wasm_conventions::get_memory(module)?;
    multi_value_xform::run(module, memory, shadow_stack_pointer, &to_xform)?;

    // Finally, unset `return_via_outptr`, fix up its incoming bindings'
@ -163,43 +164,6 @@ fn fixup_binding_argument_gets(incoming: &mut [NonstandardIncoming]) -> Result<(
    }
 }

-fn get_memory(module: &Module) -> Result<MemoryId, Error> {
-    let mut memories = module.memories.iter().map(|m| m.id());
-    let memory = memories.next();
-    if memories.next().is_some() {
-        bail!(
-            "expected a single memory, found multiple; multiple memories \
-             currently not supported"
-        );
-    }
-    memory.ok_or_else(|| {
-        format_err!(
-            "module does not have a memory; must have a memory \
-             to transform return pointers into Wasm multi-value"
-        )
-    })
-}
-
-// Get the `__shadow_stack_pointer` global that we stashed in an export early on
-// in the pipeline.
-fn get_shadow_stack_pointer(module: &mut Module) -> Result<GlobalId, Error> {
-    let (g, e) = module
-        .exports
-        .iter()
-        .find(|e| e.name == "__shadow_stack_pointer")
-        .map(|e| {
-            let g = match e.item {
-                walrus::ExportItem::Global(g) => g,
-                _ => unreachable!(),
-            };
-            (g, e.id())
-        })
-        .ok_or_else(|| format_err!("module does not have a shadow stack pointer"))?;
-
-    module.exports.delete(e);
-    Ok(g)
-}
-
 pub fn add_section(
    module: &mut Module,
    aux: &WasmBindgenAux,
--- a/crates/threads-xform/Cargo.toml
+++ b/crates/threads-xform/Cargo.toml
@ -14,3 +14,4 @@ edition = "2018"
 [dependencies]
 failure = "0.1"
 walrus = "0.12.0"
+wasm-bindgen-wasm-conventions = { path = "../wasm-conventions", version = "=0.2.50" }
--- a/crates/threads-xform/src/lib.rs
+++ b/crates/threads-xform/src/lib.rs
@ -7,6 +7,7 @@ use failure::{bail, format_err, Error};
 use walrus::ir::Value;
 use walrus::{DataId, FunctionId, InitExpr, ValType};
 use walrus::{ExportItem, GlobalId, GlobalKind, ImportKind, MemoryId, Module};
+use wasm_bindgen_wasm_conventions as wasm_conventions;

 const PAGE_SIZE: u32 = 1 << 16;

@ -16,6 +17,7 @@ const PAGE_SIZE: u32 = 1 << 16;
 pub struct Config {
    maximum_memory: u32,
    thread_stack_size: u32,
+    enabled: bool,
 }

 impl Config {
@ -24,9 +26,15 @@ impl Config {
        Config {
            maximum_memory: 1 << 30,    // 1GB
            thread_stack_size: 1 << 20, // 1MB
+            enabled: env::var("WASM_BINDGEN_THREADS").is_ok(),
        }
    }

+    /// Is threaded Wasm enabled?
+    pub fn is_enabled(&self) -> bool {
+        self.enabled
+    }
+
    /// Specify the maximum amount of memory the wasm module can ever have.
    ///
    /// We'll be specifying that the memory for this wasm module is shared, and
@ -79,18 +87,22 @@ impl Config {
    ///
    /// More and/or less may happen here over time, stay tuned!
    pub fn run(&self, module: &mut Module) -> Result<(), Error> {
+        if !self.enabled {
+            return Ok(());
+        }
+
        // Compatibility with older LLVM outputs. Newer LLVM outputs, when
        // atomics are enabled, emit a shared memory. That's a good indicator
        // that we have work to do. If shared memory isn't enabled, though then
        // this isn't an atomic module so there's nothing to do. We still allow,
        // though, an environment variable to force us to go down this path to
        // remain compatibile with older LLVM outputs.
-        let memory = find_memory(module)?;
-        if !module.memories.get(memory).shared && env::var("WASM_BINDGEN_THREADS").is_err() {
+        let memory = wasm_conventions::get_memory(module)?;
+        if !module.memories.get(memory).shared {
            return Ok(());
        }

-        let stack_pointer = find_stack_pointer(module)?;
+        let stack_pointer = wasm_conventions::get_shadow_stack_pointer(module)?;
        let addr = allocate_static_data(module, memory, 4, 4)?;
        let zero = InitExpr::Value(Value::I32(0));
        let globals = Globals {
@ -207,17 +219,6 @@ fn switch_data_segments_to_passive(
    Ok(ret)
 }

-fn find_memory(module: &mut Module) -> Result<MemoryId, Error> {
-    let mut memories = module.memories.iter();
-    let memory = memories
-        .next()
-        .ok_or_else(|| format_err!("currently incompatible with no memory modules"))?;
-    if memories.next().is_some() {
-        bail!("only one memory is currently supported");
-    }
-    Ok(memory.id())
-}
-
 fn update_memory(module: &mut Module, memory: MemoryId, max: u32) -> Result<MemoryId, Error> {
    assert!(max % PAGE_SIZE == 0);
    let memory = module.memories.get_mut(memory);
@ -313,37 +314,6 @@ fn allocate_static_data(
    Ok(address)
 }

-fn find_stack_pointer(module: &mut Module) -> Result<Option<GlobalId>, Error> {
-    let candidates = module
-        .globals
-        .iter()
-        .filter(|g| g.ty == ValType::I32)
-        .filter(|g| g.mutable)
-        .filter(|g| match g.kind {
-            GlobalKind::Local(_) => true,
-            GlobalKind::Import(_) => false,
-        })
-        .collect::<Vec<_>>();
-
-    if candidates.len() == 0 {
-        return Ok(None);
-    }
-    if candidates.len() > 2 {
-        bail!("too many mutable globals to infer the stack pointer");
-    }
-    if candidates.len() == 1 {
-        return Ok(Some(candidates[0].id()));
-    }
-
-    // If we've got two mutable globals then we're in a pretty standard
-    // situation for threaded code where one is the stack pointer and one is the
-    // TLS base offset. We need to figure out which is which, and we basically
-    // assume LLVM's current codegen where the first is the stack pointer.
-    //
-    // TODO: have an actual check here.
-    Ok(Some(candidates[0].id()))
-}
-
 enum InitMemory {
    Segments(Vec<PassiveSegment>),
    Call {
@ -358,7 +328,7 @@ fn inject_start(
    memory_init: InitMemory,
    globals: &Globals,
    addr: u32,
-    stack_pointer: Option<GlobalId>,
+    stack_pointer: GlobalId,
    stack_size: u32,
    memory: MemoryId,
 ) -> Result<(), Error> {
@ -393,30 +363,28 @@ fn inject_start(
        // we give ourselves a stack via memory.grow and we update our stack
        // pointer as the default stack pointer is surely wrong for us.
        |body| {
-            if let Some(stack_pointer) = stack_pointer {
-                // local0 = grow_memory(stack_size);
-                body.i32_const((stack_size / PAGE_SIZE) as i32)
-                    .memory_grow(memory)
-                    .local_set(local);
+            // local0 = grow_memory(stack_size);
+            body.i32_const((stack_size / PAGE_SIZE) as i32)
+                .memory_grow(memory)
+                .local_set(local);

-                // if local0 == -1 then trap
-                body.block(None, |body| {
-                    let target = body.id();
-                    body.local_get(local)
-                        .i32_const(-1)
-                        .binop(BinaryOp::I32Ne)
-                        .br_if(target)
-                        .unreachable();
-                });
-
-                // stack_pointer = local0 + stack_size
+            // if local0 == -1 then trap
+            body.block(None, |body| {
+                let target = body.id();
                body.local_get(local)
-                    .i32_const(PAGE_SIZE as i32)
-                    .binop(BinaryOp::I32Mul)
-                    .i32_const(stack_size as i32)
-                    .binop(BinaryOp::I32Add)
-                    .global_set(stack_pointer);
-            }
+                    .i32_const(-1)
+                    .binop(BinaryOp::I32Ne)
+                    .br_if(target)
+                    .unreachable();
+            });
+
+            // stack_pointer = local0 + stack_size
+            body.local_get(local)
+                .i32_const(PAGE_SIZE as i32)
+                .binop(BinaryOp::I32Mul)
+                .i32_const(stack_size as i32)
+                .binop(BinaryOp::I32Add)
+                .global_set(stack_pointer);
        },
        // If the thread ID is zero then we can skip the update of the stack
        // pointer as we know our stack pointer is valid. We need to initialize
--- a/crates/wasm-conventions/Cargo.toml
+++ b/crates/wasm-conventions/Cargo.toml
@ -0,0 +1,16 @@
+[package]
+name = "wasm-bindgen-wasm-conventions"
+version = "0.2.50"
+authors = ["The wasm-bindgen developers"]
+license = "MIT/Apache-2.0"
+repository = "https://github.com/rustwasm/wasm-bindgen/tree/master/crates/wasm-conventions"
+homepage = "https://rustwasm.github.io/wasm-bindgen/"
+documentation = "https://docs.rs/wasm-bindgen-wasm-conventions"
+description = "Utilities for working with Wasm codegen conventions (usually established by LLVM/lld)"
+edition = "2018"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+walrus = "0.12.0"
+failure = "0.1.5"
--- a/crates/wasm-conventions/src/lib.rs
+++ b/crates/wasm-conventions/src/lib.rs
@ -0,0 +1,95 @@
+//! A tiny crate of utilities for working with implicit Wasm codegen conventions
+//! (often established by LLVM and lld).
+//!
+//! Examples conventions include:
+//!
+//! * The shadow stack pointer
+//! * The canonical linear memory that contains the shadow stack
+
+#![deny(missing_docs, missing_debug_implementations)]
+
+use failure::{bail, format_err, Error};
+use walrus::{GlobalId, GlobalKind, MemoryId, Module, ValType};
+
+/// Get a Wasm module's canonical linear memory.
+pub fn get_memory(module: &Module) -> Result<MemoryId, Error> {
+    let mut memories = module.memories.iter().map(|m| m.id());
+    let memory = memories.next();
+    if memories.next().is_some() {
+        bail!(
+            "expected a single memory, found multiple; multiple memories \
+             currently not supported"
+        );
+    }
+    memory.ok_or_else(|| {
+        format_err!(
+            "module does not have a memory; must have a memory \
+             to transform return pointers into Wasm multi-value"
+        )
+    })
+}
+
+/// Discover the shadow stack pointer and add it to the module's exports as
+/// `__shadow_stack_pointer`.
+///
+/// Adding it to the exports is useful for making sure it doesn't get GC'd.
+pub fn export_shadow_stack_pointer(module: &mut Module) -> Result<(), Error> {
+    let candidates = module
+        .globals
+        .iter()
+        .filter(|g| g.ty == ValType::I32)
+        .filter(|g| g.mutable)
+        .filter(|g| match g.kind {
+            GlobalKind::Local(_) => true,
+            GlobalKind::Import(_) => false,
+        })
+        .collect::<Vec<_>>();
+
+    let ssp = match candidates.len() {
+        0 => bail!("could not find the shadow stack pointer for the module"),
+        // If we've got two mutable globals then we're in a pretty standard
+        // situation for threaded code where one is the stack pointer and one is the
+        // TLS base offset. We need to figure out which is which, and we basically
+        // assume LLVM's current codegen where the first is the stack pointer.
+        //
+        // TODO: have an actual check here.
+        1 | 2 => candidates[0].id(),
+        _ => bail!("too many mutable globals to infer which is the shadow stack pointer"),
+    };
+
+    module.exports.add("__shadow_stack_pointer", ssp);
+    Ok(())
+}
+
+/// Unexport the shadow stack pointer that was previously added to the module's
+/// exports as `__shadow_stack_pointer`.
+pub fn unexport_shadow_stack_pointer(module: &mut Module) -> Result<(), Error> {
+    let e = module
+        .exports
+        .iter()
+        .find(|e| e.name == "__shadow_stack_pointer")
+        .map(|e| e.id())
+        .ok_or_else(|| {
+            format_err!("did not find the `__shadow_stack_pointer` export in the module")
+        })?;
+    module.exports.delete(e);
+    Ok(())
+}
+
+/// Get the `__shadow_stack_pointer`.
+///
+/// It must have been previously added to the module's exports via
+/// `export_shadow_stack_pointer`.
+pub fn get_shadow_stack_pointer(module: &Module) -> Result<GlobalId, Error> {
+    module
+        .exports
+        .iter()
+        .find(|e| e.name == "__shadow_stack_pointer")
+        .ok_or_else(|| {
+            format_err!("did not find the `__shadow_stack_pointer` export in the module")
+        })
+        .and_then(|e| match e.item {
+            walrus::ExportItem::Global(g) => Ok(g),
+            _ => bail!("`__shadow_stack_pointer` export is wrong kind"),
+        })
+}