Switch from heap/stack to just a heap

This commit switches strategies for storing `JsValue` from a heap/stack
to just one heap. This mirrors the new strategy for `JsValue` storage
in #1002 and should make multiplexing those strategies at
`wasm-bindgen`-time much easier.

Instead of having one array which acts as a stack for borrowed values
and one array for a heap of borrowed values, only one JS array is used
for storage of JS values now. This makes `getObject` far simpler by
simply being an array access, but it means that cloning an object now
reserves a new slot instead of reference counting it. If the old
reference counting behavior is needed it's thought that `Rc<JsValue>`
can be used in Rust.

The new "heap" has an initial stack pointer which grows downwards, and a
heap which grows upwards. The heap is a singly-linked-list which is
allocated/deallocated from. The stack grows downwards to zero and
presumably starts generating errors once it underflows. An initial stack
size of 32 is chosen as that should encompass all use cases today, but
we can eventually probably add configuration for this!

Note that the heap is initialized to all `null` for the stack and then
the initial JS values (`undefined`, `null`, `true`, `false`) are pushed
onto the heap in reserved locations.
This commit is contained in:
Alex Crichton 2018-11-29 18:15:36 -08:00
parent e746ad5a0a
commit 49d835a7bc
4 changed files with 137 additions and 250 deletions

View File

@ -392,7 +392,11 @@ impl<'a, 'b> Js2Rust<'a, 'b> {
if arg.is_ref_anyref() {
self.js_arguments.push((name.clone(), "any".to_string()));
self.cx.expose_borrowed_objects();
self.finally("stack.pop();");
self.cx.expose_global_stack_pointer();
// the "stack-ful" nature means that we're always popping from the
// stack, and make sure that we actually clear our reference to
// allow stale values to get GC'd
self.finally("heap[stack_pointer++] = undefined;");
self.rust_arguments
.push(format!("addBorrowedObject({})", name));
return Ok(self);

View File

@ -114,7 +114,9 @@ enum Import<'a> {
},
}
const INITIAL_SLAB_VALUES: &[&str] = &["undefined", "null", "true", "false"];
const INITIAL_HEAP_VALUES: &[&str] = &["undefined", "null", "true", "false"];
// Must be kept in sync with `src/lib.rs` of the `wasm-bindgen` crate
const INITIAL_HEAP_OFFSET: usize = 32;
impl<'a> Context<'a> {
fn export(&mut self, name: &str, contents: &str, comments: Option<String>) {
@ -168,44 +170,20 @@ impl<'a> Context<'a> {
self.write_classes()?;
self.bind("__wbindgen_object_clone_ref", &|me| {
me.expose_add_heap_object();
me.expose_get_object();
let bump_cnt = if me.config.debug {
String::from(
"
if (typeof(val) === 'number') throw new Error('corrupt slab');
val.cnt += 1;
",
)
} else {
String::from("val.cnt += 1;")
};
Ok(format!(
me.expose_add_heap_object();
Ok(String::from(
"
function(idx) {
return addHeapObject(getObject(idx));
}
"
function(idx) {{
// If this object is on the stack promote it to the heap.
if ((idx & 1) === 1) return addHeapObject(getObject(idx));
// Otherwise if the object is on the heap just bump the
// refcount and move on
const val = slab[idx >> 1];
{}
return idx;
}}
",
bump_cnt
))
})?;
self.bind("__wbindgen_object_drop_ref", &|me| {
me.expose_drop_ref();
Ok(String::from(
"
function(i) {
dropRef(i);
}
",
))
Ok(String::from("function(i) { dropObject(i); }"))
})?;
self.bind("__wbindgen_string_new", &|me| {
@ -222,13 +200,7 @@ impl<'a> Context<'a> {
self.bind("__wbindgen_number_new", &|me| {
me.expose_add_heap_object();
Ok(String::from(
"
function(i) {
return addHeapObject(i);
}
",
))
Ok(String::from("function(i) { return addHeapObject(i); }"))
})?;
self.bind("__wbindgen_number_get", &|me| {
@ -370,7 +342,7 @@ impl<'a> Context<'a> {
"
function(i) {
const obj = getObject(i).original;
dropRef(i);
dropObject(i);
if (obj.cnt-- == 1) {
obj.a = 0;
return 1;
@ -383,7 +355,7 @@ impl<'a> Context<'a> {
self.bind("__wbindgen_cb_forget", &|me| {
me.expose_drop_ref();
Ok("dropRef".to_string())
Ok("dropObject".to_string())
})?;
self.bind("__wbindgen_json_parse", &|me| {
@ -427,14 +399,7 @@ impl<'a> Context<'a> {
self.bind("__wbindgen_memory", &|me| {
me.expose_add_heap_object();
let mem = me.memory();
Ok(format!(
"
function() {{
return addHeapObject({});
}}
",
mem
))
Ok(format!("function() {{ return addHeapObject({}); }}", mem))
})?;
self.bind("__wbindgen_module", &|me| {
@ -916,149 +881,54 @@ impl<'a> Context<'a> {
if !self.exposed_globals.insert("drop_ref") {
return;
}
self.expose_global_slab();
self.expose_global_slab_next();
let validate_owned = if self.config.debug {
String::from(
"
if ((idx & 1) === 1) throw new Error('cannot drop ref of stack objects');
",
)
} else {
String::new()
};
let dec_ref = if self.config.debug {
String::from(
"
if (typeof(obj) === 'number') throw new Error('corrupt slab');
obj.cnt -= 1;
if (obj.cnt > 0) return;
",
)
} else {
String::from(
"
obj.cnt -= 1;
if (obj.cnt > 0) return;
",
)
};
self.expose_global_heap();
self.expose_global_heap_next();
// Note that here we check if `idx` shouldn't actually be dropped. This
// is due to the fact that `JsValue::null()` and friends can be passed
// by value to JS where we'll automatically call this method. Those
// constants, however, cannot be dropped. See #1054 for removing this
// branch.
//
// Otherwise the free operation here is pretty simple, just appending to
// the linked list of heap slots that are free.
self.global(&format!(
"
function dropRef(idx) {{
{}
idx = idx >> 1;
function dropObject(idx) {{
if (idx < {}) return;
let obj = slab[idx];
{}
// If we hit 0 then free up our space in the slab
slab[idx] = slab_next;
slab_next = idx;
heap[idx] = heap_next;
heap_next = idx;
}}
",
validate_owned,
INITIAL_SLAB_VALUES.len(),
dec_ref
INITIAL_HEAP_OFFSET + INITIAL_HEAP_VALUES.len(),
));
}
fn expose_global_stack(&mut self) {
if !self.exposed_globals.insert("stack") {
fn expose_global_heap(&mut self) {
if !self.exposed_globals.insert("heap") {
return;
}
self.global(&format!(
"
const stack = [];
"
));
if self.config.debug {
self.export(
"assertStackEmpty",
"
function() {
if (stack.length === 0) return;
throw new Error('stack is not currently empty');
}
",
None,
);
}
self.global(&format!("const heap = new Array({});", INITIAL_HEAP_OFFSET));
self.global(&format!("heap.push({});", INITIAL_HEAP_VALUES.join(", ")));
}
fn expose_global_slab(&mut self) {
if !self.exposed_globals.insert("slab") {
fn expose_global_heap_next(&mut self) {
if !self.exposed_globals.insert("heap_next") {
return;
}
let initial_values = INITIAL_SLAB_VALUES
.iter()
.map(|s| format!("{{ obj: {} }}", s))
.collect::<Vec<_>>();
self.global(&format!("const slab = [{}];", initial_values.join(", ")));
if self.config.debug {
self.export(
"assertSlabEmpty",
&format!(
"
function() {{
for (let i = {}; i < slab.length; i++) {{
if (typeof(slab[i]) === 'number') continue;
throw new Error('slab is not currently empty');
}}
}}
",
initial_values.len()
),
None,
);
}
}
fn expose_global_slab_next(&mut self) {
if !self.exposed_globals.insert("slab_next") {
return;
}
self.expose_global_slab();
self.global(
"
let slab_next = slab.length;
",
);
self.expose_global_heap();
self.global("let heap_next = heap.length;");
}
fn expose_get_object(&mut self) {
if !self.exposed_globals.insert("get_object") {
return;
}
self.expose_global_stack();
self.expose_global_slab();
self.expose_global_heap();
let get_obj = if self.config.debug {
String::from(
"
if (typeof(val) === 'number') throw new Error('corrupt slab');
return val.obj;
",
)
} else {
String::from(
"
return val.obj;
",
)
};
self.global(&format!(
"
function getObject(idx) {{
if ((idx & 1) === 1) {{
return stack[idx >> 1];
}} else {{
const val = slab[idx >> 1];
{}
}}
}}
",
get_obj
));
// Accessing a heap object is just a simple index operation due to how
// the stack/heap are laid out.
self.global("function getObject(idx) { return heap[idx]; }");
}
fn expose_assert_num(&mut self) {
@ -1510,18 +1380,32 @@ impl<'a> Context<'a> {
);
}
fn expose_global_stack_pointer(&mut self) {
if !self.exposed_globals.insert("stack_pointer") {
return;
}
self.global(&format!("let stack_pointer = {};", INITIAL_HEAP_OFFSET));
}
fn expose_borrowed_objects(&mut self) {
if !self.exposed_globals.insert("borrowed_objects") {
return;
}
self.expose_global_stack();
self.expose_global_heap();
self.expose_global_stack_pointer();
// Our `stack_pointer` points to where we should start writing stack
// objects, and the `stack_pointer` is incremented in a `finally` block
// after executing this. Once we've reserved stack space we write the
// value. Eventually underflow will throw an exception, but JS sort of
// just handles it today...
self.global(
"
function addBorrowedObject(obj) {
stack.push(obj);
return ((stack.length - 1) << 1) | 1;
if (stack_pointer == 1) throw new Error('out of js stack');
heap[--stack_pointer] = obj;
return stack_pointer;
}
",
"
);
}
@ -1535,7 +1419,7 @@ impl<'a> Context<'a> {
"
function takeObject(idx) {
const ret = getObject(idx);
dropRef(idx);
dropObject(idx);
return ret;
}
",
@ -1546,34 +1430,34 @@ impl<'a> Context<'a> {
if !self.exposed_globals.insert("add_heap_object") {
return;
}
self.expose_global_slab();
self.expose_global_slab_next();
let set_slab_next = if self.config.debug {
self.expose_global_heap();
self.expose_global_heap_next();
let set_heap_next = if self.config.debug {
String::from(
"
if (typeof(next) !== 'number') throw new Error('corrupt slab');
slab_next = next;
if (typeof(heap_next) !== 'number') throw new Error('corrupt heap');
",
)
} else {
String::from(
"
slab_next = next;
",
)
String::new()
};
// Allocating a slot on the heap first goes through the linked list
// (starting at `heap_next`). Once that linked list is exhausted we'll
// be pointing beyond the end of the array, at which point we'll reserve
// one more slot and use that.
self.global(&format!(
"
function addHeapObject(obj) {{
if (slab_next === slab.length) slab.push(slab.length + 1);
const idx = slab_next;
const next = slab[idx];
if (heap_next === heap.length) heap.push(heap.length + 1);
const idx = heap_next;
heap_next = heap[idx];
{}
slab[idx] = {{ obj, cnt: 1 }};
return idx << 1;
heap[idx] = obj;
return idx;
}}
",
set_slab_next
set_heap_next
));
}

View File

@ -5,18 +5,21 @@ around JS objects in wasm, but that's not allowed today! While indeed true,
that's where the polyfill comes in.
The question here is how we shoehorn JS objects into a `u32` for wasm to use.
The current strategy for this approach is to maintain two module-local variables
in the generated `foo.js` file: a stack and a heap.
The current strategy for this approach is to maintain a module-local variable
in the generated `foo.js` file: a `heap`.
### Temporary JS objects on the stack
### Temporary JS objects on the "stack"
The stack in `foo.js` is, well, a stack. JS objects are pushed on the top of the
stack, and their index in the stack is the identifier that's passed to wasm. JS
objects are then only removed from the top of the stack as well. This data
structure is mainly useful for efficiently passing a JS object into wasm without
a sort of "heap allocation". The downside of this, however, is that it only
works for when wasm doesn't hold onto a JS object (aka it only gets a
"reference" in Rust parlance).
The first slots in the `heap` in `foo.js` are considered a stack. This stack,
like typical program execution stacks, grows down. JS objects are pushed on the
bottom of the stack, and their index in the stack is the identifier that's passed
to wasm. A stack pointer is maintained to figure out where the next item is
pushed.
JS objects are then only removed from the bottom of the stack as well. Removal
is simply storing null then incrementing a counter. Because of the "stack-y"
nature of this sceheme it only works for when wasm doesn't hold onto a JS object
(aka it only gets a "reference" in Rust parlance).
Let's take a look at an example.
@ -47,11 +50,14 @@ and what we actually generate looks something like:
// foo.js
import * as wasm from './foo_bg';
const stack = [];
const heap = new Array(32);
heap.push(undefined, null, true, false);
let stack_pointer = 32;
function addBorrowedObject(obj) {
stack.push(obj);
return stack.length - 1;
stack_pointer -= 1;
heap[stack_pointer] = obj;
return stack_pointer;
}
export function foo(arg0) {
@ -59,7 +65,7 @@ export function foo(arg0) {
try {
wasm.foo(idx0);
} finally {
stack.pop();
heap[stack_pointer++] = undefined;
}
}
```
@ -68,13 +74,13 @@ Here we can see a few notable points of action:
* The wasm file was renamed to `foo_bg.wasm`, and we can see how the JS module
generated here is importing from the wasm file.
* Next we can see our `stack` module variable which is used to push/pop items
from the stack.
* Next we can see our `heap` module variable which is to store all JS values
reference-able from wasm.
* Our exported function `foo`, takes an arbitrary argument, `arg0`, which is
converted to an index with the `addBorrowedObject` object function. The index
is then passed to wasm so wasm can operate with it.
* Finally, we have a `finally` which frees the stack slot as it's no longer
used, issuing a `pop` for what was pushed at the start of the function.
used, popping the value that was pushed at the start of the function.
It's also helpful to dig into the Rust side of things to see what's going on
there! Let's take a look at the code that `#[wasm_bindgen]` generates in Rust:
@ -104,12 +110,13 @@ And as with the JS, the notable points here are:
in a `JsValue`. There's some trickery here that's not worth going into just
yet, but we'll see in a bit what's happening under the hood.
### Long-lived JS objects in a slab
### Long-lived JS objects
The above strategy is useful when JS objects are only temporarily used in Rust,
for example only during one function call. Sometimes, though, objects may have a
dynamic lifetime or otherwise need to be stored on Rust's heap. To cope with
this there's a second half of management of JS objects, a slab.
this there's a second half of management of JS objects, naturally corresponding
to the other side of the JS `heap` array.
JS Objects passed to wasm that are not references are assumed to have a dynamic
lifetime inside of the wasm module. As a result the strict push/pop of the stack
@ -135,16 +142,16 @@ different. Let's see the generated JS's slab in action:
```js
import * as wasm from './foo_bg'; // imports from wasm file
const slab = [];
let slab_next = 0;
const heap = new Array(32);
heap.push(undefined, null, true, false);
let heap_next = 36;
function addHeapObject(obj) {
if (slab_next === slab.length)
slab.push(slab.length + 1);
const idx = slab_next;
const next = slab[idx];
slab_next = next;
slab[idx] = { obj, cnt: 1 };
if (heap_next === heap.length)
heap.push(heap.length + 1);
const idx = heap_next;
heap_next = heap[idx];
heap[idx] = obj;
return idx;
}
@ -154,24 +161,17 @@ export function foo(arg0) {
}
export function __wbindgen_object_drop_ref(idx) {
let obj = slab[idx];
obj.cnt -= 1;
if (obj.cnt > 0)
return;
// If we hit 0 then free up our space in the slab
slab[idx] = slab_next;
slab_next = idx;
heap[idx ] = heap_next;
heap_next = idx;
}
```
Unlike before we're now calling `addHeapObject` on the argument to `foo` rather
than `addBorrowedObject`. This function will use `slab` and `slab_next` as a
than `addBorrowedObject`. This function will use `heap` and `heap_next` as a
slab allocator to acquire a slot to store the object, placing a structure there
once it's found.
Note here that a reference count is used in addition to storing the object.
That's so we can create multiple references to the JS object in Rust without
using `Rc`, but it's overall not too important to worry about here.
once it's found. Note that this is going on the right-half of the array, unlike
the stack which resides on the left half. This discipline mirrors the stack/heap
in normal programs, roughly.
Another curious aspect of this generated module is the
`__wbindgen_object_drop_ref` function. This is one that's actually imported from
@ -229,10 +229,9 @@ If you'll recall as well, when we took `&JsValue` above we generated a wrapper
of `ManuallyDrop` around the local binding, and that's because we wanted to
avoid invoking this destructor when the object comes from the stack.
### Indexing both a slab and the stack
### Working with `heap` in reality
You might be thinking at this point that this system may not work! There's
indexes into both the slab and the stack mixed up, but how do we differentiate?
It turns out that the examples above have been simplified a bit, but otherwise
the lowest bit is currently used as an indicator of whether you're a slab or a
stack index.
The above explanations are pretty close to what happens today, but in reality
there's a few differences especially around handling constant values like
`undefined`, `null`, etc. Be sure to check out the actual generated JS and the
generation code for the full details!

View File

@ -67,11 +67,12 @@ pub struct JsValue {
_marker: marker::PhantomData<*mut u8>, // not at all threadsafe
}
const JSIDX_UNDEFINED: u32 = 0;
const JSIDX_NULL: u32 = 2;
const JSIDX_TRUE: u32 = 4;
const JSIDX_FALSE: u32 = 6;
const JSIDX_RESERVED: u32 = 8;
const JSIDX_OFFSET: u32 = 32; // keep in sync with js/mod.rs
const JSIDX_UNDEFINED: u32 = JSIDX_OFFSET + 0;
const JSIDX_NULL: u32 = JSIDX_OFFSET + 1;
const JSIDX_TRUE: u32 = JSIDX_OFFSET + 2;
const JSIDX_FALSE: u32 = JSIDX_OFFSET + 3;
const JSIDX_RESERVED: u32 = JSIDX_OFFSET + 4;
impl JsValue {
/// The `null` JS value constant.
@ -533,13 +534,12 @@ impl Drop for JsValue {
#[inline]
fn drop(&mut self) {
unsafe {
// The first bit indicates whether this is a stack value or not.
// Stack values should never be dropped (they're always in
// `ManuallyDrop`)
debug_assert!(self.idx & 1 == 0);
// We definitely should never drop anything in the stack area
debug_assert!(self.idx >= JSIDX_OFFSET);
// We don't want to drop the first few elements as they're all
// reserved, but everything else is safe to drop.
// Otherwise if we're not dropping one of our reserved values,
// actually call the intrinsic. See #1054 for eventually removing
// this branch.
if self.idx >= JSIDX_RESERVED {
__wbindgen_object_drop_ref(self.idx);
}