Split up Wasm module and function

This commit is contained in:
Brian Carroll 2021-08-30 23:07:12 +01:00
parent e5d5bb27fa
commit e67efaba4d
3 changed files with 349 additions and 242 deletions

View File

@ -0,0 +1,240 @@
use parity_wasm::elements::{Instruction, Instruction::*, Local, ValueType};
use roc_collections::all::MutMap;
use roc_module::low_level::LowLevel;
use roc_module::symbol::Symbol;
use roc_mono::ir::{CallType, Expr, Literal, Proc, Stmt};
use roc_mono::layout::{Builtin, Layout};
use crate::module::ModuleState;
#[derive(Clone, Copy)]
struct LocalId(u32);
#[derive(Clone, Copy)]
struct LabelId(u32);
struct WasmLayout {
value_type: ValueType,
stack_memory: u32,
}
struct SymbolStorage(LocalId, WasmLayout);
impl WasmLayout {
fn new(layout: &Layout) -> Result<Self, String> {
match layout {
Layout::Builtin(Builtin::Int64) => Ok(Self {
value_type: ValueType::I64,
stack_memory: 0,
}),
x => Err(format!("layout, {:?}, not implemented yet", x)),
}
}
}
pub struct FunctionGenerator<'a> {
pub instructions: std::vec::Vec<Instruction>,
pub ret_type: ValueType,
pub arg_types: std::vec::Vec<ValueType>,
pub locals: std::vec::Vec<Local>,
module_state: &'a mut ModuleState,
// joinpoint_label_map: MutMap<JoinPointId, LabelId>,
symbol_storage_map: MutMap<Symbol, SymbolStorage>,
stack_memory: u32,
}
impl<'a> FunctionGenerator<'a> {
pub fn new(module_state: &'a mut ModuleState) -> Self {
FunctionGenerator {
instructions: std::vec::Vec::new(),
ret_type: ValueType::I32,
arg_types: std::vec::Vec::new(),
locals: std::vec::Vec::new(),
module_state: module_state,
// joinpoint_label_map: MutMap::default(),
symbol_storage_map: MutMap::default(),
stack_memory: 0,
}
}
pub fn build(&mut self, proc: Proc<'a>) -> Result<(), String> {
let ret_layout = WasmLayout::new(&proc.ret_layout)?;
if ret_layout.stack_memory > 0 {
// TODO: if returning a struct by value, add an extra argument for a pointer to callee's stack memory
return Err(format!(
"Not yet implemented: Return in stack memory for non-primtitive layouts like {:?}",
proc.ret_layout
));
}
self.ret_type = ret_layout.value_type;
self.arg_types = std::vec::Vec::with_capacity(proc.args.len());
for (layout, symbol) in proc.args {
let wasm_layout = WasmLayout::new(layout)?;
self.arg_types.push(wasm_layout.value_type);
self.insert_local(wasm_layout, *symbol);
}
self.build_stmt(&proc.body, &proc.ret_layout)?;
Ok(())
}
fn insert_local(&mut self, layout: WasmLayout, symbol: Symbol) -> LocalId {
let local_id = LocalId(self.locals.len() as u32);
self.locals.push(Local::new(1, layout.value_type));
self.stack_memory += layout.stack_memory;
let storage = SymbolStorage(local_id, layout);
self.symbol_storage_map.insert(symbol, storage);
local_id
}
fn get_symbol_storage(&self, sym: &Symbol) -> Result<&SymbolStorage, String> {
self.symbol_storage_map
.get(sym)
.ok_or(format!("Symbol not found in function scope {:?}", sym))
}
fn load_from_symbol(&mut self, sym: &Symbol) -> Result<(), String> {
let SymbolStorage(LocalId(local_id), _) = self.get_symbol_storage(sym)?;
let id: u32 = *local_id;
self.instructions.push(GetLocal(id));
Ok(())
}
// Store whatever value is on top of the VM's stack
fn store_to_symbol(&mut self, sym: &Symbol) -> Result<(), String> {
let SymbolStorage(LocalId(local_id), _) = self.get_symbol_storage(sym)?;
let id: u32 = *local_id;
self.instructions.push(SetLocal(id));
Ok(())
}
fn build_stmt(&mut self, stmt: &Stmt<'a>, ret_layout: &Layout<'a>) -> Result<(), String> {
match stmt {
Stmt::Let(sym, expr, layout, following) => {
self.build_expr(sym, expr, layout)?;
let wasm_layout = WasmLayout::new(layout)?;
let local_id = self.insert_local(wasm_layout, *sym);
self.instructions.push(SetLocal(local_id.0));
self.build_stmt(following, ret_layout)?;
Ok(())
}
Stmt::Ret(sym) => {
if let Some(SymbolStorage(local_id, _)) = self.symbol_storage_map.get(sym) {
self.instructions.push(GetLocal(local_id.0));
self.instructions.push(Return);
Ok(())
} else {
Err(format!(
"Not yet implemented: returning values with layout {:?}",
ret_layout
))
}
}
x => Err(format!("statement not yet implemented: {:?}", x)),
}
}
fn build_expr(
&mut self,
sym: &Symbol,
expr: &Expr<'a>,
layout: &Layout<'a>,
) -> Result<(), String> {
match expr {
Expr::Literal(lit) => self.load_literal(lit),
Expr::Call(roc_mono::ir::Call {
call_type,
arguments,
}) => match call_type {
CallType::ByName { name: func_sym, .. } => {
for arg in *arguments {
self.load_from_symbol(arg)?;
}
let function_location =
self.module_state
.proc_symbol_map
.get(func_sym)
.ok_or(format!(
"Cannot find function {:?} called from {:?}",
func_sym, sym
))?;
self.instructions.push(Call(function_location.body));
self.store_to_symbol(sym)?;
Ok(())
}
CallType::LowLevel { op: lowlevel, .. } => {
self.build_call_low_level(sym, lowlevel, arguments, layout)
}
x => Err(format!("the call type, {:?}, is not yet implemented", x)),
},
x => Err(format!("Expression is not yet implemented {:?}", x)),
}
}
fn load_literal(&mut self, lit: &Literal<'a>) -> Result<(), String> {
match lit {
Literal::Int(x) => {
self.instructions.push(I64Const(*x as i64));
Ok(())
}
Literal::Float(x) => {
let val: f64 = *x;
self.instructions.push(F64Const(val.to_bits()));
Ok(())
}
x => Err(format!("loading literal, {:?}, is not yet implemented", x)),
}
}
fn build_call_low_level(
&mut self,
sym: &Symbol,
lowlevel: &LowLevel,
args: &'a [Symbol],
layout: &Layout<'a>,
) -> Result<(), String> {
for arg in args {
self.load_from_symbol(arg)?;
}
let wasm_layout = WasmLayout::new(layout)?;
self.build_instructions_lowlevel(lowlevel, wasm_layout.value_type)?;
self.store_to_symbol(sym)?;
Ok(())
}
fn build_instructions_lowlevel(
&mut self,
lowlevel: &LowLevel,
value_type: ValueType,
) -> Result<(), String> {
// TODO: Find a way to organise all the lowlevel ops and layouts! There's lots!
//
// Some Roc low-level ops care about wrapping, clipping, sign-extending...
// For those, we'll need to pre-process each argument before the main op,
// so simple arrays of instructions won't work. But there are common patterns.
let instructions: &[Instruction] = match lowlevel {
// Matching on Wasm type might not be enough, maybe need Roc layout for sign-extension
LowLevel::NumAdd => match value_type {
ValueType::I32 => &[I32Add],
ValueType::I64 => &[I64Add],
ValueType::F32 => &[F32Add],
ValueType::F64 => &[F64Add],
},
_ => {
return Err(format!("unsupported low-level op {:?}", lowlevel));
}
};
self.instructions.extend_from_slice(instructions);
Ok(())
}
}

View File

@ -1,242 +1,2 @@
use bumpalo::Bump;
use parity_wasm::builder::{CodeLocation, ModuleBuilder};
use parity_wasm::elements::{
Instruction, Instruction::*, Instructions, Internal, Local, ValueType,
};
use parity_wasm::{builder, elements};
// use roc_builtins::bitcode;
use roc_collections::all::{MutMap, MutSet};
// use roc_module::ident::{ModuleName, TagName};
use roc_module::low_level::LowLevel;
use roc_module::symbol::{Interns, Symbol};
use roc_mono::ir::{CallType, Expr, JoinPointId, Literal, Proc, ProcLayout, Stmt};
use roc_mono::layout::{Builtin, Layout, LayoutIds};
pub struct Env<'a> {
pub arena: &'a Bump, // not really using this much, parity_wasm works with std::vec a lot
pub interns: Interns,
pub exposed_to_host: MutSet<Symbol>,
}
#[derive(Clone, Copy)]
struct LocalId(u32);
#[derive(Clone, Copy)]
struct LabelId(u32);
struct WasmLayout {
value_type: ValueType,
stack_memory: u32,
}
impl WasmLayout {
fn new(layout: &Layout) -> Result<Self, String> {
match layout {
Layout::Builtin(Builtin::Int64) => Ok(Self {
value_type: ValueType::I64,
stack_memory: 0,
}),
x => Err(format!("layout, {:?}, not implemented yet", x)),
}
}
}
struct SymbolStorage(LocalId, WasmLayout);
// Don't allocate any constant data at the address zero or anywhere near it.
// These addresses are not special in Wasm, but putting something there seems bug-prone.
const UNUSED_DATA_SECTION_BYTES: u32 = 1024;
// State for generating a single function
struct FunctionGenerator {
instructions: std::vec::Vec<Instruction>,
joinpoint_label_map: MutMap<JoinPointId, LabelId>,
locals: std::vec::Vec<Local>,
symbol_storage_map: MutMap<Symbol, SymbolStorage>,
stack_memory: u32,
}
impl FunctionGenerator {
fn new() -> Self {
FunctionGenerator {
instructions: std::vec::Vec::new(),
joinpoint_label_map: MutMap::default(),
symbol_storage_map: MutMap::default(),
stack_memory: 0,
locals: std::vec::Vec::new(),
}
}
fn reset(&mut self) {
self.joinpoint_label_map.clear();
self.locals.clear();
self.symbol_storage_map.clear();
self.stack_memory = 0;
}
fn insert_local(&mut self, layout: WasmLayout, symbol: Symbol) -> LocalId {
let local_id = LocalId(self.locals.len() as u32);
self.locals.push(Local::new(1, layout.value_type));
self.stack_memory += layout.stack_memory;
let storage = SymbolStorage(local_id, layout);
self.symbol_storage_map.insert(symbol, storage);
local_id
}
}
pub fn build_module<'a>(
env: &'a Env,
procedures: MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>,
) -> Result<elements::Module, String> {
let mut backend = BackendWasm::new(env);
let mut layout_ids = LayoutIds::default();
for ((sym, layout), proc) in procedures {
let location = backend.build_proc(proc)?;
if env.exposed_to_host.contains(&sym) {
let fn_name = layout_ids
.get_toplevel(sym, &layout)
.to_symbol_string(sym, &env.interns);
let export = builder::export()
.field(fn_name.as_str())
.with_internal(Internal::Function(location.body))
.build();
backend.module_builder.push_export(export);
}
}
Ok(backend.module_builder.build())
}
struct BackendWasm<'a> {
env: &'a Env<'a>,
module_builder: ModuleBuilder,
data_offset_map: MutMap<Literal<'a>, u32>,
data_offset_next: u32,
}
impl<'a> BackendWasm<'a> {
fn new(env: &'a Env) -> Self {
BackendWasm {
env,
module_builder: builder::module(),
data_offset_map: MutMap::default(),
data_offset_next: UNUSED_DATA_SECTION_BYTES,
}
}
fn build_proc(&mut self, proc: Proc<'a>) -> Result<CodeLocation, String> {
let mut func_gen = FunctionGenerator::new(); // yeah probably don't need to allocate a new one every time, but tell that to the borrow checker! ;-)
let ret_layout = WasmLayout::new(&proc.ret_layout)?;
let ret_value_type = ret_layout.value_type;
if ret_layout.stack_memory > 0 {
// TODO: insert an extra param for a pointer to space allocated in callee's stack... or does Roc do something else?
return Err(format!(
"Not yet implemented: Return in stack memory for non-primtitive layouts like {:?}",
proc.ret_layout
));
}
let mut arg_types = std::vec::Vec::with_capacity(proc.args.len());
for (layout, symbol) in proc.args {
let wasm_layout = WasmLayout::new(layout)?;
arg_types.push(wasm_layout.value_type);
func_gen.insert_local(wasm_layout, *symbol);
}
let signature = builder::signature()
.with_params(arg_types) // requires std::Vec, not Bumpalo
.with_result(ret_value_type)
.build_sig();
self.build_stmt(&mut func_gen, &proc.body, &proc.ret_layout)?;
let function_def = builder::function()
.with_signature(signature)
.body()
.with_locals(func_gen.locals)
.with_instructions(Instructions::new(func_gen.instructions))
.build() // body
.build(); // function
let location = self.module_builder.push_function(function_def);
Ok(location)
}
fn build_stmt(
&mut self,
func_gen: &mut FunctionGenerator,
stmt: &Stmt<'a>,
ret_layout: &Layout<'a>,
) -> Result<(), String> {
match stmt {
Stmt::Let(sym, expr, layout, following) => {
self.build_expr(func_gen, sym, expr, layout)?;
let wasm_layout = WasmLayout::new(layout)?;
let local_id = func_gen.insert_local(wasm_layout, *sym);
func_gen.instructions.push(SetLocal(local_id.0));
self.build_stmt(func_gen, following, ret_layout)?;
Ok(())
}
Stmt::Ret(sym) => {
if let Some(SymbolStorage(local_id, wasm_layout)) =
func_gen.symbol_storage_map.get(sym)
{
func_gen.instructions.push(GetLocal(local_id.0));
func_gen.instructions.push(Return);
Ok(())
} else {
Err(format!(
"Not yet implemented: returning values with layout {:?}",
ret_layout
))
}
}
x => Err(format!("statement not yet implemented: {:?}", x)),
}
}
fn build_expr(
&mut self,
func_gen: &mut FunctionGenerator,
sym: &Symbol,
expr: &Expr<'a>,
layout: &Layout<'a>,
) -> Result<(), String> {
match expr {
Expr::Literal(lit) => self.load_literal(func_gen, lit),
x => Err(format!("Expression is not yet implemented {:?}", x)),
}
}
fn load_literal(
&mut self,
func_gen: &mut FunctionGenerator,
lit: &Literal<'a>,
) -> Result<(), String> {
match lit {
Literal::Int(x) => {
func_gen.instructions.push(I64Const(*x as i64));
Ok(())
}
Literal::Float(x) => {
// F64Const takes a u64??
// I've raised an issue in the library to check https://github.com/paritytech/parity-wasm/issues/314
let val: u64 = unsafe { std::mem::transmute(*x) };
func_gen.instructions.push(F64Const(val));
Ok(())
}
x => Err(format!("loading literal, {:?}, is not yet implemented", x)),
}
}
}
pub mod module;
mod function;

View File

@ -0,0 +1,107 @@
use bumpalo::Bump;
use parity_wasm::builder::{CodeLocation, ModuleBuilder};
use parity_wasm::elements::{Instructions, Internal};
use parity_wasm::{builder, elements};
use roc_collections::all::{MutMap, MutSet};
use roc_module::symbol::{Interns, Symbol};
use roc_mono::ir::{CallType, Expr, Literal, Proc, ProcLayout, Stmt};
use roc_mono::layout::LayoutIds;
use crate::function::FunctionGenerator;
pub struct Env<'a> {
pub arena: &'a Bump, // not really using this much, parity_wasm works with std::vec a lot
pub interns: Interns,
pub exposed_to_host: MutSet<Symbol>,
}
// Don't allocate any constant data at the address zero or anywhere near it.
// These addresses are not special in Wasm, but putting something there seems bug-prone.
const UNUSED_DATA_SECTION_BYTES: u32 = 1024;
pub fn build_module<'a>(
env: &'a Env,
procedures: MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>,
) -> Result<elements::Module, String> {
let mut module_state = ModuleState::new(env);
let mut layout_ids = LayoutIds::default();
for ((sym, layout), proc) in procedures {
let location = build_proc(&mut module_state, proc)?;
if env.exposed_to_host.contains(&sym) {
let fn_name = layout_ids
.get_toplevel(sym, &layout)
.to_symbol_string(sym, &env.interns);
let export = builder::export()
.field(fn_name.as_str())
.with_internal(Internal::Function(location.body))
.build();
module_state.module_builder.push_export(export);
}
module_state.proc_symbol_map.insert(sym, location);
}
Ok(module_state.module_builder.build())
}
fn build_proc<'a>(module_state: &mut ModuleState, proc: Proc<'a>) -> Result<CodeLocation, String> {
// TODO: see if we can reuse the same memory each time and reset it?
// Can't convince the borrow-checker to let me do that, as things get moved into the function builder.
let mut func_gen = FunctionGenerator::new(module_state);
func_gen.build(proc)?;
let signature = builder::signature()
.with_params(func_gen.arg_types) // requires std::Vec, not Bumpalo
.with_result(func_gen.ret_type)
.build_sig();
let function_def = builder::function()
.with_signature(signature)
.body()
.with_locals(func_gen.locals)
.with_instructions(Instructions::new(func_gen.instructions))
.build() // body
.build(); // function
let location = module_state.module_builder.push_function(function_def);
Ok(location)
}
pub struct ModuleState<'a> {
_env: &'a Env<'a>,
module_builder: ModuleBuilder,
pub proc_symbol_map: MutMap<Symbol, CodeLocation>,
pub _data_offset_map: MutMap<Literal<'a>, u32>,
pub _data_offset_next: u32,
}
impl<'a> ModuleState<'a> {
fn new(_env: &'a Env) -> Self {
ModuleState {
_env,
module_builder: builder::module(),
proc_symbol_map: MutMap::default(),
_data_offset_map: MutMap::default(),
_data_offset_next: UNUSED_DATA_SECTION_BYTES,
}
}
}
// TODO: use something like this for very simple inlining
// Create a HashMap of inlined Procs, generate each call with different Symbol arguments
fn _is_lowlevel_wrapper<'a>(proc: Proc<'a>) -> bool {
match proc.body {
Stmt::Let(_, expr, _, Stmt::Ret(..)) => match expr {
Expr::Call(roc_mono::ir::Call { call_type, .. }) => match call_type {
CallType::LowLevel { .. } => true,
_ => false,
},
_ => false,
},
_ => false,
}
}