From e67efaba4dc8d39639154af8c1f93b0877c8f624 Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Mon, 30 Aug 2021 23:07:12 +0100 Subject: [PATCH] Split up Wasm module and function --- compiler/gen_wasm/src/function.rs | 240 +++++++++++++++++++++++++++++ compiler/gen_wasm/src/lib.rs | 244 +----------------------------- compiler/gen_wasm/src/module.rs | 107 +++++++++++++ 3 files changed, 349 insertions(+), 242 deletions(-) create mode 100644 compiler/gen_wasm/src/function.rs create mode 100644 compiler/gen_wasm/src/module.rs diff --git a/compiler/gen_wasm/src/function.rs b/compiler/gen_wasm/src/function.rs new file mode 100644 index 0000000000..07de4b12d1 --- /dev/null +++ b/compiler/gen_wasm/src/function.rs @@ -0,0 +1,240 @@ +use parity_wasm::elements::{Instruction, Instruction::*, Local, ValueType}; + +use roc_collections::all::MutMap; +use roc_module::low_level::LowLevel; +use roc_module::symbol::Symbol; +use roc_mono::ir::{CallType, Expr, Literal, Proc, Stmt}; +use roc_mono::layout::{Builtin, Layout}; + +use crate::module::ModuleState; + +#[derive(Clone, Copy)] +struct LocalId(u32); + +#[derive(Clone, Copy)] +struct LabelId(u32); + +struct WasmLayout { + value_type: ValueType, + stack_memory: u32, +} + +struct SymbolStorage(LocalId, WasmLayout); + +impl WasmLayout { + fn new(layout: &Layout) -> Result { + match layout { + Layout::Builtin(Builtin::Int64) => Ok(Self { + value_type: ValueType::I64, + stack_memory: 0, + }), + x => Err(format!("layout, {:?}, not implemented yet", x)), + } + } +} + +pub struct FunctionGenerator<'a> { + pub instructions: std::vec::Vec, + pub ret_type: ValueType, + pub arg_types: std::vec::Vec, + pub locals: std::vec::Vec, + module_state: &'a mut ModuleState, + // joinpoint_label_map: MutMap, + symbol_storage_map: MutMap, + stack_memory: u32, +} + +impl<'a> FunctionGenerator<'a> { + pub fn new(module_state: &'a mut ModuleState) -> Self { + FunctionGenerator { + instructions: std::vec::Vec::new(), + ret_type: ValueType::I32, + arg_types: std::vec::Vec::new(), + locals: std::vec::Vec::new(), + module_state: module_state, + // joinpoint_label_map: MutMap::default(), + symbol_storage_map: MutMap::default(), + stack_memory: 0, + } + } + + pub fn build(&mut self, proc: Proc<'a>) -> Result<(), String> { + let ret_layout = WasmLayout::new(&proc.ret_layout)?; + if ret_layout.stack_memory > 0 { + // TODO: if returning a struct by value, add an extra argument for a pointer to callee's stack memory + return Err(format!( + "Not yet implemented: Return in stack memory for non-primtitive layouts like {:?}", + proc.ret_layout + )); + } + + self.ret_type = ret_layout.value_type; + self.arg_types = std::vec::Vec::with_capacity(proc.args.len()); + + for (layout, symbol) in proc.args { + let wasm_layout = WasmLayout::new(layout)?; + self.arg_types.push(wasm_layout.value_type); + self.insert_local(wasm_layout, *symbol); + } + + self.build_stmt(&proc.body, &proc.ret_layout)?; + Ok(()) + } + + fn insert_local(&mut self, layout: WasmLayout, symbol: Symbol) -> LocalId { + let local_id = LocalId(self.locals.len() as u32); + self.locals.push(Local::new(1, layout.value_type)); + self.stack_memory += layout.stack_memory; + + let storage = SymbolStorage(local_id, layout); + self.symbol_storage_map.insert(symbol, storage); + + local_id + } + + fn get_symbol_storage(&self, sym: &Symbol) -> Result<&SymbolStorage, String> { + self.symbol_storage_map + .get(sym) + .ok_or(format!("Symbol not found in function scope {:?}", sym)) + } + + fn load_from_symbol(&mut self, sym: &Symbol) -> Result<(), String> { + let SymbolStorage(LocalId(local_id), _) = self.get_symbol_storage(sym)?; + let id: u32 = *local_id; + self.instructions.push(GetLocal(id)); + Ok(()) + } + + // Store whatever value is on top of the VM's stack + fn store_to_symbol(&mut self, sym: &Symbol) -> Result<(), String> { + let SymbolStorage(LocalId(local_id), _) = self.get_symbol_storage(sym)?; + let id: u32 = *local_id; + self.instructions.push(SetLocal(id)); + Ok(()) + } + + fn build_stmt(&mut self, stmt: &Stmt<'a>, ret_layout: &Layout<'a>) -> Result<(), String> { + match stmt { + Stmt::Let(sym, expr, layout, following) => { + self.build_expr(sym, expr, layout)?; + + let wasm_layout = WasmLayout::new(layout)?; + let local_id = self.insert_local(wasm_layout, *sym); + self.instructions.push(SetLocal(local_id.0)); + + self.build_stmt(following, ret_layout)?; + Ok(()) + } + Stmt::Ret(sym) => { + if let Some(SymbolStorage(local_id, _)) = self.symbol_storage_map.get(sym) { + self.instructions.push(GetLocal(local_id.0)); + self.instructions.push(Return); + Ok(()) + } else { + Err(format!( + "Not yet implemented: returning values with layout {:?}", + ret_layout + )) + } + } + x => Err(format!("statement not yet implemented: {:?}", x)), + } + } + + fn build_expr( + &mut self, + sym: &Symbol, + expr: &Expr<'a>, + layout: &Layout<'a>, + ) -> Result<(), String> { + match expr { + Expr::Literal(lit) => self.load_literal(lit), + + Expr::Call(roc_mono::ir::Call { + call_type, + arguments, + }) => match call_type { + CallType::ByName { name: func_sym, .. } => { + for arg in *arguments { + self.load_from_symbol(arg)?; + } + let function_location = + self.module_state + .proc_symbol_map + .get(func_sym) + .ok_or(format!( + "Cannot find function {:?} called from {:?}", + func_sym, sym + ))?; + self.instructions.push(Call(function_location.body)); + self.store_to_symbol(sym)?; + Ok(()) + } + + CallType::LowLevel { op: lowlevel, .. } => { + self.build_call_low_level(sym, lowlevel, arguments, layout) + } + x => Err(format!("the call type, {:?}, is not yet implemented", x)), + }, + + x => Err(format!("Expression is not yet implemented {:?}", x)), + } + } + + fn load_literal(&mut self, lit: &Literal<'a>) -> Result<(), String> { + match lit { + Literal::Int(x) => { + self.instructions.push(I64Const(*x as i64)); + Ok(()) + } + Literal::Float(x) => { + let val: f64 = *x; + self.instructions.push(F64Const(val.to_bits())); + Ok(()) + } + x => Err(format!("loading literal, {:?}, is not yet implemented", x)), + } + } + + fn build_call_low_level( + &mut self, + sym: &Symbol, + lowlevel: &LowLevel, + args: &'a [Symbol], + layout: &Layout<'a>, + ) -> Result<(), String> { + for arg in args { + self.load_from_symbol(arg)?; + } + let wasm_layout = WasmLayout::new(layout)?; + self.build_instructions_lowlevel(lowlevel, wasm_layout.value_type)?; + self.store_to_symbol(sym)?; + Ok(()) + } + + fn build_instructions_lowlevel( + &mut self, + lowlevel: &LowLevel, + value_type: ValueType, + ) -> Result<(), String> { + // TODO: Find a way to organise all the lowlevel ops and layouts! There's lots! + // + // Some Roc low-level ops care about wrapping, clipping, sign-extending... + // For those, we'll need to pre-process each argument before the main op, + // so simple arrays of instructions won't work. But there are common patterns. + let instructions: &[Instruction] = match lowlevel { + // Matching on Wasm type might not be enough, maybe need Roc layout for sign-extension + LowLevel::NumAdd => match value_type { + ValueType::I32 => &[I32Add], + ValueType::I64 => &[I64Add], + ValueType::F32 => &[F32Add], + ValueType::F64 => &[F64Add], + }, + _ => { + return Err(format!("unsupported low-level op {:?}", lowlevel)); + } + }; + self.instructions.extend_from_slice(instructions); + Ok(()) + } +} diff --git a/compiler/gen_wasm/src/lib.rs b/compiler/gen_wasm/src/lib.rs index 63c9a66670..d020707d3f 100644 --- a/compiler/gen_wasm/src/lib.rs +++ b/compiler/gen_wasm/src/lib.rs @@ -1,242 +1,2 @@ -use bumpalo::Bump; -use parity_wasm::builder::{CodeLocation, ModuleBuilder}; -use parity_wasm::elements::{ - Instruction, Instruction::*, Instructions, Internal, Local, ValueType, -}; -use parity_wasm::{builder, elements}; - -// use roc_builtins::bitcode; -use roc_collections::all::{MutMap, MutSet}; -// use roc_module::ident::{ModuleName, TagName}; -use roc_module::low_level::LowLevel; -use roc_module::symbol::{Interns, Symbol}; -use roc_mono::ir::{CallType, Expr, JoinPointId, Literal, Proc, ProcLayout, Stmt}; -use roc_mono::layout::{Builtin, Layout, LayoutIds}; - -pub struct Env<'a> { - pub arena: &'a Bump, // not really using this much, parity_wasm works with std::vec a lot - pub interns: Interns, - pub exposed_to_host: MutSet, -} - -#[derive(Clone, Copy)] -struct LocalId(u32); - -#[derive(Clone, Copy)] -struct LabelId(u32); - -struct WasmLayout { - value_type: ValueType, - stack_memory: u32, -} - -impl WasmLayout { - fn new(layout: &Layout) -> Result { - match layout { - Layout::Builtin(Builtin::Int64) => Ok(Self { - value_type: ValueType::I64, - stack_memory: 0, - }), - x => Err(format!("layout, {:?}, not implemented yet", x)), - } - } -} - -struct SymbolStorage(LocalId, WasmLayout); - -// Don't allocate any constant data at the address zero or anywhere near it. -// These addresses are not special in Wasm, but putting something there seems bug-prone. -const UNUSED_DATA_SECTION_BYTES: u32 = 1024; - -// State for generating a single function -struct FunctionGenerator { - instructions: std::vec::Vec, - joinpoint_label_map: MutMap, - locals: std::vec::Vec, - symbol_storage_map: MutMap, - stack_memory: u32, -} - -impl FunctionGenerator { - fn new() -> Self { - FunctionGenerator { - instructions: std::vec::Vec::new(), - joinpoint_label_map: MutMap::default(), - symbol_storage_map: MutMap::default(), - stack_memory: 0, - locals: std::vec::Vec::new(), - } - } - - fn reset(&mut self) { - self.joinpoint_label_map.clear(); - self.locals.clear(); - self.symbol_storage_map.clear(); - self.stack_memory = 0; - } - - fn insert_local(&mut self, layout: WasmLayout, symbol: Symbol) -> LocalId { - let local_id = LocalId(self.locals.len() as u32); - self.locals.push(Local::new(1, layout.value_type)); - self.stack_memory += layout.stack_memory; - - let storage = SymbolStorage(local_id, layout); - self.symbol_storage_map.insert(symbol, storage); - - local_id - } -} - -pub fn build_module<'a>( - env: &'a Env, - procedures: MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>, -) -> Result { - let mut backend = BackendWasm::new(env); - let mut layout_ids = LayoutIds::default(); - - for ((sym, layout), proc) in procedures { - let location = backend.build_proc(proc)?; - - if env.exposed_to_host.contains(&sym) { - let fn_name = layout_ids - .get_toplevel(sym, &layout) - .to_symbol_string(sym, &env.interns); - - let export = builder::export() - .field(fn_name.as_str()) - .with_internal(Internal::Function(location.body)) - .build(); - - backend.module_builder.push_export(export); - } - } - - Ok(backend.module_builder.build()) -} - -struct BackendWasm<'a> { - env: &'a Env<'a>, - module_builder: ModuleBuilder, - data_offset_map: MutMap, u32>, - data_offset_next: u32, -} - -impl<'a> BackendWasm<'a> { - fn new(env: &'a Env) -> Self { - BackendWasm { - env, - module_builder: builder::module(), - data_offset_map: MutMap::default(), - data_offset_next: UNUSED_DATA_SECTION_BYTES, - } - } - - fn build_proc(&mut self, proc: Proc<'a>) -> Result { - let mut func_gen = FunctionGenerator::new(); // yeah probably don't need to allocate a new one every time, but tell that to the borrow checker! ;-) - - let ret_layout = WasmLayout::new(&proc.ret_layout)?; - let ret_value_type = ret_layout.value_type; - if ret_layout.stack_memory > 0 { - // TODO: insert an extra param for a pointer to space allocated in callee's stack... or does Roc do something else? - return Err(format!( - "Not yet implemented: Return in stack memory for non-primtitive layouts like {:?}", - proc.ret_layout - )); - } - - let mut arg_types = std::vec::Vec::with_capacity(proc.args.len()); - - for (layout, symbol) in proc.args { - let wasm_layout = WasmLayout::new(layout)?; - arg_types.push(wasm_layout.value_type); - func_gen.insert_local(wasm_layout, *symbol); - } - - let signature = builder::signature() - .with_params(arg_types) // requires std::Vec, not Bumpalo - .with_result(ret_value_type) - .build_sig(); - - self.build_stmt(&mut func_gen, &proc.body, &proc.ret_layout)?; - - let function_def = builder::function() - .with_signature(signature) - .body() - .with_locals(func_gen.locals) - .with_instructions(Instructions::new(func_gen.instructions)) - .build() // body - .build(); // function - - let location = self.module_builder.push_function(function_def); - Ok(location) - } - - fn build_stmt( - &mut self, - func_gen: &mut FunctionGenerator, - stmt: &Stmt<'a>, - ret_layout: &Layout<'a>, - ) -> Result<(), String> { - match stmt { - Stmt::Let(sym, expr, layout, following) => { - self.build_expr(func_gen, sym, expr, layout)?; - - let wasm_layout = WasmLayout::new(layout)?; - let local_id = func_gen.insert_local(wasm_layout, *sym); - func_gen.instructions.push(SetLocal(local_id.0)); - - self.build_stmt(func_gen, following, ret_layout)?; - Ok(()) - } - Stmt::Ret(sym) => { - if let Some(SymbolStorage(local_id, wasm_layout)) = - func_gen.symbol_storage_map.get(sym) - { - func_gen.instructions.push(GetLocal(local_id.0)); - func_gen.instructions.push(Return); - Ok(()) - } else { - Err(format!( - "Not yet implemented: returning values with layout {:?}", - ret_layout - )) - } - } - x => Err(format!("statement not yet implemented: {:?}", x)), - } - } - - fn build_expr( - &mut self, - func_gen: &mut FunctionGenerator, - sym: &Symbol, - expr: &Expr<'a>, - layout: &Layout<'a>, - ) -> Result<(), String> { - match expr { - Expr::Literal(lit) => self.load_literal(func_gen, lit), - x => Err(format!("Expression is not yet implemented {:?}", x)), - } - } - - fn load_literal( - &mut self, - func_gen: &mut FunctionGenerator, - lit: &Literal<'a>, - ) -> Result<(), String> { - match lit { - Literal::Int(x) => { - func_gen.instructions.push(I64Const(*x as i64)); - Ok(()) - } - Literal::Float(x) => { - // F64Const takes a u64?? - // I've raised an issue in the library to check https://github.com/paritytech/parity-wasm/issues/314 - let val: u64 = unsafe { std::mem::transmute(*x) }; - func_gen.instructions.push(F64Const(val)); - Ok(()) - } - x => Err(format!("loading literal, {:?}, is not yet implemented", x)), - } - } -} +pub mod module; +mod function; diff --git a/compiler/gen_wasm/src/module.rs b/compiler/gen_wasm/src/module.rs new file mode 100644 index 0000000000..408ea3b4fd --- /dev/null +++ b/compiler/gen_wasm/src/module.rs @@ -0,0 +1,107 @@ +use bumpalo::Bump; +use parity_wasm::builder::{CodeLocation, ModuleBuilder}; +use parity_wasm::elements::{Instructions, Internal}; +use parity_wasm::{builder, elements}; + +use roc_collections::all::{MutMap, MutSet}; +use roc_module::symbol::{Interns, Symbol}; +use roc_mono::ir::{CallType, Expr, Literal, Proc, ProcLayout, Stmt}; +use roc_mono::layout::LayoutIds; + +use crate::function::FunctionGenerator; + +pub struct Env<'a> { + pub arena: &'a Bump, // not really using this much, parity_wasm works with std::vec a lot + pub interns: Interns, + pub exposed_to_host: MutSet, +} + +// Don't allocate any constant data at the address zero or anywhere near it. +// These addresses are not special in Wasm, but putting something there seems bug-prone. +const UNUSED_DATA_SECTION_BYTES: u32 = 1024; + +pub fn build_module<'a>( + env: &'a Env, + procedures: MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>, +) -> Result { + let mut module_state = ModuleState::new(env); + let mut layout_ids = LayoutIds::default(); + + for ((sym, layout), proc) in procedures { + let location = build_proc(&mut module_state, proc)?; + + if env.exposed_to_host.contains(&sym) { + let fn_name = layout_ids + .get_toplevel(sym, &layout) + .to_symbol_string(sym, &env.interns); + + let export = builder::export() + .field(fn_name.as_str()) + .with_internal(Internal::Function(location.body)) + .build(); + + module_state.module_builder.push_export(export); + } + module_state.proc_symbol_map.insert(sym, location); + } + + Ok(module_state.module_builder.build()) +} + +fn build_proc<'a>(module_state: &mut ModuleState, proc: Proc<'a>) -> Result { + // TODO: see if we can reuse the same memory each time and reset it? + // Can't convince the borrow-checker to let me do that, as things get moved into the function builder. + let mut func_gen = FunctionGenerator::new(module_state); + func_gen.build(proc)?; + + let signature = builder::signature() + .with_params(func_gen.arg_types) // requires std::Vec, not Bumpalo + .with_result(func_gen.ret_type) + .build_sig(); + + let function_def = builder::function() + .with_signature(signature) + .body() + .with_locals(func_gen.locals) + .with_instructions(Instructions::new(func_gen.instructions)) + .build() // body + .build(); // function + + let location = module_state.module_builder.push_function(function_def); + Ok(location) +} + +pub struct ModuleState<'a> { + _env: &'a Env<'a>, + module_builder: ModuleBuilder, + pub proc_symbol_map: MutMap, + pub _data_offset_map: MutMap, u32>, + pub _data_offset_next: u32, +} + +impl<'a> ModuleState<'a> { + fn new(_env: &'a Env) -> Self { + ModuleState { + _env, + module_builder: builder::module(), + proc_symbol_map: MutMap::default(), + _data_offset_map: MutMap::default(), + _data_offset_next: UNUSED_DATA_SECTION_BYTES, + } + } +} + +// TODO: use something like this for very simple inlining +// Create a HashMap of inlined Procs, generate each call with different Symbol arguments +fn _is_lowlevel_wrapper<'a>(proc: Proc<'a>) -> bool { + match proc.body { + Stmt::Let(_, expr, _, Stmt::Ret(..)) => match expr { + Expr::Call(roc_mono::ir::Call { call_type, .. }) => match call_type { + CallType::LowLevel { .. } => true, + _ => false, + }, + _ => false, + }, + _ => false, + } +}