Refactor after creating constant linking data

This commit is contained in:
Brian Carroll 2021-11-06 17:30:23 +00:00
parent 38d9fc5bbd
commit 34b57cf315
4 changed files with 204 additions and 172 deletions

View File

@ -1,4 +1,4 @@
use bumpalo::collections::Vec;
use bumpalo::{self, collections::Vec};
use code_builder::Align;
use roc_collections::all::MutMap;
@ -16,23 +16,27 @@ use crate::wasm_module::sections::{
};
use crate::wasm_module::{
code_builder, BlockType, CodeBuilder, ConstExpr, Export, ExportType, Global, GlobalType,
LocalId, Signature, ValueType,
LocalId, Signature, SymInfo, ValueType,
};
use crate::{copy_memory, CopyMemoryConfig, Env, PTR_TYPE};
// Don't store any constants at address zero or near it. Would be valid, but bug-prone.
// Follow Emscripten's example by leaving 1kB unused (4 bytes would probably do)
const CONSTANTS_MIN_ADDR: u32 = 1024;
/// The memory address where the constants data will be loaded during module instantiation.
/// We avoid address zero and anywhere near it. They're valid addresses but maybe bug-prone.
/// Follow Emscripten's example by leaving 1kB unused (though 4 bytes would probably do!)
const CONST_SEGMENT_BASE_ADDR: u32 = 1024;
/// Index of the data segment where we store constants
const CONST_SEGMENT_INDEX: usize = 0;
pub struct WasmBackend<'a> {
env: &'a Env<'a>,
// Module-level data
pub module: WasmModule<'a>,
pub layout_ids: LayoutIds<'a>,
pub strings: MutMap<&'a str, (u32, DataSymbol)>,
next_string_addr: u32,
layout_ids: LayoutIds<'a>,
constant_sym_index_map: MutMap<&'a str, usize>,
proc_symbols: Vec<'a, Symbol>,
pub linker_symbols: Vec<'a, SymInfo>,
// Function-level data
code_builder: CodeBuilder<'a>,
@ -44,63 +48,74 @@ pub struct WasmBackend<'a> {
}
impl<'a> WasmBackend<'a> {
pub fn new(env: &'a Env<'a>, proc_symbols: Vec<'a, Symbol>) -> Self {
pub fn new(
env: &'a Env<'a>,
layout_ids: LayoutIds<'a>,
proc_symbols: Vec<'a, Symbol>,
linker_symbols: Vec<'a, SymInfo>,
mut exports: Vec<'a, Export>,
) -> Self {
const MEMORY_INIT_SIZE: u32 = 1024 * 1024;
let arena = env.arena;
let num_procs = proc_symbols.len();
let mut module = WasmModule {
types: TypeSection::new(env.arena),
import: ImportSection::new(env.arena),
function: FunctionSection::new(env.arena),
table: (), // Unused in Roc (mainly for function pointers)
memory: MemorySection::new(MEMORY_INIT_SIZE),
global: GlobalSection::new(env.arena),
export: ExportSection::new(env.arena),
start: (), // Entry function. In Roc this would be part of the platform.
element: (), // Unused in Roc (related to table section)
code: CodeSection::new(env.arena),
data: DataSection::new(env.arena),
linking: LinkingSection::new(env.arena),
relocations: RelocationSection::new(env.arena, "reloc.CODE"),
};
module.export.entries.push(Export {
exports.push(Export {
name: "memory".to_string(),
ty: ExportType::Mem,
index: 0,
});
let stack_pointer_global = Global {
let stack_pointer = Global {
ty: GlobalType {
value_type: ValueType::I32,
is_mutable: true,
},
init: ConstExpr::I32(MEMORY_INIT_SIZE as i32),
};
module.global.entries.push(stack_pointer_global);
let literal_segment = DataSegment {
let const_segment = DataSegment {
mode: DataMode::Active {
offset: ConstExpr::I32(CONSTANTS_MIN_ADDR as i32),
offset: ConstExpr::I32(CONST_SEGMENT_BASE_ADDR as i32),
},
init: Vec::with_capacity_in(64, env.arena),
init: Vec::with_capacity_in(64, arena),
};
let module = WasmModule {
types: TypeSection::new(arena, num_procs),
import: ImportSection::new(arena),
function: FunctionSection::new(arena, num_procs),
table: (),
memory: MemorySection::new(MEMORY_INIT_SIZE),
global: GlobalSection {
entries: bumpalo::vec![in arena; stack_pointer],
},
export: ExportSection { entries: exports },
start: (),
element: (),
code: CodeSection::new(arena),
data: DataSection {
segments: bumpalo::vec![in arena; const_segment],
},
linking: LinkingSection::new(arena),
relocations: RelocationSection::new(arena, "reloc.CODE"),
};
module.data.segments.push(literal_segment);
WasmBackend {
env,
// Module-level data
module,
next_string_addr: CONSTANTS_MIN_ADDR,
layout_ids,
constant_sym_index_map: MutMap::default(),
proc_symbols,
layout_ids: LayoutIds::default(),
strings: MutMap::default(),
linker_symbols,
// Function-level data
block_depth: 0,
joinpoint_label_map: MutMap::default(),
code_builder: CodeBuilder::new(env.arena),
storage: Storage::new(env.arena),
code_builder: CodeBuilder::new(arena),
storage: Storage::new(arena),
}
}
@ -421,8 +436,8 @@ impl<'a> WasmBackend<'a> {
self.storage.load_symbols(&mut self.code_builder, wasm_args);
// Index of the called function in the code section
// TODO: account for inlined functions when we start doing that (remember we emit procs out of order)
// Index of the called function in the code section. Assumes all functions end up in the binary.
// (We may decide to keep all procs even if calls are inlined, in case platform calls them)
let func_index = match self.proc_symbols.iter().position(|s| s == func_sym) {
Some(i) => i as u32,
None => {
@ -435,7 +450,8 @@ impl<'a> WasmBackend<'a> {
};
// Index of the function's name in the symbol table
let symbol_index = func_index; // TODO: update this when we add other things to the symbol table
// Same as the function index since those are the first symbols we add
let symbol_index = func_index;
self.code_builder.call(
func_index,
@ -484,14 +500,14 @@ impl<'a> WasmBackend<'a> {
}
StoredValue::StackMemory { location, .. } => match lit {
Literal::Str(s) => {
Literal::Str(string) => {
let (local_id, offset) =
location.local_and_offset(self.storage.stack_frame_pointer);
let len = s.len();
let len = string.len();
if len < 8 {
let mut stack_mem_bytes = [0; 8];
stack_mem_bytes[0..len].clone_from_slice(s.as_bytes());
stack_mem_bytes[0..len].clone_from_slice(string.as_bytes());
stack_mem_bytes[7] = 0x80 | (len as u8);
let str_as_int = i64::from_le_bytes(stack_mem_bytes);
@ -499,42 +515,8 @@ impl<'a> WasmBackend<'a> {
self.code_builder.i64_const(str_as_int);
self.code_builder.i64_store(Align::Bytes4, offset);
} else {
let (linker_sym_index, elements_addr) = match self.strings.get(s) {
// We've seen this string before. Retrieve its address from linker data.
Some((sym_index, DataSymbol::Defined { segment_offset, .. })) => {
let addr = segment_offset + CONSTANTS_MIN_ADDR;
(*sym_index, addr)
}
_ => {
// Store the string in the data section, to be initialised on module load.
// `elements` field points to that constant data, not the heap
self.module.data.segments[0]
.init
.extend_from_slice(s.as_bytes());
let addr = self.next_string_addr;
self.next_string_addr += len as u32;
// Generate linker info
let sym_index =
(self.proc_symbols.len() + self.strings.len()) as u32;
let name = self
.layout_ids
.get(sym, layout)
.to_symbol_string(sym, &self.env.interns);
let linker_symbol = DataSymbol::Defined {
name,
segment_index: 0,
segment_offset: addr - CONSTANTS_MIN_ADDR,
size: len as u32,
};
self.strings.insert(*s, (sym_index, linker_symbol));
(sym_index, addr)
}
};
let (linker_sym_index, elements_addr) =
self.lookup_string_constant(string, sym, layout);
self.code_builder.get_local(local_id);
self.code_builder.insert_memory_relocation(linker_sym_index);
@ -542,7 +524,7 @@ impl<'a> WasmBackend<'a> {
self.code_builder.i32_store(Align::Bytes4, offset);
self.code_builder.get_local(local_id);
self.code_builder.i32_const(len as i32);
self.code_builder.i32_const(string.len() as i32);
self.code_builder.i32_store(Align::Bytes4, offset + 4);
};
}
@ -558,6 +540,63 @@ impl<'a> WasmBackend<'a> {
Ok(())
}
/// Look up a string constant in our internal data structures
/// Return the data we need for code gen: linker symbol index and memory address
fn lookup_string_constant(
&mut self,
string: &'a str,
sym: Symbol,
layout: &Layout<'a>,
) -> (u32, u32) {
match self.constant_sym_index_map.get(string) {
Some(linker_sym_index) => {
// We've seen this string before. The linker metadata has a reference
// to its offset in the constants data segment.
let syminfo = &self.linker_symbols[*linker_sym_index];
match syminfo {
SymInfo::Data(DataSymbol::Defined { segment_offset, .. }) => {
let elements_addr = *segment_offset + CONST_SEGMENT_BASE_ADDR;
(*linker_sym_index as u32, elements_addr)
}
_ => unreachable!(
"Compiler bug: Invalid linker symbol info for string {:?}:\n{:?}",
string, syminfo
),
}
}
None => {
let const_segment_bytes = &mut self.module.data.segments[CONST_SEGMENT_INDEX].init;
// Store the string in the data section, to be loaded on module instantiation
// RocStr `elements` field will point to that constant data, not the heap
let segment_offset = const_segment_bytes.len() as u32;
let elements_addr = segment_offset + CONST_SEGMENT_BASE_ADDR;
const_segment_bytes.extend_from_slice(string.as_bytes());
// Generate linker info
// Just pick the symbol name from the first usage
let name = self
.layout_ids
.get(sym, layout)
.to_symbol_string(sym, &self.env.interns);
let linker_symbol = SymInfo::Data(DataSymbol::Defined {
flags: 0,
name,
segment_index: CONST_SEGMENT_INDEX as u32,
segment_offset,
size: string.len() as u32,
});
let linker_sym_index = self.linker_symbols.len();
self.constant_sym_index_map.insert(string, linker_sym_index);
self.linker_symbols.push(linker_symbol);
(linker_sym_index as u32, elements_addr)
}
}
}
fn create_struct(
&mut self,
sym: &Symbol,

View File

@ -9,6 +9,7 @@ use bumpalo::{self, collections::Vec, Bump};
use roc_collections::all::{MutMap, MutSet};
use roc_module::symbol::{Interns, Symbol};
use roc_mono::ir::{Proc, ProcLayout};
use roc_mono::layout::LayoutIds;
use crate::backend::WasmBackend;
use crate::wasm_module::{
@ -42,45 +43,44 @@ pub fn build_module_help<'a>(
env: &'a Env,
procedures: MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>,
) -> Result<WasmModule<'a>, String> {
let proc_symbols = Vec::from_iter_in(procedures.keys().map(|(sym, _)| *sym), env.arena);
let mut backend = WasmBackend::new(env, proc_symbols);
let mut layout_ids = LayoutIds::default();
let mut proc_symbols = Vec::with_capacity_in(procedures.len(), env.arena);
let mut linker_symbols = Vec::with_capacity_in(procedures.len() * 2, env.arena);
let mut exports = Vec::with_capacity_in(procedures.len(), env.arena);
let mut symbol_table_entries = Vec::with_capacity_in(procedures.len(), env.arena);
// Collect the symbols & names for the procedures
for (i, (sym, layout)) in procedures.keys().enumerate() {
proc_symbols.push(*sym);
for (i, ((sym, layout), proc)) in procedures.into_iter().enumerate() {
let proc_name = backend
.layout_ids
.get(proc.name, &proc.ret_layout)
.to_symbol_string(proc.name, &env.interns);
symbol_table_entries.push(SymInfo::for_function(i as u32, proc_name));
let fn_name = layout_ids
.get_toplevel(*sym, layout)
.to_symbol_string(*sym, &env.interns);
backend.build_proc(proc, sym)?;
if env.exposed_to_host.contains(&sym) {
let fn_name = backend
.layout_ids
.get_toplevel(sym, &layout)
.to_symbol_string(sym, &env.interns);
backend.module.export.entries.push(Export {
name: fn_name,
if env.exposed_to_host.contains(sym) {
exports.push(Export {
name: fn_name.clone(),
ty: ExportType::Func,
index: i as u32,
});
}
let linker_sym = SymInfo::for_function(i as u32, fn_name);
linker_symbols.push(linker_sym);
}
let mut data_symbols_and_indices = Vec::from_iter_in(backend.strings.values(), env.arena);
data_symbols_and_indices.sort_by_key(|(idx, _)| *idx);
let data_syminfos = data_symbols_and_indices
.iter()
.map(|(_, data_symbol)| SymInfo::for_data(data_symbol.clone()));
symbol_table_entries.extend(data_syminfos);
// Main loop: Build the Wasm module
let (mut module, linker_symbols) = {
let mut backend = WasmBackend::new(env, layout_ids, proc_symbols, linker_symbols, exports);
for ((sym, _), proc) in procedures.into_iter() {
backend.build_proc(proc, sym)?;
}
(backend.module, backend.linker_symbols)
};
let symbol_table = LinkingSubSection::SymbolTable(symbol_table_entries);
backend.module.linking.subsections.push(symbol_table);
let symbol_table = LinkingSubSection::SymbolTable(linker_symbols);
module.linking.subsections.push(symbol_table);
Ok(backend.module)
Ok(module)
}
pub struct CopyMemoryConfig {

View File

@ -283,35 +283,47 @@ pub const WASM_SYM_EXPLICIT_NAME: u32 = 0x40; // use the name from the symbol ta
/// linker output, regardless of whether it is used by the program.
pub const WASM_SYM_NO_STRIP: u32 = 0x80;
#[derive(Clone, Debug)]
pub enum WasmObjectSymbol {
Defined { index: u32, name: String },
Imported { index: u32 },
Defined {
flags: u32,
index: u32,
name: String,
},
Imported {
flags: u32,
index: u32,
},
}
impl Serialize for WasmObjectSymbol {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
match self {
Self::Defined { index, name } => {
Self::Defined { flags, index, name } => {
buffer.encode_u32(*flags);
buffer.encode_u32(*index);
buffer.encode_u32(name.len() as u32);
buffer.append_slice(name.as_bytes());
}
Self::Imported { index } => {
Self::Imported { flags, index } => {
buffer.encode_u32(*flags);
buffer.encode_u32(*index);
}
}
}
}
#[derive(Clone)]
#[derive(Clone, Debug)]
pub enum DataSymbol {
Defined {
flags: u32,
name: String,
segment_index: u32,
segment_offset: u32,
size: u32,
},
Imported {
flags: u32,
name: String,
},
}
@ -320,18 +332,21 @@ impl Serialize for DataSymbol {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
match self {
Self::Defined {
flags,
name,
segment_index,
segment_offset,
size,
} => {
buffer.encode_u32(*flags);
buffer.encode_u32(name.len() as u32);
buffer.append_slice(name.as_bytes());
buffer.encode_u32(*segment_index);
buffer.encode_u32(*segment_offset);
buffer.encode_u32(*size);
}
Self::Imported { name } => {
Self::Imported { flags, name } => {
buffer.encode_u32(*flags);
buffer.encode_u32(name.len() as u32);
buffer.append_slice(name.as_bytes());
}
@ -340,63 +355,56 @@ impl Serialize for DataSymbol {
}
/// section index (not section id!)
#[derive(Clone, Copy, Debug)]
pub struct SectionIndex(u32);
#[derive(Clone, Debug)]
pub struct SectionSymbol {
flags: u32,
index: u32,
}
pub enum SymInfoFields {
impl Serialize for SectionSymbol {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
self.flags.serialize(buffer);
self.index.serialize(buffer);
}
}
#[derive(Clone, Debug)]
pub enum SymInfo {
Function(WasmObjectSymbol),
Data(DataSymbol),
Global(WasmObjectSymbol),
Section(SectionIndex),
Section(SectionSymbol),
Event(WasmObjectSymbol),
Table(WasmObjectSymbol),
}
pub struct SymInfo {
flags: u32,
info: SymInfoFields,
}
impl SymInfo {
pub fn for_function(wasm_function_index: u32, name: String) -> Self {
let linking_symbol = WasmObjectSymbol::Defined {
SymInfo::Function(WasmObjectSymbol::Defined {
flags: 0,
index: wasm_function_index,
name,
};
SymInfo {
flags: 0,
info: SymInfoFields::Function(linking_symbol),
}
}
pub fn for_data(data_symbol: DataSymbol) -> Self {
SymInfo {
flags: 0,
info: SymInfoFields::Data(data_symbol),
}
})
}
}
impl Serialize for SymInfo {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.append_u8(match self.info {
SymInfoFields::Function(_) => 0,
SymInfoFields::Data(_) => 1,
SymInfoFields::Global(_) => 2,
SymInfoFields::Section(_) => 3,
SymInfoFields::Event(_) => 4,
SymInfoFields::Table(_) => 5,
buffer.append_u8(match self {
Self::Function(_) => 0,
Self::Data(_) => 1,
Self::Global(_) => 2,
Self::Section(_) => 3,
Self::Event(_) => 4,
Self::Table(_) => 5,
});
buffer.encode_u32(self.flags);
match &self.info {
SymInfoFields::Function(x) => x.serialize(buffer),
SymInfoFields::Data(x) => x.serialize(buffer),
SymInfoFields::Global(x) => x.serialize(buffer),
SymInfoFields::Section(SectionIndex(x)) => {
buffer.encode_u32(*x);
}
SymInfoFields::Event(x) => x.serialize(buffer),
SymInfoFields::Table(x) => x.serialize(buffer),
match self {
Self::Function(x) => x.serialize(buffer),
Self::Data(x) => x.serialize(buffer),
Self::Global(x) => x.serialize(buffer),
Self::Section(x) => x.serialize(buffer),
Self::Event(x) => x.serialize(buffer),
Self::Table(x) => x.serialize(buffer),
};
}
}

View File

@ -107,9 +107,9 @@ pub struct TypeSection<'a> {
}
impl<'a> TypeSection<'a> {
pub fn new(arena: &'a Bump) -> Self {
pub fn new(arena: &'a Bump, capacity: usize) -> Self {
TypeSection {
signatures: Vec::with_capacity_in(8, arena),
signatures: Vec::with_capacity_in(capacity, arena),
}
}
@ -229,13 +229,12 @@ pub struct FunctionSection<'a> {
}
impl<'a> FunctionSection<'a> {
pub fn new(arena: &'a Bump) -> Self {
pub fn new(arena: &'a Bump, capacity: usize) -> Self {
FunctionSection {
signature_indices: Vec::with_capacity_in(8, arena),
signature_indices: Vec::with_capacity_in(capacity, arena),
}
}
}
impl<'a> Serialize for FunctionSection<'a> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
serialize_vector_section(buffer, SectionId::Function, &self.signature_indices);
@ -373,14 +372,6 @@ pub struct GlobalSection<'a> {
pub entries: Vec<'a, Global>,
}
impl<'a> GlobalSection<'a> {
pub fn new(arena: &'a Bump) -> Self {
GlobalSection {
entries: Vec::with_capacity_in(1, arena),
}
}
}
impl<'a> Serialize for GlobalSection<'a> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
serialize_vector_section(buffer, SectionId::Global, &self.entries);
@ -507,12 +498,6 @@ pub struct DataSection<'a> {
}
impl<'a> DataSection<'a> {
pub fn new(arena: &'a Bump) -> Self {
DataSection {
segments: Vec::with_capacity_in(1, arena),
}
}
fn is_empty(&self) -> bool {
self.segments.is_empty() || self.segments.iter().all(|seg| seg.init.is_empty())
}