From d220ed7a23a92378f1a94c8983f6ccbecd622c64 Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Fri, 5 Nov 2021 11:55:14 +0000 Subject: [PATCH 1/4] Refactor module section serialization --- compiler/gen_wasm/src/wasm_module/sections.rs | 134 ++++++++++++------ 1 file changed, 87 insertions(+), 47 deletions(-) diff --git a/compiler/gen_wasm/src/wasm_module/sections.rs b/compiler/gen_wasm/src/wasm_module/sections.rs index a31a1e32ef..da43361ed1 100644 --- a/compiler/gen_wasm/src/wasm_module/sections.rs +++ b/compiler/gen_wasm/src/wasm_module/sections.rs @@ -526,11 +526,38 @@ impl Serialize for DataSection<'_> { } } -fn write_data_count_section<'a, T: SerialBuffer>(buffer: &mut T, data_section: &DataSection<'a>) { - if !data_section.is_empty() { - let header_indices = write_section_header(buffer, SectionId::DataCount); - buffer.encode_u32(data_section.segments.len() as u32); - update_section_size(buffer, header_indices); +/******************************************************************* + * + * Data Count section + * + * Pre-declares the number of segments in the Data section. + * This helps the runtime to validate the module in a single pass. + * The order of sections is DataCount -> Code -> Data + * + *******************************************************************/ + +struct DataCountSection { + count: u32, +} + +impl DataCountSection { + fn new(data_section: &DataSection<'_>) -> Self { + let count = data_section + .segments + .iter() + .filter(|seg| !seg.init.is_empty()) + .count() as u32; + DataCountSection { count } + } +} + +impl Serialize for DataCountSection { + fn serialize(&self, buffer: &mut T) { + if self.count > 0 { + let header_indices = write_section_header(buffer, SectionId::DataCount); + buffer.encode_u32(self.count); + update_section_size(buffer, header_indices); + } } } @@ -541,6 +568,34 @@ fn write_data_count_section<'a, T: SerialBuffer>(buffer: &mut T, data_section: & * https://webassembly.github.io/spec/core/binary/modules.html * *******************************************************************/ + +/// Helper struct to count non-empty sections. +/// Needed to generate linking data, which refers to target sections by index. +struct SectionCounter { + buffer_size: usize, + section_index: u32, +} + +impl SectionCounter { + /// Update the section counter if buffer size increased since last call + fn update(&mut self, buffer: &mut SB) { + let new_size = buffer.size(); + if new_size > self.buffer_size { + self.section_index += 1; + self.buffer_size = new_size; + } + } + + fn serialize_and_count( + &mut self, + buffer: &mut SB, + section: &S, + ) { + section.serialize(buffer); + self.update(buffer); + } +} + pub struct WasmModule<'a> { pub types: TypeSection<'a>, pub import: ImportSection<'a>, @@ -576,58 +631,43 @@ impl<'a> WasmModule<'a> { buffer.append_slice("asm".as_bytes()); buffer.write_unencoded_u32(Self::WASM_VERSION); - let mut index: u32 = 0; - let mut prev_size = buffer.size(); + // Keep track of (non-empty) section indices for linking + let mut counter = SectionCounter { + buffer_size: buffer.size(), + section_index: 0, + }; - self.types.serialize(buffer); - maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + counter.serialize_and_count(buffer, &self.types); + counter.serialize_and_count(buffer, &self.import); + counter.serialize_and_count(buffer, &self.function); + counter.serialize_and_count(buffer, &self.table); + counter.serialize_and_count(buffer, &self.memory); + counter.serialize_and_count(buffer, &self.global); + counter.serialize_and_count(buffer, &self.export); + counter.serialize_and_count(buffer, &self.start); + counter.serialize_and_count(buffer, &self.element); - self.import.serialize(buffer); - maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + // Data Count section forward-declares the size of the Data section + // so that Code section can be validated in one pass + let data_count_section = DataCountSection::new(&self.data); + counter.serialize_and_count(buffer, &data_count_section); - self.function.serialize(buffer); - maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - - self.table.serialize(buffer); - maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - - self.memory.serialize(buffer); - maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - - self.global.serialize(buffer); - maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - - self.export.serialize(buffer); - maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - - self.start.serialize(buffer); - maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - - self.element.serialize(buffer); - maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - - // Data count section has no independent data, just helps the runtime - // to validate code references to the data section in a single pass - write_data_count_section(buffer, &self.data); - maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - - self.reloc_code.target_section_index = Some(index); + // Code section mutates its linker relocation data during serialization + let code_section_index = counter.section_index; self.code .serialize_mut(buffer, &mut self.reloc_code.entries); - maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + counter.update(buffer); + // Data section is the last one before linking, so we can stop counting + let data_section_index = counter.section_index; self.data.serialize(buffer); - self.reloc_data.target_section_index = Some(index); self.linking.serialize(buffer); + + self.reloc_code.target_section_index = Some(code_section_index); self.reloc_code.serialize(buffer); + + self.reloc_data.target_section_index = Some(data_section_index); self.reloc_data.serialize(buffer); } } - -fn maybe_increment_section(size: usize, prev_size: &mut usize, index: &mut u32) { - if size > *prev_size { - *index += 1; - *prev_size = size; - } -} From 01b47a2ec72f32a19a5e9708889115ef74c2b90c Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Fri, 5 Nov 2021 11:59:25 +0000 Subject: [PATCH 2/4] Add inline directives to ensure unit placeholders are optimised away --- compiler/gen_wasm/src/wasm_module/sections.rs | 2 ++ compiler/gen_wasm/src/wasm_module/serialize.rs | 1 + 2 files changed, 3 insertions(+) diff --git a/compiler/gen_wasm/src/wasm_module/sections.rs b/compiler/gen_wasm/src/wasm_module/sections.rs index da43361ed1..46c07d888e 100644 --- a/compiler/gen_wasm/src/wasm_module/sections.rs +++ b/compiler/gen_wasm/src/wasm_module/sections.rs @@ -578,6 +578,7 @@ struct SectionCounter { impl SectionCounter { /// Update the section counter if buffer size increased since last call + #[inline] fn update(&mut self, buffer: &mut SB) { let new_size = buffer.size(); if new_size > self.buffer_size { @@ -586,6 +587,7 @@ impl SectionCounter { } } + #[inline] fn serialize_and_count( &mut self, buffer: &mut SB, diff --git a/compiler/gen_wasm/src/wasm_module/serialize.rs b/compiler/gen_wasm/src/wasm_module/serialize.rs index 2f051553db..9445d8c30b 100644 --- a/compiler/gen_wasm/src/wasm_module/serialize.rs +++ b/compiler/gen_wasm/src/wasm_module/serialize.rs @@ -27,6 +27,7 @@ impl Serialize for u32 { // Unit is used as a placeholder in parts of the Wasm spec we don't use yet impl Serialize for () { + #[inline(always)] fn serialize(&self, _buffer: &mut T) {} } From e5a0738681ac18d8c5874c8153fbfa03bba6f774 Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Fri, 5 Nov 2021 12:25:27 +0000 Subject: [PATCH 3/4] Rename CodeSection serializer, since it doesn't actually mutate --- compiler/gen_wasm/src/wasm_module/sections.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/compiler/gen_wasm/src/wasm_module/sections.rs b/compiler/gen_wasm/src/wasm_module/sections.rs index 46c07d888e..d6f6b4d458 100644 --- a/compiler/gen_wasm/src/wasm_module/sections.rs +++ b/compiler/gen_wasm/src/wasm_module/sections.rs @@ -452,15 +452,15 @@ impl<'a> CodeSection<'a> { } /// Serialize the code builders for all functions, and get code relocations with final offsets - pub fn serialize_mut( - &mut self, + pub fn serialize_with_relocs( + &self, buffer: &mut T, relocations: &mut Vec<'a, RelocationEntry>, ) { let header_indices = write_section_header(buffer, SectionId::Code); buffer.encode_u32(self.code_builders.len() as u32); - for code_builder in self.code_builders.iter_mut() { + for code_builder in self.code_builders.iter() { code_builder.serialize_with_relocs(buffer, relocations, header_indices.body_index); } @@ -657,7 +657,7 @@ impl<'a> WasmModule<'a> { // Code section mutates its linker relocation data during serialization let code_section_index = counter.section_index; self.code - .serialize_mut(buffer, &mut self.reloc_code.entries); + .serialize_with_relocs(buffer, &mut self.reloc_code.entries); counter.update(buffer); // Data section is the last one before linking, so we can stop counting From 5f998f3707ad13d475a7824eb2dc488f36e98823 Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Fri, 5 Nov 2021 12:26:05 +0000 Subject: [PATCH 4/4] Minor refactor in CodeBuilder --- compiler/gen_wasm/src/wasm_module/code_builder.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/compiler/gen_wasm/src/wasm_module/code_builder.rs b/compiler/gen_wasm/src/wasm_module/code_builder.rs index 38561a986e..28d4d7b6cd 100644 --- a/compiler/gen_wasm/src/wasm_module/code_builder.rs +++ b/compiler/gen_wasm/src/wasm_module/code_builder.rs @@ -378,7 +378,7 @@ impl<'a> CodeBuilder<'a> { } /// Serialize all byte vectors in the right order - /// Also update relocation offsets relative to the provided base offset in the buffer + /// Also update relocation offsets relative to the base offset (code section body start) pub fn serialize_with_relocs( &self, buffer: &mut T, @@ -395,7 +395,10 @@ impl<'a> CodeBuilder<'a> { loop { let next_insert = insert_iter.next(); - let next_pos = next_insert.map(|i| i.at).unwrap_or_else(|| self.code.len()); + let next_pos = match next_insert { + Some(Insertion { at, .. }) => *at, + None => self.code.len(), + }; // Relocation offset needs to be an index into the body of the code section, but // at this point it is an index into self.code. Need to adjust for all previous functions