diff --git a/compiler/gen_wasm/src/wasm_module/dead_code.rs b/compiler/gen_wasm/src/wasm_module/dead_code.rs index c950484cf1..0b18173ed0 100644 --- a/compiler/gen_wasm/src/wasm_module/dead_code.rs +++ b/compiler/gen_wasm/src/wasm_module/dead_code.rs @@ -2,7 +2,8 @@ use bumpalo::collections::vec::Vec; use bumpalo::Bump; use super::opcodes::OpCode; -use super::serialize::{parse_u32_or_panic, SerialBuffer, Serialize, SkipBytes}; +use super::parse::{parse_u32_or_panic, SkipBytes}; +use super::serialize::{SerialBuffer, Serialize}; use super::CodeBuilder; /* diff --git a/compiler/gen_wasm/src/wasm_module/mod.rs b/compiler/gen_wasm/src/wasm_module/mod.rs index 63a2cf5468..428e682006 100644 --- a/compiler/gen_wasm/src/wasm_module/mod.rs +++ b/compiler/gen_wasm/src/wasm_module/mod.rs @@ -4,6 +4,7 @@ pub mod linking; pub mod opcodes; pub mod sections; pub mod serialize; +pub mod parse; use bumpalo::{collections::Vec, Bump}; pub use code_builder::{Align, CodeBuilder, LocalId, ValueType, VmSymbolState}; diff --git a/compiler/gen_wasm/src/wasm_module/opcodes.rs b/compiler/gen_wasm/src/wasm_module/opcodes.rs index 04f779dfbf..059912b78b 100644 --- a/compiler/gen_wasm/src/wasm_module/opcodes.rs +++ b/compiler/gen_wasm/src/wasm_module/opcodes.rs @@ -1,6 +1,6 @@ use roc_error_macros::internal_error; -use super::serialize::{parse_u32_or_panic, SkipBytes}; +use super::parse::{parse_u32_or_panic, SkipBytes}; #[repr(u8)] #[derive(Clone, Copy, Debug, PartialEq, Eq)] diff --git a/compiler/gen_wasm/src/wasm_module/parse.rs b/compiler/gen_wasm/src/wasm_module/parse.rs new file mode 100644 index 0000000000..2185e2477a --- /dev/null +++ b/compiler/gen_wasm/src/wasm_module/parse.rs @@ -0,0 +1,91 @@ +use super::serialize::MAX_SIZE_ENCODED_U32; +use bumpalo::Bump; +use roc_error_macros::internal_error; + +/// Skip over serialized bytes for a type +/// This may, or may not, require looking at the byte values +pub trait SkipBytes { + fn skip_bytes(bytes: &[u8], cursor: &mut usize); +} + +/// Decode an unsigned 32-bit integer from the provided buffer in LEB-128 format +/// Return the integer itself and the offset after it ends +pub fn decode_u32(bytes: &[u8]) -> Result<(u32, usize), String> { + let mut value = 0; + let mut shift = 0; + for (i, byte) in bytes.iter().take(MAX_SIZE_ENCODED_U32).enumerate() { + value += ((byte & 0x7f) as u32) << shift; + if (byte & 0x80) == 0 { + return Ok((value, i + 1)); + } + shift += 7; + } + Err(format!( + "Failed to decode u32 as LEB-128 from bytes: {:2x?}", + std::vec::Vec::from_iter(bytes.iter().take(MAX_SIZE_ENCODED_U32)) + )) +} + +pub fn parse_u32_or_panic(bytes: &[u8], cursor: &mut usize) -> u32 { + let (value, len) = decode_u32(&bytes[*cursor..]).unwrap_or_else(|e| internal_error!("{}", e)); + *cursor += len; + value +} + +pub fn parse_string_bytes<'a>(arena: &'a Bump, bytes: &[u8], cursor: &mut usize) -> &'a [u8] { + let len = parse_u32_or_panic(bytes, cursor); + let end = *cursor + len as usize; + let bytes: &[u8] = &bytes[*cursor..end]; + let copy = arena.alloc_slice_copy(bytes); + *cursor = end; + copy +} + +impl SkipBytes for u32 { + fn skip_bytes(bytes: &[u8], cursor: &mut usize) { + const MAX_LEN: usize = 5; + for (i, byte) in bytes.iter().enumerate().skip(*cursor).take(MAX_LEN) { + if byte & 0x80 == 0 { + *cursor = i + 1; + return; + } + } + internal_error!("Invalid LEB encoding"); + } +} + +impl SkipBytes for u64 { + fn skip_bytes(bytes: &[u8], cursor: &mut usize) { + const MAX_LEN: usize = 10; + for (i, byte) in bytes.iter().enumerate().skip(*cursor).take(MAX_LEN) { + if byte & 0x80 == 0 { + *cursor = i + 1; + return; + } + } + internal_error!("Invalid LEB encoding"); + } +} + +impl SkipBytes for u8 { + fn skip_bytes(_bytes: &[u8], cursor: &mut usize) { + *cursor += 1; + } +} + +/// Note: This is just for skipping over Wasm bytes. We don't actually care about String vs str! +impl SkipBytes for String { + fn skip_bytes(bytes: &[u8], cursor: &mut usize) { + let len = parse_u32_or_panic(bytes, cursor); + + if false { + let str_bytes = &bytes[*cursor..(*cursor + len as usize)]; + println!( + "Skipping string {:?}", + std::str::from_utf8(str_bytes).unwrap() + ); + } + + *cursor += len as usize; + } +} diff --git a/compiler/gen_wasm/src/wasm_module/sections.rs b/compiler/gen_wasm/src/wasm_module/sections.rs index 8a4dc106a1..ac33ce12df 100644 --- a/compiler/gen_wasm/src/wasm_module/sections.rs +++ b/compiler/gen_wasm/src/wasm_module/sections.rs @@ -11,10 +11,8 @@ use super::dead_code::{ }; use super::linking::RelocationEntry; use super::opcodes::OpCode; -use super::serialize::{ - parse_string_bytes, parse_u32_or_panic, SerialBuffer, Serialize, SkipBytes, - MAX_SIZE_ENCODED_U32, -}; +use super::parse::{parse_string_bytes, parse_u32_or_panic, SkipBytes}; +use super::serialize::{SerialBuffer, Serialize, MAX_SIZE_ENCODED_U32}; use super::{CodeBuilder, ValueType}; /******************************************************************* diff --git a/compiler/gen_wasm/src/wasm_module/serialize.rs b/compiler/gen_wasm/src/wasm_module/serialize.rs index fc59a69d37..1531447dc3 100644 --- a/compiler/gen_wasm/src/wasm_module/serialize.rs +++ b/compiler/gen_wasm/src/wasm_module/serialize.rs @@ -1,7 +1,5 @@ -use std::{fmt::Debug, iter::FromIterator}; - -use bumpalo::{collections::vec::Vec, Bump}; -use roc_error_macros::internal_error; +use bumpalo::collections::vec::Vec; +use std::fmt::Debug; /// In the WebAssembly binary format, all integers are variable-length encoded (using LEB-128) /// A small value like 3 or 100 is encoded as 1 byte. The value 128 needs 2 bytes, etc. @@ -238,97 +236,10 @@ impl<'a> SerialBuffer for Vec<'a, u8> { } } -/// Decode an unsigned 32-bit integer from the provided buffer in LEB-128 format -/// Return the integer itself and the offset after it ends -pub fn decode_u32(bytes: &[u8]) -> Result<(u32, usize), String> { - let mut value = 0; - let mut shift = 0; - for (i, byte) in bytes.iter().take(MAX_SIZE_ENCODED_U32).enumerate() { - value += ((byte & 0x7f) as u32) << shift; - if (byte & 0x80) == 0 { - return Ok((value, i + 1)); - } - shift += 7; - } - Err(format!( - "Failed to decode u32 as LEB-128 from bytes: {:2x?}", - std::vec::Vec::from_iter(bytes.iter().take(MAX_SIZE_ENCODED_U32)) - )) -} - -pub fn parse_u32_or_panic(bytes: &[u8], cursor: &mut usize) -> u32 { - let (value, len) = decode_u32(&bytes[*cursor..]).unwrap_or_else(|e| internal_error!("{}", e)); - *cursor += len; - value -} - -pub fn parse_string_bytes<'a>(arena: &'a Bump, bytes: &[u8], cursor: &mut usize) -> &'a [u8] { - let len = parse_u32_or_panic(bytes, cursor); - let end = *cursor + len as usize; - let bytes: &[u8] = &bytes[*cursor..end]; - let copy = arena.alloc_slice_copy(bytes); - *cursor = end; - copy -} - -/// Skip over serialized bytes for a type -/// This may, or may not, require looking at the byte values -pub trait SkipBytes { - fn skip_bytes(bytes: &[u8], cursor: &mut usize); -} - -impl SkipBytes for u32 { - fn skip_bytes(bytes: &[u8], cursor: &mut usize) { - const MAX_LEN: usize = 5; - for (i, byte) in bytes.iter().enumerate().skip(*cursor).take(MAX_LEN) { - if byte & 0x80 == 0 { - *cursor = i + 1; - return; - } - } - internal_error!("Invalid LEB encoding"); - } -} - -impl SkipBytes for u64 { - fn skip_bytes(bytes: &[u8], cursor: &mut usize) { - const MAX_LEN: usize = 10; - for (i, byte) in bytes.iter().enumerate().skip(*cursor).take(MAX_LEN) { - if byte & 0x80 == 0 { - *cursor = i + 1; - return; - } - } - internal_error!("Invalid LEB encoding"); - } -} - -impl SkipBytes for u8 { - fn skip_bytes(_bytes: &[u8], cursor: &mut usize) { - *cursor += 1; - } -} - -/// Note: This is just for skipping over Wasm bytes. We don't actually care about String vs str! -impl SkipBytes for String { - fn skip_bytes(bytes: &[u8], cursor: &mut usize) { - let len = parse_u32_or_panic(bytes, cursor); - - if false { - let str_bytes = &bytes[*cursor..(*cursor + len as usize)]; - println!( - "Skipping string {:?}", - std::str::from_utf8(str_bytes).unwrap() - ); - } - - *cursor += len as usize; - } -} - #[cfg(test)] mod tests { use super::*; + use crate::wasm_module::parse::{decode_u32, parse_u32_or_panic}; use bumpalo::{self, collections::Vec, Bump}; fn help_u32(arena: &Bump, value: u32) -> Vec<'_, u8> {