Support expr when use typescript_custom_section attribute (#3901)

* feat: support expr when use `typescript_custom_section` attribute * test: update typescript-tests * chore: update "APPROVED_SCHEMA_FILE_HASH" of shared lib * chore: cargo fmt * Apply suggestions from code review include fix typo and adding whitespace to ensure consistent code style. Co-authored-by: Liam Murphy <liampm32@gmail.com> * chore(backend): fix typo * chore(typescript-tests): rename custom_section_type to custom_section_type.d.ts * fix(backend/codegen): change method flat_slices to flat_byte_slices in order to avoid unsafe code * fix(backend/codegen): use dynamic wasm_bindgen path as import entry * chore(typescript-tests): ignore *.d.ts file when test * chore(shared/program): rename CustomSection to LitOrExpr * doc(shared/lib): add doc for program[typescript_custom_sections], explain why there are different types of LitOrExpr when encoding and decoding * chore(shared): update "APPROVED_SCHEMA_FILE_HASH" of shared lib * doc: add docs for method encode_u32_to_fixed_len_bytes * refactor(backend/encode): rename method shared_typescript_custom_section to shared_lit_or_expr * refactor(__rt): extract methods from nested mod directly into `__rt` * chore: cargo fmt * chore(__rt): remove unnecessary TODO * chore(changelog): update change log Support Expressions when using the `typescript_custom_section` attribute[#3901] * Update CHANGELOG.md
2024-11-30 12:33:54 +03:00 · 2024-04-10 14:43:15 +08:00 · 2024-04-10 14:43:15 +08:00 · 7d0b11c80e
commit 7d0b11c80e
parent d25a68eaa7
14 changed files with 220 additions and 41 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -5,6 +5,9 @@

 ### Added

+* Added support for arbitrary expressions when using `#[wasm_bindgen(typescript_custom_section)]`.
+  [#3901](https://github.com/rustwasm/wasm-bindgen/pull/3901)
+
 * Implement `From<NonNull<T>>` for `JsValue`.
  [#3877](https://github.com/rustwasm/wasm-bindgen/pull/3877)

--- a/crates/backend/src/ast.rs
+++ b/crates/backend/src/ast.rs
@ -24,7 +24,7 @@ pub struct Program {
    /// rust structs
    pub structs: Vec<Struct>,
    /// custom typescript sections to be included in the definition file
-    pub typescript_custom_sections: Vec<String>,
+    pub typescript_custom_sections: Vec<LitOrExpr>,
    /// Inline JS snippets
    pub inline_js: Vec<String>,
    /// Path to wasm_bindgen
@ -460,6 +460,16 @@ pub enum TypeLocation {
    ExportRet,
 }

+/// An enum representing either a literal value (`Lit`) or an expression (`syn::Expr`).
+#[cfg_attr(feature = "extra-traits", derive(Debug))]
+#[derive(Clone)]
+pub enum LitOrExpr {
+    /// Represents an expression that needs to be evaluated before it can be encoded
+    Expr(syn::Expr),
+    /// Represents a literal string that can be directly encoded.
+    Lit(String),
+}
+
 impl Export {
    /// Mangles a rust -> javascript export, so that the created Ident will be unique over function
    /// name and class name, if the function belongs to a javascript class.
--- a/crates/backend/src/codegen.rs
+++ b/crates/backend/src/codegen.rs
@ -1,5 +1,6 @@
 use crate::ast;
 use crate::encode;
+use crate::encode::EncodeChunk;
 use crate::Diagnostic;
 use once_cell::sync::Lazy;
 use proc_macro2::{Ident, Literal, Span, TokenStream};
@ -94,17 +95,51 @@ impl TryToTokens for ast::Program {
            shared::SCHEMA_VERSION,
            shared::version()
        );
-        let encoded = encode::encode(self)?;
-        let len = prefix_json.len() as u32;
-        let bytes = [
-            &len.to_le_bytes()[..],
-            prefix_json.as_bytes(),
-            &encoded.custom_section,
-        ]
-        .concat();

-        let generated_static_length = bytes.len();
-        let generated_static_value = syn::LitByteStr::new(&bytes, Span::call_site());
+        let wasm_bindgen = &self.wasm_bindgen;
+
+        let encoded = encode::encode(self)?;
+
+        let encoded_chunks: Vec<_> = encoded
+            .custom_section
+            .iter()
+            .map(|chunk| match chunk {
+                EncodeChunk::EncodedBuf(buf) => {
+                    let buf = syn::LitByteStr::new(buf.as_slice(), Span::call_site());
+                    quote!(#buf)
+                }
+                EncodeChunk::StrExpr(expr) => {
+                    // encode expr as str
+                    quote!({
+                        use #wasm_bindgen::__rt::{encode_u32_to_fixed_len_bytes};
+                        const _STR_EXPR: &str = #expr;
+                        const _STR_EXPR_BYTES: &[u8] = _STR_EXPR.as_bytes();
+                        const _STR_EXPR_BYTES_LEN: usize = _STR_EXPR_BYTES.len() + 5;
+                        const _ENCODED_BYTES: [u8; _STR_EXPR_BYTES_LEN] = flat_byte_slices([
+                            &encode_u32_to_fixed_len_bytes(_STR_EXPR_BYTES.len() as u32),
+                            _STR_EXPR_BYTES,
+                        ]);
+                        &_ENCODED_BYTES
+                    })
+                }
+            })
+            .collect();
+
+        let chunk_len = encoded_chunks.len();
+
+        // concatenate all encoded chunks and write the length in front of the chunk;
+        let encode_bytes = quote!({
+            const _CHUNK_SLICES: [&[u8]; #chunk_len] = [
+                #(#encoded_chunks,)*
+            ];
+            const _CHUNK_LEN: usize = flat_len(_CHUNK_SLICES);
+            const _CHUNKS: [u8; _CHUNK_LEN] = flat_byte_slices(_CHUNK_SLICES);
+
+            const _LEN_BYTES: [u8; 4] = (_CHUNK_LEN as u32).to_le_bytes();
+            const _ENCODED_BYTES_LEN: usize = _CHUNK_LEN + 4;
+            const _ENCODED_BYTES: [u8; _ENCODED_BYTES_LEN] = flat_byte_slices([&_LEN_BYTES, &_CHUNKS]);
+            &_ENCODED_BYTES
+        });

        // We already consumed the contents of included files when generating
        // the custom section, but we want to make sure that updates to the
@ -119,15 +154,26 @@ impl TryToTokens for ast::Program {
            quote! { include_str!(#file) }
        });

+        let len = prefix_json.len() as u32;
+        let prefix_json_bytes = [&len.to_le_bytes()[..], prefix_json.as_bytes()].concat();
+        let prefix_json_bytes = syn::LitByteStr::new(&prefix_json_bytes, Span::call_site());
+
        (quote! {
            #[cfg(target_arch = "wasm32")]
            #[automatically_derived]
            const _: () = {
+                use #wasm_bindgen::__rt::{flat_len, flat_byte_slices};
+
                static _INCLUDED_FILES: &[&str] = &[#(#file_dependencies),*];

+                const _ENCODED_BYTES: &[u8] = #encode_bytes;
+                const _PREFIX_JSON_BYTES: &[u8] = #prefix_json_bytes;
+                const _ENCODED_BYTES_LEN: usize  = _ENCODED_BYTES.len();
+                const _PREFIX_JSON_BYTES_LEN: usize =  _PREFIX_JSON_BYTES.len();
+                const _LEN: usize = _PREFIX_JSON_BYTES_LEN + _ENCODED_BYTES_LEN;
+
                #[link_section = "__wasm_bindgen_unstable"]
-                pub static _GENERATED: [u8; #generated_static_length] =
-                    *#generated_static_value;
+                static _GENERATED: [u8; _LEN] = flat_byte_slices([_PREFIX_JSON_BYTES, _ENCODED_BYTES]);
            };
        })
        .to_tokens(tokens);
--- a/crates/backend/src/encode.rs
+++ b/crates/backend/src/encode.rs
@ -9,8 +9,15 @@ use std::path::PathBuf;
 use crate::ast;
 use crate::Diagnostic;

+#[derive(Clone)]
+pub enum EncodeChunk {
+    EncodedBuf(Vec<u8>),
+    StrExpr(syn::Expr),
+    // TODO: support more expr type;
+}
+
 pub struct EncodeResult {
-    pub custom_section: Vec<u8>,
+    pub custom_section: Vec<EncodeChunk>,
    pub included_files: Vec<PathBuf>,
 }

@ -144,7 +151,7 @@ fn shared_program<'a>(
        typescript_custom_sections: prog
            .typescript_custom_sections
            .iter()
-            .map(|x| -> &'a str { x })
+            .map(|x| shared_lit_or_expr(x, intern))
            .collect(),
        linked_modules: prog
            .linked_modules
@ -253,6 +260,13 @@ fn shared_import<'a>(i: &'a ast::Import, intern: &'a Interner) -> Result<Import<
    })
 }

+fn shared_lit_or_expr<'a>(i: &'a ast::LitOrExpr, _intern: &'a Interner) -> LitOrExpr<'a> {
+    match i {
+        ast::LitOrExpr::Lit(lit) => LitOrExpr::Lit(lit),
+        ast::LitOrExpr::Expr(expr) => LitOrExpr::Expr(expr),
+    }
+}
+
 fn shared_linked_module<'a>(
    name: &str,
    i: &'a ast::ImportModule,
@ -358,24 +372,48 @@ trait Encode {
 }

 struct Encoder {
-    dst: Vec<u8>,
+    dst: Vec<EncodeChunk>,
+}
+
+enum LitOrExpr<'a> {
+    Expr(&'a syn::Expr),
+    Lit(&'a str),
+}
+
+impl<'a> Encode for LitOrExpr<'a> {
+    fn encode(&self, dst: &mut Encoder) {
+        match self {
+            LitOrExpr::Expr(expr) => {
+                dst.dst.push(EncodeChunk::StrExpr((*expr).clone()));
+            }
+            LitOrExpr::Lit(s) => s.encode(dst),
+        }
+    }
 }

 impl Encoder {
    fn new() -> Encoder {
-        Encoder {
-            dst: vec![0, 0, 0, 0],
-        }
+        Encoder { dst: vec![] }
    }

-    fn finish(mut self) -> Vec<u8> {
-        let len = (self.dst.len() - 4) as u32;
-        self.dst[..4].copy_from_slice(&len.to_le_bytes()[..]);
+    fn finish(self) -> Vec<EncodeChunk> {
        self.dst
    }

    fn byte(&mut self, byte: u8) {
-        self.dst.push(byte);
+        if let Some(EncodeChunk::EncodedBuf(buf)) = self.dst.last_mut() {
+            buf.push(byte);
+        } else {
+            self.dst.push(EncodeChunk::EncodedBuf(vec![byte]));
+        }
+    }
+
+    fn extend_from_slice(&mut self, slice: &[u8]) {
+        if let Some(EncodeChunk::EncodedBuf(buf)) = self.dst.last_mut() {
+            buf.extend_from_slice(slice);
+        } else {
+            self.dst.push(EncodeChunk::EncodedBuf(slice.to_owned()));
+        }
    }
 }

@ -407,7 +445,7 @@ impl Encode for usize {
 impl<'a> Encode for &'a [u8] {
    fn encode(&self, dst: &mut Encoder) {
        self.len().encode(dst);
-        dst.dst.extend_from_slice(self);
+        dst.extend_from_slice(self);
    }
 }

--- a/crates/cli-support/src/decode.rs
+++ b/crates/cli-support/src/decode.rs
@ -1,4 +1,4 @@
-use std::str;
+use std::{ops::Deref, str};

 pub trait Decode<'src>: Sized {
    fn decode(data: &mut &'src [u8]) -> Self;
@ -10,12 +10,30 @@ pub trait Decode<'src>: Sized {
    }
 }

+pub struct LitOrExpr<'src> {
+    str: &'src str,
+}
+
 fn get(b: &mut &[u8]) -> u8 {
    let r = b[0];
    *b = &b[1..];
    r
 }

+impl<'src> Deref for LitOrExpr<'src> {
+    type Target = str;
+    fn deref(&self) -> &Self::Target {
+        self.str
+    }
+}
+
+impl<'src> Decode<'src> for LitOrExpr<'src> {
+    fn decode(data: &mut &'src [u8]) -> Self {
+        let str = <&'src str>::decode(data);
+        Self { str }
+    }
+}
+
 impl<'src> Decode<'src> for bool {
    fn decode(data: &mut &'src [u8]) -> Self {
        get(data) != 0
--- a/crates/cli-support/src/wit/mod.rs
+++ b/crates/cli-support/src/wit/mod.rs
@ -455,7 +455,7 @@ impl<'a> Context<'a> {
            self.struct_(struct_)?;
        }
        for section in typescript_custom_sections {
-            self.aux.extra_typescript.push_str(section);
+            self.aux.extra_typescript.push_str(&section);
            self.aux.extra_typescript.push_str("\n\n");
        }
        self.aux
@ -1536,14 +1536,14 @@ version of wasm-bindgen that uses a different bindgen format than this binary:
     this binary schema version: {my_version}

 Currently the bindgen format is unstable enough that these two schema versions
-must exactly match. You can accomplish this by either updating this binary or 
+must exactly match. You can accomplish this by either updating this binary or
 the wasm-bindgen dependency in the Rust project.

 You should be able to update the wasm-bindgen dependency with:

    cargo update -p wasm-bindgen --precise {my_version}

-don't forget to recompile your wasm file! Alternatively, you can update the 
+don't forget to recompile your wasm file! Alternatively, you can update the
 binary with:

    cargo install -f wasm-bindgen-cli --version {their_version}
--- a/crates/macro-support/src/parser.rs
+++ b/crates/macro-support/src/parser.rs
@ -1404,17 +1404,17 @@ impl MacroParse<BindgenAttrs> for syn::ItemConst {
            bail_span!(self, "#[wasm_bindgen] will not work on constants unless you are defining a #[wasm_bindgen(typescript_custom_section)].");
        }

-        match get_expr(&self.expr) {
+        let typescript_custom_section = match get_expr(&self.expr) {
            syn::Expr::Lit(syn::ExprLit {
                lit: syn::Lit::Str(litstr),
                ..
-            }) => {
-                program.typescript_custom_sections.push(litstr.value());
-            }
-            expr => {
-                bail_span!(expr, "Expected a string literal to be used with #[wasm_bindgen(typescript_custom_section)].");
-            }
-        }
+            }) => ast::LitOrExpr::Lit(litstr.value()),
+            expr => ast::LitOrExpr::Expr(expr.clone()),
+        };
+
+        program
+            .typescript_custom_sections
+            .push(typescript_custom_section);

        opts.check_used();

--- a/crates/shared/src/lib.rs
+++ b/crates/shared/src/lib.rs
@ -17,7 +17,11 @@ macro_rules! shared_api {
            enums: Vec<Enum<'a>>,
            imports: Vec<Import<'a>>,
            structs: Vec<Struct<'a>>,
-            typescript_custom_sections: Vec<&'a str>,
+            // NOTE: Originally typescript_custom_sections are just some strings
+            // But the expression type can only be parsed into a string during compilation
+            // So when encoding, LitOrExpr contains two types, one is that expressions are parsed into strings during compilation, and the other is can be parsed directly.
+            // When decoding, LitOrExpr can be decoded as a string.
+            typescript_custom_sections: Vec<LitOrExpr<'a>>,
            local_modules: Vec<LocalModule<'a>>,
            inline_js: Vec<&'a str>,
            unique_crate_identifier: &'a str,
--- a/crates/shared/src/schema_hash_approval.rs
+++ b/crates/shared/src/schema_hash_approval.rs
@ -8,7 +8,7 @@
 // If the schema in this library has changed then:
 //  1. Bump the version in `crates/shared/Cargo.toml`
 //  2. Change the `SCHEMA_VERSION` in this library to this new Cargo.toml version
-const APPROVED_SCHEMA_FILE_HASH: &str = "11955579329744078753";
+const APPROVED_SCHEMA_FILE_HASH: &str = "10197913343580353876";

 #[test]
 fn schema_version() {
--- a/crates/typescript-tests/jest.config.cjs
+++ b/crates/typescript-tests/jest.config.cjs
@ -4,7 +4,7 @@ module.exports = {
  testEnvironment: 'node',
  extensionsToTreatAsEsm: [".ts"],
  verbose: true,
-  testMatch: ['**/src/*.ts'],
+  testMatch: ['**/src/*.ts', '!**/src/*.d.ts'],
  // TODO: migrate all test files and remove this
  testPathIgnorePatterns: [
    ".*/src/custom_section.ts$",
--- a/crates/typescript-tests/src/custom_section.rs
+++ b/crates/typescript-tests/src/custom_section.rs
@ -5,6 +5,13 @@ const TS_INTERFACE_EXPORT: &'static str = r"
  interface Height { height: number; }
 ";

+#[wasm_bindgen(typescript_custom_section)]
+const TS_INTERFACE_EXPORT1: &'static str = include_str!("./custom_section_types.d.ts");
+
+const TS_INTERFACE_EXPORT2: &str = "interface Person2 { height: number; }";
+#[wasm_bindgen(typescript_custom_section)]
+const _: &str = TS_INTERFACE_EXPORT2;
+
 #[wasm_bindgen]
 pub struct Person {
    pub height: u32,
--- a/crates/typescript-tests/src/custom_section.ts
+++ b/crates/typescript-tests/src/custom_section.ts
@ -1,3 +1,7 @@
-import * as wbg from '../pkg/typescript_tests';
+import * as wbg from "../pkg/typescript_tests"

-const height: wbg.Height = new wbg.Person();
+const height: wbg.Height = new wbg.Person()
+
+const height1: wbg.Person1 = new wbg.Person()
+
+const height2: wbg.Person2 = new wbg.Person()
--- a/crates/typescript-tests/src/custom_section_types.d.ts
+++ b/crates/typescript-tests/src/custom_section_types.d.ts
@ -0,0 +1,3 @@
+interface Person1 {
+  height: number
+}
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1820,6 +1820,52 @@ pub mod __rt {
        }
    }

+    pub const fn flat_len<const SIZE: usize, T>(slices: [&[T]; SIZE]) -> usize {
+        let mut len = 0;
+        let mut i = 0;
+        while i < slices.len() {
+            len += slices[i].len();
+            i += 1;
+        }
+        len
+    }
+
+    pub const fn flat_byte_slices<const RESULT_LEN: usize, const SIZE: usize>(
+        slices: [&[u8]; SIZE],
+    ) -> [u8; RESULT_LEN] {
+        let mut result = [0; RESULT_LEN];
+
+        let mut slice_index = 0;
+        let mut result_offset = 0;
+
+        while slice_index < slices.len() {
+            let mut i = 0;
+            let slice = slices[slice_index];
+            while i < slice.len() {
+                result[result_offset] = slice[i];
+                i += 1;
+                result_offset += 1;
+            }
+            slice_index += 1;
+        }
+
+        result
+    }
+
+    // NOTE: This method is used to encode u32 into a variable-length-integer during the compile-time .
+    // Generally speaking, the length of the encoded variable-length-integer depends on the size of the integer
+    // but the maximum capacity can be used here to simplify the amount of code during the compile-time .
+    pub const fn encode_u32_to_fixed_len_bytes(value: u32) -> [u8; 5] {
+        let mut result: [u8; 5] = [0; 5];
+        let mut i = 0;
+        while i < 4 {
+            result[i] = ((value >> (7 * i)) | 0x80) as u8;
+            i += 1;
+        }
+        result[4] = (value >> (7 * 4)) as u8;
+        result
+    }
+
    if_std! {
        use core::mem;
        use std::boxed::Box;