From bc66078251f39228e7a82ef9e6e695f2bcfee092 Mon Sep 17 00:00:00 2001 From: Kaz Wesley Date: Wed, 6 Jul 2022 19:46:42 -0700 Subject: [PATCH] Parser: Transpile Rust AST types to Java types (#3555) Implement generation of Java AST types from the Rust AST type definitions, with support for deserializing in Java syntax trees created in Rust. ### New Libraries #### `enso-reflect` Implements a `#[derive(Reflect)]` macro to enable runtime analysis of datatypes. Macro interface includes helper attributes; **the Rust types and the `reflect` attributes applied to them fully determine the Java types** ultimately produced (by `enso-metamodel`). This is the most important API, as it is used in the subject crates (`enso-parser`, and dependencies with types used in the AST). [Module docs](https://github.com/enso-org/enso/blob/wip/kw/parser/ast-transpiler/lib/rust/reflect/macros/src/lib.rs). #### `enso-metamodel` Provides data models for data models in Rust/Java/Meta (a highly-abstracted language-independent model--I have referred to it before as the "generic representation", but that was an overloaded term). The high-level interface consists of operations on data models, and between them. For example, the only operations needed by [the binary that drives datatype transpilation](https://github.com/enso-org/enso/blob/wip/kw/parser/ast-transpiler/lib/rust/parser/generate-java/src/main.rs) are: `rust::to_meta`, `java::from_meta`, `java::transform::optional_to_null`, `java::to_syntax`. The low-level interface consists of direct usage of the datatypes; this is used by [the module that implements some serialization overrides](https://github.com/enso-org/enso/blob/wip/kw/parser/ast-transpiler/lib/rust/parser/generate-java/src/serialization.rs) (so that the Java interface to `Code` references can produce `String`s on demand based on serialized offset/length pairs). The serialization override mechanism is based on customizing, not replacing, the generated deserialization methods, so as to be as robust as possible to changes in the Rust source or in the transpilation process. ### Important Notes - Rust/Java serialization is exhaustively tested for structural compatibility. A function [`metamodel::meta::serialization::testcases`](https://github.com/enso-org/enso/blob/wip/kw/parser/ast-transpiler/lib/rust/metamodel/src/meta/serialization.rs) uses `reflect`-derived data to generate serialized representations of ASTs to use as test cases. Its should-accept cases cover every type a tree can contain; it also produces a representative set of should-reject cases. A Rust `#[test]` confirms that these cases are accepted/rejected as expected, and generated Java tests (see Binaries below) check the generated Java deserialization code against the same test cases. - Deserializing `Code` is untested. The mechanism is in place (in Rust, we serialize only the offset/length of the `Cow`; in Java, during deserialization we obtain a context object holding a buffer for all string data; the accessor generated in Java uses the buffer and the offset/length to return `String`s), but it will be easier to test once we have implemented actually parsing something and instantiating the `Cow`s with source code. - `#[tagged_enum]` [now supports](https://github.com/enso-org/enso/blob/wip/kw/parser/ast-transpiler/lib/rust/shapely/macros/src/tagged_enum.rs#L36-L51) control over what is done with container-level attributes; they can be applied to the container and variants (default), only to the container, or only to variants. - Generation of `sealed` classes is supported, but currently disabled by `TARGET_VERSION` in `metamodel::java::syntax` so that tests don't require Java 15 to run. (The same logic is run either way; there is a shallow difference in output.) ### Binaries The `enso-parser-generate-java` crate defines several binaries: - `enso-parser-generate-java`: Performs the transpilation; after integration, this will be invoked by the build script. - `java-tests`: Generates the Java code that tests format deserialization; after integration this command will be invoked by the build script, and its Java output compiled and run during testing. - `graph-rust`/`graph-meta`/`graph-java`: Produce GraphViz representations of data models in different typesystems; these are for developing and understanding model transformations. Until integration, a **script regenerates the Java and runs the format tests: `./tools/parser_generate_java.sh`**. The generated code can be browsed in `target/generated_java`. --- .github/CODEOWNERS | 8 +- Cargo.lock | 46 +- Cargo.toml | 1 + build-config.yaml | 2 +- lib/rust/metamodel/Cargo.toml | 17 + lib/rust/metamodel/src/data_structures.rs | 216 ++++++ lib/rust/metamodel/src/graphviz.rs | 122 ++++ lib/rust/metamodel/src/java/bincode.rs | 336 +++++++++ lib/rust/metamodel/src/java/from_meta.rs | 170 +++++ lib/rust/metamodel/src/java/graphviz.rs | 48 ++ lib/rust/metamodel/src/java/implementation.rs | 336 +++++++++ lib/rust/metamodel/src/java/mod.rs | 250 +++++++ lib/rust/metamodel/src/java/syntax.rs | 239 +++++++ lib/rust/metamodel/src/java/transform.rs | 43 ++ lib/rust/metamodel/src/lib.rs | 75 ++ lib/rust/metamodel/src/meta/graphviz.rs | 71 ++ lib/rust/metamodel/src/meta/mod.rs | 410 +++++++++++ lib/rust/metamodel/src/meta/serialization.rs | 649 ++++++++++++++++++ lib/rust/metamodel/src/meta/transform.rs | 118 ++++ lib/rust/metamodel/src/rust/graphviz.rs | 73 ++ lib/rust/metamodel/src/rust/mod.rs | 364 ++++++++++ lib/rust/metamodel/src/rust/to_meta.rs | 287 ++++++++ lib/rust/parser/Cargo.toml | 10 +- lib/rust/parser/generate-java/Cargo.toml | 17 + .../enso/syntax2/serialization/Either.java | 16 + .../serialization/FormatException.java | 11 + .../enso/syntax2/serialization/Message.java | 53 ++ lib/rust/parser/generate-java/run.sh | 15 + .../generate-java/src/bin/graph-java.rs | 30 + .../generate-java/src/bin/graph-meta.rs | 25 + .../generate-java/src/bin/graph-rust.rs | 22 + .../generate-java/src/bin/java-tests.rs | 81 +++ lib/rust/parser/generate-java/src/lib.rs | 83 +++ lib/rust/parser/generate-java/src/main.rs | 55 ++ .../parser/generate-java/src/serialization.rs | 103 +++ lib/rust/parser/src/main.rs | 4 + lib/rust/parser/src/serialization.rs | 91 +++ lib/rust/parser/src/source/code.rs | 5 +- lib/rust/parser/src/source/span.rs | 14 +- lib/rust/parser/src/syntax/token.rs | 9 +- lib/rust/parser/src/syntax/tree.rs | 22 +- lib/rust/prelude/Cargo.toml | 1 + lib/rust/prelude/src/data/non_empty_vec.rs | 5 +- lib/rust/prelude/src/lib.rs | 2 + lib/rust/reflect/Cargo.toml | 13 + lib/rust/reflect/macros/Cargo.toml | 18 + lib/rust/reflect/macros/src/analyze.rs | 313 +++++++++ lib/rust/reflect/macros/src/lib.rs | 267 +++++++ lib/rust/reflect/macros/src/runtime.rs | 190 +++++ lib/rust/reflect/src/lib.rs | 308 +++++++++ lib/rust/reflect/tests/test.rs | 39 ++ lib/rust/shapely/macros/src/tagged_enum.rs | 104 ++- lib/rust/types/Cargo.toml | 2 + lib/rust/types/src/unit2.rs | 9 +- 54 files changed, 5787 insertions(+), 31 deletions(-) create mode 100644 lib/rust/metamodel/Cargo.toml create mode 100644 lib/rust/metamodel/src/data_structures.rs create mode 100644 lib/rust/metamodel/src/graphviz.rs create mode 100644 lib/rust/metamodel/src/java/bincode.rs create mode 100644 lib/rust/metamodel/src/java/from_meta.rs create mode 100644 lib/rust/metamodel/src/java/graphviz.rs create mode 100644 lib/rust/metamodel/src/java/implementation.rs create mode 100644 lib/rust/metamodel/src/java/mod.rs create mode 100644 lib/rust/metamodel/src/java/syntax.rs create mode 100644 lib/rust/metamodel/src/java/transform.rs create mode 100644 lib/rust/metamodel/src/lib.rs create mode 100644 lib/rust/metamodel/src/meta/graphviz.rs create mode 100644 lib/rust/metamodel/src/meta/mod.rs create mode 100644 lib/rust/metamodel/src/meta/serialization.rs create mode 100644 lib/rust/metamodel/src/meta/transform.rs create mode 100644 lib/rust/metamodel/src/rust/graphviz.rs create mode 100644 lib/rust/metamodel/src/rust/mod.rs create mode 100644 lib/rust/metamodel/src/rust/to_meta.rs create mode 100644 lib/rust/parser/generate-java/Cargo.toml create mode 100644 lib/rust/parser/generate-java/java/org/enso/syntax2/serialization/Either.java create mode 100644 lib/rust/parser/generate-java/java/org/enso/syntax2/serialization/FormatException.java create mode 100644 lib/rust/parser/generate-java/java/org/enso/syntax2/serialization/Message.java create mode 100755 lib/rust/parser/generate-java/run.sh create mode 100644 lib/rust/parser/generate-java/src/bin/graph-java.rs create mode 100644 lib/rust/parser/generate-java/src/bin/graph-meta.rs create mode 100644 lib/rust/parser/generate-java/src/bin/graph-rust.rs create mode 100644 lib/rust/parser/generate-java/src/bin/java-tests.rs create mode 100644 lib/rust/parser/generate-java/src/lib.rs create mode 100644 lib/rust/parser/generate-java/src/main.rs create mode 100644 lib/rust/parser/generate-java/src/serialization.rs create mode 100644 lib/rust/parser/src/serialization.rs create mode 100644 lib/rust/reflect/Cargo.toml create mode 100644 lib/rust/reflect/macros/Cargo.toml create mode 100644 lib/rust/reflect/macros/src/analyze.rs create mode 100644 lib/rust/reflect/macros/src/lib.rs create mode 100644 lib/rust/reflect/macros/src/runtime.rs create mode 100644 lib/rust/reflect/src/lib.rs create mode 100644 lib/rust/reflect/tests/test.rs diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 5258c3ffb3..7bad69198d 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -7,12 +7,12 @@ CHANGELOG.md # Rust Libraries and Related Files rust-toolchain.toml @MichaelMauderer @4e6 @mwu-tow @farmaazon rustfmt.toml @MichaelMauderer @4e6 @mwu-tow @farmaazon -Cargo.lock @MichaelMauderer @4e6 @mwu-tow @farmaazon @wdanilo -Cargo.toml @MichaelMauderer @4e6 @mwu-tow @farmaazon @wdanilo -/lib/rust/ @MichaelMauderer @4e6 @mwu-tow @farmaazon @wdanilo +Cargo.lock @MichaelMauderer @4e6 @mwu-tow @farmaazon @wdanilo @kazcw +Cargo.toml @MichaelMauderer @4e6 @mwu-tow @farmaazon @wdanilo @kazcw +/lib/rust/ @MichaelMauderer @4e6 @mwu-tow @farmaazon @wdanilo @kazcw /lib/rust/ensogl/ @MichaelMauderer @wdanilo @farmaazon /lib/rust/profiler/ @kazcw @MichaelMauderer @wdanilo -/integration-test/ @MichaelMauderer @wdanilo @farmaazon +/integration-test/ @MichaelMauderer @wdanilo @farmaazon @kazcw /tools/build-performance/ @kazcw @mwu-tow @wdanilo # Scala Libraries diff --git a/Cargo.lock b/Cargo.lock index 9c7aaad2d9..23b2b286d9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2026,6 +2026,15 @@ dependencies = [ "wasm-bindgen-test", ] +[[package]] +name = "enso-metamodel" +version = "0.1.0" +dependencies = [ + "bincode", + "derivative", + "derive_more", +] + [[package]] name = "enso-optics" version = "0.2.0" @@ -2037,12 +2046,26 @@ dependencies = [ name = "enso-parser" version = "0.1.0" dependencies = [ + "bincode", "enso-data-structures", "enso-parser-syntax-tree-builder", "enso-parser-syntax-tree-visitor", "enso-prelude", + "enso-reflect", "enso-shapely-macros", "enso-types", + "serde", +] + +[[package]] +name = "enso-parser-generate-java" +version = "0.1.0" +dependencies = [ + "derivative", + "enso-metamodel", + "enso-parser", + "enso-prelude", + "enso-reflect", ] [[package]] @@ -2078,6 +2101,7 @@ dependencies = [ "derivative", "derive_more", "enclose", + "enso-reflect", "enso-shapely", "failure", "futures 0.3.21", @@ -2157,6 +2181,24 @@ dependencies = [ "syn", ] +[[package]] +name = "enso-reflect" +version = "0.1.0" +dependencies = [ + "derivative", + "enso-metamodel", + "enso-reflect-macros", +] + +[[package]] +name = "enso-reflect-macros" +version = "0.1.0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "enso-shapely" version = "0.2.0" @@ -2215,9 +2257,11 @@ dependencies = [ name = "enso-types" version = "0.1.0" dependencies = [ + "enso-reflect", "nalgebra 0.26.2", "num-traits", "paste 1.0.7", + "serde", ] [[package]] @@ -3603,7 +3647,7 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5617e92fc2f2501c3e2bc6ce547cad841adba2bae5b921c7e52510beca6d084c" dependencies = [ - "base64 0.10.1", + "base64 0.13.0", "bytes 1.1.0", "http", "httpdate 0.3.2", diff --git a/Cargo.toml b/Cargo.toml index 2b9d303e84..2ae03f58fb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ members = [ "lib/rust/*", "lib/rust/parser/src/syntax/tree/visitor", "lib/rust/parser/src/syntax/tree/builder", + "lib/rust/parser/generate-java", "lib/rust/profiler/data", "integration-test" ] diff --git a/build-config.yaml b/build-config.yaml index 232e317073..829a342702 100644 --- a/build-config.yaml +++ b/build-config.yaml @@ -1,6 +1,6 @@ # Options intended to be common for all developers. -wasm-size-limit: 4.99 MiB +wasm-size-limit: 5.05 MiB required-versions: cargo-watch: ^8.1.1 diff --git a/lib/rust/metamodel/Cargo.toml b/lib/rust/metamodel/Cargo.toml new file mode 100644 index 0000000000..9bfe40cfa5 --- /dev/null +++ b/lib/rust/metamodel/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "enso-metamodel" +version = "0.1.0" +edition = "2021" +authors = ["Enso Team "] + +[dependencies] +derivative = "2.2" +derive_more = "0.99" + +[dev-dependencies] +bincode = "1.3" + +[features] +graphviz = [] +java = [] +rust = [] diff --git a/lib/rust/metamodel/src/data_structures.rs b/lib/rust/metamodel/src/data_structures.rs new file mode 100644 index 0000000000..fa1437d559 --- /dev/null +++ b/lib/rust/metamodel/src/data_structures.rs @@ -0,0 +1,216 @@ +//! Data structures used in the crate implementation. + +use vecmap::*; + +use derivative::Derivative; +use std::marker::PhantomData; + + + +// =========== +// === IDs === +// =========== + +/// A globally unique identifier, with a type-tag. +#[derive(Derivative)] +#[derivative(Copy(bound = ""))] +#[derivative(Clone(bound = ""))] +#[derivative(Debug(bound = ""))] +#[derivative(Eq(bound = ""))] +#[derivative(PartialEq(bound = ""))] +#[derivative(Ord(bound = ""))] +#[derivative(PartialOrd(bound = ""))] +#[derivative(Hash(bound = ""))] +pub struct Id { + value: u32, + marker: PhantomData<*const T>, +} + +impl Id { + /// Assign a new ID. + pub fn new() -> Self { + use std::sync::atomic; + static NEXT_ID: atomic::AtomicU32 = atomic::AtomicU32::new(0); + let value = NEXT_ID.fetch_add(1, atomic::Ordering::Relaxed); + let marker = Default::default(); + Self { value, marker } + } +} + +impl Default for Id { + fn default() -> Self { + Self::new() + } +} + +impl std::fmt::Display for Id { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.value) + } +} + + + +// ========================== +// === Densely-stored map === +// ========================== + +/// Densely-stored map from internally-produced keys. +/// +/// This is similar in implementation to `enso_data_structures::OptVec`, however there is a core +/// design difference: `OptVec` is a self-keying map created to be *more efficient* than the +/// standard map types; this is a self-keying map created to be *safer* than the standard map types, +/// and also efficient for the expected workload. +/// +/// `OptVec` uses a freelist to reuse keys and remain dense during mixed remove/create workloads; +/// `VecMap` statically disallows key reuse--values can be explicitly mutated, but once removed +/// cannot be rebound. This improves the failure mode of broken references: Rather than likely +/// become apparently-valid references to the wrong values, attempts to access removed elements will +/// fail, and be detected. +#[derive(Debug, Derivative, Clone)] +#[derivative(Default(bound = ""))] +pub struct VecMap { + data: Vec>, +} + +impl VecMap { + /// Obtain a new key, with no bound value. + pub fn unbound_key(&mut self) -> Key { + let id = Key::new(self.data.len()); + self.data.push(None); + id + } + + /// Set the value bound to a key. + pub fn bind(&mut self, key: Key, value: T) -> Key { + assert!(self.data[key.index].is_none()); + self.data[key.index] = Some(value); + Key::new(key.index) + } + + /// Add a value; return its newly-assigned key. + pub fn insert(&mut self, value: T) -> Key { + let key = self.unbound_key(); + self.bind(key, value) + } + + /// Remove a value from the graph; its ID will be permanently unoccupied. + pub fn remove(&mut self, key: Key) -> T { + self.data[key.index].take().unwrap() + } + + /// Get a reference to a value, if present. + pub fn get(&self, key: Key) -> Option<&T> { + self.data[key.index].as_ref() + } + + /// Get a mutable reference to a value, if present. + pub fn get_mut(&mut self, key: Key) -> Option<&mut T> { + self.data[key.index].as_mut() + } + + /// Iterate all key with values set. + pub fn keys(&self) -> impl Iterator> + '_ { + self.data.iter().enumerate().filter_map(|(i, val)| val.as_ref().map(|_| Key::new(i))) + } + + /// Iterate values. + pub fn values(&self) -> impl Iterator { + self.data.iter().filter_map(|val| val.as_ref()) + } + + /// Iterate values mutably. + pub fn values_mut(&mut self) -> impl Iterator { + self.data.iter_mut().filter_map(|val| val.as_mut()) + } + + /// Iterate entries. + pub fn iter<'s>(&'s self) -> impl Iterator, &'s T)> { + let map_key = |(i, val): (usize, &'s Option)| val.as_ref().map(|val| (Key::new(i), val)); + self.data.iter().enumerate().filter_map(map_key) + } + + /// Iterate entries mutably. + pub fn iter_mut<'s>(&'s mut self) -> impl Iterator, &'s mut T)> { + let map_key = + |(i, val): (usize, &'s mut Option)| val.as_mut().map(|val| (Key::new(i), val)); + self.data.iter_mut().enumerate().filter_map(map_key) + } +} + +impl std::ops::Index> for VecMap { + type Output = T; + fn index(&self, key: Key) -> &Self::Output { + self.get(key).unwrap() + } +} +impl std::ops::Index<&Key> for VecMap { + type Output = T; + fn index(&self, key: &Key) -> &Self::Output { + &self[*key] + } +} +impl std::ops::IndexMut> for VecMap { + fn index_mut(&mut self, key: Key) -> &mut Self::Output { + self.get_mut(key).unwrap() + } +} +impl std::ops::IndexMut<&Key> for VecMap { + fn index_mut(&mut self, key: &Key) -> &mut Self::Output { + &mut self[*key] + } +} + +/// Types used by `VecMap`. +pub mod vecmap { + use super::*; + + /// Marker indicating a key that may or may not currently be bound. + #[derive(Copy, Clone, Debug)] + pub struct MaybeBound; + /// Marker indicating a key that is not yet bound. + #[allow(missing_copy_implementations)] // Type is one-shot promise. + #[derive(Debug)] + pub struct Unbound; + + /// Identifies a location within a `VecMap`. + #[derive(Derivative)] + #[derivative(Copy(bound = "State: Copy"))] + #[derivative(Clone(bound = "State: Clone"))] + #[derivative(Debug(bound = ""))] + #[derivative(Eq(bound = ""))] + #[derivative(PartialEq(bound = ""))] + #[derivative(Ord(bound = ""))] + #[derivative(PartialOrd(bound = ""))] + #[derivative(Hash(bound = ""))] + pub struct Key { + pub(super) index: usize, + #[derivative(Debug = "ignore")] + marker: PhantomData<*const T>, + #[derivative(Debug = "ignore")] + state: PhantomData<*const State>, + } + + impl Key { + pub(super) fn new(index: usize) -> Self { + let marker = Default::default(); + let state = Default::default(); + Self { index, marker, state } + } + } + + /// Identifies a location within a `VecMap` that does not yet have a value bound. + pub type UnboundKey = Key; + + impl From<&'_ Key> for Key { + fn from(key: &Key) -> Self { + Self::new(key.index) + } + } + + impl std::fmt::Display for Key { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.index) + } + } +} diff --git a/lib/rust/metamodel/src/graphviz.rs b/lib/rust/metamodel/src/graphviz.rs new file mode 100644 index 0000000000..681119ebc1 --- /dev/null +++ b/lib/rust/metamodel/src/graphviz.rs @@ -0,0 +1,122 @@ +//! Rendering graphical representations of data models with GraphViz. + +use std::collections::BTreeSet; + + + +/// Hide data fields that don't reference any types outside the builtin set. +const PRUNE_PRIMITIVE_LEAFS: bool = true; + + + +// ============= +// === Graph === +// ============= + +/// A GraphViz graph of relationships between types. +#[derive(Default, Debug)] +pub struct Graph { + pub(crate) nodes: std::collections::HashMap, + pub(crate) edges: Vec<(String, String, EdgeType)>, +} + +#[derive(Debug)] +pub(crate) struct Node { + pub label: String, + pub node_type: NodeType, + pub primitive: bool, +} + +#[derive(Debug)] +pub(crate) enum NodeType { + Struct, + Enum, + Variant, + AbstractStruct, +} + +#[derive(Debug)] +pub(crate) enum EdgeType { + Variant, + Field, + OptionalField, + Subtype, +} + +impl std::fmt::Display for Graph { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let variant_color = "#7EA3CC"; + let primitive_attrs = vec![ + format!("style=filled"), + format!("fillcolor={:?}", "#262626"), + format!("fontcolor={:?}", "white"), + ]; + let enum_attrs = vec![ + format!("style=filled"), + format!("fillcolor={:?}", "#255C99"), + format!("fontcolor={:?}", "white"), + ]; + let variant_attrs = vec![ + format!("style=filled"), + format!("fillcolor={:?}", variant_color), + format!("shape=oval"), + ]; + let struct_attrs = vec![ + format!("style=filled"), + format!("fillcolor={:?}", "#B3001B"), + format!("fontcolor={:?}", "white"), + ]; + let abstract_struct_attrs = vec![ + format!("style=filled"), + format!("fillcolor={:?}", "#6D1321"), + format!("fontcolor={:?}", "white"), + ]; + let variant_edge_attrs = vec![format!("color={:?}", variant_color)]; + let field_edge_attrs = vec![]; + let optional_field_edge_attrs = vec![format!("style=dashed")]; + let subtype_edge_attrs = vec![format!("arrowhead=dot")]; + writeln!(f, "digraph refs {{")?; + let non_leafs: BTreeSet<_> = self.edges.iter().map(|(x, _, _)| x).cloned().collect(); + let mut pruned = BTreeSet::new(); + for (id, node) in &self.nodes { + let mut attrs; + if node.primitive { + if PRUNE_PRIMITIVE_LEAFS && !non_leafs.contains(id) { + pruned.insert(id.clone()); + continue; + } + attrs = primitive_attrs.clone(); + } else { + match node.node_type { + NodeType::Struct => attrs = struct_attrs.clone(), + NodeType::Enum => attrs = enum_attrs.clone(), + NodeType::Variant => attrs = variant_attrs.clone(), + NodeType::AbstractStruct => attrs = abstract_struct_attrs.clone(), + } + } + attrs.push(format!("label={:?}", node.label)); + let shape = match node.node_type { + NodeType::Enum => "diamond", + NodeType::Variant => "oval", + NodeType::Struct => "box", + NodeType::AbstractStruct => "diamond", + }; + attrs.push(format!("shape={}", shape)); + writeln!(f, "{:?} [{}];", id, attrs.join(","))?; + } + for (x, y, edgetype) in &self.edges { + if pruned.contains(x) || pruned.contains(y) { + continue; + } + let attrs = match edgetype { + EdgeType::Variant => &variant_edge_attrs, + EdgeType::Field => &field_edge_attrs, + EdgeType::OptionalField => &optional_field_edge_attrs, + EdgeType::Subtype => &subtype_edge_attrs, + }; + writeln!(f, "{:?} -> {:?} [{}];", x, y, attrs.join(","))?; + } + writeln!(f, "}}")?; + Ok(()) + } +} diff --git a/lib/rust/metamodel/src/java/bincode.rs b/lib/rust/metamodel/src/java/bincode.rs new file mode 100644 index 0000000000..e7b1e38215 --- /dev/null +++ b/lib/rust/metamodel/src/java/bincode.rs @@ -0,0 +1,336 @@ +//! Derivation of bincode[1] serialization for Java types. +//! [1]: https://github.com/bincode-org/bincode +//! +//! # Compatibility +//! +//! The generated deserialization methods support the same format as Rust's `serde-bincode` for an +//! analagous tree of types, with the following configuration: +//! ``` +//! # let data = &[0u8; 0]; +//! use bincode::Options; +//! let options = bincode::DefaultOptions::new().with_fixint_encoding(); +//! let serialized = options.serialize(data); +//! ``` +//! +//! # Nullability +//! +//! The [`crate::java]` model distinguishes between non-null fields, and fields that may be null. +//! If a field is *not* non-null, or if a type is wrapped in a `java.util.Optional`, whether it's +//! present is encoded compatibly with Rust's `Option` type (i.e. with a 1-byte discriminant). +//! +//! # Basic types +//! +//! Basic types (e.g. integer types, `boolean`, `String`) are encoded compatibly with the +//! corresponding types in Rust. +//! +//! # Sequence types +//! +//! A sequence (e.g. as encoded for a Rust `Vec`) is represented idiomatically in Java: +//! internally its implementation type is `java.util.ArrayList`, but in public interfaces it is +//! exposed as a `java.util.List`. +//! +//! # `Result` +//! +//! In Java, an `Either` type is used to represent a `Result` as used in Rust. `Either` +//! is similar to `Result`, with the main difference being that the `Ok` case is the `Right` value +//! of an `Either`, and the `Err` case is the `left`. +//! +//! # Overrides +//! +//! The default deserialization can be replaced or modified per-field; see the +//! [`DeserializationBuilder`] interface for details. +//! +//! # Deserialization errors +//! +//! The only runtime error possible is `FormatException`, defined in the Java `serialization` +//! support package; it is a `RuntimeException` rather than a checked exception, as deserialization +//! is extensively tested to succeed for any types that may be serialized in `Rust`. + +use crate::java::implementation::*; +use crate::java::*; + +use derivative::Derivative; +use std::fmt::Write; + + + +// ========================== +// === Derive Deserialize === +// ========================== + +/// Supports configuring deserialization for a type. +#[derive(Derivative)] +#[derivative(Debug)] +pub struct DeserializerBuilder { + root: ClassId, + #[derivative(Debug = "ignore")] + materializers: BTreeMap, + #[derivative(Debug = "ignore")] + mappers: BTreeMap, + support: String, + either_type: String, +} + +impl DeserializerBuilder { + /// Create a deserializer builder. + /// - `root`: The type to deserialize. + /// - `support`: The serialization support package. + /// - `either_type`: The fully-qualified name of the type that implements `Either`. + pub fn new(root: ClassId, support: impl Into, either_type: impl Into) -> Self { + let materializers = Default::default(); + let mappers = Default::default(); + let support = support.into(); + let either_type = either_type.into(); + Self { root, materializers, mappers, support, either_type } + } + + /// Configure the specified field to be produced according to an expression, instead of by + /// standard deserialization. The expression will be produced by the given function. + pub fn materialize(&mut self, field: FieldId, materializer: F) + where F: for<'a> FnOnce(MaterializerInput<'a>) -> String + 'static { + self.materializers.insert(field, Box::new(materializer)); + } + + /// Configure the specified field to be modified by an expression, after being deserialized. + /// The expression will be produced by the given function. + pub fn map(&mut self, field: FieldId, mapper: F) + where F: for<'a, 'b> FnOnce(MapperInput<'a, 'b>) -> String + 'static { + self.mappers.insert(field, Box::new(mapper)); + } + + /// Generate the deserialization method. + pub fn build(mut self, graph: &TypeGraph) -> Method { + let method = match graph[self.root].abstract_ { + true => self.deserialize_abstract(graph), + false => self.deserialize_concrete(graph), + }; + Method::Raw(method) + } +} + +type Materializer = Box FnOnce(MaterializerInput<'a>) -> String>; +type Mapper = Box FnOnce(MapperInput<'a, 'b>) -> String>; + +/// Input to a function that produces an expression that deserializes a field. +#[derive(Debug)] +pub struct MaterializerInput<'a> { + /// Identifier of the serialized message object. + pub message: &'a str, +} + +/// Input to a function that produces an expression that modifies a field after deserialization. +#[derive(Debug)] +pub struct MapperInput<'a, 'b> { + /// Identifier of the serialized message object. + pub message: &'a str, + /// Identifier of the field's value, after producing with standard deserialization. + pub value: &'b str, +} + + +// === Product Types === + +impl DeserializerBuilder { + /// Deserialize a `Class` of a fixed type (not dependant on further runtime data). + fn deserialize_concrete(&mut self, graph: &TypeGraph) -> syntax::Method { + let class = &graph[self.root]; + let message = "message"; + let mut body = String::new(); + let mut next_temp_variable_number = 0; + let mut get_temp = || { + let prefix = "generatedTemp"; + let result = format!("{}{}", prefix, next_temp_variable_number); + next_temp_variable_number += 1; + result + }; + let fields = class_fields(graph, class); + for field in &fields { + let ty_name = quote_type(graph, &field.data); + let expr = if let Some(materializer) = self.materializers.remove(&field.id()) { + (materializer)(MaterializerInput { message }) + } else { + match &field.data { + FieldData::Object { type_, non_null } => { + let value = get_temp(); + if *non_null { + self.deserialize_object( + graph, + *type_, + message, + &value, + &mut get_temp, + &mut body, + ); + } else { + self.deserialize_nullable( + graph, + *type_, + message, + &value, + &mut get_temp, + &mut body, + ); + } + value + } + FieldData::Primitive(Primitive::Int { .. }) => format!("{}.get32()", message), + FieldData::Primitive(Primitive::Long { .. }) => format!("{}.get64()", message), + FieldData::Primitive(Primitive::Bool) => format!("{}.getBoolean()", message), + } + }; + let expr = match self.mappers.remove(&field.id()) { + Some(mapper) => { + let value = get_temp(); + writeln!(body, "{} {} = {};", ty_name, &value, expr).unwrap(); + (mapper)(MapperInput { message, value: &value }) + } + None => expr, + }; + writeln!(body, "{} {} = {};", ty_name, &field.name, expr).unwrap(); + } + let constructor_args: Vec<_> = + fields.into_iter().map(|field| field.name.as_str()).collect(); + let constructor_args = constructor_args.join(", "); + writeln!(body, "return new {}({});", &class.name, constructor_args).unwrap(); + let message_ty = syntax::Type::named(format!("{}.Message", &self.support)); + let mut method = syntax::Method::new("deserialize", quote_class_type(graph, self.root)); + method.static_ = true; + method.body = body; + method.arguments = vec![(message_ty, message.to_owned())]; + method + } + + /// Deserialize an optional object; if it is not present, use the Java `null` value. + fn deserialize_nullable( + &self, + graph: &TypeGraph, + id: ClassId, + message: &str, + output: &str, + get_temp: &mut F, + body: &mut String, + ) where + F: FnMut() -> String, + { + let ty_name = quote_class_type(graph, id); + writeln!(body, "{ty_name} {output} = null;").unwrap(); + writeln!(body, "if ({message}.getBoolean()) {{").unwrap(); + let value = get_temp(); + self.deserialize_object(graph, id, message, &value, get_temp, body); + writeln!(body, "{output} = {value};").unwrap(); + writeln!(body, "}}").unwrap(); + } + + /// Deserialize an object that is non-optional (unconditionally present in the serialized data). + fn deserialize_object( + &self, + graph: &TypeGraph, + id: ClassId, + message: &str, + output: &str, + get_temp: &mut F, + body: &mut String, + ) where + F: FnMut() -> String, + { + let ty = &graph[id]; + let ty_name = quote_class_type(graph, id); + if !ty.builtin { + writeln!(body, "{ty_name} {output} = {ty_name}.deserialize({message});").unwrap(); + return; + } + match ty.name.as_str() { + STRING => writeln!(body, "{ty_name} {output} = {message}.getString();").unwrap(), + OPTIONAL => { + let base = ty.params[0]; + let present = get_temp(); + writeln!(body, "{ty_name} {output};").unwrap(); + writeln!(body, "boolean {present} = {message}.getBoolean();").unwrap(); + writeln!(body, "if ({present}) {{").unwrap(); + let value = get_temp(); + self.deserialize_object(graph, base, message, &value, get_temp, body); + writeln!(body, "{output} = {OPTIONAL}.of({value});").unwrap(); + writeln!(body, "}} else {output} = {OPTIONAL}.empty();").unwrap(); + } + LIST => { + let base = ty.params[0]; + let count = get_temp(); + writeln!(body, "int {count} = (int){message}.get64();").unwrap(); + let list_impl = get_temp(); + let params_ = quote_params(graph, &ty.params); + let impl_ty = syntax::Type::generic("java.util.ArrayList", params_); + writeln!(body, "{impl_ty} {list_impl} = new {impl_ty}({count});").unwrap(); + let unmodifiable_list = "java.util.Collections.unmodifiableList"; + writeln!(body, "for (int i=0; i<{count}; i++) {{").unwrap(); + let value = get_temp(); + self.deserialize_object(graph, base, message, &value, get_temp, body); + writeln!(body, "{list_impl}.add({value});").unwrap(); + writeln!(body, "}}").unwrap(); + writeln!(body, "{ty_name} {output} = {unmodifiable_list}({list_impl});").unwrap(); + } + x if x == self.either_type => { + let t0 = ty.params[0]; + let t1 = ty.params[1]; + let t0 = quote_class_type(graph, t0); + let t1 = quote_class_type(graph, t1); + let name = &ty.name; + let discriminant = get_temp(); + writeln!(body, "{ty_name} {output};").unwrap(); + writeln!(body, "int {discriminant} = {message}.get32();").unwrap(); + writeln!(body, "switch ({discriminant}) {{").unwrap(); + writeln!( + body, + "case 0: {output} = {name}.right({t1}.deserialize({message})); break;" + ) + .unwrap(); + writeln!( + body, + "case 1: {output} = {name}.left({t0}.deserialize({message})); break;" + ) + .unwrap(); + let err = format!("Unknown discriminant in {ty_name}."); + let serialization = &self.support; + writeln!(body, "default: throw new {serialization}.FormatException({err:?}); }}") + .unwrap(); + } + _ => unimplemented!("Deserialize builtin: {}", &ty.name), + } + } +} + + +// === Sum Types === + +impl DeserializerBuilder { + /// Deserialize a `Class` of known supertype, with concrete type encoded in the serialized data. + fn deserialize_abstract(&self, graph: &TypeGraph) -> syntax::Method { + let class = &graph[self.root]; + let message = "message"; + let mut n = 0; + let mut get_temp = |base| { + let suffix = "GeneratedTemp"; + let result = format!("{}{}{}", base, suffix, n); + n += 1; + result + }; + let mut body = String::new(); + let discriminant = get_temp("discriminant"); + writeln!(body, "int {discriminant} = {message}.get32();").unwrap(); + writeln!(body, "switch ({discriminant}) {{").unwrap(); + for (key, id) in &class.discriminants { + let ty = quote_class_type(graph, *id); + writeln!(body, "case {key}: return {ty}.deserialize({message});").unwrap(); + } + let ty_name = quote_class_type(graph, self.root); + let err = format!("Unknown discriminant in {ty_name}."); + let serialization = &self.support; + writeln!(body, "default: throw new {serialization}.FormatException({:?});", err).unwrap(); + writeln!(body, "}}").unwrap(); + let message_ty = syntax::Type::named(format!("{serialization}.Message")); + let mut method = syntax::Method::new("deserialize", ty_name); + method.static_ = true; + method.body = body; + method.arguments = vec![(message_ty, message.to_owned())]; + method + } +} diff --git a/lib/rust/metamodel/src/java/from_meta.rs b/lib/rust/metamodel/src/java/from_meta.rs new file mode 100644 index 0000000000..ad4b96b7dd --- /dev/null +++ b/lib/rust/metamodel/src/java/from_meta.rs @@ -0,0 +1,170 @@ +//! Translating a data model in the highly-abstracted `meta` representation to a data model in the +//! `crate::java` representation. +//! +//! As the `meta` and `java` models are similar, this is a straightforward translation. The main +//! differences are: +//! - In Java, there is a distinction between a few types that are unboxed primitives and all other +//! types, which are reference types. +//! - In Java, all classes are expected to implement certain methods. These methods are attached in +//! this stage, although [`Dynamic`] methods are used so that if any classes are modified before +//! the model is rendered to syntax, the generated methods will reflect the changes. + +use crate::java::*; + +use crate::meta; + + + +// ====================== +// === Java from Meta === +// ====================== + +/// Translate a data model in the [`meta`] representation to a data model in the Java typesystem. +pub fn from_meta( + graph: &meta::TypeGraph, + either_type: impl Into, +) -> (TypeGraph, BTreeMap) { + let primitives = Default::default(); + let mut java = TypeGraph::default(); + let mut class_promises: BTreeMap<_, _> = + graph.types.keys().map(|id| (id, java.classes.unbound_key())).collect(); + let meta_to_java = class_promises.iter().map(|(key, value)| (*key, value.into())).collect(); + let either_type = either_type.into(); + let mut from_meta = FromMeta { java, meta_to_java, primitives, either_type }; + // Translate primitives first, because in Java we need to know whether a type is primitive when + // we reference the type. + let mut unbound_ids: Vec<_> = class_promises.keys().copied().collect(); + for &id_ in &unbound_ids { + if let meta::Data::Primitive(ty) = &graph[id_].data { + match from_meta.primitive(ty) { + Ok(prim) => { + from_meta.primitives.insert(id_, prim); + } + Err(class) => { + from_meta.java.classes.bind(class_promises.remove(&id_).unwrap(), class); + } + } + } + } + unbound_ids.clear(); + unbound_ids.extend(class_promises.keys().copied()); + // Translate structs. + for id_ in unbound_ids { + let ty = &graph[id_]; + let fields_ = match &ty.data { + meta::Data::Primitive(_) => continue, + meta::Data::Struct(fields_) => fields_, + }; + let class = from_meta.class(ty, fields_); + from_meta.java.classes.bind(class_promises.remove(&id_).unwrap(), class); + } + let FromMeta { java, meta_to_java, .. } = from_meta; + (java, meta_to_java) +} + +#[derive(Debug)] +struct FromMeta { + java: TypeGraph, + meta_to_java: BTreeMap, + primitives: BTreeMap, + either_type: String, +} + +impl FromMeta { + /// Translate a primitive in the [`meta`] model to either a Java primitive, or a Java class. + fn primitive(&self, ty: &meta::Primitive) -> Result { + match ty { + meta::Primitive::Bool => Ok(Primitive::Bool), + meta::Primitive::U64 => Ok(Primitive::Long { unsigned: true }), + meta::Primitive::U32 => Ok(Primitive::Int { unsigned: true }), + meta::Primitive::String => Err(Class::string()), + meta::Primitive::Option(t0_) => Err(Class::optional(self.meta_to_java[t0_])), + meta::Primitive::Sequence(t0_) => Err(Class::list(self.meta_to_java[t0_])), + meta::Primitive::Result(t0_, t1_) => { + let t0 = self.meta_to_java[t0_]; + let t1 = self.meta_to_java[t1_]; + Err(Class::builtin(&self.either_type, vec![t1, t0])) + } + } + } + + /// Translate a type in the [`meta`] model to a Java class. + fn class<'f>( + &self, + ty: &meta::Type, + fields_: impl IntoIterator, + ) -> Class { + let name = ty.name.to_pascal_case(); + let abstract_ = ty.abstract_; + let sealed = ty.closed; + let parent = ty.parent.as_ref().map(|id| self.meta_to_java[id]); + let mut methods = match abstract_ { + true => abstract_methods(), + false => standard_methods(), + }; + let fields_ = fields_.into_iter(); + let mut fields = Vec::with_capacity(fields_.size_hint().0); + for field in fields_ { + let meta::Field { name, type_, hide, .. } = field; + let name = name.to_camel_case().expect("Unimplemented: Tuples."); + let field = match self.primitives.get(type_) { + Some(primitive) => Field::primitive(name, *primitive), + None => Field::object(name, self.meta_to_java[type_], true), + }; + if !hide { + methods.push(Method::Dynamic(Dynamic::Getter(field.id()))); + } + fields.push(field); + } + let discriminants = + ty.discriminants.iter().map(|(key, id)| (*key, self.meta_to_java[id])).collect(); + let child_field = ty.child_field; + Class { + name, + parent, + abstract_, + sealed, + fields, + methods, + discriminants, + child_field, + ..Default::default() + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_converting_graph() { + let mut meta = meta::TypeGraph::new(); + let u32_name = meta::TypeName::from_pascal_case("U32"); + let u32_ty = meta::Type::new(u32_name, meta::Data::Primitive(meta::Primitive::U32)); + let u32_ = meta.types.insert(u32_ty); + let inner_field_name = meta::FieldName::from_snake_case("inner_field"); + let inner_fields = vec![meta::Field::named(inner_field_name, u32_)]; + let inner_name = meta::TypeName::from_pascal_case("Inner"); + let inner = + meta.types.insert(meta::Type::new(inner_name, meta::Data::Struct(inner_fields))); + let outer_field_inner_name = meta::FieldName::from_snake_case("inner"); + let outer_name = meta::TypeName::from_pascal_case("Outer"); + let outer_fields = vec![meta::Field::named(outer_field_inner_name, inner)]; + let outer_ty = meta::Type::new(outer_name, meta::Data::Struct(outer_fields)); + let outer = meta.types.insert(outer_ty); + let (java, meta_to_java) = from_meta(&meta, "Either"); + let outer_ = meta_to_java[&outer]; + let inner_ = meta_to_java[&inner]; + assert_eq!(java[outer_].name, "Outer"); + assert_eq!(java[inner_].name, "Inner"); + assert_eq!(java[outer_].fields[0].data, FieldData::Object { + type_: inner_, + non_null: true, + }); + assert_eq!( + java[inner_].fields[0].data, + FieldData::Primitive(Primitive::Int { unsigned: true }) + ); + } +} diff --git a/lib/rust/metamodel/src/java/graphviz.rs b/lib/rust/metamodel/src/java/graphviz.rs new file mode 100644 index 0000000000..ce4266114a --- /dev/null +++ b/lib/rust/metamodel/src/java/graphviz.rs @@ -0,0 +1,48 @@ +//! Generating graphical representations of Java type systems. + +use super::*; + +use crate::graphviz::EdgeType; +use crate::graphviz::Graph; +use crate::graphviz::Node; +use crate::graphviz::NodeType; + + + +// ========================= +// === Graphviz Graphing === +// ========================= + +/// Produce a graphviz graph of the datatypes. +pub fn graph(java: &TypeGraph) -> Graph { + let mut graph = Graph::default(); + let classes = &java.classes; + for (id, ty) in classes.iter() { + let sname = format!("{}{}", ty.name, id); + let node_type = match &ty.abstract_ { + true => NodeType::AbstractStruct, + false => NodeType::Struct, + }; + let label = ty.name.clone(); + let primitive = ty.builtin; + graph.nodes.insert(sname.clone(), Node { primitive, node_type, label }); + if let Some(&parent) = ty.parent.as_ref() { + let sparent = format!("{}{}", classes[id].name, parent); + graph.edges.push((sparent.clone(), sname.clone(), EdgeType::Subtype)); + } + for field in &ty.fields { + match &field.data { + FieldData::Object { type_, non_null } => { + let sname2 = format!("{}{}", classes[id].name, type_); + let edgetype = match non_null { + false => EdgeType::OptionalField, + true => EdgeType::Field, + }; + graph.edges.push((sname.clone(), sname2, edgetype)); + } + FieldData::Primitive(_) => {} + } + } + } + graph +} diff --git a/lib/rust/metamodel/src/java/implementation.rs b/lib/rust/metamodel/src/java/implementation.rs new file mode 100644 index 0000000000..2244a9f021 --- /dev/null +++ b/lib/rust/metamodel/src/java/implementation.rs @@ -0,0 +1,336 @@ +//! Given a [`java`] representation of a data model, produce a [`java::syntax`] tree that can be +//! rendered to Java code implementing the data model. + +use crate::java::*; + +use std::fmt::Write; + + + +// =================================== +// === Implementing Java Datatypes === +// =================================== + +/// Produce Java syntax implement all the types modeled in a [`TypeGraph`]. +pub fn implement(graph: &TypeGraph, package: &str) -> Vec { + let mut implementations = BTreeMap::new(); + for (id, class) in graph.classes.iter() { + if !class.builtin { + implementations.insert(id, implement_class(graph, id)); + } + } + for (id, class) in graph.classes.iter() { + if let Some(parent) = class.parent { + let mut inner = implementations.remove(&id).unwrap(); + inner.static_ = true; + implementations.get_mut(&parent).unwrap().nested.push(inner); + } + } + for class in implementations.values_mut() { + class.package = Some(package.to_owned()); + } + implementations.into_values().collect() +} + +/// For some [`Class`] (identified by ID) in a [`TypeGraph`], get its qualified name, relative to +/// its package. If it is not a nested class, this will be the same as its unqualified name; if it +/// is a nested class, this will include the hierarchy of classes containing it as part of its +/// namespace. +/// +/// # Examples +/// +/// For a [`Class`] equivalent to the following: +/// ```java +/// class Token { +/// static class Ident { } +/// }; +/// ``` +/// The `path` would be "Token.Ident". +/// +/// For a non-nested [`Class`], like this: +/// ```java +/// class Error { +/// String message; +/// }; +/// ``` +/// The `path` would be "Error". +pub fn path(graph: &TypeGraph, id: ClassId) -> String { + let mut components = vec![]; + let mut next_id = Some(id); + while let Some(id) = next_id { + let ty = &graph[id]; + components.push(ty.name.as_str()); + next_id = ty.parent; + } + components.reverse(); + components.join(".") +} + +/// Get the fields owned by a class, including its own fields and the fields of its supertypes. +pub fn class_fields<'v, 's: 'v, 'c: 'v>(graph: &'s TypeGraph, class: &'c Class) -> Vec<&'v Field> { + let mut out = vec![]; + class_fields_(graph, class, &mut out, None, None); + out +} + +fn class_fields_<'v, 's: 'v, 'c: 'v>( + graph: &'s TypeGraph, + class: &'c Class, + out: &mut Vec<&'v Field>, + start: Option, + end: Option, +) { + let mut fields = &class.fields[..]; + if let Some(end) = end { + fields = &fields[..end]; + } + if let Some(start) = start { + fields = &fields[start..]; + } else if let Some(parent) = class.parent { + let index = Some(graph[parent].child_field.unwrap()); + class_fields_(graph, &graph[parent], out, None, index); + out.extend(fields); + class_fields_(graph, &graph[parent], out, index, None); + return; + } + out.extend(fields); +} + +/// Given a [`TypeGraph`] and a definition of a field's contents ([`FieldData`]), produce what is +/// referred to in the Java AST specification as an an `UnannType`[1]. This value is suitable for +/// use as the type portion of a field declaration, local variable declaration, formal parameter, or +/// return type specification. +/// +/// [1]: https://docs.oracle.com/javase/specs/jls/se18/html/jls-8.html#jls-UnannType +pub fn quote_type(graph: &TypeGraph, data: &FieldData) -> syntax::Type { + let class = match data { + FieldData::Object { type_, .. } => return quote_class_type(graph, *type_), + FieldData::Primitive(Primitive::Int { .. }) => "int", + FieldData::Primitive(Primitive::Bool) => "boolean", + FieldData::Primitive(Primitive::Long { .. }) => "long", + }; + syntax::Type::named(class) +} + +/// Given a [`TypeGraph`] and an ID identifying a [`Class`], produce what is referred to in the Java +/// AST specification as an an `UnannClassOrInterfaceType`[1]. This value is suitable for +/// use anywhere an `UnannType`[2] is expected. +/// +/// [1]: https://docs.oracle.com/javase/specs/jls/se18/html/jls-8.html#jls-UnannClassOrInterfaceType +/// [2]: https://docs.oracle.com/javase/specs/jls/se18/html/jls-8.html#jls-UnannType +pub fn quote_class_type(graph: &TypeGraph, id: ClassId) -> syntax::Type { + let class = path(graph, id); + let params = quote_params(graph, &graph[id].params); + syntax::Type { class, params } +} + +/// Render a parameter list. +pub fn quote_params<'a>( + graph: &TypeGraph, + params: impl IntoIterator, +) -> Vec { + params.into_iter().map(|ty| path(graph, *ty)).collect() +} + + +// === Helpers === + +/// Given a model of a field ([`Field`]), create a representation of the Java syntax defining a +/// class field with name, type, and attributes as specified in the model. +fn quote_field(graph: &TypeGraph, field: &Field) -> syntax::Field { + let Field { name, data, id: _ } = field; + let type_ = quote_type(graph, data); + let name = name.clone(); + let final_ = true; + syntax::Field { type_, name, final_ } +} + +/// Given a model of a method ([`Method`]), create a representation of the Java syntax implementing +/// the method. +fn method(graph: &TypeGraph, method: &Method, class: &Class) -> syntax::Method { + match method { + Method::Dynamic(method) => implement_method(graph, method, class), + Method::Raw(method) => method.clone(), + } +} + +/// Produce a representation of Java syntax implementing the specified [`Dynamic`] method, for the +/// specified [`Class`] within the specified [`TypeGraph`]. +fn implement_method(graph: &TypeGraph, method: &Dynamic, class: &Class) -> syntax::Method { + match method { + Dynamic::Constructor => implement_constructor(graph, class), + Dynamic::HashCode => implement_hash_code(graph, class), + Dynamic::Equals => implement_equals(graph, class), + Dynamic::ToString => implement_to_string(graph, class), + Dynamic::Getter(field) => implement_getter(graph, class, *field), + } +} + +/// Produce a representation of Java syntax implementing a constructor for the given [`Class`]. +/// +/// The constructor will accept a value for each of its fields, and for all fields of any classes +/// it extends, in an order that matches the order they appear in serialized formats. +/// +/// For all field that have the `non_null` property sets (see [`FieldData`]), the constructor will +/// produce `requireNonNull`[1] statements validating the corresponding inputs. +/// +/// [1]: https://docs.oracle.com/javase/8/docs/api/java/util/Objects.html#requireNonNull-T- +fn implement_constructor(graph: &TypeGraph, class: &Class) -> syntax::Method { + let suffix = "__GeneratedArgument"; + let arguments = class_fields(graph, class) + .into_iter() + .map(|field| (quote_type(graph, &field.data), format!("{}{}", &field.name, &suffix))) + .collect(); + let mut body = vec![]; + if let Some(parent) = class.parent { + let suffix = |field: &Field| format!("{}{}", &field.name, &suffix); + let fields: Vec<_> = class_fields(graph, &graph[parent]).into_iter().map(suffix).collect(); + body.push(format!("super({});", fields.join(", "))); + } + for field in &class.fields { + if let FieldData::Object { non_null: true, .. } = &field.data { + body.push(format!("java.util.Objects.requireNonNull({}{});", &field.name, &suffix)); + } + } + let own_field_initializers = + class.fields.iter().map(|field| format!("{} = {}{};", &field.name, &field.name, &suffix)); + body.extend(own_field_initializers); + let mut method = syntax::Method::constructor(class.name.clone()); + method.arguments = arguments; + method.body = body.join("\n"); + method +} + +/// Produce a representation of Java syntax implementing a method overriding `Object.hashCode`[1] +/// for the specified [`Class`]. +/// +/// The implementation will pass all fields of the class, and of any superclasses, to +/// `java.util.Objects.hash`[2] and return the result. +/// +/// [1]: https://docs.oracle.com/javase/7/docs/api/java/lang/Object.html#hashCode() +/// [2]: https://docs.oracle.com/javase/8/docs/api/java/util/Objects.html#hash-java.lang.Object...- +fn implement_hash_code(graph: &TypeGraph, class: &Class) -> syntax::Method { + let fields: Vec<_> = + class_fields(graph, class).into_iter().map(|field| field.name.as_str()).collect(); + let fields = fields.join(", "); + let body = format!("return java.util.Objects.hash({});", fields); + let return_ = FieldData::Primitive(Primitive::Int { unsigned: false }); + let return_ = quote_type(graph, &return_); + let mut method = syntax::Method::new("hashCode", return_); + method.override_ = true; + method.body = body; + method +} + +/// Produce a representation of Java syntax implementing a method overriding `Object.equals`[1] +/// for the specified [`Class`]. +/// +/// The implementation: +/// - Returns `true` if the objects are identity-equal. +/// - Returns `false` if the other object is not of the same type as this object. +/// Otherwise, returns a boolean-and of a field-by-field comparison: +/// - Primitive fields are compared with `==`. +/// - Reference-type fields are compared with `Object.equals`. +/// +/// [1]: https://docs.oracle.com/javase/7/docs/api/java/lang/Object.html#equals(java.lang.Object) +fn implement_equals(graph: &TypeGraph, class: &Class) -> syntax::Method { + let object = "object"; + let that = "that"; + let compare = + |field: &Field| field.data.fmt_equals(&field.name, &format!("{that}.{}", &field.name)); + let field_comparisons = class_fields(graph, class).into_iter().map(compare); + let mut values = vec!["true".to_string()]; + values.extend(field_comparisons); + let expr = values.join(" && "); + let body = vec![ + format!("if ({} == this) return true;", &object), + format!("if (!({} instanceof {})) return false;", &object, &class.name), + format!("{} {} = ({}){};", &class.name, &that, &class.name, &object), + format!("return {};", expr), + ]; + let return_ = FieldData::Primitive(Primitive::Bool); + let return_ = quote_type(graph, &return_); + let mut method = syntax::Method::new("equals", return_); + method.override_ = true; + method.arguments = vec![(syntax::Type::named("Object"), object.to_string())]; + method.body = body.join("\n"); + method +} + +/// Produce a representation of Java syntax implementing a method overriding `Object.toString`[1] +/// for the specified [`Class`]. +/// +/// The generated `toString` formats all the object's fields in the same manner as would be done by +/// a Java `record`[2] with the same fields. +/// +/// [1]: https://docs.oracle.com/javase/7/docs/api/java/lang/Object.html#toString() +/// [2]: https://openjdk.org/jeps/395 +fn implement_to_string(graph: &TypeGraph, class: &Class) -> syntax::Method { + let string_builder = "stringBuilder"; + let stringify = + |field: &Field| format!("{string_builder}.append(String.valueOf({}));", field.name); + let fields: Vec<_> = class_fields(graph, class).into_iter().map(stringify).collect(); + let mut body = String::new(); + let ty_name = &class.name; + writeln!(body, "StringBuilder {string_builder} = new StringBuilder();").unwrap(); + writeln!(body, "{string_builder}.append(\"{ty_name}[\");").unwrap(); + writeln!(body, "{}", fields.join(&format!("\n{string_builder}.append(\", \");\n"))).unwrap(); + writeln!(body, "{string_builder}.append(\"]\");").unwrap(); + writeln!(body, "return {string_builder}.toString();").unwrap(); + let return_ = syntax::Type::named("String"); + let mut method = syntax::Method::new("toString", return_); + method.override_ = true; + method.body = body; + method +} + +/// Produce a representation of Java syntax implementing a method returning the value of a field +/// (identified by [`FieldId`]) of the specified [`Class`]. +fn implement_getter(graph: &TypeGraph, class: &Class, id: FieldId) -> syntax::Method { + let field = class.fields.iter().find(|field| field.id() == id).unwrap(); + getter(graph, field) +} + +/// Produce a representation of Java syntax implementing a method returning the value of the +/// specified [`Field`]. The method must be attached to the same [`syntax::Class`] in which the +/// [`Field`] is defined. +fn getter(graph: &TypeGraph, field: &Field) -> syntax::Method { + let getter_name = |field| { + let field = crate::meta::Identifier::from_camel_case(field); + let mut name = crate::meta::Identifier::from_camel_case("get"); + name.append(field); + name.to_camel_case() + }; + let type_ = quote_type(graph, &field.data); + let mut method = syntax::Method::new(getter_name(&field.name), type_); + method.body = format!("return {};", &field.name); + method +} + +/// Produce a representation of Java syntax defining a `class` as specified by the given [`Class`] +/// (identified by its [`ClassId`]). +fn implement_class(graph: &TypeGraph, id: ClassId) -> syntax::Class { + let class = &graph[id]; + let name = class.name.clone(); + let abstract_ = class.abstract_; + let final_ = !abstract_; + let static_ = false; + let parent = class.parent.map(|id| quote_class_type(graph, id)); + let fields = class.fields.iter().map(|field| quote_field(graph, field)).collect(); + let nested = vec![]; + let methods = class.methods.iter().map(|m| method(graph, m, class)).collect(); + let package = Default::default(); + let sealed = class.sealed.then(Default::default); + syntax::Class { + package, + name, + abstract_, + final_, + static_, + parent, + fields, + methods, + nested, + sealed, + } +} diff --git a/lib/rust/metamodel/src/java/mod.rs b/lib/rust/metamodel/src/java/mod.rs new file mode 100644 index 0000000000..a0827fd045 --- /dev/null +++ b/lib/rust/metamodel/src/java/mod.rs @@ -0,0 +1,250 @@ +//! Representation of datatype definitions in the Java typesystem. + + +// ============== +// === Export === +// ============== + +pub mod bincode; + + + +mod from_meta; +#[cfg(feature = "graphviz")] +mod graphviz; +mod implementation; +pub mod syntax; +pub mod transform; + +use crate::data_structures::VecMap; +use derive_more::Index; +use derive_more::IndexMut; +pub use from_meta::from_meta; +pub use implementation::implement as to_syntax; +use std::collections::BTreeMap; + + + +// ===================== +// === Java Builtins === +// ===================== + +/// Fully-qualified name of Java's `Optional` type. +pub const OPTIONAL: &str = "java.util.Optional"; +/// Fully-qualified name of Java's `List` type. +pub const LIST: &str = "java.util.List"; +/// Fully-qualified name of Java's `String` type. +pub const STRING: &str = "String"; + + + +// ============================== +// === Type Parameterizations === +// ============================== + +/// Globally unique, stable identifier for a `Field`. +pub type FieldId = crate::data_structures::Id; +/// Identifies a Java class within a `TypeGraph`. +pub type ClassId = crate::data_structures::vecmap::Key; +/// Identifier for a class whose value hasn't been set yet. +pub type UnboundClassId = crate::data_structures::vecmap::UnboundKey; + + + +// ====================== +// === Datatype Types === +// ====================== + +/// A Java class. +#[derive(Debug, Default, PartialEq, Eq)] +pub struct Class { + /// The name of the class, not including package. + pub name: String, + /// Parameters of a generic class. + pub params: Vec, + /// The parent class, if any. + pub parent: Option, + /// Whether this class is `abstract`. + pub abstract_: bool, + /// Whether this class is `sealed`. + pub sealed: bool, + /// The data fields. + pub fields: Vec, + /// The class's methods. + pub methods: Vec, + builtin: bool, + // Attributes + discriminants: BTreeMap, + child_field: Option, +} + +impl Class { + /// Create a new "builtin" class. + pub fn builtin(name: &str, fields: impl IntoIterator) -> Self { + let params: Vec<_> = fields.into_iter().collect(); + let name = name.to_owned(); + let builtin = true; + let fields = params.iter().map(|&type_| Field::object("data", type_, true)).collect(); + Class { name, params, builtin, fields, ..Default::default() } + } + + /// Define a type for Java's `Optional` instantiated with a type. + pub fn optional(param: ClassId) -> Self { + Self::builtin(OPTIONAL, Some(param)) + } + + /// Define a type for Java's `List` instantiated with a type. + pub fn list(param: ClassId) -> Self { + Self::builtin(LIST, Some(param)) + } + + /// Define a type for Java's `String` instantiated with a type. + pub fn string() -> Self { + Self::builtin(STRING, []) + } + + /// Get a field by name. + pub fn find_field(&self, name: &str) -> Option<&Field> { + self.fields.iter().find(|field| field.name == name) + } +} + +/// A method of a class. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Method { + /// A `Dynamic` method. + Dynamic(Dynamic), + /// A literal method implementation. + Raw(syntax::Method), +} + +/// A method that is rendered to syntax on demand. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum Dynamic { + /// A constructor. + Constructor, + /// `hashCode` method. + HashCode, + /// `equals` method. + Equals, + /// `toString` method. + ToString, + /// A read-accessor for a field. + Getter(FieldId), +} + +impl From for Method { + fn from(method: Dynamic) -> Self { + Method::Dynamic(method) + } +} + +fn abstract_methods() -> Vec { + vec![Dynamic::Constructor.into()] +} + +fn standard_methods() -> Vec { + vec![ + Dynamic::Constructor.into(), + Dynamic::HashCode.into(), + Dynamic::Equals.into(), + Dynamic::ToString.into(), + ] +} + +/// A data field of a class. +#[derive(Debug, PartialEq, Eq)] +pub struct Field { + #[allow(missing_docs)] + pub name: String, + #[allow(missing_docs)] + pub data: FieldData, + id: FieldId, +} + +impl Field { + /// Create a field referencing a `Class` of a specified type. + pub fn object(name: impl Into, type_: ClassId, non_null: bool) -> Self { + let name = name.into(); + let data = FieldData::Object { type_, non_null }; + let id = Default::default(); + Self { name, data, id } + } + + /// Create a field holding primitive data. + pub fn primitive(name: impl Into, primitive: Primitive) -> Self { + let name = name.into(); + let data = FieldData::Primitive(primitive); + let id = Default::default(); + Self { name, data, id } + } + + #[allow(missing_docs)] + pub fn id(&self) -> FieldId { + self.id + } +} + +/// A field's data contents. +#[derive(Debug, Clone, PartialEq, Eq, Copy, PartialOrd, Ord, Hash)] +pub enum FieldData { + /// A reference to an object. + Object { + #[allow(missing_docs)] + type_: ClassId, + /// If `true`, this field should be subject to null-checking in constructors, and can be + /// assumed always to be present. + non_null: bool, + }, + /// An unboxed primitive. + Primitive(Primitive), +} + +impl FieldData { + fn fmt_equals(&self, a: &str, b: &str) -> String { + match self { + FieldData::Object { .. } => format!("{}.equals({})", a, b), + FieldData::Primitive(_) => format!("({} == {})", a, b), + } + } +} + +/// An unboxed type; i.e. a type that is not a subtype of `java.lang.Object`. +#[derive(Debug, Clone, PartialEq, Eq, Copy, PartialOrd, Ord, Hash)] +pub enum Primitive { + /// Java's `boolean` + Bool, + /// Java's `int` + Int { + /// If `true`, arithmetic on this value is to be performed with unsigned operations. + unsigned: bool, + }, + /// Java's `long` + Long { + /// If `true`, arithmetic on this value is to be performed with unsigned operations. + unsigned: bool, + }, +} + + + +// ============================ +// === Systems of Datatypes === +// ============================ + +/// A system of Java `Class`es. +#[derive(Debug, Default, Index, IndexMut)] +pub struct TypeGraph { + #[allow(missing_docs)] + pub classes: VecMap, +} + + +// === GraphViz support === + +#[cfg(feature = "graphviz")] +impl From<&'_ TypeGraph> for crate::graphviz::Graph { + fn from(graph: &'_ TypeGraph) -> Self { + graphviz::graph(graph) + } +} diff --git a/lib/rust/metamodel/src/java/syntax.rs b/lib/rust/metamodel/src/java/syntax.rs new file mode 100644 index 0000000000..911291425b --- /dev/null +++ b/lib/rust/metamodel/src/java/syntax.rs @@ -0,0 +1,239 @@ +//! Java syntax. + +use std::fmt; + + + +const TARGET_VERSION: usize = 14; + + + +// =================== +// === Syntax Data === +// =================== + +/// A class definition. +#[derive(Debug)] +pub struct Class { + #[allow(missing_docs)] + pub package: Option, + #[allow(missing_docs)] + pub name: String, + #[allow(missing_docs)] + pub abstract_: bool, + #[allow(missing_docs)] + pub final_: bool, + #[allow(missing_docs)] + pub static_: bool, + #[allow(missing_docs)] + pub parent: Option, + #[allow(missing_docs)] + pub fields: Vec, + #[allow(missing_docs)] + pub methods: Vec, + #[allow(missing_docs)] + pub sealed: Option>, + /// Classes defined in the scope of this class. + pub nested: Vec, +} + +/// A class field definition. +#[derive(Debug)] +pub struct Field { + #[allow(missing_docs)] + pub type_: Type, + #[allow(missing_docs)] + pub name: String, + #[allow(missing_docs)] + pub final_: bool, +} + +/// Identifies a type; this corresponds to `UnannType`[1] in the Java specification. +/// It is suitable for use as the type portion of a field declaration, local variable declaration, +/// formal parameter, or return type specification. +/// +/// [1]: https://docs.oracle.com/javase/specs/jls/se18/html/jls-8.html#jls-UnannType +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct Type { + /// Class name. + pub class: String, + /// Parameter list. + pub params: Vec, +} + +impl Type { + /// A simple type. + pub fn named(name: impl Into) -> Self { + let class = name.into(); + let params = vec![]; + Type { class, params } + } + + /// A generic type. + pub fn generic(name: impl Into, params: Vec) -> Self { + let class = name.into(); + Type { class, params } + } +} + +/// A method. +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Method { + #[allow(missing_docs)] + pub name: String, + #[allow(missing_docs)] + pub arguments: Vec<(Type, String)>, + /// Return value, unless this is a constructor. + pub return_: Option, + #[allow(missing_docs)] + pub static_: bool, + #[allow(missing_docs)] + pub final_: bool, + /// Literal body, not including brackets. + pub body: String, + #[allow(missing_docs)] + pub override_: bool, + #[allow(missing_docs)] + pub throws: Vec, +} + + +// === Constructors === + +impl Method { + /// Create a method. + pub fn new(name: impl Into, return_: Type) -> Self { + let name = name.into(); + let return_ = Some(return_); + let arguments = Default::default(); + let static_ = Default::default(); + let final_ = Default::default(); + let body = Default::default(); + let override_ = Default::default(); + let throws = Default::default(); + Method { name, arguments, return_, static_, final_, body, override_, throws } + } + + /// Create a constructor. + pub fn constructor(name: impl Into) -> Self { + let name = name.into(); + let arguments = Default::default(); + let return_ = Default::default(); + let static_ = Default::default(); + let final_ = Default::default(); + let body = Default::default(); + let override_ = Default::default(); + let throws = Default::default(); + Method { name, arguments, return_, static_, final_, body, override_, throws } + } +} + + + +// ========================= +// === Rendering to Text === +// ========================= + +impl fmt::Display for Class { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let Class { + package, + name, + abstract_, + final_, + static_, + parent, + fields, + methods, + nested, + sealed, + } = &self; + if let Some(package) = package { + writeln!(f, "package {};", package)?; + } + let mut modifiers = vec!["public".to_string()]; + static_.then(|| modifiers.push("static".to_string())); + final_.then(|| modifiers.push("final".to_string())); + abstract_.then(|| modifiers.push("abstract".to_string())); + if TARGET_VERSION >= 15 && sealed.is_some() { + modifiers.push("sealed".to_string()) + } + let mut tokens = modifiers; + tokens.push("class".to_string()); + tokens.push(name.to_string()); + if let Some(parent) = parent { + tokens.push("extends".to_string()); + tokens.push(parent.to_string()); + } + if let Some(sealed) = sealed { + if !sealed.is_empty() { + let types: Vec<_> = sealed.iter().map(|ty| ty.to_string()).collect(); + tokens.push(format!("permits {}", types.join(", "))); + } + } + let tokens = tokens.join(" "); + writeln!(f, "{} {{", tokens)?; + for field in fields { + write!(f, "{}", field)?; + } + for method in methods { + write!(f, "{}", method)?; + } + for class in nested { + write!(f, "{}", class)?; + } + writeln!(f, "}}")?; + Ok(()) + } +} + +impl fmt::Display for Field { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let Field { type_, name, final_ } = &self; + let mut tokens = vec!["protected".to_string()]; + final_.then(|| tokens.push("final".to_string())); + tokens.push(type_.to_string()); + tokens.push(name.clone()); + let tokens = tokens.join(" "); + writeln!(f, "{};", tokens) + } +} + +impl fmt::Display for Type { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", &self.class)?; + if !self.params.is_empty() { + write!(f, "<{}>", self.params.join(", "))?; + } + Ok(()) + } +} + +impl fmt::Display for Method { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let Method { name, arguments, return_, static_, final_, body, override_, throws } = &self; + let mut tokens = vec![]; + override_.then(|| tokens.push("@Override".to_string())); + tokens.push("public".to_string()); + static_.then(|| tokens.push("static".to_string())); + final_.then(|| tokens.push("final".to_string())); + if let Some(return_) = return_ { + tokens.push(return_.to_string()); + } + tokens.push(name.to_string()); + let tokens = tokens.join(" "); + let arguments: Vec<_> = + arguments.iter().map(|(ty, name)| format!("{} {}", ty, name)).collect(); + let arguments = arguments.join(", "); + writeln!(f, "{}({})", tokens, arguments)?; + if !throws.is_empty() { + let types: Vec<_> = throws.iter().map(|ty| ty.to_string()).collect(); + let types = types.join(", "); + writeln!(f, "throws {types}")?; + } + writeln!(f, "{{")?; + writeln!(f, "{body}")?; + writeln!(f, "}}")?; + Ok(()) + } +} diff --git a/lib/rust/metamodel/src/java/transform.rs b/lib/rust/metamodel/src/java/transform.rs new file mode 100644 index 0000000000..8ff18168e6 --- /dev/null +++ b/lib/rust/metamodel/src/java/transform.rs @@ -0,0 +1,43 @@ +//! Transformations of Java datamodels. + +use crate::java::*; + + + +// ======================== +// === Optional to Null === +// ======================== + +/// Rewrite the typegraph to eliminate `Optional` and instead allow some fields to be `null`. +/// +/// `TypeId` validity: +/// `TypeId`s that referred to `Optional` types: No long resolvable after transformation. +/// All other `TypeId`s: Unaffected. +pub fn optional_to_null(mut graph: TypeGraph) -> TypeGraph { + let mut optional_to_class = BTreeMap::new(); + for (id, class) in graph.classes.iter() { + if class.builtin && class.name == OPTIONAL { + let wrapped = class.params[0]; + optional_to_class.insert(id, wrapped); + } + } + let no_multilevel = "Handling of multi-level nullability has not been implemented."; + for class in optional_to_class.values() { + assert!(!optional_to_class.contains_key(class), "{}", no_multilevel); + } + for class in graph.classes.values_mut() { + for field in &mut class.fields { + if let FieldData::Object { type_, non_null } = &mut field.data { + if let Some(mapped) = optional_to_class.get(type_) { + assert!(*non_null, "{}", no_multilevel); + *non_null = false; + *type_ = *mapped; + } + } + } + } + for &id in optional_to_class.keys() { + graph.classes.remove(id); + } + graph +} diff --git a/lib/rust/metamodel/src/lib.rs b/lib/rust/metamodel/src/lib.rs new file mode 100644 index 0000000000..1e80bcca4b --- /dev/null +++ b/lib/rust/metamodel/src/lib.rs @@ -0,0 +1,75 @@ +//! A *metamodel* is a data model for data models in a particular typesystem. This crate defines a +//! few metamodels, some operations for inspecting and transforming data models within a metamodel, +//! and operations for translating a data model from one metamodel to another. +//! +//! # Modules +//! +//! The core modules define the metamodels, and operations on them: +//! - [`rust`]: A metamodel representing data models in the Rust typesystem. +//! - [`java`]: A metamodel representing data models in the Java typesystem. +//! - [`meta`]: An abstract metamodel, used to perform language-independent analysis of data models, +//! and as an intermediate when translating data models between language-specific metamodels. +//! +//! Some accessory modules: +//! - [`graphviz`]: Support for rendering graphical representations of data models with GraphViz. +//! This serves primarily to support developing and understanding transformations on and between +//! metamodels. +//! +//! # Use cases +//! +//! The primary use case currently supported by this crate is Rust-to-Java datatype transpilation: +//! - Accept a Rust data model as an input (which may be obtained by the [`enso_reflect`] crate). +//! - Translate the data model to a Java data model (via the [`meta`] intermediate representation), +//! using [`rust::to_meta`] and [`java::from_meta`]. +//! - Derive deserialization for the Java data model, using [`java::bincode`]. +//! - Generate Java code implementing the data model, using [`java::to_syntax`]. +//! +//! Other use cases supported include: +//! - Analyze a data model's serialization to generate exhaustive test cases, using +//! [`meta::serialization::testcases`]. +//! - Produce graphs of type relationships, using [graphviz::Graph]` +//! +//! # Design +//! +//! A major design principle of this crate is: Operate on the most abstracted representation +//! possible. Primarily, this means we don't try to analyze or reason about *syntax* any more than +//! necessary. The [`rust`] data produced by [`enso_reflect`] is much higher-level than the [`syn`] +//! trees it is created from; it is easier to reason about a graph of datatypes than the tree of +//! tokens that implements it. The [`meta`] intermediate representation is even more abstract, and +//! simpler to operate on than Rust or Java. When we manipulate the data in Java terms (i.e. using +//! [`java::transform::optional_to_null`] to rewrite `Optional` types to nullable types), we do so +//! on the [`java`] graph of types. It is not until we are done with analysis and transformation +//! that we generate a [`java::syntax`] tree from the [`java`] types. [`java::syntax`] is treated as +//! write-only; we never try to inspect it, but just use its [`Display`] implementation to produce +//! Java code after all computation is completed. + +// === Features === +#![feature(map_first_last)] +#![feature(associated_type_defaults)] +#![feature(option_get_or_insert_default)] +// === Standard Linter Configuration === +#![deny(non_ascii_idents)] +#![warn(unsafe_code)] +// === Non-Standard Linter Configuration === +#![allow(clippy::option_map_unit_fn)] +#![allow(clippy::precedence)] +#![allow(dead_code)] +#![deny(unconditional_recursion)] +#![warn(missing_copy_implementations)] +#![warn(missing_debug_implementations)] +#![warn(missing_docs)] +#![warn(trivial_casts)] +#![warn(trivial_numeric_casts)] +#![warn(unused_import_braces)] +#![warn(unused_qualifications)] + + + +mod data_structures; +#[cfg(feature = "graphviz")] +pub mod graphviz; +#[cfg(feature = "java")] +pub mod java; +pub mod meta; +#[cfg(feature = "rust")] +pub mod rust; diff --git a/lib/rust/metamodel/src/meta/graphviz.rs b/lib/rust/metamodel/src/meta/graphviz.rs new file mode 100644 index 0000000000..3f69e517e6 --- /dev/null +++ b/lib/rust/metamodel/src/meta/graphviz.rs @@ -0,0 +1,71 @@ +//! Graphical representation of a `TypeGraph` with GraphViz. + +use super::*; + +use crate::graphviz::EdgeType; +use crate::graphviz::Graph; +use crate::graphviz::Node; +use crate::graphviz::NodeType; + + + +// ============= +// === Graph === +// ============= + +/// Produce a GraphViz graph representation of the relationships between the types. +pub fn graph(typegraph: &TypeGraph) -> Graph { + let mut graph = Graph::default(); + let types = &typegraph.types; + for (id, ty) in types.iter() { + let sname = format!("{}{}", ty.name, id); + let node_type = match &ty.data { + Data::Struct(_) if ty.abstract_ => NodeType::AbstractStruct, + Data::Struct(_) if ty.abstract_ && ty.closed => NodeType::Enum, + Data::Struct(_) => NodeType::Struct, + Data::Primitive(_) => NodeType::Struct, + }; + let primitive = matches!(&ty.data, Data::Primitive(_)); + let label = ty.name.to_string(); + graph.nodes.insert(sname.clone(), Node { primitive, node_type, label }); + let parentlike = ty.parent.iter().chain(&ty.mixins); + for id in parentlike { + let sparent = format!("{}{}", types[id].name, id); + graph.edges.push((sparent.clone(), sname.clone(), EdgeType::Subtype)); + } + match &ty.data { + Data::Struct(fields) => + for Field { type_, name: _, hide: _, id: _ } in fields { + let sname2 = format!("{}{}", types[type_].name, type_); + graph.edges.push((sname.clone(), sname2, EdgeType::Field)); + }, + Data::Primitive(Primitive::U32) + | Data::Primitive(Primitive::Bool) + | Data::Primitive(Primitive::U64) + | Data::Primitive(Primitive::String) => {} + Data::Primitive(Primitive::Sequence(t0)) => graph.edges.push(( + sname.clone(), + format!("{}{}", types[t0].name, t0), + EdgeType::Field, + )), + Data::Primitive(Primitive::Option(t0)) => graph.edges.push(( + sname.clone(), + format!("{}{}", types[t0].name, t0), + EdgeType::Field, + )), + Data::Primitive(Primitive::Result(t0, t1)) => { + graph.edges.push(( + sname.clone(), + format!("{}{}", types[t0].name, t0), + EdgeType::Field, + )); + graph.edges.push(( + sname.clone(), + format!("{}{}", types[t1].name, t1), + EdgeType::Field, + )); + } + } + } + graph +} diff --git a/lib/rust/metamodel/src/meta/mod.rs b/lib/rust/metamodel/src/meta/mod.rs new file mode 100644 index 0000000000..a0130ecaf1 --- /dev/null +++ b/lib/rust/metamodel/src/meta/mod.rs @@ -0,0 +1,410 @@ +//! A language-independent metamodel for representing data models. +//! +//! This is used as an intermediate representation in translation from Rust to Java to: +//! - Decouple the complexities of the source language from those of the target language. +//! - Provide a simple representation in which to apply transformations. +//! +//! It is also used for language-independent analysis of data models. + + + +#[cfg(feature = "graphviz")] +mod graphviz; +pub mod serialization; +pub mod transform; + +use crate::data_structures::VecMap; +use derive_more::Index; +use derive_more::IndexMut; +use std::collections::BTreeMap; +use std::collections::BTreeSet; + + + +// ============================== +// === Type Parameterizations === +// ============================== + +/// Globally unique, stable identifier for a `Field`. +pub type FieldId = crate::data_structures::Id; + +/// Identfies a type within a `TypeGraph`. +pub type TypeId = crate::data_structures::vecmap::Key; +/// Identfies an unbound type within a `TypeGraph`. +pub type UnboundTypeId = crate::data_structures::vecmap::UnboundKey; + + + +// ====================== +// === Datatype Types === +// ====================== + +/// A datatype. +#[derive(Debug, PartialEq, Eq, Hash, Clone)] +pub struct Type { + /// The type's name. + pub name: TypeName, + /// The type's data content. + pub data: Data, + /// The parent type, if any. + pub parent: Option, + /// Types that this type inherits from that are not the parent. + pub mixins: Vec, + /// If true, this type cannot be instantiated. + pub abstract_: bool, + /// If true, this type is not open to extension by children outside those defined with it. + pub closed: bool, + /// When serializing/deserializing, indicates the index of the field in a `Type` before which a + /// child object's data will be placed/expected. + pub child_field: Option, + /// When serializing/deserializing, indicates the available concrete types and the values used + /// to identify them. + pub discriminants: BTreeMap, +} + +impl Type { + /// Create a new datatype, with defaults for most fields. + pub fn new(name: TypeName, data: Data) -> Self { + let parent = Default::default(); + let mixins = Default::default(); + let abstract_ = Default::default(); + let closed = Default::default(); + let child_field = Default::default(); + let discriminants = Default::default(); + Type { name, data, parent, mixins, abstract_, closed, child_field, discriminants } + } +} + +/// A datatype's data. +#[derive(Debug, PartialEq, Eq, Hash, Clone)] +pub enum Data { + /// A type with fields. + Struct(Vec), + /// A builtin type. + Primitive(Primitive), +} + +/// Standard types. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Copy)] +pub enum Primitive { + /// A boolean value. + Bool, + /// An unsigned 32-bit integer. + U32, + /// An unsigned 64-bit integer. + U64, + /// An UTF-8-encoded string. + String, + /// Zero or more values of a type. + Sequence(TypeId), + /// Zero or one value of a type. + Option(TypeId), + /// A value that may be one type in a success case, or another type in a failure case. + Result(TypeId, TypeId), +} + +/// A data field of a `Type`. +#[derive(Debug, PartialEq, Eq, Hash, Clone)] +pub struct Field { + /// The field's `Type`. + pub type_: TypeId, + /// The field's name. + pub name: FieldName, + /// Whether the field should be private in generated code. + pub hide: bool, + id: FieldId, +} + +impl Field { + /// Create a new named field. + pub fn named(name: FieldName, type_: TypeId) -> Self { + let hide = Default::default(); + let id = Default::default(); + Self { type_, name, hide, id } + } + + /// Create a new unnamed field. + pub fn unnamed(type_: TypeId) -> Self { + let name = Default::default(); + let hide = Default::default(); + let id = Default::default(); + Self { name, type_, hide, id } + } + + /// Get the field's `FieldId`. + pub fn id(&self) -> FieldId { + self.id + } +} + + + +// =================== +// === Identifiers === +// =================== + +/// An identifier, in a naming convention-agnostic representation. +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] +pub struct Identifier { + segments: Vec, +} + +impl Identifier { + fn new(segments: Vec) -> Self { + for segment in &segments { + assert!(!segment.is_empty()); + } + Self { segments } + } + + fn segments_len(&self) -> usize { + let mut n = 0; + for segment in &self.segments { + n += segment.len(); + } + n + } + + /// Render in PascalCase. + pub fn to_pascal_case(&self) -> String { + let mut pascal = String::with_capacity(self.segments_len() + self.segments.len()); + for segment in &self.segments { + let mut chars = segment.chars(); + pascal.push(chars.next().unwrap().to_ascii_uppercase()); + pascal.extend(chars); + } + pascal + } + + /// Render in camelCase. + pub fn to_camel_case(&self) -> String { + let mut camel = String::with_capacity(self.segments_len()); + let (head, tail) = self.segments.split_first().unwrap(); + camel.push_str(head); + for segment in tail { + let mut chars = segment.chars(); + camel.push(chars.next().unwrap().to_ascii_uppercase()); + camel.extend(chars); + } + camel + } + + /// Render in snake_case. + pub fn to_snake_case(&self) -> String { + self.segments.join("_") + } + + /// Parse an identifier expected to be in snake_case. + pub fn from_snake_case(s: &str) -> Self { + let segments = s.split('_').map(|s| s.to_string()).collect(); + Self::new(segments) + } + + /// Parse an identifier expected to be in camelCase. + pub fn from_camel_case(s: &str) -> Self { + Self::from_pascal_case(s) + } + + /// Parse an identifier expected to be in PascalCase. + pub fn from_pascal_case(s: &str) -> Self { + let mut segments = vec![]; + let mut current = String::new(); + for c in s.chars() { + if c.is_ascii_uppercase() && !current.is_empty() { + segments.push(std::mem::take(&mut current)); + } + current.push(c.to_ascii_lowercase()); + } + segments.push(current); + Self::new(segments) + } + + /// Append another `Identifier` to the end of `self`; when rendered, the boundary between the + /// old and new components will be indicated in a manner determined by the naming convention + /// chosen at rendering time. + pub fn append(&mut self, other: Self) { + self.segments.extend(other.segments) + } +} + + +// === Type Names === + +/// The name of a type, e.g. a `struct` in Rust or a `class` in Java. +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct TypeName(Identifier); + +impl std::fmt::Display for TypeName { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.0.to_pascal_case()) + } +} + +impl TypeName { + /// Parse from PascalCase. + pub fn from_pascal_case(s: &str) -> Self { + Self(Identifier::from_pascal_case(s)) + } + /// Render in PascalCase. + pub fn to_pascal_case(&self) -> String { + self.0.to_pascal_case() + } + /// Append another `TypeName` to the end of `self`. See `Identifier::append`. + pub fn append(&mut self, other: Self) { + self.0.append(other.0) + } +} + + +// === Field Names === + +/// The name of a field, e.g. the data members of a Rust struct or Java class. +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] +pub struct FieldName(Identifier); + +impl std::fmt::Display for FieldName { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.0.to_camel_case()) + } +} + +impl FieldName { + /// Parse from snake_case. + pub fn from_snake_case(s: &str) -> Self { + Self(Identifier::from_snake_case(s)) + } + /// Render in camelCase. + pub fn to_camel_case(&self) -> Option { + match self.0.to_camel_case() { + ident if ident.is_empty() => None, + ident => Some(ident), + } + } + /// Append another `FieldName` to the end of `self`. See `Identifier::append`. + pub fn append(&mut self, other: Self) { + self.0.append(other.0) + } +} + + + +// =========================== +// === System of Datatypes === +// =========================== + +/// A collection of [`Type`]s. The [`TypeGraph`] owns its types; they do not refer to each other +/// directly, but through [`TypeId`]s, which must be looked up in the graph (its [`Index`] +/// implementation provides a convenient interface). +#[derive(Debug, Default, Clone, Index, IndexMut)] +pub struct TypeGraph { + #[index] + #[index_mut] + #[allow(missing_docs)] + pub types: VecMap, +} + +impl TypeGraph { + /// Create a new, empty [`TypeGraph`]. + pub fn new() -> Self { + Default::default() + } + + /// For every `(id0, id1)` pair in the input, replace all occurrences of `id0` in any type + /// definition with the corresponding `id1`. + pub fn apply_aliases<'a>(&mut self, aliases: impl IntoIterator) { + let mut canonical = BTreeMap::new(); + for (from_, to_) in aliases.into_iter() { + canonical.insert(*from_, *to_); + } + let rewrite = |id: &mut TypeId| { + if let Some(id_) = canonical.get(id) { + *id = *id_; + } + }; + for ty in self.types.values_mut() { + if let Some(parent) = &mut ty.parent { + rewrite(parent); + } + for parent in &mut ty.mixins { + rewrite(parent); + } + match &mut ty.data { + Data::Struct(fields) => + for field in fields { + rewrite(&mut field.type_); + }, + Data::Primitive(Primitive::Sequence(t0)) + | Data::Primitive(Primitive::Option(t0)) => rewrite(t0), + Data::Primitive(Primitive::Result(t0, t1)) => { + rewrite(t0); + rewrite(t1); + } + Data::Primitive(Primitive::U32) + | Data::Primitive(Primitive::Bool) + | Data::Primitive(Primitive::U64) + | Data::Primitive(Primitive::String) => {} + } + } + } + + /// Eliminate types that are not in the referential transitive closure of the given collection + /// of roots. + pub fn gc(&mut self, roots: impl IntoIterator) { + let mut visited = BTreeSet::new(); + let mut to_visit = BTreeSet::new(); + to_visit.extend(roots); + while let Some(id) = to_visit.pop_last() { + let Type { + name: _, + data, + parent, + mixins, + abstract_: _, + closed: _, + child_field: _, + discriminants, + } = &self.types[id]; + let already_visited = !visited.insert(id); + if already_visited { + continue; + } + if let Some(parent) = parent { + to_visit.insert(*parent); + } + to_visit.extend(mixins); + to_visit.extend(discriminants.values()); + match data { + Data::Struct(fields) => to_visit.extend(fields.iter().map(|field| field.type_)), + Data::Primitive(Primitive::Sequence(t0)) + | Data::Primitive(Primitive::Option(t0)) => { + to_visit.insert(*t0); + } + Data::Primitive(Primitive::Result(t0, t1)) => { + to_visit.insert(*t0); + to_visit.insert(*t1); + } + Data::Primitive(Primitive::U32) + | Data::Primitive(Primitive::Bool) + | Data::Primitive(Primitive::U64) + | Data::Primitive(Primitive::String) => {} + } + } + let live = |id: &TypeId| visited.contains(id); + let ids: Vec<_> = self.types.keys().collect(); + for id in ids { + if !live(&id) { + self.types.remove(id); + } + } + } +} + + +// === GraphViz support === + +#[cfg(feature = "graphviz")] +impl From<&'_ TypeGraph> for crate::graphviz::Graph { + fn from(graph: &'_ TypeGraph) -> Self { + graphviz::graph(graph) + } +} diff --git a/lib/rust/metamodel/src/meta/serialization.rs b/lib/rust/metamodel/src/meta/serialization.rs new file mode 100644 index 0000000000..4b3a7cff1e --- /dev/null +++ b/lib/rust/metamodel/src/meta/serialization.rs @@ -0,0 +1,649 @@ +//! Serialization analysis on meta representations. +//! +//! # Test Case Generation +//! +//! The [`testcases`] function supports generation of deserialization test cases that cover all +//! types reachable from some root type in a [`TypeGraph`]. +//! +//! The implementation is based on computing a test program built from a small set of operation, +//! and then interpreting the program to generate all the needed test cases. +//! +//! ## Test programs +//! +//! Abstractly, a test program can be considered to be equivalent to a tree, where each node has +//! three possibilities (the implementation is equivalent, but more efficient to execute): +//! - `Constant`: Evaluates to some constant data. The value affects the output but is irrelevant to +//! control flow. (In the implementation, this is [`Op::U8(_)`], [`Op::U32(_)`], etc.) +//! - `Concat(A, B)`: Evaluates to the concatenation of the evaluation of its two child nodes. (In +//! the implementation, this operator is implicit in program order.) +//! - `Amb(A, B)`: In every evaluation, this must evaluate to either the value of `A` or the value +//! of `B`. For completeness, there must be at least one evaluation of the whole program in which +//! this is evaluated to `A`, and at least one evaluation where it is evaluated to `B`. (In the +//! implementation, this is an n-ary operator expressed with [`Op::SwitchPush`] / +//! [`Op::SwitchPop`] / [`Op::Case(_)`].) +//! +//! ## Program generation +//! +//! The input typegraph may contain cycles. The first step of program generation is to select a +//! *basecase* for every sum type such that the type graph, when excluding non-basecase +//! possibilities from every sum type, does not contain any cycles. For details on this problem and +//! the algorithm solving it, see [`select_basecase`]. +//! +//! Once we have the information necessary to avoid trying to emit cyclic structures, program +//! generation is straightforward: For product types, we use the equivalent of the `Concat` +//! operation described above; for sum types, the `Amb` operation. Compound primitives like `Option` +//! and `Result` are treated as similar user-defined sum types would be. +//! +//! ## Program interpretation +//! +//! Program interpretation is better described in terms of the sequence of [`Op`]s than the more +//! abstract tree representation described above. The interpreter advances a program counter over +//! every [`Op`] once, in sequence. It maintains a stack of the [`Op::SwitchPop`] corresponding to +//! every [`Op::SwitchPush`]--that is, the join points for the n-ary `Amb` operators that the +//! program counter is currently within. For each [`Op::Case`] (i.e. one possibility of an `Amb`), a +//! test case is generated: The test case will consist of the basecase-mode evaluation of the whole +//! program up to the active [`Op::Case`] in each open switch (this value is maintained as execution +//! proceeds), the present case in each open switch, and then the output of basecase-mode execution +//! from the join point of the switch on top of stack to the end of the program--thus efficiently +//! producing one test case for every [`Op::Case`] in the input, with each case composed of the +//! output of the whole program, using basecase values for all switches not in the stack at the +//! point the case is reached. + +use crate::meta::*; + +use std::fmt::Write; + + + +const DEBUG: bool = false; + + + +// ============================ +// === Test Case Generation === +// ============================ + +/// A set of *accept* and *reject* tests for a serialization format. +#[derive(Debug, Clone)] +pub struct TestCases { + /// Inputs that a deserializer should accept. + pub accept: Vec>, + /// Inputs that a deserializer should reject. + pub reject: Vec>, + program: Vec, + debuginfo: BTreeMap, +} + +/// Generate test cases. +/// +/// Produces 100% coverage of valid structures (i.e. every variant of every enum occurs in some +/// `accept` case), and a representative set of `reject` cases. +pub fn testcases(graph: &TypeGraph, root: TypeId) -> TestCases { + let mut builder = ProgramBuilder::new(graph, root); + builder.type_(root, Default::default()); + let ProgramBuilder { program, debuginfo, .. } = builder; + if DEBUG { + eprintln!("{}", fmt_program(&program, &debuginfo)); + } + let (accept, reject) = Interpreter::run(&program); + TestCases { accept, reject, program, debuginfo } +} + +impl TestCases { + /// Produce a JSON representation of test case data. + pub fn to_json(&self) -> String { + let accept: Vec<_> = self.accept.iter().map(|case| format!("{:?}", case)).collect(); + let accept = accept.join(", \n\t"); + let reject: Vec<_> = self.reject.iter().map(|case| format!("{:?}", case)).collect(); + let reject = reject.join(", \n\t"); + let mut out = String::new(); + writeln!(out, "{{").unwrap(); + writeln!(out, "\"accept\": [").unwrap(); + writeln!(out, "\t{accept}").unwrap(); + writeln!(out, "],").unwrap(); + writeln!(out, "\"reject\": [").unwrap(); + writeln!(out, "\t{reject}").unwrap(); + writeln!(out, "]}}").unwrap(); + out + } + + /// Render a debug representation of the test program used to generate the cases. + pub fn program(&self) -> String { + fmt_program(&self.program, &self.debuginfo) + } +} + +/// Produce a debug representation of a program. +fn fmt_program(program: &[Op], debuginfo: &BTreeMap) -> String { + let mut out = String::new(); + let mut indent = 0; + let continuations = collect_continuations(program); + let mut accept = 0; + let mut reject = 0; + for (i, op) in program.iter().enumerate() { + if *op == Op::SwitchPop { + indent -= 1 + } + write!(out, "{:>4}: ", i).unwrap(); + for _ in 0..indent { + write!(out, " ").unwrap(); + } + write!(out, "{:?}", op).unwrap(); + if let Some(debuginfo) = debuginfo.get(&i) { + write!(out, " -- {}", debuginfo).unwrap(); + } + if let Some(continuation) = continuations.get(&i) { + write!(out, " [{}]", continuation).unwrap(); + } + if *op == Op::Case(Case::Accept) { + write!(out, " # accept{accept}").unwrap(); + accept += 1; + } + if *op == Op::Case(Case::Reject) { + write!(out, " # reject{reject}").unwrap(); + reject += 1; + } + if *op == Op::SwitchPush { + indent += 1 + } + writeln!(out).unwrap(); + } + out +} + + + +// ========================== +// === Program Operations === +// ========================== + +/// Operations for a test-case-generating program. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum Op { + U8(u8), + U32(u32), + U64(u64), + SwitchPush, + SwitchPop, + Case(Case), +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum Case { + Accept, + Reject, +} + + + +// ========================== +// === Program Generation === +// ========================== + +/// Generates test-case-generating program for a type graph. +#[derive(Debug)] +struct ProgramBuilder<'g> { + graph: &'g TypeGraph, + will_visit: BTreeSet, + visited: BTreeSet, + debuginfo: BTreeMap, + program: Vec, + basecase_discriminant: BTreeMap, +} + +impl<'g> ProgramBuilder<'g> { + fn new(graph: &'g TypeGraph, root: TypeId) -> Self { + let mut graph_ = graph.clone(); + graph_.gc(vec![root]); + let program = Default::default(); + let visited = Default::default(); + let mut will_visit = BTreeSet::new(); + let mut basecase_discriminant = BTreeMap::new(); + let mut sb_visited = BTreeSet::new(); + for (id, ty) in graph_.types.iter() { + if let Data::Struct(fields) = &ty.data { + will_visit.extend(fields.iter().map(|field| field.type_)); + will_visit.extend(ty.discriminants.values()); + } + select_basecase(graph, id, &mut basecase_discriminant, &mut sb_visited); + sb_visited.clear(); + } + let debuginfo = Default::default(); + Self { graph, program, visited, will_visit, debuginfo, basecase_discriminant } + } + + fn emit(&mut self, op: Op) { + self.program.push(op); + } + + fn debug_next(&mut self, debug: impl std::fmt::Display) { + let n = self.program.len(); + self.debuginfo.insert(n, debug.to_string()); + } + + fn debug_prev(&mut self, debug: impl std::fmt::Display) { + let n = self.program.len() - 1; + self.debuginfo.insert(n, debug.to_string()); + } + + fn basecase(&self, id: TypeId) -> bool { + self.visited.contains(&id) || self.will_visit.contains(&id) + } + + fn type_(&mut self, id: TypeId, basecase: bool) { + let basecase = basecase || !self.visited.insert(id); + let ty = &self.graph[id]; + match &ty.data { + Data::Struct(_) => self.object(id, basecase), + Data::Primitive(primitive) => self.primitive(*primitive, basecase, id), + } + } + + /// Emit [`Op`]s reflecting the data of a [`Primitive`]. + /// + /// # Simple primitives + /// + /// If the [`Primitive`] is scalar data, like an integer or bool, operations producing an + /// arbitrary example value will be emitted. + /// + /// # Compound primitives + /// + /// For all compound primitives (primitives referring to other types), the `basecase` parameter + /// deterimines whether the output is minimal (as appropriate for previously-encountered types), + /// or exhaustive. + /// + /// If the input is an option: + /// - If `basecase` is `true`, only the `None` representation will be emitted. + /// - If `basecase` is `false`, an alternation of the `None` representation, the `Some` + /// representation, and a reject-case with an invalid discriminant will be emitted. + /// + /// If the input is a sequence: + /// - If `basecase` is `true`, a zero-length sequence will be emitted. + /// - If `basecase` is `false`, an alternation of an empty sequence and a 1-object sequence will + /// be emitted (this tests the correspondence between the encoded length and number of + /// elements). Although an `Option` also allows 0 or 1 objects, an `Option` is encoded with a + /// smaller (1-byte) length field, so they are encoded distinctly. + /// + /// If the input is a result: + /// - If `basecase` is `true`, an type that has been determined not to cause recursion will be + /// selected. + /// - If `basecase` is `false`, an alternation of the two possible types will be emitted, along + /// with a reject case with an invalid discriminant. + fn primitive(&mut self, primitive: Primitive, basecase: bool, id: TypeId) { + match primitive { + // Value doesn't matter, but this will be recognizable in the output, and will tend not + // to encode compatibly with other types. + Primitive::U32 => self.emit(Op::U32(1234567890)), + // Value 1 chosen to detect errors better: 0 encodes the same way as Option::None. + Primitive::Bool => self.emit(Op::U8(1)), + // Value doesn't matter, but this will be recognizable in the output, and will tend not + // to encode compatibly with other types. + Primitive::U64 => self.emit(Op::U64(1234567890123456789)), + Primitive::String => self.emit(Op::U64("".len() as u64)), + Primitive::Sequence(_) if basecase => self.emit(Op::U64(0)), + Primitive::Sequence(t0) => { + self.emit(Op::SwitchPush); + self.emit(Op::U64(0)); + self.emit(Op::Case(Case::Accept)); + self.emit(Op::U64(1)); + self.type_(t0, basecase); + self.emit(Op::Case(Case::Accept)); + self.emit(Op::SwitchPop); + } + Primitive::Option(_) if basecase => self.emit(Op::U8(0)), + Primitive::Option(t0) => { + self.emit(Op::SwitchPush); + if self.basecase(t0) { + self.emit(Op::U8(0)); + } else { + self.emit(Op::U8(1)); + self.type_(t0, basecase); + } + self.emit(Op::Case(Case::Accept)); + self.emit(Op::U8(2)); + self.emit(Op::Case(Case::Reject)); + self.emit(Op::SwitchPop); + } + Primitive::Result(t0, t1) => { + let basecase_index = self.basecase_discriminant[&id]; + let types = [t0, t1]; + let t0 = types[basecase_index]; + let t1 = types[1 - basecase_index]; + let i0 = basecase_index as u32; + let i1 = 1 - basecase_index as u32; + if basecase { + self.emit(Op::U32(i0)); + self.type_(t0, basecase); + } else { + self.emit(Op::SwitchPush); + if !self.basecase(t0) || self.basecase(t1) { + self.emit(Op::U32(i0)); + self.type_(t0, basecase); + self.emit(Op::Case(Case::Accept)); + } + if !self.basecase(t1) { + self.emit(Op::U32(i1)); + self.type_(t1, basecase); + self.emit(Op::Case(Case::Accept)); + } + self.emit(Op::U32(2)); + self.emit(Op::Case(Case::Reject)); + self.emit(Op::SwitchPop); + } + } + } + } + + /// Emit [`Op`]s reflecting the data of a [`Type`], as identified by ID. + /// + /// If `basecase` is true: An example of the specified type will be created that is intended to + /// be no larger than necessary, and that avoids infinite recursion; this is appropriate when + /// emitting data for a type that has already been exercised with `basecase=false`, or for a + /// type that has been determined to occur unconditionally as a field of another type. + /// + /// If `basecase` is false, if the type has child types, an alternation of all possible child + /// types will be emitted, along with a reject-case including a discriminant higher than the + /// highest valid discriminant, and reject-cases for any invalid discriminants lower than the + /// highest valid discriminant. + fn object(&mut self, id: TypeId, basecase: bool) { + let mut hierarchy = vec![id]; + let mut id = id; + while let Some(id_) = self.graph[id].parent { + id = id_; + hierarchy.push(id); + } + self.object_(&mut hierarchy, basecase); + assert_eq!(&hierarchy, &[]) + } + + /// Emit [`Op`]s reflecting the data of a [`Type`], as identified by a `Vec` `hierarchy` in + /// which: + /// - `hierarchy[0]` is a concrete [`Type`]. + /// - `hierarchy[i]` is the parent of `hierarchy[i-1]`. + /// - `hierarchy[hierarchy.len() - 1]` identifies a type that doesn't have any parent type. + /// + /// For a design description see the primary interface, [`Self::object`]. + fn object_(&mut self, hierarchy: &mut Vec, basecase: bool) { + let id = hierarchy.pop().unwrap(); + let ty = &self.graph[id]; + let fields = match &ty.data { + Data::Struct(fields) => fields, + _ => panic!(), + }; + for (i, field) in fields.iter().enumerate() { + if ty.child_field == Some(i) { + if hierarchy.is_empty() { + let basecase_discriminant = self.basecase_discriminant[&id]; + let discriminants = &ty.discriminants; + let basecase_ty = discriminants[&basecase_discriminant]; + hierarchy.push(basecase_ty); + if basecase { + self.emit(Op::U32(basecase_discriminant as u32)); + self.object_(hierarchy, basecase); + } else { + let (&max, _) = discriminants.last_key_value().unwrap(); + self.emit(Op::SwitchPush); + self.emit(Op::U32(basecase_discriminant as u32)); + self.debug_prev(&self.graph[basecase_ty].name); + self.object_(hierarchy, basecase); + self.emit(Op::Case(Case::Accept)); + for i in 0..=(max + 1) { + if i == basecase_discriminant { + continue; + } + self.emit(Op::U32(i as u32)); + match discriminants.get(&i) { + Some(id) => { + hierarchy.push(*id); + self.debug_prev(&self.graph[*id].name); + self.object_(hierarchy, basecase); + self.emit(Op::Case(Case::Accept)); + } + None => self.emit(Op::Case(Case::Reject)), + } + } + self.emit(Op::SwitchPop); + } + } else { + self.object_(hierarchy, basecase); + } + } + self.type_(field.type_, basecase); + self.debug_prev(format!(".{}", &field.name)); + } + } +} + +/// Choose a discriminant for the specified type, and if necessary for some other types reachable +/// from it in the composition graph, so that the composition graph for the type is non-recursive. +/// +/// If any child type doesn't have own any sum-types, we select it. Otherwise, selections are made +/// according to the following recursive algorithm: +/// - If we have a child that doesn't own any sum-type fields, choose it and return Ok. +/// - Otherwise, recurse into each child; if one returns Ok, choose it and return Ok. +/// - If no child returns Ok, we got here by recursing into a bad choice; return Err. +/// - If we reach a type we have already visited, this choice contains a cycle; return Err. (Because +/// we only visit each type once, the time complexity of this algorithm is linear in the number of +/// types we need to select discriminants for). +/// +/// The top-level call will always return Ok because: There must be a sum type in our descendants +/// that has a child that doesn't own any sum-type fields, or there would be a type in the input +/// that is only possible to instantiate with cyclic or infinite data. +fn select_basecase( + graph: &TypeGraph, + id: TypeId, + out: &mut BTreeMap, + visited: &mut BTreeSet, +) { + select_basecase_(graph, id, out, visited).unwrap() +} + +/// Implementation. See the documentation for [`select_basecase`]. +fn select_basecase_( + graph: &TypeGraph, + id: TypeId, + out: &mut BTreeMap, + visited: &mut BTreeSet, +) -> Result<(), ()> { + if out.contains_key(&id) { + return Ok(()); + } + if !visited.insert(id) { + return Err(()); + } + let mut result_discriminants = BTreeMap::new(); + let discriminants = match &graph[id].data { + Data::Primitive(Primitive::Result(t0, t1)) => { + result_discriminants.insert(0, *t0); + result_discriminants.insert(1, *t1); + &result_discriminants + } + _ => &graph[id].discriminants, + }; + if discriminants.is_empty() { + return Ok(()); + } + let mut descendants = BTreeMap::<_, Vec<_>>::new(); + let mut child_fields = BTreeSet::new(); + let mut child_sums = BTreeSet::new(); + for (&i, &child) in discriminants { + child_fields.clear(); + child_sums.clear(); + child_fields.insert(child); + while let Some(child_) = child_fields.pop_last() { + let ty = &graph[child_]; + if ty.child_field.is_some() { + child_sums.insert(child_); + } + match &ty.data { + Data::Struct(fields) => child_fields.extend(fields.iter().map(|field| field.type_)), + Data::Primitive(Primitive::Result(_, _)) => { + child_sums.insert(child_); + } + Data::Primitive(_) => (), + } + } + if child_sums.is_empty() { + out.insert(id, i); + return Ok(()); + } + descendants.insert(i, child_sums.iter().copied().collect()); + } + for (i, descendants) in descendants { + let is_ok = |id: &TypeId| select_basecase_(graph, *id, out, visited).is_ok(); + if descendants.iter().all(is_ok) { + out.insert(id, i); + return Ok(()); + } + } + Err(()) +} + + + +// ================= +// === Execution === +// ================= + +/// Runs a test-case-generating program. +#[derive(Debug, Default)] +struct Interpreter<'p> { + program: &'p [Op], + continuations: BTreeMap, +} + +/// A control-stack frame of the interpreted program. +#[derive(Debug, Default, PartialEq, Eq)] +struct Frame { + /// A return address, as an index into the sequence of [`Op`]s. + return_: usize, + /// A height of the data stack. + prefix_len: usize, +} + +impl<'p> Interpreter<'p> { + /// Interpret a program, producing collections of accept-cases and reject-cases. + fn run(program: &'p [Op]) -> (Vec>, Vec>) { + let continuations = collect_continuations(program); + let self_ = Self { program, continuations }; + self_.run_() + } + + /// Interpret every instruction in the program, in order. For every case in each switch, emit an + /// (accept or reject) output consisting of the basecase interpretation of all data before the + /// given switch, the switch case's data, and then the basecase interpretation of all data after + /// the switch. + fn run_(&self) -> (Vec>, Vec>) { + let mut accept: Vec> = Default::default(); + let mut reject: Vec> = Default::default(); + let mut prefix: Vec = Default::default(); + let mut stack: Vec = Default::default(); + for (pc, op) in self.program.iter().enumerate() { + match op { + Op::SwitchPush => stack + .push(Frame { return_: self.continuations[&pc], prefix_len: prefix.len() }), + Op::SwitchPop => { + let Frame { prefix_len, .. } = stack.pop().unwrap(); + prefix.truncate(prefix_len); + let cont_stack = vec![self.continuations[&pc]]; + if DEBUG { + eprintln!("- delimited continuation: {pc} -> {cont_stack:?}"); + } + self.run_continuation(cont_stack, &mut prefix); + } + Op::U8(data) => prefix.push(*data), + Op::U32(data) => prefix.extend(&data.to_le_bytes()), + Op::U64(data) => prefix.extend(&data.to_le_bytes()), + Op::Case(case) => { + if DEBUG { + match case { + Case::Accept => eprint!("accept{}: ", accept.len()), + Case::Reject => eprint!("reject{}: ", reject.len()), + }; + } + let results = match case { + Case::Accept => &mut accept, + Case::Reject => &mut reject, + }; + let Frame { prefix_len, .. } = stack.last().unwrap(); + let stack = stack.iter().map(|frame| frame.return_).collect(); + let mut data = prefix.clone(); + if DEBUG { + eprintln!("{pc} -> {stack:?}"); + } + let final_pc = self.run_continuation(stack, &mut data); + let returned = "Returned from escape continuation"; + assert_eq!(final_pc, self.program.len(), "{returned} at {final_pc}."); + results.push(data); + prefix.truncate(*prefix_len); + } + } + } + assert_eq!(&stack, &[]); + (accept, reject) + } + + /// Given an initial return stack, run the program until the last stack frame is exited, + /// running only basecase cases of each switch encountered, emitting the data to the `Vec` + /// passed in the `out` parameter. + /// + /// The return value is the program counter when the last stack frame was exited. + /// + /// If the given stack is the full stack at a certain point in program execution, the + /// continuation is an escape continuation that will run the program until completion. + /// + /// If the given stack is a consecutive slice of the stack at a certain point in program + /// execution, the continuation is a delimited continuation. + fn run_continuation(&self, mut stack: Vec, out: &mut Vec) -> usize { + let mut pc = stack.pop().unwrap(); + while let Some(op) = self.program.get(pc) { + match op { + Op::SwitchPush => stack.push(self.continuations[&pc]), + Op::SwitchPop => panic!("Fell through a switch at {pc}."), + Op::U8(data) => out.push(*data), + Op::U32(data) => out.extend(&data.to_le_bytes()), + Op::U64(data) => out.extend(&data.to_le_bytes()), + Op::Case(Case::Accept) => { + if let Some(pc_) = stack.pop() { + if DEBUG { + eprintln!("- ret: {pc} -> {pc_}"); + } + pc = pc_; + continue; + } + return pc; + } + Op::Case(Case::Reject) => panic!("Rejected base case at {}.", pc), + } + pc += 1; + } + assert_eq!(&stack, &[]); + pc + } +} + +/// Analyze a program to calculate the index of the target of each [`Op`] that implicitly refers to +/// another location in the program. +fn collect_continuations(program: &[Op]) -> BTreeMap { + let mut continuations = BTreeMap::new(); + let mut switch_stack = vec![]; + for (pc, op) in program.iter().enumerate() { + match op { + Op::SwitchPush => switch_stack.push(pc), + Op::SwitchPop => { + let push_pc = switch_stack.pop().unwrap(); + let pop_pc = pc; + // A `SwitchPush` pushes its continuation onto the return stack; the return address + // for an `Ok`/`Fail` is after the switch. + continuations.insert(push_pc, pop_pc + 1); + // When we "fall through" a switch after executing all the `Ok`/`Fail` cases, we + // re-run the switch's first (delimited) continuation in basecase mode before + // proceeding. + continuations.insert(pop_pc, push_pc + 1); + } + _ => (), + } + } + assert_eq!(&switch_stack, &[]); + continuations +} diff --git a/lib/rust/metamodel/src/meta/transform.rs b/lib/rust/metamodel/src/meta/transform.rs new file mode 100644 index 0000000000..3e4513ff04 --- /dev/null +++ b/lib/rust/metamodel/src/meta/transform.rs @@ -0,0 +1,118 @@ +//! Transformations on the meta representation. + +use crate::meta::*; + +use derivative::Derivative; + + + +// =============== +// === Flatten === +// =============== + +/// `flatten` the specified fields into their containing structs, transitively. +/// +/// Each inserted field will have its name prepended with the name of its eliminated container. +/// If the `hide` property is set for the container, it will be inherited by its child fields. +/// +/// This implements the [`reflect(flatten)`](../enso_reflect_macros/#reflectflatten-field-attribute) +/// attribute of the `#[derive(Reflect)]` macro; see the `enso_reflect_macros` documentation for an +/// example of the usage and results of the transformation. +pub fn flatten(graph: &mut TypeGraph, ids: &mut BTreeSet) { + let order = toposort(graph.types.keys(), TypeGraphDependencyVisitor { graph, ids }); + for id in order { + flatten_(graph, ids, id); + } +} + +/// `flatten` the fields specified in `to_flatten` into the type identified by `outer`. +/// +/// For design notes, see [`flatten`]. +fn flatten_(graph: &mut TypeGraph, to_flatten: &mut BTreeSet, outer: TypeId) { + let outer_fields = match &mut graph[outer].data { + Data::Struct(ref mut fields) => std::mem::take(fields), + _ => return, + }; + let mut child_field = graph[outer].child_field; + let mut flattened = Vec::with_capacity(outer_fields.len()); + for (i, field) in outer_fields.into_iter().enumerate() { + let inner = field.type_; + if to_flatten.remove(&field.id) { + let inner_ty = &graph[inner]; + let inner_fields = match &inner_ty.data { + Data::Struct(fields) => fields, + Data::Primitive(_) => panic!("Cannot flatten a primitive field."), + }; + let flatten_field = |inner_: &Field| { + let mut name = field.name.clone(); + name.append(inner_.name.clone()); + let mut flat = Field::named(name, inner_.type_); + flat.hide = field.hide || inner_.hide; + flat + }; + flattened.extend(inner_fields.iter().map(flatten_field)); + } else { + flattened.push(field); + } + if child_field == Some(i + 1) { + child_field = Some(flattened.len()); + } + } + graph[outer].child_field = child_field; + match &mut graph[outer].data { + Data::Struct(fields) => *fields = flattened, + _ => unreachable!(), + }; +} + + +// === Topologic Sort === + +fn toposort(iter: impl IntoIterator, dependencies: V) -> Vec +where + T: Copy + Ord, + V: DependencyVisitor, { + let mut sort = TopoSort::default(); + for id in iter { + sort.visit(id, &dependencies); + } + sort.order +} + +#[derive(Derivative)] +#[derivative(Default(bound = ""))] +struct TopoSort { + visited: BTreeSet, + order: Vec, +} + +impl TopoSort { + fn visit(&mut self, t: T, visitor: &impl DependencyVisitor) + where T: Copy + Ord { + if self.visited.insert(t) { + visitor.visit(self, t); + self.order.push(t); + } + } +} + +trait DependencyVisitor { + fn visit(&self, sort: &mut TopoSort, t: T); +} + +struct TypeGraphDependencyVisitor<'g, 'i> { + graph: &'g TypeGraph, + ids: &'i BTreeSet, +} + +impl DependencyVisitor for TypeGraphDependencyVisitor<'_, '_> { + fn visit(&self, sort: &mut TopoSort, id: TypeId) { + if let Data::Struct(fields) = &self.graph[id].data { + for field in fields { + if self.ids.contains(&field.id) { + sort.visit(field.type_, self); + } + } + } + } +} diff --git a/lib/rust/metamodel/src/rust/graphviz.rs b/lib/rust/metamodel/src/rust/graphviz.rs new file mode 100644 index 0000000000..9e8dd0e319 --- /dev/null +++ b/lib/rust/metamodel/src/rust/graphviz.rs @@ -0,0 +1,73 @@ +//! Graphical representation of Rust type relationships with GraphViz. + +use super::*; + +use crate::graphviz::EdgeType; +use crate::graphviz::Graph; +use crate::graphviz::Node; +use crate::graphviz::NodeType; + +use std::collections::BTreeMap; + + + +// ============= +// === Graph === +// ============= + +/// Generate a graph of the given type's relationships with other types. +pub fn graph(root: LazyType) -> Graph { + let mut to_visit = vec![root]; + let mut types = BTreeMap::new(); + while let Some(type_) = to_visit.pop() { + let id = type_.id; + if types.contains_key(&id) { + continue; + } + let type_ = type_.evaluate(); + to_visit.extend(type_.referenced_types().into_iter()); + types.insert(id, type_); + } + let mut graph = Graph::default(); + let mut numbers = BTreeMap::new(); + let mut next_id = 0; + let mut number = |key: TypeId| { + *numbers.entry(key).or_insert_with(|| { + let id = next_id; + next_id += 1; + id + }) + }; + for type_ in types.values() { + let sname = format!("{}{}", type_.name, number(type_.id)); + let primitive = type_.is_primitive(); + let node_type = match type_.type_type() { + TypeType::Sum => NodeType::Enum, + TypeType::Product => NodeType::Struct, + }; + let label = type_.name.clone(); + graph.nodes.insert(sname.clone(), Node { primitive, node_type, label }); + if let Data::Enum(enum_) = &type_.data { + for variant in &enum_.variants { + let svariant = format!("{}_{}", sname, variant.ident); + let primitive = false; + let node_type = NodeType::Variant; + let label = variant.ident.clone(); + graph.nodes.insert(svariant.clone(), Node { primitive, node_type, label }); + graph.edges.push((sname.clone(), svariant.clone(), EdgeType::Variant)); + for ty in variant.fields.referenced_types() { + let ty = &types[&ty.id]; + let sname2 = format!("{}{}", ty.name, number(ty.id)); + graph.edges.push((svariant.clone(), sname2, EdgeType::Field)); + } + } + } else { + for ty in type_.referenced_types() { + let ty = &types[&ty.id]; + let sname2 = format!("{}{}", ty.name, number(ty.id)); + graph.edges.push((sname.clone(), sname2, EdgeType::Field)); + } + } + } + graph +} diff --git a/lib/rust/metamodel/src/rust/mod.rs b/lib/rust/metamodel/src/rust/mod.rs new file mode 100644 index 0000000000..b17576867e --- /dev/null +++ b/lib/rust/metamodel/src/rust/mod.rs @@ -0,0 +1,364 @@ +//! Representation of data models in the Rust typesystem. +//! +//! Unlike the other metamodels in this crate, the Rust model uses a lazy-evaluation representation +//! of type graphs. While this representation doesn't support analysis as easily as the +//! `crate::data_structures::VecMap` representation, it can be created by a context-free translation +//! from Rust syntax, so it can be built directly by a proc macro, like [`enso_reflect`]. + + +// ============== +// === Export === +// ============== + +pub use to_meta::to_meta; + + + +#[cfg(feature = "graphviz")] +mod graphviz; +mod to_meta; + + + +// ================== +// === Data model === +// ================== + +/// A type. +#[derive(Debug, Clone)] +pub struct TypeData { + /// A value uniquely identifying the type. + pub id: TypeId, + /// The Rust identifier of the type. + pub name: String, + /// The type's contents. + pub data: Data, + /// A value uniquely-identifying the type up to the type of a certain field. + pub subtype_erased: GenericTypeId, +} + +/// A type's data content. +#[derive(Debug, Clone)] +pub enum Data { + /// A `struct`. + Struct(Struct), + /// An `enum`. + Enum(Enum), + /// Builtins, including basic types ands generics. + Primitive(Primitive), +} + +/// An `enum`. +#[derive(Debug, Clone)] +pub struct Enum { + /// The variants. + pub variants: Vec, +} + +/// A possible value of an `enum`. +#[derive(Debug, Clone)] +pub struct Variant { + /// The variant's name. + pub ident: String, + /// The variant's data. + pub fields: Fields, + /// If true, when abstracting to the `meta` representation, rather than generate a type for + /// this variant, its (sole) field will become a child of the parent enum. + pub inline: bool, +} + +/// A `struct`. +#[derive(Debug, Clone)] +pub struct Struct { + /// The fields. + pub fields: Fields, + /// If true, this field should be passed-through to its (sole) field when abstracting to the + /// `meta` representation. + pub transparent: bool, +} + +/// A field with a name. +#[derive(Debug, Clone)] +pub struct NamedField { + #[allow(missing_docs)] + pub name: String, + #[allow(missing_docs)] + pub type_: LazyType, + /// If true, this type should become the parent of the type in this field. + pub subtype: bool, + /// If true, the fields of this field should be inserted in place of it. + pub flatten: bool, + /// If true, this field should be hidden in generated code, in a target-language-dependent + /// manner. + pub hide: bool, +} + +/// A field in a tuple struct or tuple variant. +#[derive(Debug, Copy, Clone)] +pub struct UnnamedField { + #[allow(missing_docs)] + pub type_: LazyType, +} + +/// The data of a struct or variant. +#[derive(Debug, Clone)] +pub enum Fields { + #[allow(missing_docs)] + Named(Vec), + #[allow(missing_docs)] + Unnamed(Vec), + #[allow(missing_docs)] + Unit, +} + +/// Rust standard types. +#[derive(Debug, Clone, Copy)] +pub enum Primitive { + /// A `bool`. + Bool, + /// A `usize`. + Usize, + /// A `u32`. + U32, + /// A `String`. + String, + /// A `Vec<_>`. + Vec(LazyType), + /// An `Option<_>`. + Option(LazyType), + /// A `Result<_, _>`. + Result(LazyType, LazyType), +} + + + +// ======================= +// === Type references === +// ======================= + +/// Uniquely identifies a type. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] +pub struct TypeId(std::any::TypeId); + +impl TypeId { + #[allow(missing_docs)] + pub fn new(id: std::any::TypeId) -> Self { + Self(id) + } +} + +/// Distinguishes a type, irrespective of any sole type parameter present in the field marked +/// `#[reflect(subtype)]`, if any. Used in the implementation of the `subtype` transform. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] +pub struct GenericTypeId(std::any::TypeId); + +impl GenericTypeId { + #[allow(missing_docs)] + pub fn new(id: std::any::TypeId) -> Self { + Self(id) + } +} + +/// Identifies a type, and can be evaluated to obtain the type's definition. +/// +/// This is used for the fields of a type's `TypeData` because type graphs may contain cycles. +#[derive(Copy, Clone, Debug)] +pub struct LazyType { + #[allow(missing_docs)] + pub id: TypeId, + evaluate: Thunk, +} + +impl LazyType { + #[allow(missing_docs)] + pub fn new(id: TypeId, evaluate: Thunk) -> Self { + Self { id, evaluate } + } + + /// Obtain the type's definition. + pub fn evaluate(&self) -> TypeData { + (self.evaluate)() + } +} + +type Thunk = fn() -> T; + + +// === Reference Traversal === + +/// Type reference traversal. +pub trait ReferencedTypes { + /// Identify all the types this type contains references to. + fn referenced_types(&self) -> Vec; +} + +impl ReferencedTypes for Primitive { + fn referenced_types(&self) -> Vec { + match self { + Primitive::Bool | Primitive::Usize | Primitive::String | Primitive::U32 => vec![], + Primitive::Vec(ty) | Primitive::Option(ty) => vec![*ty], + Primitive::Result(ty0, ty1) => vec![*ty0, *ty1], + } + } +} + +impl ReferencedTypes for TypeData { + fn referenced_types(&self) -> Vec { + self.data.referenced_types() + } +} + +impl ReferencedTypes for Data { + fn referenced_types(&self) -> Vec { + match self { + Data::Struct(struct_) => struct_.referenced_types(), + Data::Enum(enum_) => enum_.referenced_types(), + Data::Primitive(primitive) => primitive.referenced_types(), + } + } +} + +impl ReferencedTypes for Enum { + fn referenced_types(&self) -> Vec { + let mut referenced = vec![]; + for variant in &self.variants { + referenced.extend(variant.referenced_types()); + } + referenced + } +} + +impl ReferencedTypes for Struct { + fn referenced_types(&self) -> Vec { + self.fields.referenced_types() + } +} + +impl ReferencedTypes for Variant { + fn referenced_types(&self) -> Vec { + self.fields.referenced_types() + } +} + +impl ReferencedTypes for Fields { + fn referenced_types(&self) -> Vec { + match self { + Fields::Named(fields) => fields.iter().map(|field| field.type_).collect(), + Fields::Unnamed(fields) => fields.iter().map(|field| field.type_).collect(), + Fields::Unit => vec![], + } + } +} + +impl ReferencedTypes for NamedField { + fn referenced_types(&self) -> Vec { + vec![self.type_] + } +} + +impl ReferencedTypes for UnnamedField { + fn referenced_types(&self) -> Vec { + vec![self.type_] + } +} + + + +// ==================================== +// === Abstractions over data model === +// ==================================== + +/// Categorizes types by the nature of their composition operators. +#[derive(Copy, Clone, Debug)] +pub enum TypeType { + /// A type like an `enum`, that only contains data for one of its constituent types. + Sum, + /// A type like a `struct` or tuple, that contains data for all of its constituent types. + Product, +} + +impl TypeData { + /// Get information about the composition operator relating the types this type is composed of. + pub fn type_type(&self) -> TypeType { + match &self.data { + Data::Struct(_) => TypeType::Product, + Data::Enum(_) => TypeType::Sum, + Data::Primitive(primitive) => primitive.type_type(), + } + } +} + +impl Primitive { + /// Get information about the composition operator relating the types this type is composed of. + pub fn type_type(&self) -> TypeType { + match &self { + Primitive::Bool + | Primitive::Usize + | Primitive::U32 + | Primitive::String + | Primitive::Vec(_) => TypeType::Product, + Primitive::Option(_) | Primitive::Result(_, _) => TypeType::Sum, + } + } +} + + + +// ================================ +// === Operations on data model === +// ================================ + +impl Fields { + /// Get the sole field this type contains, if it has exactly one. + pub fn as_wrapped_type(&self) -> Option { + match self { + Fields::Named(fields) if fields.len() == 1 => Some(fields[0].type_), + Fields::Unnamed(fields) if fields.len() == 1 => Some(fields[0].type_), + _ => None, + } + } +} + +impl TypeData { + /// Return whether this type is a `Primitive`. + pub fn is_primitive(&self) -> bool { + matches!(&self.data, Data::Primitive(_)) + } +} + +impl NamedField { + #[allow(missing_docs)] + pub fn type_id(&self) -> TypeId { + self.type_.id + } + + #[allow(missing_docs)] + pub fn type_(&self) -> TypeData { + self.type_.evaluate() + } +} + +impl UnnamedField { + #[allow(missing_docs)] + pub fn type_id(&self) -> TypeId { + self.type_.id + } + + #[allow(missing_docs)] + pub fn type_(&self) -> TypeData { + self.type_.evaluate() + } +} + + + +// ======================== +// === GraphViz support === +// ======================== + +#[cfg(feature = "graphviz")] +impl From for crate::graphviz::Graph { + fn from(root: LazyType) -> Self { + graphviz::graph(root) + } +} diff --git a/lib/rust/metamodel/src/rust/to_meta.rs b/lib/rust/metamodel/src/rust/to_meta.rs new file mode 100644 index 0000000000..29c21778d5 --- /dev/null +++ b/lib/rust/metamodel/src/rust/to_meta.rs @@ -0,0 +1,287 @@ +//! Abstracting Rust data models to the `meta` representation. + +use crate::rust::*; + +use crate::meta; + +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::mem::take; + + + +// ==================== +// === Rust to Meta === +// ==================== + +/// Abstract the data model to the meta represenation. +pub fn to_meta(ty: TypeData) -> (meta::TypeGraph, BTreeMap) { + let mut to_meta = ToMeta::new(); + let root_ = to_meta.run(ty); + to_meta.graph.gc(vec![root_]); + (to_meta.graph, to_meta.rust_to_meta) +} + +#[derive(Debug, Default)] +struct ToMeta { + // Outputs + rust_to_meta: BTreeMap, + graph: meta::TypeGraph, + // Intermediate state + interfaces: Vec<(meta::TypeId, meta::TypeId)>, + parent_types: BTreeMap, + subtypings: Vec<(GenericTypeId, TypeId, meta::UnboundTypeId)>, + flatten: BTreeSet, +} + +impl ToMeta { + fn new() -> Self { + Default::default() + } +} + +impl ToMeta { + fn named_struct<'f>( + &mut self, + id_: meta::UnboundTypeId, + name: &str, + fields: impl IntoIterator, + erased: Option, + ) { + let mut body = vec![]; + let mut child_field = None; + for (i, field) in fields.into_iter().enumerate() { + assert!(!(field.flatten && field.subtype)); + if field.subtype { + assert_eq!(child_field, None); + child_field = Some((i, field.type_.id)); + continue; + } + let type_ = self.rust_to_meta[&field.type_.id]; + let name = field_name(&field.name); + let mut field_ = meta::Field::named(name, type_); + if field.flatten { + self.flatten.insert(field_.id()); + } + field_.hide = field.hide; + body.push(field_); + } + let data = meta::Data::Struct(body); + let name = type_name(name); + if let Some((index, field)) = child_field { + let erased = erased.unwrap(); + self.parent_types.insert(erased, (name, data, index)); + self.subtypings.push((erased, field, id_)); + return; + } + let ty = meta::Type::new(name, data); + self.graph.types.bind(id_, ty); + } + + fn unnamed_struct(&mut self, id_: meta::UnboundTypeId, name: &str, fields: &[UnnamedField]) { + let abstract_field = + |field: &UnnamedField| meta::Field::unnamed(self.rust_to_meta[&field.type_.id]); + let data = fields.iter().map(abstract_field).collect(); + let data = meta::Data::Struct(data); + let name = type_name(name); + let ty = meta::Type::new(name, data); + self.graph.types.bind(id_, ty); + } + + fn struct_( + &mut self, + id_: meta::UnboundTypeId, + name: &str, + fields: &Fields, + erased: Option, + ) { + match fields { + Fields::Named(fields) => self.named_struct(id_, name, fields, erased), + Fields::Unnamed(fields) => self.unnamed_struct(id_, name, fields), + Fields::Unit => self.unit_struct(id_, name), + } + } + + fn unit_struct(&mut self, id_: meta::UnboundTypeId, name: &str) { + let data = meta::Data::Struct(vec![]); + let name = type_name(name); + let ty = meta::Type::new(name, data); + self.graph.types.bind(id_, ty); + } + + fn enum_(&mut self, id_: meta::UnboundTypeId, name: &str, variants: &[Variant]) { + let name = type_name(name); + let children = variants.iter().map(|Variant { ident, fields, inline: transparent }| { + if *transparent { + let field = &fields.as_wrapped_type().unwrap().id; + let field_ = self.rust_to_meta[field]; + self.interfaces.push(((&id_).into(), field_)); + field_ + } else { + let promise = self.graph.types.unbound_key(); + let new_ = meta::TypeId::from(&promise); + self.struct_(promise, ident, fields, None); + self.graph[new_].parent = Some((&id_).into()); + new_ + } + }); + let data = meta::Data::Struct(vec![]); + let mut ty = meta::Type::new(name, data); + ty.abstract_ = true; + ty.closed = true; + ty.discriminants = children.enumerate().collect(); + self.graph.types.bind(id_, ty); + } + + fn primitive(&mut self, id_: meta::UnboundTypeId, name: &str, primitive: &Primitive) { + let primitive = match primitive { + Primitive::Bool => meta::Primitive::Bool, + Primitive::U32 => meta::Primitive::U32, + // In platform-independent formats, a `usize` is serialized as 64 bits. + Primitive::Usize => meta::Primitive::U64, + Primitive::String => meta::Primitive::String, + Primitive::Vec(t0) => meta::Primitive::Sequence(self.rust_to_meta[&t0.id]), + Primitive::Option(t0) => meta::Primitive::Option(self.rust_to_meta[&t0.id]), + Primitive::Result(t0, t1) => + meta::Primitive::Result(self.rust_to_meta[&t0.id], self.rust_to_meta[&t1.id]), + }; + let data = meta::Data::Primitive(primitive); + let name = type_name(name); + let ty = meta::Type::new(name, data); + self.graph.types.bind(id_, ty); + } +} + +impl ToMeta { + fn remove_transparent( + &mut self, + types: &mut BTreeMap, + ) -> BTreeMap { + let mut alias = BTreeMap::new(); + types.retain(|id, TypeData { data, .. }| { + let sole_field = + "`#[reflect(transparent)]` can only be applied to types with exactly one field."; + let target = match data { + Data::Struct(Struct { fields, transparent }) if *transparent => + fields.as_wrapped_type().expect(sole_field).id, + _ => return true, + }; + alias.insert(*id, target); + false + }); + let entries: Vec<_> = alias.iter().map(|(k, v)| (*k, *v)).collect(); + for (key, mut value) in entries { + while let Some(value_) = alias.get(&value).copied() { + alias.insert(key, value_); + value = value_; + } + } + alias + } + + /// Perform the transformation for the reference-closure of the given type. + pub fn run(&mut self, ty: TypeData) -> meta::TypeId { + let root_rust_id = ty.id; + let mut rust_types = collect_types(ty); + let aliases = self.remove_transparent(&mut rust_types); + let mut meta_promises: BTreeMap<_, _> = + rust_types.keys().map(|id| (*id, self.graph.types.unbound_key())).collect(); + self.rust_to_meta = + meta_promises.iter().map(|(k, v)| (*k, meta::TypeId::from(v))).collect(); + for (id, target) in aliases { + let target_ = self.rust_to_meta[&target]; + self.rust_to_meta.insert(id, target_); + } + for (&id, rust) in &rust_types { + let name = &rust.name; + let id_ = meta_promises.remove(&id).unwrap(); + let erased = Some(rust.subtype_erased); + match &rust.data { + Data::Struct(Struct { fields, transparent: _ }) => + self.struct_(id_, name, fields, erased), + Data::Enum(Enum { variants }) => self.enum_(id_, name, variants), + Data::Primitive(primitive) => self.primitive(id_, name, primitive), + }; + } + for (parent_, child_) in self.interfaces.drain(..) { + let old_parent = self.graph[child_].parent.replace(parent_); + assert_eq!(None, old_parent); + } + self.generate_subtypes(&rust_types); + meta::transform::flatten(&mut self.graph, &mut self.flatten); + self.rust_to_meta[&root_rust_id] + } + + fn generate_subtypes(&mut self, rust_types: &BTreeMap) { + let mut parent_ids = BTreeMap::new(); + let mut aliases = vec![]; + let subtypings = take(&mut self.subtypings); + let mut concrete_subtypes = vec![]; + for (erased, field, promise) in subtypings { + let id_ = meta::TypeId::from(&promise); + let field_ty = &rust_types[&field]; + match &field_ty.data { + Data::Enum(_) => { + let field_ = self.rust_to_meta[&field]; + let (name, wrapper_data, index) = self.parent_types.remove(&erased).unwrap(); + // Move the Enum: We're merging the wrapper data into it, so any reference + // to it that wasn't through the wrapper must be an error. + // Note: This approach won't allow types that are subsetted by multiple enums. + let mut enum_ty_ = self.graph.types.remove(field_); + enum_ty_.name = name; + enum_ty_.data = wrapper_data; + enum_ty_.child_field = Some(index); + let children_: Vec<_> = enum_ty_.discriminants.values().copied().collect(); + self.graph.types.bind(promise, enum_ty_); + for child_ in children_ { + let old_parent = self.graph[child_].parent.replace(id_); + assert_eq!(old_parent, Some(field_)); + } + parent_ids.insert(erased, id_); + } + Data::Struct(_) => { + concrete_subtypes.push((erased, field, id_)); + continue; + } + Data::Primitive(_) => panic!("Cannot transform a builtin to a subtype."), + }; + } + for (_erased, field, id_) in concrete_subtypes { + let variants_only = "Applying `#[reflect(subtype)]` to a field that does not occur \ + in a variant of an enum used to instantiate the field is not supported."; + let id = *self.rust_to_meta.get(&field).expect(variants_only); + aliases.push((id_, id)); + } + self.graph.apply_aliases(&aliases); + } +} + +/// Gather the Rust type IDs and definitions for the given type and its closure in the type +/// graph. +fn collect_types(root: TypeData) -> BTreeMap { + let mut to_visit = BTreeMap::new(); + let mut new_types = BTreeMap::new(); + for lazy in root.referenced_types() { + to_visit.insert(lazy.id, lazy); + } + let root_id = root.id; + new_types.insert(root_id, root); + while let Some((id, lazy)) = to_visit.pop_last() { + new_types.entry(id).or_insert_with(|| { + let type_ = lazy.evaluate(); + debug_assert_eq!(id, type_.id); + let refs = type_.referenced_types().into_iter().map(|lazy: LazyType| (lazy.id, lazy)); + to_visit.extend(refs); + type_ + }); + } + new_types +} + +fn field_name(s: &str) -> meta::FieldName { + meta::FieldName::from_snake_case(s) +} + +fn type_name(s: &str) -> meta::TypeName { + meta::TypeName::from_pascal_case(s) +} diff --git a/lib/rust/parser/Cargo.toml b/lib/rust/parser/Cargo.toml index c5fd1eaff6..91f67619ea 100644 --- a/lib/rust/parser/Cargo.toml +++ b/lib/rust/parser/Cargo.toml @@ -10,9 +10,15 @@ repository = "https://github.com/enso-org/enso" license-file = "../../LICENSE" [dependencies] -enso-prelude = { path = "../prelude" } +enso-prelude = { path = "../prelude", features = ["serde"] } +enso-reflect = { path = "../reflect" } enso-data-structures = { path = "../data-structures" } -enso-types = { path = "../types" } +enso-types = { path = "../types", features = ["serde"] } enso-shapely-macros = { path = "../shapely/macros" } enso-parser-syntax-tree-visitor = { path = "src/syntax/tree/visitor" } enso-parser-syntax-tree-builder = { path = "src/syntax/tree/builder" } +serde = { version = "1.0", features = ["derive"] } +bincode = "1.3" + +[lib] +path = "src/main.rs" diff --git a/lib/rust/parser/generate-java/Cargo.toml b/lib/rust/parser/generate-java/Cargo.toml new file mode 100644 index 0000000000..53069e6e71 --- /dev/null +++ b/lib/rust/parser/generate-java/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "enso-parser-generate-java" +version = "0.1.0" +authors = ["Enso Team "] +edition = "2021" +description = "Generates Java bindings and deserialization for Enso Parser AST types." +readme = "README.md" +homepage = "https://github.com/enso-org/enso" +repository = "https://github.com/enso-org/enso" +license-file = "../../LICENSE" + +[dependencies] +enso-metamodel = { path = "../../metamodel", features = ["rust", "java"] } +enso-prelude = { path = "../../prelude" } +enso-parser = { path = ".." } +enso-reflect = { path = "../../reflect", features = ["graphviz"] } +derivative = "2.2" diff --git a/lib/rust/parser/generate-java/java/org/enso/syntax2/serialization/Either.java b/lib/rust/parser/generate-java/java/org/enso/syntax2/serialization/Either.java new file mode 100644 index 0000000000..d8938af696 --- /dev/null +++ b/lib/rust/parser/generate-java/java/org/enso/syntax2/serialization/Either.java @@ -0,0 +1,16 @@ +package org.enso.syntax2.serialization; + +public class Either { + protected Left left; + protected Right right; + protected Either(Left leftIn, Right rightIn) { + left = leftIn; + right = rightIn; + } + public static final Either left(L left) { + return new Either(left, null); + } + public static final Either right(R right) { + return new Either(null, right); + } +} diff --git a/lib/rust/parser/generate-java/java/org/enso/syntax2/serialization/FormatException.java b/lib/rust/parser/generate-java/java/org/enso/syntax2/serialization/FormatException.java new file mode 100644 index 0000000000..dfea92731f --- /dev/null +++ b/lib/rust/parser/generate-java/java/org/enso/syntax2/serialization/FormatException.java @@ -0,0 +1,11 @@ +package org.enso.syntax2.serialization; + +public class FormatException + extends RuntimeException { + public FormatException(String errorMessage, Throwable err) { + super(errorMessage, err); + } + public FormatException(String errorMessage) { + super(errorMessage); + } +} diff --git a/lib/rust/parser/generate-java/java/org/enso/syntax2/serialization/Message.java b/lib/rust/parser/generate-java/java/org/enso/syntax2/serialization/Message.java new file mode 100644 index 0000000000..f0f29ec627 --- /dev/null +++ b/lib/rust/parser/generate-java/java/org/enso/syntax2/serialization/Message.java @@ -0,0 +1,53 @@ +package org.enso.syntax2.serialization; + +public final class Message { + private final java.nio.ByteBuffer buffer; + private final java.nio.ByteBuffer context; + private final long base; + + public Message(java.nio.ByteBuffer bufferIn, java.nio.ByteBuffer contextIn, long baseIn) { + buffer = bufferIn; + context = contextIn; + base = baseIn; + } + + public long get64() { + return buffer.getLong(); + } + + public int get32() { + return buffer.getInt(); + } + + public boolean getBoolean() { + switch (buffer.get()) { + case 0: return false; + case 1: return true; + default: throw new FormatException("Boolean out of range"); + } + } + + public String getString() { + int len = (int)get64(); + byte[] dst = new byte[len]; + buffer.get(dst); + try { + return new String(dst, "UTF-8"); + } catch (java.io.UnsupportedEncodingException e) { + throw new FormatException("Expected UTF-8", e); + } + } + + public java.nio.ByteBuffer context() { + return context; + } + + public int offset(int xLow32) { + // Given the low bits of `x`, the value of `base`, and the invariant `x >= base`, + // return `x - base`. + long tmp = xLow32 - base; + if (tmp < 0) + tmp += 0x0000000100000000L; + return (int)tmp; + } +} diff --git a/lib/rust/parser/generate-java/run.sh b/lib/rust/parser/generate-java/run.sh new file mode 100755 index 0000000000..1c4809071e --- /dev/null +++ b/lib/rust/parser/generate-java/run.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +set -e + +echo $0 | grep lib/rust || ( echo This tool must be run from the repo root, as lib/rust/parser/generate-java/run.sh; exit 1 ) + +BASE=target/generated_java +OUT=$BASE/org/enso/syntax2 +LIB=lib/rust/parser/generate-java/java +mkdir -p $OUT +cargo test -p enso-parser-generate-java +cargo run -p enso-parser-generate-java --bin enso-parser-generate-java -- $OUT +cargo run -p enso-parser-generate-java --bin java-tests > $BASE/GeneratedFormatTests.java +javac -classpath "$LIB:$BASE" -d $BASE $BASE/GeneratedFormatTests.java +java -classpath $BASE GeneratedFormatTests diff --git a/lib/rust/parser/generate-java/src/bin/graph-java.rs b/lib/rust/parser/generate-java/src/bin/graph-java.rs new file mode 100644 index 0000000000..462a515298 --- /dev/null +++ b/lib/rust/parser/generate-java/src/bin/graph-java.rs @@ -0,0 +1,30 @@ +//! Generate a GraphViz graph of parser datatype relationships in the Java type system. +//! +//! Usage: +//! ```console +//! graph-java > java.dot +//! dot -Tx11 java.dot +//! ``` + +// === Standard Linter Configuration === +#![deny(non_ascii_idents)] +#![warn(unsafe_code)] + +use enso_metamodel::graphviz; +use enso_metamodel::java; +use enso_metamodel::rust; +use enso_reflect::Reflect; + + + +// =========================== +// === Graphing Java types === +// =========================== + +fn main() { + let (graph, _) = rust::to_meta(enso_parser::syntax::Tree::reflect()); + let (graph, _) = java::from_meta(&graph, enso_parser_generate_java::EITHER_TYPE); + let graph = java::transform::optional_to_null(graph); + let rendered = graphviz::Graph::from(&graph); + println!("{}", rendered); +} diff --git a/lib/rust/parser/generate-java/src/bin/graph-meta.rs b/lib/rust/parser/generate-java/src/bin/graph-meta.rs new file mode 100644 index 0000000000..a4d5b9611e --- /dev/null +++ b/lib/rust/parser/generate-java/src/bin/graph-meta.rs @@ -0,0 +1,25 @@ +//! Generate a GraphViz graph of parser datatype relationships in the `meta` metamodel. +//! +//! Usage: +//! ```console +//! graph-meta > meta.dot +//! dot -Tx11 meta.dot +//! ``` + +// === Standard Linter Configuration === +#![deny(non_ascii_idents)] +#![warn(unsafe_code)] + +use enso_reflect::Reflect; + + + +// ============================= +// === Graphing `meta` types === +// ============================= + +fn main() { + let (graph, _) = enso_metamodel::rust::to_meta(enso_parser::syntax::Tree::reflect()); + let rendered = enso_metamodel::graphviz::Graph::from(&graph); + println!("{}", rendered); +} diff --git a/lib/rust/parser/generate-java/src/bin/graph-rust.rs b/lib/rust/parser/generate-java/src/bin/graph-rust.rs new file mode 100644 index 0000000000..c925cf9124 --- /dev/null +++ b/lib/rust/parser/generate-java/src/bin/graph-rust.rs @@ -0,0 +1,22 @@ +//! Generate a GraphViz graph of parser datatype relationships in the Rust type system. +//! +//! Usage: +//! ```console +//! graph-rust > rust.dot +//! dot -Tx11 rust.dot +//! ``` + +// === Standard Linter Configuration === +#![deny(non_ascii_idents)] +#![warn(unsafe_code)] + + + +// =========================== +// === Graphing Rust types === +// =========================== + +fn main() { + let rendered = enso_reflect::graph::(); + println!("{}", rendered); +} diff --git a/lib/rust/parser/generate-java/src/bin/java-tests.rs b/lib/rust/parser/generate-java/src/bin/java-tests.rs new file mode 100644 index 0000000000..ec930d1fa6 --- /dev/null +++ b/lib/rust/parser/generate-java/src/bin/java-tests.rs @@ -0,0 +1,81 @@ +//! Generates Java format tests. +//! +//! Usage: +//! ```console +//! java-tests > GeneratedFormatTests.java +//! javac -d generated-java/ GeneratedFormatTests.java && java GeneratedFormatTests +//! ``` + +// === Standard Linter Configuration === +#![deny(non_ascii_idents)] +#![warn(unsafe_code)] + + + +// ============================ +// === Java Test Generation === +// ============================ + +fn main() { + let cases = enso_parser_generate_java::generate_testcases(); + let fmt_cases = |cases: &[Vec]| { + let cases: Vec<_> = cases + .iter() + .map(|case| { + let case: Vec<_> = case.iter().map(|byte| (*byte as i8).to_string()).collect(); + format!("{{{}}}", case.join(", ")) + }) + .collect(); + cases.join(", ") + }; + let accept = fmt_cases(&cases.accept); + let reject = fmt_cases(&cases.reject); + let package = enso_parser_generate_java::PACKAGE; + let serialization = enso_parser_generate_java::SERIALIZATION_SUPPORT; + println!("import {package}.Tree;"); + println!("import {serialization}.Message;",); + println!("import java.nio.ByteBuffer;"); + println!("import java.nio.ByteOrder;"); + println!(); + println!("class GeneratedFormatTests {{"); + println!(" public static void main(String[] args) {{"); + println!(" byte[][] accept = {{{accept}}};"); + println!(" byte[][] reject = {{{reject}}};"); + println!(" int result = 0;"); + println!(" for (int i = 0; i < accept.length; i++) {{"); + println!(" ByteBuffer buffer = ByteBuffer.wrap(accept[i]);"); + println!(" buffer.order(ByteOrder.LITTLE_ENDIAN);"); + println!(" ByteBuffer context = ByteBuffer.allocate(0);"); + println!(" Message message = new Message(buffer, context, 0);"); + println!(" try {{"); + println!(" Tree tree = Tree.deserialize(message);"); + println!(" System.out.print(\"- pass: \");"); + println!(" System.out.println(tree.toString());"); + println!(" }} catch (RuntimeException e) {{"); + println!(" System.out.println(\"- fail:\");"); + println!(" e.printStackTrace();"); + println!(" result = 1;"); + println!(" }}"); + println!(" }}"); + println!(" for (int i = 0; i < reject.length; i++) {{"); + println!(" ByteBuffer buffer = ByteBuffer.wrap(reject[i]);"); + println!(" buffer.order(ByteOrder.LITTLE_ENDIAN);"); + println!(" ByteBuffer context = ByteBuffer.allocate(0);"); + println!(" Message message = new Message(buffer, context, 0);"); + println!(" try {{"); + println!(" Tree tree = Tree.deserialize(message);"); + println!(" System.out.print(\"- fail: accepted: \");"); + println!(" System.out.println(tree.toString());"); + println!(" result = 1;"); + println!(" }} catch ({serialization}.FormatException e) {{"); + println!(" System.out.println(\"- pass: (rejected)\");"); + println!(" }} catch (RuntimeException e) {{"); + println!(" System.out.println(\"- fail: wrong exception: \");"); + println!(" e.printStackTrace();"); + println!(" result = 1;"); + println!(" }}"); + println!(" }}"); + println!(" System.exit(result);"); + println!(" }}"); + println!("}}"); +} diff --git a/lib/rust/parser/generate-java/src/lib.rs b/lib/rust/parser/generate-java/src/lib.rs new file mode 100644 index 0000000000..3059d34abf --- /dev/null +++ b/lib/rust/parser/generate-java/src/lib.rs @@ -0,0 +1,83 @@ +//! Supports generation of Java types corresponding to `enso-parser`'s AST types, and testing and +//! debugging the translation process. + +// === Standard Linter Configuration === +#![deny(non_ascii_idents)] +#![warn(unsafe_code)] +// === Non-Standard Linter Configuration === +#![allow(clippy::option_map_unit_fn)] +#![allow(clippy::precedence)] +#![allow(dead_code)] +#![deny(unconditional_recursion)] +#![warn(missing_copy_implementations)] +#![warn(missing_debug_implementations)] +#![warn(missing_docs)] +#![warn(trivial_casts)] +#![warn(trivial_numeric_casts)] +#![warn(unused_import_braces)] +#![warn(unused_qualifications)] + +use enso_metamodel::meta; +use enso_reflect::Reflect; + + +// ============== +// === Export === +// ============== + +pub mod serialization; + + + +// ===================== +// === Configuration === +// ===================== + +/// The package for the generated code. +pub const PACKAGE: &str = "org.enso.syntax2"; +/// The package for the non-generated serialization support code. +pub const SERIALIZATION_SUPPORT: &str = "org.enso.syntax2.serialization"; +/// The fully-qualified name of an `Either` type. +pub const EITHER_TYPE: &str = "org.enso.syntax2.serialization.Either"; + + + +// ================== +// === Test Cases === +// ================== + +use enso_parser::syntax; + +/// Generate accept/reject test case set for the parser types rooted at `syntax::Tree`. +pub fn generate_testcases() -> meta::serialization::TestCases { + let root = syntax::Tree::reflect(); + let root_id = root.id; + let (graph, rust_to_meta) = enso_metamodel::rust::to_meta(root); + let root = rust_to_meta[&root_id]; + meta::serialization::testcases(&graph, root) +} + + + +// =========================== +// === Rust Format Testing === +// =========================== + +#[cfg(test)] +mod test { + /// Check Rust deserialization against test cases. + #[test] + fn test_format() { + let cases = super::generate_testcases(); + for (i, case) in cases.accept.iter().enumerate() { + if let Err(err) = enso_parser::serialization::deserialize_tree(case) { + panic!("accept{i} fail: {err:?}"); + } + } + for (i, case) in cases.reject.iter().enumerate() { + if let Ok(tree) = enso_parser::serialization::deserialize_tree(case) { + panic!("reject{i} fail: accepted: {tree:?}"); + } + } + } +} diff --git a/lib/rust/parser/generate-java/src/main.rs b/lib/rust/parser/generate-java/src/main.rs new file mode 100644 index 0000000000..5325f4697d --- /dev/null +++ b/lib/rust/parser/generate-java/src/main.rs @@ -0,0 +1,55 @@ +//! Generate the Java types corresponding to `enso-parser`'s AST types. +//! +//! # Usage +//! +//! Generated files will be placed in the directory given as an argument: +//! ```console +//! generate-java org/enso/syntax2/ +//! ``` + +// === Standard Linter Configuration === +#![deny(non_ascii_idents)] +#![warn(unsafe_code)] +// === Non-Standard Linter Configuration === +#![allow(clippy::option_map_unit_fn)] +#![allow(clippy::precedence)] +#![allow(dead_code)] +#![deny(unconditional_recursion)] +#![warn(missing_copy_implementations)] +#![warn(missing_debug_implementations)] +#![warn(missing_docs)] +#![warn(trivial_casts)] +#![warn(trivial_numeric_casts)] +#![warn(unused_import_braces)] +#![warn(unused_qualifications)] + +use enso_metamodel::java; +use enso_metamodel::rust; +use enso_parser_generate_java::serialization; +use enso_reflect::Reflect; + + + +// ======================= +// === Java Generation === +// ======================= + +fn main() { + let ast = enso_parser::syntax::Tree::reflect(); + let tree = enso_parser::syntax::Tree::reflect().id; + let token = enso_parser::syntax::Token::::reflect().id; + let (graph, rust_to_meta) = rust::to_meta(ast); + let (graph, meta_to_java) = java::from_meta(&graph, enso_parser_generate_java::EITHER_TYPE); + let mut graph = java::transform::optional_to_null(graph); + let rust_to_java = |id| meta_to_java[&rust_to_meta[&id]]; + let (tree, token) = (rust_to_java(tree), rust_to_java(token)); + serialization::derive(&mut graph, tree, token); + let graph = java::to_syntax(&graph, enso_parser_generate_java::PACKAGE); + let mut args = std::env::args(); + args.next().unwrap(); + let dir = args.next().expect("Usage: generate-java "); + for class in graph { + let code = class.to_string(); + std::fs::write(format!("{}/{}.java", &dir, &class.name), &code).unwrap(); + } +} diff --git a/lib/rust/parser/generate-java/src/serialization.rs b/lib/rust/parser/generate-java/src/serialization.rs new file mode 100644 index 0000000000..cdc4677b04 --- /dev/null +++ b/lib/rust/parser/generate-java/src/serialization.rs @@ -0,0 +1,103 @@ +//! Serialization overrides for the `enso_parser` types. + +use enso_metamodel::java::*; + +use enso_metamodel::java::bincode::MapperInput; +use enso_metamodel::java::bincode::MaterializerInput; + + + +// ============================== +// === Derive Deserialization === +// ============================== + +// FIXME: After we have implemented a transformation from the raw `Reflect` output to a +// `rust::TypeGraph`, at which time we can assign unique `FieldId`s: We should identify +// generated fields in Java classes by starting from a `str -> rust::FieldId` query on Rust +// type data, and mapping fields analogously to `rust_to_java` for types. +const CODE_GETTER: &str = "codeRepr"; +const TREE_BEGIN: &str = "spanLeftOffsetCodeReprBegin"; +const TREE_LEN: &str = "spanLeftOffsetCodeReprLen"; +const TOKEN_BEGIN: &str = "codeReprBegin"; +const TOKEN_LEN: &str = "codeReprLen"; +const TOKEN_OFFSET_BEGIN: &str = "leftOffsetCodeReprBegin"; +//const TOKEN_OFFSET_LEN: &str = "leftOffsetCodeReprLen"; + +/// Derive deserialization for all types in the typegraph. +pub fn derive(graph: &mut TypeGraph, tree: ClassId, token: ClassId) { + let source = "source"; + impl_deserialize(graph, tree, token, source); + graph[token].methods.push(impl_getter(CODE_GETTER, source, TOKEN_BEGIN, TOKEN_LEN)); + graph[tree].methods.push(impl_getter(CODE_GETTER, source, TREE_BEGIN, TREE_LEN)); +} + + +// === Deserialization Methods === + +fn impl_deserialize(graph: &mut TypeGraph, tree: ClassId, token: ClassId, source: &str) { + // Add source field to parent types. + let buffer = Class::builtin("java.nio.ByteBuffer", vec![]); + let buffer = graph.classes.insert(buffer); + let tree_source_ = Field::object(source, buffer, true); + let tree_source = tree_source_.id(); + graph[tree].fields.push(tree_source_); + let token_source_ = Field::object(source, buffer, true); + let token_source = token_source_.id(); + graph[token].fields.push(token_source_); + let ids: Vec<_> = graph.classes.keys().collect(); + for id in ids { + let class = &graph[id]; + let mut deserialization = + bincode::DeserializerBuilder::new(id, crate::SERIALIZATION_SUPPORT, crate::EITHER_TYPE); + match () { + // Base classes: Map the code repr fields. + _ if id == tree => { + let code_begin = class.find_field(TREE_BEGIN).unwrap().id(); + deserialization.map(code_begin, offset_mapper()); + } + _ if id == token => { + let code_begin = class.find_field(TOKEN_BEGIN).unwrap().id(); + let offset_begin = class.find_field(TOKEN_OFFSET_BEGIN).unwrap().id(); + deserialization.map(code_begin, offset_mapper()); + deserialization.map(offset_begin, offset_mapper()); + } + // Child classes: Pass context object from deserializer to parent. + _ if class.parent == Some(tree) => + deserialization.materialize(tree_source, context_materializer()), + _ if class.parent == Some(token) => + deserialization.materialize(token_source, context_materializer()), + // Everything else: Standard deserialization. + _ => (), + } + let deserializer = deserialization.build(graph); + graph[id].methods.push(deserializer); + } +} + +fn context_materializer() -> impl for<'a> FnOnce(MaterializerInput<'a>) -> String + 'static { + |MaterializerInput { message }| format!("{message}.context()") +} +fn offset_mapper() -> impl for<'a, 'b> FnOnce(MapperInput<'a, 'b>) -> String + 'static { + |MapperInput { message, value }| format!("{message}.offset({value})") +} + + +// === Source Code Getters === + +fn impl_getter(name: &str, buffer: &str, begin: &str, len: &str) -> Method { + use std::fmt::Write; + let mut body = String::new(); + let serialization = crate::SERIALIZATION_SUPPORT; + let exception = format!("{serialization}.FormatException"); + writeln!(body, "byte[] dst = new byte[{len}];").unwrap(); + writeln!(body, "{buffer}.position({begin});").unwrap(); + writeln!(body, "{buffer}.get(dst);").unwrap(); + writeln!(body, "try {{").unwrap(); + writeln!(body, " return new String(dst, \"UTF-8\");").unwrap(); + writeln!(body, "}} catch (java.io.UnsupportedEncodingException e) {{").unwrap(); + writeln!(body, " throw new {exception}(\"Expected UTF-8\", e);").unwrap(); + writeln!(body, "}}").unwrap(); + let mut method = syntax::Method::new(name, syntax::Type::named("String")); + method.body = body; + Method::Raw(method) +} diff --git a/lib/rust/parser/src/main.rs b/lib/rust/parser/src/main.rs index cfd38dbf7c..825e2eea33 100644 --- a/lib/rust/parser/src/main.rs +++ b/lib/rust/parser/src/main.rs @@ -107,6 +107,7 @@ use crate::prelude::*; pub mod lexer; pub mod macros; +pub mod serialization; pub mod source; pub mod syntax; @@ -114,7 +115,10 @@ pub mod syntax; /// Popular utilities, imported by most modules of this crate. pub mod prelude { + pub use enso_prelude::serde_reexports::*; pub use enso_prelude::*; + pub use enso_reflect as reflect; + pub use enso_reflect::Reflect; pub use enso_types::traits::*; pub use enso_types::unit2::Bytes; } diff --git a/lib/rust/parser/src/serialization.rs b/lib/rust/parser/src/serialization.rs new file mode 100644 index 0000000000..c823646648 --- /dev/null +++ b/lib/rust/parser/src/serialization.rs @@ -0,0 +1,91 @@ +//! Serialization/deserialization support. +//! +//! Deserialization is used only for testing, but it is used by dependent crates, so it cannot be +//! gated with `#[cfg(test)]`. + +use crate::prelude::*; + + + +// ============ +// === Tree === +// ============ + +/// Deserialize a `Tree` from its binary representation. +pub fn deserialize_tree(data: &[u8]) -> Result { + use bincode::Options; + let options = bincode::DefaultOptions::new().with_fixint_encoding(); + options.deserialize(data) +} + + + +// ============ +// === Code === +// ============ + +/// Serialized representation of a source code `Cow`. +#[derive(Serialize, Reflect)] +pub(crate) struct Code { + #[reflect(hide)] + begin: u32, + #[reflect(hide)] + len: u32, +} + +/// Serde wrapper to serialize a `Cow` as the `Code` representation. +#[allow(clippy::ptr_arg)] // This is the signature required by serde. +pub(crate) fn serialize_cow(cow: &Cow<'_, str>, ser: S) -> Result +where S: serde::Serializer { + let s = match cow { + Cow::Borrowed(s) => *s, + Cow::Owned(_) => panic!(), + }; + let begin = s.as_ptr() as u32; + let len = s.len() as u32; + let serializable = Code { begin, len }; + serializable.serialize(ser) +} + +pub(crate) fn deserialize_cow<'c, 'de, D>(deserializer: D) -> Result, D::Error> +where D: serde::Deserializer<'de> { + let _ = deserializer.deserialize_u64(DeserializeU64); + Ok(Cow::Owned(String::new())) +} + + + +// ============= +// === Error === +// ============= + +/// Deserialization type for `crate::syntax::tree::Error`. +#[derive(Deserialize, Debug, Clone)] +pub(crate) struct Error(String); + +impl From for crate::syntax::tree::Error { + fn from(_: Error) -> Self { + crate::syntax::tree::Error { message: "" } + } +} + + + +// ================ +// === Visitors === +// ================ + +struct DeserializeU64; + +impl<'de> serde::de::Visitor<'de> for DeserializeU64 { + type Value = u64; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "An unsigned 64-bit integer.") + } + + fn visit_u64(self, i: u64) -> Result + where E: serde::de::Error { + Ok(i) + } +} diff --git a/lib/rust/parser/src/source/code.rs b/lib/rust/parser/src/source/code.rs index b3530c7534..affbaad783 100644 --- a/lib/rust/parser/src/source/code.rs +++ b/lib/rust/parser/src/source/code.rs @@ -9,10 +9,13 @@ use crate::prelude::*; // ============ /// A code representation. It can either be a borrowed source code or a modified owned one. -#[derive(Clone, Default, Eq, PartialEq, From, Into, Shrinkwrap)] +#[derive(Clone, Default, Eq, PartialEq, From, Into, Shrinkwrap, Serialize, Reflect, Deserialize)] #[shrinkwrap(mutable)] #[allow(missing_docs)] pub struct Code<'s> { + #[serde(serialize_with = "crate::serialization::serialize_cow")] + #[serde(deserialize_with = "crate::serialization::deserialize_cow")] + #[reflect(as = "crate::serialization::Code", flatten)] pub repr: Cow<'s, str>, } diff --git a/lib/rust/parser/src/source/span.rs b/lib/rust/parser/src/source/span.rs index 5228b7aa4f..110d2e3305 100644 --- a/lib/rust/parser/src/source/span.rs +++ b/lib/rust/parser/src/source/span.rs @@ -23,11 +23,10 @@ pub mod traits { /// A strongly typed visible offset size. For example, a space character has value of 1, while the /// tab character has value of 4. For other space-like character sizes, refer to the lexer /// implementation. -#[derive( - Clone, Copy, Debug, Default, From, Into, Add, AddAssign, Sub, PartialEq, Eq, Hash, PartialOrd, - Ord -)] +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[derive(From, Into, Add, AddAssign, Sub, Reflect, Serialize, Deserialize)] #[allow(missing_docs)] +#[reflect(transparent)] pub struct VisibleOffset { pub width_in_spaces: usize, } @@ -60,10 +59,11 @@ impl From<&str> for VisibleOffset { /// Offset information. In most cases it is used to express the left-hand-side whitespace offset /// for tokens and AST nodes. -#[derive(Clone, Debug, Default, PartialEq, Eq)] +#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Reflect, Deserialize)] #[allow(missing_docs)] pub struct Offset<'s> { pub visible: VisibleOffset, + #[reflect(flatten)] pub code: Code<'s>, } @@ -129,11 +129,13 @@ impl<'s> std::ops::AddAssign<&Offset<'s>> for Offset<'s> { /// element. This is done in order to not duplicate the data. For example, some AST nodes contain a /// lot of tokens. They need to remember their span, but they do not need to remember their code, /// because it is already stored in the tokens. -#[derive(Clone, Debug, Default, Eq, PartialEq)] +#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Reflect, Deserialize)] #[allow(missing_docs)] pub struct Span<'s> { + #[reflect(flatten)] pub left_offset: Offset<'s>, /// The length of the code, excluding [`left_offset`]. + #[reflect(hide)] pub code_length: Bytes, } diff --git a/lib/rust/parser/src/syntax/token.rs b/lib/rust/parser/src/syntax/token.rs index 0dd073ecc3..b8c4e1784c 100644 --- a/lib/rust/parser/src/syntax/token.rs +++ b/lib/rust/parser/src/syntax/token.rs @@ -103,13 +103,16 @@ use enso_shapely_macros::tagged_enum; // ============= /// The lexical token definition. See the module docs to learn more about its usage scenarios. -#[derive(Clone, Deref, DerefMut, Eq, PartialEq)] +#[derive(Clone, Deref, DerefMut, Eq, PartialEq, Serialize, Reflect, Deserialize)] #[allow(missing_docs)] pub struct Token<'s, T = Variant> { #[deref] #[deref_mut] + #[reflect(subtype)] pub variant: T, + #[reflect(flatten, hide)] pub left_offset: Offset<'s>, + #[reflect(flatten, hide)] pub code: Code<'s>, } @@ -241,8 +244,10 @@ impl<'s, 'a, T: Debug> Debug for Ref<'s, 'a, T> { macro_rules! with_token_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)* /// Elements that can be found in the source code. #[tagged_enum] - #[derive(Clone, Copy, PartialEq, Eq)] + #[derive(Clone, Copy, PartialEq, Eq, Serialize, Reflect, Deserialize)] #[allow(missing_docs)] + #[tagged_enum(apply_attributes_to = "variants")] + #[reflect(inline)] pub enum Variant { Newline, Symbol, diff --git a/lib/rust/parser/src/syntax/tree.rs b/lib/rust/parser/src/syntax/tree.rs index 22ef4c1b41..ca90c84187 100644 --- a/lib/rust/parser/src/syntax/tree.rs +++ b/lib/rust/parser/src/syntax/tree.rs @@ -16,12 +16,14 @@ use enso_shapely_macros::tagged_enum; // ============ /// The Abstract Syntax Tree of the language. -#[derive(Clone, Deref, DerefMut, Eq, PartialEq)] +#[derive(Clone, Deref, DerefMut, Eq, PartialEq, Serialize, Reflect, Deserialize)] #[allow(missing_docs)] pub struct Tree<'s> { #[deref] #[deref_mut] + #[reflect(subtype)] pub variant: Box>, + #[reflect(flatten)] pub span: Span<'s>, } @@ -57,7 +59,9 @@ impl<'s> AsRef> for Tree<'s> { macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)* /// [`Tree`] variants definition. See its docs to learn more. #[tagged_enum] - #[derive(Clone, Eq, PartialEq, Visitor)] + #[derive(Clone, Eq, PartialEq, Visitor, Serialize, Reflect, Deserialize)] + #[tagged_enum(apply_attributes_to = "variants")] + #[reflect(inline)] pub enum Variant<'s> { /// Invalid [`Tree`] fragment with an attached [`Error`]. Invalid { @@ -146,9 +150,12 @@ with_ast_definition!(generate_ast_definition()); // === Invalid === /// Error of parsing attached to an [`Tree`] node. -#[derive(Clone, Copy, Debug, Eq, PartialEq, Visitor)] +#[derive(Clone, Copy, Debug, Eq, PartialEq, Visitor, Serialize, Reflect, Deserialize)] #[allow(missing_docs)] +#[reflect(transparent)] +#[serde(from = "crate::serialization::Error")] pub struct Error { + #[serde(skip_deserializing)] pub message: &'static str, } @@ -179,7 +186,7 @@ impl<'s> span::Builder<'s> for Error { pub type OperatorOrError<'s> = Result, MultipleOperatorError<'s>>; /// Error indicating multiple operators found next to each other, like `a + * b`. -#[derive(Clone, Debug, Eq, PartialEq, Visitor)] +#[derive(Clone, Debug, Eq, PartialEq, Visitor, Serialize, Reflect, Deserialize)] #[allow(missing_docs)] pub struct MultipleOperatorError<'s> { pub operators: NonEmptyVec>, @@ -195,7 +202,7 @@ impl<'s> span::Builder<'s> for MultipleOperatorError<'s> { // === MultiSegmentApp === /// A segment of [`MultiSegmentApp`], like `if cond` in the `if cond then ok else fail` expression. -#[derive(Clone, Debug, Eq, PartialEq, Visitor)] +#[derive(Clone, Debug, Eq, PartialEq, Visitor, Serialize, Reflect, Deserialize)] #[allow(missing_docs)] pub struct MultiSegmentAppSegment<'s> { pub header: Token<'s>, @@ -374,7 +381,10 @@ macro_rules! define_visitor_for_tokens { ( $(#$kind_meta:tt)* pub enum $kind:ident { - $( $variant:ident $({$($args:tt)*})? ),* $(,)? + $( + $(#$variant_meta:tt)* + $variant:ident $({$($args:tt)*})? + ),* $(,)? } ) => { impl<'s, 'a> TreeVisitable<'s, 'a> for token::$kind {} diff --git a/lib/rust/prelude/Cargo.toml b/lib/rust/prelude/Cargo.toml index 77ab0d5177..e1825d880f 100644 --- a/lib/rust/prelude/Cargo.toml +++ b/lib/rust/prelude/Cargo.toml @@ -16,6 +16,7 @@ publish = true crate-type = ["cdylib", "rlib"] [dependencies] +enso-reflect = { path = "../reflect" } enso-shapely = { version = "^0.2.0", path = "../shapely" } anyhow = "1.0.37" assert_approx_eq = { version = "1.1.0" } diff --git a/lib/rust/prelude/src/data/non_empty_vec.rs b/lib/rust/prelude/src/data/non_empty_vec.rs index d8f1449868..c24ca0eae6 100644 --- a/lib/rust/prelude/src/data/non_empty_vec.rs +++ b/lib/rust/prelude/src/data/non_empty_vec.rs @@ -14,7 +14,10 @@ use std::vec::Splice; /// A version of [`std::vec::Vec`] that can't be empty. #[allow(missing_docs)] -#[derive(Clone, Debug, Eq, PartialEq, Deref, DerefMut)] +#[derive(Clone, Debug, Eq, PartialEq, Deref, DerefMut, Reflect)] +#[reflect(transparent)] +#[cfg_attr(feature = "serde", derive(crate::serde_reexports::Serialize))] +#[cfg_attr(feature = "serde", derive(crate::serde_reexports::Deserialize))] pub struct NonEmptyVec { pub elems: Vec, } diff --git a/lib/rust/prelude/src/lib.rs b/lib/rust/prelude/src/lib.rs index f592adcdc5..8eee7dda6b 100644 --- a/lib/rust/prelude/src/lib.rs +++ b/lib/rust/prelude/src/lib.rs @@ -91,6 +91,8 @@ pub use std::collections::hash_map::DefaultHasher; pub use std::hash::Hash; pub use std::hash::Hasher; +pub use enso_reflect::prelude::*; + use std::cell::UnsafeCell; diff --git a/lib/rust/reflect/Cargo.toml b/lib/rust/reflect/Cargo.toml new file mode 100644 index 0000000000..65345a8e05 --- /dev/null +++ b/lib/rust/reflect/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "enso-reflect" +version = "0.1.0" +edition = "2021" +authors = ["Enso Team "] + +[dependencies] +enso-reflect-macros = { path = "macros" } +enso-metamodel = { path = "../metamodel", features = ["rust"] } +derivative = "2.2" + +[features] +graphviz = ["enso-metamodel/graphviz"] diff --git a/lib/rust/reflect/macros/Cargo.toml b/lib/rust/reflect/macros/Cargo.toml new file mode 100644 index 0000000000..ab864a0efb --- /dev/null +++ b/lib/rust/reflect/macros/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "enso-reflect-macros" +version = "0.1.0" +edition = "2021" +authors = ["Enso Team "] + +[lib] +proc-macro = true + +[dependencies] +proc-macro2 = "1.0" +quote = "1.0" +syn = { version = "1.0", features = [ + "full", + "extra-traits", + "printing", + "parsing" +] } diff --git a/lib/rust/reflect/macros/src/analyze.rs b/lib/rust/reflect/macros/src/analyze.rs new file mode 100644 index 0000000000..4a3115993c --- /dev/null +++ b/lib/rust/reflect/macros/src/analyze.rs @@ -0,0 +1,313 @@ +//! Parse syntax into (macro execution-time) representations of data structure definitions. + +use super::*; + +use quote::ToTokens; +use syn::punctuated::Punctuated; +use syn::DeriveInput; +use syn::GenericParam; +use syn::Token; + + + +// =============== +// === Analyze === +// =============== + +/// Parse `syn` syntax and produce type definitions. +pub(crate) fn analyze(input: TokenStream) -> Type { + let input = syn::parse2::(input).unwrap(); + let ident = input.ident; + let attrs: ContainerAttrs = input.attrs.iter().collect(); + let mut lifetimes: Punctuated<_, Token![,]> = Punctuated::new(); + let mut generic_params: Punctuated<_, Token![,]> = Punctuated::new(); + for param in input.generics.params { + match param { + GenericParam::Type(type_) => generic_params.push(type_.ident.to_token_stream()), + GenericParam::Lifetime(lifetime) => lifetimes.push(lifetime.lifetime.to_token_stream()), + GenericParam::Const(_) => unimplemented!("Reflect for const generics."), + } + } + let mut generics = lifetimes.clone(); + generics.extend(generic_params.clone()); + let data = match input.data { + syn::Data::Struct(struct_) => Data::Struct(parse_fields(struct_.fields)), + syn::Data::Enum(enum_) => + Data::Enum(enum_.variants.into_iter().map(Variant::from).collect()), + syn::Data::Union(_) => unimplemented!("Reflect for `union`s."), + }; + Type { ident, generics, lifetimes, generic_params, data, attrs } +} + + + +// =============== +// === Parsing === +// =============== + +fn parse_fields(fields: syn::Fields) -> Fields { + match fields { + syn::Fields::Named(syn_fields) => { + let mut fields = vec![]; + 'fields: for field in syn_fields.named { + let mut field_ = NamedField::new(field.ident.unwrap(), field.ty); + let mut annotations = Default::default(); + for attr in field.attrs { + parse_field_attrs(&attr, &mut annotations); + } + for annotation in annotations { + match annotation { + FieldAttr::Flatten => field_.flatten = true, + FieldAttr::Hide => field_.hide = true, + FieldAttr::Subtype => field_.subtype = true, + FieldAttr::As(ty) => field_.refer = Some(ty), + // NOTE: Implementing `skip` at analysis time makes our Rust information + // incomplete. For `reflect` to be used to generate Rust deserialization + // code, we'd need to emit a field with a type that is a marker type, + // which we'd filter out when abstracting. + FieldAttr::Skip => continue 'fields, + } + } + fields.push(field_); + } + Fields::Named { fields } + } + syn::Fields::Unnamed(fields) => + Fields::Unnamed(fields.unnamed.into_iter().map(UnnamedField::from).collect()), + syn::Fields::Unit => Fields::Unit, + } +} + +impl From for UnnamedField { + fn from(field: syn::Field) -> Self { + let type_ = field.ty; + UnnamedField { type_ } + } +} + +impl From for Variant { + fn from(variant: syn::Variant) -> Self { + if variant.discriminant.is_some() { + unimplemented!("Explicit discriminators."); + } + let fields = parse_fields(variant.fields); + let mut transparent = false; + let mut annotations = Default::default(); + for attr in &variant.attrs { + parse_variant_attrs(attr, &mut annotations); + } + for annotation in annotations { + match annotation { + VariantAttr::Inline => transparent = true, + } + } + let ident = variant.ident; + Variant { ident, fields, transparent } + } +} + + + +// ========================= +// === Helper attributes === +// ========================= + +/// Helper attribute identifier. Must match the value `attributes(_)` parameter in the +/// `proc_macro_derive` annotation on this crate's entry point. +const HELPER_ATTRIBUTE_PATH: &str = "reflect"; +const INVALID_HELPER_SYNTAX: &str = "Unknown helper attribute syntax."; +const UNKNOWN_HELPER: &str = "Unknown helper attribute."; + + +// === Field Attributes === + +#[derive(PartialEq, Eq)] +enum FieldAttr { + Flatten, + Hide, + Skip, + Subtype, + As(Box), +} + +fn parse_field_attrs(attr: &syn::Attribute, out: &mut Vec) { + if attr.style != syn::AttrStyle::Outer { + return; + } + match attr.path.get_ident() { + Some(ident) if ident == HELPER_ATTRIBUTE_PATH => (), + _ => return, + } + let meta = attr.parse_meta().expect(INVALID_HELPER_SYNTAX); + match meta { + syn::Meta::List(metalist) => + out.extend(metalist.nested.iter().map(|meta| parse_field_annotation(meta, attr))), + syn::Meta::Path(_) | syn::Meta::NameValue(_) => + panic!("{}: {}.", INVALID_HELPER_SYNTAX, meta.to_token_stream()), + } +} + +fn parse_field_annotation(meta: &syn::NestedMeta, attr: &syn::Attribute) -> FieldAttr { + let meta = match meta { + syn::NestedMeta::Meta(meta) => meta, + _ => panic!("{}: {}.", INVALID_HELPER_SYNTAX, meta.into_token_stream()), + }; + match meta { + syn::Meta::Path(path) => { + let ident = path.get_ident().expect(INVALID_HELPER_SYNTAX); + match ident.to_string().as_str() { + "flatten" => FieldAttr::Flatten, + "hide" => FieldAttr::Hide, + "skip" => FieldAttr::Skip, + "subtype" => FieldAttr::Subtype, + _ => panic!("{}: {}.", UNKNOWN_HELPER, ident.into_token_stream()), + } + } + syn::Meta::NameValue(syn::MetaNameValue { path, lit: syn::Lit::Str(lit), .. }) => { + let ident = path.get_ident().expect(INVALID_HELPER_SYNTAX); + match ident.to_string().as_str() { + "as" => FieldAttr::As(Box::new(lit.parse().expect(INVALID_HELPER_SYNTAX))), + _ => panic!("{}: {}.", UNKNOWN_HELPER, ident.into_token_stream()), + } + } + _ => panic!("{}: {}.", INVALID_HELPER_SYNTAX, attr.into_token_stream()), + } +} + + +// === Variant Attributes === + +#[derive(PartialEq, Eq)] +enum VariantAttr { + Inline, +} + +fn parse_variant_attrs(attr: &syn::Attribute, out: &mut Vec) { + if attr.style != syn::AttrStyle::Outer { + return; + } + match attr.path.get_ident() { + Some(ident) if ident == HELPER_ATTRIBUTE_PATH => (), + _ => return, + } + let meta = attr.parse_meta().expect(INVALID_HELPER_SYNTAX); + match meta { + syn::Meta::List(metalist) => { + let parse = |meta| match parse_meta_ident(meta).to_string().as_str() { + "inline" => VariantAttr::Inline, + _ => panic!("{}: {}.", UNKNOWN_HELPER, meta.into_token_stream()), + }; + out.extend(metalist.nested.iter().map(parse)); + } + syn::Meta::Path(_) | syn::Meta::NameValue(_) => + panic!("{}: {}.", INVALID_HELPER_SYNTAX, meta.into_token_stream()), + } +} + + +// === Container Attributes === + +#[derive(PartialEq, Eq)] +enum ContainerAttr { + Transparent, +} + +fn parse_container_attrs(attr: &syn::Attribute, out: &mut Vec) { + if attr.style != syn::AttrStyle::Outer { + return; + } + match attr.path.get_ident() { + Some(ident) if ident == HELPER_ATTRIBUTE_PATH => (), + _ => return, + } + let meta = attr.parse_meta().expect(INVALID_HELPER_SYNTAX); + match meta { + syn::Meta::List(metalist) => { + let parse = |meta| match parse_meta_ident(meta).to_string().as_str() { + "transparent" => ContainerAttr::Transparent, + _ => panic!("{}: {}.", UNKNOWN_HELPER, attr.into_token_stream()), + }; + out.extend(metalist.nested.iter().map(parse)); + } + syn::Meta::Path(_) | syn::Meta::NameValue(_) => + panic!("{}: {}.", INVALID_HELPER_SYNTAX, attr.into_token_stream()), + } +} + +impl<'a> FromIterator<&'a syn::Attribute> for ContainerAttrs { + fn from_iter>(iter: T) -> Self { + let mut transparent = false; + let mut annotations = Default::default(); + for attr in iter { + parse_container_attrs(attr, &mut annotations); + } + for annotation in annotations { + match annotation { + ContainerAttr::Transparent => transparent = true, + } + } + ContainerAttrs { transparent } + } +} + + +// === Helpers === + +fn parse_meta_ident(meta: &syn::NestedMeta) -> &syn::Ident { + let path = match meta { + syn::NestedMeta::Meta(syn::Meta::Path(path)) => path, + _ => panic!("{}: {}.", INVALID_HELPER_SYNTAX, meta.into_token_stream()), + }; + path.get_ident().expect(INVALID_HELPER_SYNTAX) +} + + + +// ============= +// === Tests === +// ============= + +#[cfg(test)] +mod tests { + use super::analyze::analyze; + use quote::quote; + + #[test] + fn accept_inputs() { + let inputs = [ + quote! { + struct Foo; + }, + quote! { + struct Bar { + bar: u32, + baar: &'static str, + } + }, + quote! { + enum Baz { + Bar(Bar), + Baz, + } + }, + quote! { + struct Quux { + quux: T, + } + }, + quote! { + struct Quuux { + quux: Box, + } + }, + quote! { + struct Code<'s> { + repr: std::borrow::Cow<'s, str>, + } + }, + ]; + for input in inputs { + analyze(input); + } + } +} diff --git a/lib/rust/reflect/macros/src/lib.rs b/lib/rust/reflect/macros/src/lib.rs new file mode 100644 index 0000000000..fc53258d13 --- /dev/null +++ b/lib/rust/reflect/macros/src/lib.rs @@ -0,0 +1,267 @@ +//! # Rust reflection +//! +//! This crate implements a macro, `#[derive(Reflect)]`, which adds runtime reflection support to +//! datatype definitions. Its main motivation is type-driven code generation. +//! +//! ## General Attributes +//! +//! ### `#[reflect(skip)]` (field attribute) +//! The field will be excluded from reflection data. +//! When this attribute is present, the field's type does not need to implement `Reflect`. +//! +//! ### `#[reflect(as = "OtherType")]` (field attribute) +//! The field's type in the reflection data will be `OtherType` rather than the field's real type. +//! When this attribute is present, the field's real type does not need to implement `Reflect`. The +//! alternative type specified must implement `Reflect`. +//! +//! ## Attributes for Abstraction Customization +//! +//! Application of `#[derive(Reflect)]` to data types is enough to enable reflection over Rust +//! types. However, if the types will be abstracted with `enso_metamodel::meta` (i.e. for +//! transpilation to another language), some customization is likely: Direct translation into +//! another language would reproduce Rust patterns where they are likely not necessary (on top of +//! the target-language patterns introduced by the translation), resulting in an overly-complex +//! data model. In order to avert this (without using heuristics, which would result in +//! unpredictable output), this crate supports helper attributes to inform the abstractor about the +//! use of Rust patterns that can be erased in translation. +//! +//! ### `#[reflect(transparent)]` (struct attribute) +//! Only applicable to single-field `struct`s. The type will be not appear in abstracted reflection +//! data; all references will appear as references to the contained type. +//! +//! ### `#[reflect(hide)]` (field attribute) +//! In target languages that support it, the field will be hidden from direct access. In the Java +//! target, this prevents the generation of accessors. +//! +//! ### `#[reflect(flatten)]` (field attribute) +//! In abstracted reflection data, the field will be replaced in this `struct` with the contents of +//! its type, which must be a `struct` type. +//! +//! To reduce the chance of name conflicts, the names of inserted fields will be created by +//! prepending the name of the flattened-away field to the names of the fields originating from the +//! inner type. Other field attributes such as [`hide`](#reflecthide-field-attribute) that were +//! applied to the flattened field will be inherited by the inserted fields. +//! +//! #### Example: +//! This input code: +//! ```ignore +//! #[derive(Reflect)] +//! struct Outer { +//! first: u32, +//! #[reflect(flatten, hide)] +//! inner: Inner, +//! last: u32, +//! } +//! +//! #[derive(Reflect)] +//! struct Inner { +//! value0: u32, +//! value1: u32, +//! } +//! ``` +//! +//! Will be represented the same as this input: +//! ```ignore +//! #[derive(Reflect)] +//! struct Outer { +//! first: u32, +//! #[reflect(hide)] +//! inner_value0: u32, +//! #[reflect(hide)] +//! inner_value1: u32, +//! last: u32, +//! } +//! ``` +//! +//! ### `#[reflect(subtype)]` (field attribute) +//! In the abstracted representation, the containing type will be made the parent of the field's +//! type. There must be no references to the field's type except through the containing type. The +//! field's type must be an `enum`, or a generic parameter. +//! If the field's type is a generic parameter, the parameter must be instantiated with one `enum`, +//! and may be instantiated with any types that are members of the `enum` (this can only occur with +//! `#[reflect(inline)]`; see below). References to the type instantiated with the `enum` will +//! become references to the resulting parent type; references to `struct` instantiatons will become +//! references to the resulting child types. +//! +//! ### `#[reflect(inline)]` (`enum` variant attribute) +//! In the abstracted representation, no type will be generated for the variant (which must be a +//! single-field variant); the contained type will instead by treated as a member of the `enum`. +//! +//! # Using `#[derive(Reflect)]` versus writing a proc macro +//! +//! Proc macros have some limitations. A proc macro should be: +//! - A pure function +//! - from syntax to syntax +//! - operating on each item in isolation. +//! +//! This crate doesn't have these limitations; it supports reasoning about the whole typegraph, and +//! has no restrictions about side effects. However, a user of this crate must depend on its subject +//! code to obtain the reflection data at runtime; so automatic generation of Rust code during +//! compilation requires use of a build script to perform the reflection/code-generation step. + +// === Standard Linter Configuration === +#![deny(non_ascii_idents)] +#![warn(unsafe_code)] +// === Non-Standard Linter Configuration === +#![allow(clippy::option_map_unit_fn)] +#![allow(clippy::precedence)] +#![allow(dead_code)] +#![deny(unconditional_recursion)] +#![warn(missing_copy_implementations)] +#![warn(missing_debug_implementations)] +#![warn(missing_docs)] +#![warn(trivial_casts)] +#![warn(trivial_numeric_casts)] +#![warn(unused_import_braces)] +#![warn(unused_qualifications)] + +use proc_macro2::TokenStream; +use quote::quote; +use quote::ToTokens; +use syn::punctuated::Punctuated; +use syn::Token; + + + +mod analyze; +mod runtime; + +use runtime::Quote; + + + +// ======================== +// === Type Definitions === +// ======================== + +/// Represents a type definition. +#[derive(Debug)] +pub(crate) struct Type { + ident: syn::Ident, + generics: Punctuated, + lifetimes: Punctuated, + generic_params: Punctuated, + data: Data, + attrs: ContainerAttrs, +} + +#[derive(Debug)] +enum Data { + Struct(Fields), + Enum(Vec), +} + +#[derive(Debug)] +struct NamedField { + name: syn::Ident, + type_: syn::Type, + subtype: bool, + refer: Option>, + flatten: bool, + hide: bool, +} + +impl NamedField { + pub fn new(name: syn::Ident, type_: syn::Type) -> Self { + let subtype = Default::default(); + let refer = Default::default(); + let flatten = Default::default(); + let hide = Default::default(); + Self { name, type_, subtype, refer, flatten, hide } + } +} + +#[derive(Debug)] +struct UnnamedField { + type_: syn::Type, +} + +#[derive(Debug)] +enum Fields { + Named { fields: Vec }, + Unnamed(Vec), + Unit, +} + +#[derive(Debug)] +struct Variant { + ident: syn::Ident, + fields: Fields, + transparent: bool, +} + +#[derive(Debug, Default)] +struct ContainerAttrs { + /// If true, the container must have exactly one field; the container will not appear in + /// reflection data; all references to it are treated as references to the contained type. + transparent: bool, +} + + + +// ====================== +// === Derive Reflect === +// ====================== + +/// Derive a function providing information at runtime about the type's definition. See [`crate`] +/// for detailed documentation. +#[proc_macro_derive(Reflect, attributes(reflect))] +pub fn derive_reflect(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + let type_ = analyze::analyze(input.into()); + let ident = &type_.ident; + let generics = &type_.generics; + let mut generic_bounds = type_.lifetimes.clone(); + let with_bound = |param| (quote! { #param: enso_reflect::Reflect }).into_token_stream(); + let type_bounds = type_.generic_params.iter().map(with_bound); + generic_bounds.extend(type_bounds); + let type_expr = type_.quote(); + let static_lifetimes: Vec<_> = type_.lifetimes.iter().map(|_| quote! { 'static }).collect(); + let to_static = + |param| (quote! { <#param as enso_reflect::Reflect>::Static }).into_token_stream(); + let static_types = type_.generic_params.iter().map(to_static); + let mut static_params = vec![]; + static_params.extend(static_lifetimes.iter().cloned()); + static_params.extend(static_types); + let mut subtype_erased = quote! { Self::Static }; + if let Some(ty) = subtype_field_type(&type_.data) { + let erased_types = type_.generic_params.iter().cloned().map(|param| { + let param_ty: syn::Type = syn::parse2(param.clone()).unwrap(); + if param_ty == ty { + quote! { () } + } else { + param + } + }); + let mut erased_params = vec![]; + erased_params.extend(static_lifetimes.iter().cloned()); + erased_params.extend(erased_types); + subtype_erased = quote! { #ident<#(#erased_params),*> }; + } + let impl_reflect = quote! { + impl<#generic_bounds> enso_reflect::Reflect for #ident<#generics> { + type Static = #ident<#(#static_params),*>; + type SubtypeErased = #subtype_erased; + fn reflect() -> enso_reflect::metamodel::rust::TypeData { + #type_expr + } + } + }; + impl_reflect.into() +} + +fn subtype_field_type(data: &Data) -> Option { + match data { + Data::Struct(Fields::Named { fields }) => { + let mut type_ = None; + for field in fields { + if field.subtype { + let err = "A struct cannot have more than one field with #[reflect(subtype)]."; + assert_eq!(type_, None, "{}", err); + type_ = Some(field.type_.clone()); + } + } + type_ + } + _ => None, + } +} diff --git a/lib/rust/reflect/macros/src/runtime.rs b/lib/rust/reflect/macros/src/runtime.rs new file mode 100644 index 0000000000..99222dfeed --- /dev/null +++ b/lib/rust/reflect/macros/src/runtime.rs @@ -0,0 +1,190 @@ +//! Convert compile-time type definitions to syntax that evaluates to corresponding runtime values. + +use super::*; + +use syn::punctuated::Punctuated; +use syn::Token; + + + +// ============= +// === Quote === +// ============= + +/// Convert a value to syntax that evaluates to an analogous value at runtime. +pub(crate) trait Quote { + fn quote(&self) -> TokenStream; +} + + +// === Implementations === + +impl Quote for Type { + fn quote(&self) -> TokenStream { + let ident = &self.ident; + let generics = &self.generics; + let data = self.data.quote(self.attrs.transparent); + let name = self.ident.to_string(); + quote! { + enso_reflect::metamodel::rust::TypeData { + id: enso_reflect::type_id::<#ident<#generics>>(), + name: #name.to_owned(), + data: #data, + subtype_erased: enso_reflect::generic_id::(), + } + } + } +} + +impl Data { + fn quote(&self, transparent: bool) -> TokenStream { + match self { + Data::Struct(fields) => { + let fields = fields.quote(); + quote! { + enso_reflect::metamodel::rust::Data::Struct(enso_reflect::metamodel::rust::Struct { + fields: #fields, + transparent: #transparent, + }) + } + } + Data::Enum(variants) => { + assert!(!transparent, "`#[reflect(transparent)]` is not applicable to `enum`s."); + let variants: Punctuated<_, Token![,]> = + variants.iter().map(Quote::quote).collect(); + quote! { + enso_reflect::metamodel::rust::Data::Enum(enso_reflect::metamodel::rust::Enum { + variants: vec![#variants], + }) + } + } + } + } +} + +impl Quote for Fields { + fn quote(&self) -> TokenStream { + match self { + Fields::Named { fields } => { + let fields: Punctuated<_, Token![,]> = fields.iter().map(Quote::quote).collect(); + quote! { enso_reflect::metamodel::rust::Fields::Named(vec![#fields]) } + } + Fields::Unnamed(fields) => { + let fields: Punctuated<_, Token![,]> = fields.iter().map(Quote::quote).collect(); + quote! { enso_reflect::metamodel::rust::Fields::Unnamed(vec![#fields]) } + } + Fields::Unit => quote! { enso_reflect::metamodel::rust::Fields::Unit }, + } + } +} + +impl Quote for NamedField { + fn quote(&self) -> TokenStream { + let name = self.name.to_string(); + let typename = match &self.refer { + Some(ty) => ty, + None => &self.type_, + }; + let subtype = self.subtype; + let flatten = self.flatten; + let hide = self.hide; + quote! { + enso_reflect::metamodel::rust::NamedField { + name: #name.to_owned(), + type_: enso_reflect::reflect_lazy::<#typename>(), + subtype: #subtype, + flatten: #flatten, + hide: #hide, + } + } + } +} + +impl Quote for UnnamedField { + fn quote(&self) -> TokenStream { + let typename = &self.type_; + quote! { + enso_reflect::metamodel::rust::UnnamedField { + type_: enso_reflect::reflect_lazy::<#typename>(), + } + } + } +} + +impl Quote for Variant { + fn quote(&self) -> TokenStream { + let ident = self.ident.to_string(); + let fields = self.fields.quote(); + let inline = self.transparent; + let quoted = quote! { + enso_reflect::metamodel::rust::Variant { + ident: #ident.to_owned(), + fields: #fields, + inline: #inline, + } + }; + quoted + } +} + + + +// ============= +// === Tests === +// ============= + +#[cfg(test)] +mod tests { + use super::analyze::analyze; + use crate::Quote; + use quote::quote; + + #[test] + fn accept_simple_inputs() { + let inputs = [ + quote! { + struct Foo; + }, + quote! { + struct Bar { + bar: Foo, + baar: &'static str, + } + }, + ]; + for input in inputs { + analyze(input).quote(); + } + } + + #[test] + fn accept_generics() { + let inputs = [ + quote! { + struct Quux { + quux: T, + } + }, + quote! { + struct Quuux { + quux: Box, + } + }, + ]; + for input in inputs { + analyze(input).quote(); + } + } + + #[test] + fn accept_generic_lifetimes() { + let inputs = [quote! { + struct Code<'s> { + repr: std::borrow::Cow<'s, str>, + } + }]; + for input in inputs { + analyze(input).quote(); + } + } +} diff --git a/lib/rust/reflect/src/lib.rs b/lib/rust/reflect/src/lib.rs new file mode 100644 index 0000000000..d1e5bf7cad --- /dev/null +++ b/lib/rust/reflect/src/lib.rs @@ -0,0 +1,308 @@ +//! Runtime support crate for [`enso_reflect_macros`]. +//! +//! For data producers: See the docs of [`enso_reflect_macros`] for how to derive [`Reflect`]. It is +//! not recommended to explicitly `impl Reflect`; the derived implementation can be extensively +//! customized through attributes. The meanings of the associated types required by the trait are +//! rather obtuse, and the trait itself should not be considered a stable interface--expect that +//! new, even more obtuse associated types will be added to the trait in the future. +//! +//! For data consumers: The `Reflect` trait can be used to obtain reflection data; after that, the +//! [`enso_metamodel`] crate supports working with it--see the docs there. +//! +//! # Implementation +//! +//! The functionality exposed by the `Reflect` trait is to report information about type +//! relationships--what fields compose a `struct`, what variants compose an `enum`, etc. +//! +//! The chief design constraint of the `Reflect` trait is: It must be possible for a pure function +//! from Rust syntax to Rust syntax, operating on each data type in isolation (e.g. a proc macro) +//! to generate a `Reflect` implementation for any type. +//! +//! ## Producing a type graph from syntax +//! +//! Because Rust doesn't have value-level type information (i.e. it doesn't have a native reflection +//! mechanism), and a `Reflect` implementation must be generatable from syntax, when the `Reflect` +//! implementation needs to refer to another type, it does so by creating an invocation of the +//! `Reflect` method of the type being referred to. However, it cannot call these functions +//! directly--type graphs often contain cycles. To address this, the [`enso_metamodel::rust`] +//! representation is based on *lazy* graphs: A reference to a type contains a thunk that can be +//! evaluated to obtain type information. +//! +//! ## Associating unique identifiers with types +//! +//! This solves the problem of producing references between types, potentially cyclic, from syntax. +//! However, the consumer of the data needs more information not to be stymied by the cyclic nature +//! of type graphs; without attaching some notion of identity to the type references, it would be +//! impossible for a data consumer to tell whether they are following a cycle (repeatedly visiting +//! types they've encountered before), or encountering new, similarly-shaped types. +//! +//! Assigning IDs is not straightforward: How does a pure function from the syntax representing a +//! type name to the syntax representing an expression produce something that will evaluate to a +//! value uniquely identifying a type? +//! +//! Referring to the address of the type's `Reflect::reflect` function might seem like a solution, +//! but that isn't reliable--if two `reflect` function bodies compiled to the same code, LLVM might +//! implement them both with one function; conversely, one function in the source code can have +//! multiple addresses, for example if a generic type is instantiated with the same parameters in +//! different compilation units. +//! +//! Fortunately, there is an answer: `std::TypeId::of::()` returns a value uniquely identifying +//! any type `T`. "Wait," you ask--"std::TypeId::of:: has a `T: 'static` bound! How could we +//! possibly use it to implement a trait for types that may be non-`'static`?" And so, we have come +//! to the motivation for the first obtuse associated type, `Reflect::Static`: +//! ``` +//! pub trait Reflect { +//! type Static: 'static; +//! // ... +//! } +//! ``` +//! While a function operating on syntax has extremely limited ability to reason about types, one +//! thing it can do is tell a `'static` type from a non-`'static` type: A type is always `'static` +//! unless it is parameterized with some type parameter other than `'static`. Thus, a proc macro is +//! able to, for any type, name a type that is the same except with only `'static` lifetime +//! parameters, and therefore a `'static` type. [`enso_reflect_macros`] uses this life-extension +//! approach to provide `Reflect::Static` types, and `Reflect` uses the `std::any::TypeId` of its +//! associated `Static` to attach an identifier to a type, or to a lazy reference to a type. +//! +//! ## Adding a little parametric polymorphism +//! +//! Due to its syntax-transformation implementation, `reflect` sees types post-monomorphization: For +//! example, in the following type definition: +//! ```ignore +//! #[derive(Reflect)] +//! struct Foo { +//! field: T, +//! } +//! ``` +//! Syntactically, the reflect implementation for `Foo` will refer to the type of `field` by the +//! name of its parameter `T`--but when its `reflect` function is run to collect the data, +//! monomorphization has already occurred; the resulting data will not be able to distinguish +//! between a field with a parameteric type `T` that has been instantiated with, e.g. `u32`, and a +//! field with a concrete type that is always `u32`. +//! +//! However, to support a Rust pattern in [`enso_parser`], it was necessary for +//! [`enso_reflect_macros`] to provide the `#[reflect(subtype)]` attribute. If you refer to the +//! documentation for that field, you may notice that its implementation requires identifying when a +//! generic type with a field whose type is a type parameter is instantiated with different types +//! for that parameter. A certain amount of type-erasure is called for. +//! +//! And so, we come to the second obtuse assocatied type of `Reflect`: +//! ``` +//! pub trait Reflect { +//! // ... +//! type SubtypeErased: 'static; +//! // ... +//! } +//! ``` +//! The `SubtypeErased` type is used to obtain a `TypeId` that does not depend on the parameter of +//! the type of the field annotated with `#[reflect(subtype)]`, if any. This is accomplished by a +//! similar approach to the way lifetimes are erased: Starting with the lifetime-erased type used to +//! for `Reflect::Static`, identify the relevant parameter instantiation within the type, and +//! replace it with a constant type, to obtain a type that is invariant in one parameter, and +//! covariant in all the others. The implementation uses `()` for the constant type. (Thus, it is +//! not currently supported for types to apply the `subtype` transform to a field with a parameter +//! that has arbitrary bounds. While this could be achieved to some extent by using a `Box` +//! for the invariant parameter, that has other complications--we'd need to identify associated +//! types of the trait in question used in other fields--and [`enso_parser`] hasn't had a need for +//! it, and probably never will.) + +// === Features === +#![feature(map_first_last)] +#![feature(associated_type_defaults)] +#![feature(option_get_or_insert_default)] +// === Standard Linter Configuration === +#![deny(non_ascii_idents)] +#![warn(unsafe_code)] +// === Non-Standard Linter Configuration === +#![allow(clippy::option_map_unit_fn)] +#![allow(clippy::precedence)] +#![allow(dead_code)] +#![deny(unconditional_recursion)] +#![warn(missing_copy_implementations)] +#![warn(missing_debug_implementations)] +#![warn(missing_docs)] +#![warn(trivial_casts)] +#![warn(trivial_numeric_casts)] +#![warn(unused_import_braces)] +#![warn(unused_qualifications)] + +use enso_metamodel::rust::*; + + +// ============== +// === Export === +// ============== + +pub use enso_metamodel as metamodel; + + + +/// Imports for crates that `#[derive(Reflect)]`. +pub mod prelude { + pub use enso_reflect_macros::Reflect; +} + + + +// =============== +// === Reflect === +// =============== + +/// Supports obtaining descriptions of the definitions of types at runtime. +pub trait Reflect { + /// This must be a type that uniquely identifies `Self`, ignoring any lifetime parameters. + type Static: 'static; + /// This must be a type that uniquely identifies `Self`, ignoring any lifetime parameters, and + /// invariant to any one generic parameter that may occur in the definition of a field marked + /// `#[reflect(subtype)]`, if present. The type used for the erased parameter can be any type + /// that satisfies `Self`'s bounds. + type SubtypeErased: 'static; + /// Get information about the type's definition. + fn reflect() -> TypeData; + /// Get information about type, identified by a reference. + fn reflect_type(&self) -> TypeData { + Self::reflect() + } +} + + +// === Implementations for standard types === + +impl Reflect for std::borrow::Cow<'_, str> { + type Static = String; + type SubtypeErased = Self::Static; + fn reflect() -> TypeData { + ::reflect() + } +} + +impl Reflect for std::rc::Rc +where T: Reflect +{ + type Static = T::Static; + type SubtypeErased = Self::Static; + fn reflect() -> TypeData { + T::reflect() + } +} + +impl Reflect for Box +where T: Reflect +{ + type Static = T::Static; + type SubtypeErased = Self::Static; + fn reflect() -> TypeData { + T::reflect() + } +} + +impl Reflect for Option +where T: Reflect +{ + type Static = Option; + type SubtypeErased = Self::Static; + fn reflect() -> TypeData { + let id = type_id::(); + let name = "Option".to_owned(); + let data = Data::Primitive(Primitive::Option(reflect_lazy::())); + let subtype_erased = generic_id::(); + TypeData { id, name, data, subtype_erased } + } +} + +impl Reflect for Result +where + T: Reflect, + E: Reflect, +{ + type Static = Result; + type SubtypeErased = Self::Static; + fn reflect() -> TypeData { + let id = type_id::(); + let name = "Result".to_owned(); + let ok = reflect_lazy::(); + let err = reflect_lazy::(); + let data = Data::Primitive(Primitive::Result(ok, err)); + let subtype_erased = generic_id::(); + TypeData { id, name, data, subtype_erased } + } +} + +impl Reflect for &'_ str { + type Static = String; + type SubtypeErased = Self::Static; + fn reflect() -> TypeData { + ::reflect() + } +} + +impl Reflect for Vec +where T: Reflect +{ + type Static = Vec; + type SubtypeErased = Self::Static; + fn reflect() -> TypeData { + let id = type_id::>(); + let name = "Vec".to_owned(); + let data = Data::Primitive(Primitive::Vec(reflect_lazy::())); + let subtype_erased = generic_id::(); + TypeData { id, name, data, subtype_erased } + } +} + +macro_rules! reflect_primitive { + ($ty: ty, $primitive: expr) => { + impl Reflect for $ty { + type Static = Self; + type SubtypeErased = Self::Static; + fn reflect() -> TypeData { + let id = type_id::<$ty>(); + let name = stringify!($ty).to_owned(); + let data = Data::Primitive($primitive); + let subtype_erased = generic_id::(); + TypeData { id, name, data, subtype_erased } + } + } + }; +} + +reflect_primitive!(bool, Primitive::Bool); +reflect_primitive!(usize, Primitive::Usize); +reflect_primitive!(u32, Primitive::U32); +reflect_primitive!(String, Primitive::String); + + + +// ================== +// === Reflectors === +// ================== + +/// Return a value that can be used to obtain type information. +pub fn reflect_lazy() -> LazyType { + let id = type_id::(); + let evaluate = ::reflect; + LazyType::new(id, evaluate) +} + +/// Get an identifier that uniquely identifies the type, up to the instantiation of the parameter +/// of any field marked with the attribute `#[reflect(subtype)]` +pub fn generic_id() -> GenericTypeId { + GenericTypeId::new(std::any::TypeId::of::()) +} + +/// Obtain a unique identifier for a type. +pub fn type_id() -> TypeId { + TypeId::new(std::any::TypeId::of::()) +} + + + +// ================ +// === GraphViz === +// ================ + +/// Generate a graph of the given type's relationships with other types. +#[cfg(feature = "graphviz")] +pub fn graph() -> metamodel::graphviz::Graph { + reflect_lazy::().into() +} diff --git a/lib/rust/reflect/tests/test.rs b/lib/rust/reflect/tests/test.rs new file mode 100644 index 0000000000..3ddf700b35 --- /dev/null +++ b/lib/rust/reflect/tests/test.rs @@ -0,0 +1,39 @@ +// The type definitions in this crate exercise `#[derive(Reflect)]`. + +// === Non-Standard Linter Configuration === +#![allow(dead_code)] + +use enso_reflect as reflect; +use enso_reflect_macros::Reflect; + + + +#[derive(Reflect)] +struct Foo; + +#[derive(Reflect)] +struct Bar { + bar: Foo, +} + +#[derive(Reflect)] +enum Baz { + Bar(Bar), + Baz, +} + +#[derive(Reflect)] +struct Quux { + _quux: T, +} + +#[derive(Reflect)] +pub struct Code<'s> { + pub _repr: std::borrow::Cow<'s, str>, +} + +#[test] +fn test() { + use reflect::Reflect; + let _type = Baz::reflect(); +} diff --git a/lib/rust/shapely/macros/src/tagged_enum.rs b/lib/rust/shapely/macros/src/tagged_enum.rs index b95f59f726..4293f43169 100644 --- a/lib/rust/shapely/macros/src/tagged_enum.rs +++ b/lib/rust/shapely/macros/src/tagged_enum.rs @@ -1,6 +1,8 @@ use crate::prelude::*; use inflector::cases::snakecase::to_snake_case; +use syn::AttrStyle; +use syn::Attribute; use syn::Data; use syn::DeriveInput; use syn::Fields; @@ -32,8 +34,22 @@ use syn::Fields; /// ``` /// /// # Attributes -/// All attributes defined before the `#[tagged_enum]` one will be applied to the enum only, while -/// all other attributes will be applied to both the enum and all the variant structs. +/// Attributes defined after `#[tagged_enum]` and not in a section (see below) will be applied to +/// the enum and also all the variants structs produced; this is the default because it is +/// appropriate for common attributes like `#[derive(..)]`. +/// +/// The attribute `#[tagged_enum(apply-attrs-to = "enum")]` starts an attribute section; any +/// attributes in the section will be applied only to the enum itself. +/// +/// The attribute `#[tagged_enum(apply-attrs-to = "variants")]` starts an attribute section; any +/// attributes in the section will be applied only to the variant structs produced. +/// +/// An attribute can be placed before the `#[tagged_enum]` if its proc macro needs to operate on +/// the enum before expanding `tagged_enum`; otherwise, to avoid confusion, attributes should not be +/// placed before `#[tagged_enum]`, as the results would differ for *active* or *inert* +/// attributes[1]. +/// [1]: https://doc.rust-lang.org/reference/attributes.html#active-and-inert-attributes + pub fn run( attr: proc_macro::TokenStream, input: proc_macro::TokenStream, @@ -45,7 +61,9 @@ pub fn run( } else if !attrs.is_empty() { panic!("Unsupported attributes: {:?}", attrs); } - let decl = syn::parse_macro_input!(input as DeriveInput); + let mut decl = syn::parse_macro_input!(input as DeriveInput); + let (enum_attrs, variant_types_attrs, variants_attrs) = + split_attr_sections(std::mem::take(&mut decl.attrs)); let (impl_generics, ty_generics, inherent_where_clause_opt) = &decl.generics.split_for_impl(); let mut where_clause = enso_macro_utils::new_where_clause(vec![]); for inherent_where_clause in inherent_where_clause_opt { @@ -80,7 +98,6 @@ pub fn run( // } let vis = &decl.vis; let enum_name = &decl.ident; - let enum_attrs = &decl.attrs; let variant_names: Vec<_> = data.variants.iter().map(|v| &v.ident).collect(); let variant_bodies = variant_names.iter().map(|v| { if is_boxed { @@ -89,11 +106,15 @@ pub fn run( quote!(#v #ty_generics) } }); + let variants_attrs = quote! { #(#variants_attrs)* }; output.push(quote! { #(#enum_attrs)* #[allow(missing_docs)] #vis enum #enum_name #ty_generics #where_clause { - #(#variant_names(#variant_bodies)),* + #( + #variants_attrs + #variant_names(#variant_bodies) + ),* } impl #impl_generics Debug for #enum_name #ty_generics #where_clause { @@ -183,7 +204,7 @@ pub fn run( let fields = &variant.fields; let fields = if fields.is_empty() { quote!({}) } else { quote!(#fields) }; output.push(quote! { - #(#enum_attrs)* + #(#variant_types_attrs)* #(#variant_attrs)* #[derive(Debug)] #[allow(missing_docs)] @@ -318,3 +339,74 @@ pub fn run( output.into() } + + + +// ================== +// === Attributes === +// ================== + +/// The path used to identify helper-attributes that configure the macro. +/// E.g. `tagged_enum` in `#[tagged_enum(apply_attributes_to = "variants")]` +const HELPER_ATTRIBUTE_PATH: &str = "tagged_enum"; + +enum Attr { + ApplyAttributesTo(ApplyAttributesTo), +} + +enum ApplyAttributesTo { + Enum, + VariantTypes, + Variants, +} + +fn parse_attr(attr: &Attribute) -> Option { + if attr.style != AttrStyle::Outer { + return None; + } + if attr.path.get_ident()? != HELPER_ATTRIBUTE_PATH { + return None; + } + let name_value = "Parsing name-value argument"; + let syn::MetaNameValue { lit, path, .. } = attr.parse_args().expect(name_value); + match path.get_ident().expect("Unsupported helper-attribute name").to_string().as_str() { + "apply_attributes_to" => Some(Attr::ApplyAttributesTo({ + let value = match lit { + syn::Lit::Str(lit_str) => lit_str.value(), + _ => panic!("Expected a LitStr in argument to helper-attribute."), + }; + match value.as_str() { + "enum" => ApplyAttributesTo::Enum, + "variant-types" => ApplyAttributesTo::VariantTypes, + "variants" => ApplyAttributesTo::Variants, + _ => panic!("Unexpected value in string argument to helper-attribute."), + } + })), + _ => panic!("Unsupported helper-attribute name: {:?}.", path), + } +} + +fn split_attr_sections(attrs: Vec) -> (Vec, Vec, Vec) { + let mut enum_attrs = vec![]; + let mut variant_types_attrs = vec![]; + let mut variants_attrs = vec![]; + let mut attr_section = None; + for attr in attrs { + if let Some(attr) = parse_attr(&attr) { + match attr { + Attr::ApplyAttributesTo(apply_to) => attr_section = Some(apply_to), + } + continue; + } + match attr_section { + None => { + enum_attrs.push(attr.clone()); + variant_types_attrs.push(attr); + } + Some(ApplyAttributesTo::Enum) => enum_attrs.push(attr), + Some(ApplyAttributesTo::VariantTypes) => variant_types_attrs.push(attr), + Some(ApplyAttributesTo::Variants) => variants_attrs.push(attr), + } + } + (enum_attrs, variant_types_attrs, variants_attrs) +} diff --git a/lib/rust/types/Cargo.toml b/lib/rust/types/Cargo.toml index 5eec9c7b7c..fc7173c75c 100644 --- a/lib/rust/types/Cargo.toml +++ b/lib/rust/types/Cargo.toml @@ -7,6 +7,8 @@ edition = "2021" [lib] [dependencies] +enso-reflect = { path = "../reflect" } nalgebra = { version = "0.26.1" } num-traits = { version = "0.2" } paste = "1.0.7" +serde = { version = "1.0", features = ["derive"], optional = true } diff --git a/lib/rust/types/src/unit2.rs b/lib/rust/types/src/unit2.rs index 9c2f38e6b6..3a4465d4f7 100644 --- a/lib/rust/types/src/unit2.rs +++ b/lib/rust/types/src/unit2.rs @@ -6,6 +6,8 @@ //! [`Duration`] or a number, respectfully. You are allowed to define any combination of operators //! and rules of how the result inference should be performed. +use enso_reflect::prelude::*; + use paste::paste; use std::borrow::Cow; use std::marker::PhantomData; @@ -94,8 +96,12 @@ pub trait Variant { /// Internal representation of every unit. #[repr(transparent)] +#[derive(Reflect)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[reflect(transparent)] pub struct UnitData { repr: R, + #[reflect(skip)] variant: PhantomData, } @@ -513,7 +519,8 @@ macro_rules! define { $(#$meta)* pub type $name = $crate::unit2::Unit<[<$name:snake:upper>]>; $(#$meta)* - #[derive(Debug, Clone, Copy)] + #[derive(Debug, Clone, Copy, Reflect)] + #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct [<$name:snake:upper>]; impl $crate::unit2::Variant for [<$name:snake:upper>] {