diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 5258c3ffb3..7bad69198d 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -7,12 +7,12 @@ CHANGELOG.md # Rust Libraries and Related Files rust-toolchain.toml @MichaelMauderer @4e6 @mwu-tow @farmaazon rustfmt.toml @MichaelMauderer @4e6 @mwu-tow @farmaazon -Cargo.lock @MichaelMauderer @4e6 @mwu-tow @farmaazon @wdanilo -Cargo.toml @MichaelMauderer @4e6 @mwu-tow @farmaazon @wdanilo -/lib/rust/ @MichaelMauderer @4e6 @mwu-tow @farmaazon @wdanilo +Cargo.lock @MichaelMauderer @4e6 @mwu-tow @farmaazon @wdanilo @kazcw +Cargo.toml @MichaelMauderer @4e6 @mwu-tow @farmaazon @wdanilo @kazcw +/lib/rust/ @MichaelMauderer @4e6 @mwu-tow @farmaazon @wdanilo @kazcw /lib/rust/ensogl/ @MichaelMauderer @wdanilo @farmaazon /lib/rust/profiler/ @kazcw @MichaelMauderer @wdanilo -/integration-test/ @MichaelMauderer @wdanilo @farmaazon +/integration-test/ @MichaelMauderer @wdanilo @farmaazon @kazcw /tools/build-performance/ @kazcw @mwu-tow @wdanilo # Scala Libraries diff --git a/Cargo.lock b/Cargo.lock index 9c7aaad2d9..23b2b286d9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2026,6 +2026,15 @@ dependencies = [ "wasm-bindgen-test", ] +[[package]] +name = "enso-metamodel" +version = "0.1.0" +dependencies = [ + "bincode", + "derivative", + "derive_more", +] + [[package]] name = "enso-optics" version = "0.2.0" @@ -2037,12 +2046,26 @@ dependencies = [ name = "enso-parser" version = "0.1.0" dependencies = [ + "bincode", "enso-data-structures", "enso-parser-syntax-tree-builder", "enso-parser-syntax-tree-visitor", "enso-prelude", + "enso-reflect", "enso-shapely-macros", "enso-types", + "serde", +] + +[[package]] +name = "enso-parser-generate-java" +version = "0.1.0" +dependencies = [ + "derivative", + "enso-metamodel", + "enso-parser", + "enso-prelude", + "enso-reflect", ] [[package]] @@ -2078,6 +2101,7 @@ dependencies = [ "derivative", "derive_more", "enclose", + "enso-reflect", "enso-shapely", "failure", "futures 0.3.21", @@ -2157,6 +2181,24 @@ dependencies = [ "syn", ] +[[package]] +name = "enso-reflect" +version = "0.1.0" +dependencies = [ + "derivative", + "enso-metamodel", + "enso-reflect-macros", +] + +[[package]] +name = "enso-reflect-macros" +version = "0.1.0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "enso-shapely" version = "0.2.0" @@ -2215,9 +2257,11 @@ dependencies = [ name = "enso-types" version = "0.1.0" dependencies = [ + "enso-reflect", "nalgebra 0.26.2", "num-traits", "paste 1.0.7", + "serde", ] [[package]] @@ -3603,7 +3647,7 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5617e92fc2f2501c3e2bc6ce547cad841adba2bae5b921c7e52510beca6d084c" dependencies = [ - "base64 0.10.1", + "base64 0.13.0", "bytes 1.1.0", "http", "httpdate 0.3.2", diff --git a/Cargo.toml b/Cargo.toml index 2b9d303e84..2ae03f58fb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ members = [ "lib/rust/*", "lib/rust/parser/src/syntax/tree/visitor", "lib/rust/parser/src/syntax/tree/builder", + "lib/rust/parser/generate-java", "lib/rust/profiler/data", "integration-test" ] diff --git a/build-config.yaml b/build-config.yaml index 232e317073..829a342702 100644 --- a/build-config.yaml +++ b/build-config.yaml @@ -1,6 +1,6 @@ # Options intended to be common for all developers. -wasm-size-limit: 4.99 MiB +wasm-size-limit: 5.05 MiB required-versions: cargo-watch: ^8.1.1 diff --git a/lib/rust/metamodel/Cargo.toml b/lib/rust/metamodel/Cargo.toml new file mode 100644 index 0000000000..9bfe40cfa5 --- /dev/null +++ b/lib/rust/metamodel/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "enso-metamodel" +version = "0.1.0" +edition = "2021" +authors = ["Enso Team "] + +[dependencies] +derivative = "2.2" +derive_more = "0.99" + +[dev-dependencies] +bincode = "1.3" + +[features] +graphviz = [] +java = [] +rust = [] diff --git a/lib/rust/metamodel/src/data_structures.rs b/lib/rust/metamodel/src/data_structures.rs new file mode 100644 index 0000000000..fa1437d559 --- /dev/null +++ b/lib/rust/metamodel/src/data_structures.rs @@ -0,0 +1,216 @@ +//! Data structures used in the crate implementation. + +use vecmap::*; + +use derivative::Derivative; +use std::marker::PhantomData; + + + +// =========== +// === IDs === +// =========== + +/// A globally unique identifier, with a type-tag. +#[derive(Derivative)] +#[derivative(Copy(bound = ""))] +#[derivative(Clone(bound = ""))] +#[derivative(Debug(bound = ""))] +#[derivative(Eq(bound = ""))] +#[derivative(PartialEq(bound = ""))] +#[derivative(Ord(bound = ""))] +#[derivative(PartialOrd(bound = ""))] +#[derivative(Hash(bound = ""))] +pub struct Id { + value: u32, + marker: PhantomData<*const T>, +} + +impl Id { + /// Assign a new ID. + pub fn new() -> Self { + use std::sync::atomic; + static NEXT_ID: atomic::AtomicU32 = atomic::AtomicU32::new(0); + let value = NEXT_ID.fetch_add(1, atomic::Ordering::Relaxed); + let marker = Default::default(); + Self { value, marker } + } +} + +impl Default for Id { + fn default() -> Self { + Self::new() + } +} + +impl std::fmt::Display for Id { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.value) + } +} + + + +// ========================== +// === Densely-stored map === +// ========================== + +/// Densely-stored map from internally-produced keys. +/// +/// This is similar in implementation to `enso_data_structures::OptVec`, however there is a core +/// design difference: `OptVec` is a self-keying map created to be *more efficient* than the +/// standard map types; this is a self-keying map created to be *safer* than the standard map types, +/// and also efficient for the expected workload. +/// +/// `OptVec` uses a freelist to reuse keys and remain dense during mixed remove/create workloads; +/// `VecMap` statically disallows key reuse--values can be explicitly mutated, but once removed +/// cannot be rebound. This improves the failure mode of broken references: Rather than likely +/// become apparently-valid references to the wrong values, attempts to access removed elements will +/// fail, and be detected. +#[derive(Debug, Derivative, Clone)] +#[derivative(Default(bound = ""))] +pub struct VecMap { + data: Vec>, +} + +impl VecMap { + /// Obtain a new key, with no bound value. + pub fn unbound_key(&mut self) -> Key { + let id = Key::new(self.data.len()); + self.data.push(None); + id + } + + /// Set the value bound to a key. + pub fn bind(&mut self, key: Key, value: T) -> Key { + assert!(self.data[key.index].is_none()); + self.data[key.index] = Some(value); + Key::new(key.index) + } + + /// Add a value; return its newly-assigned key. + pub fn insert(&mut self, value: T) -> Key { + let key = self.unbound_key(); + self.bind(key, value) + } + + /// Remove a value from the graph; its ID will be permanently unoccupied. + pub fn remove(&mut self, key: Key) -> T { + self.data[key.index].take().unwrap() + } + + /// Get a reference to a value, if present. + pub fn get(&self, key: Key) -> Option<&T> { + self.data[key.index].as_ref() + } + + /// Get a mutable reference to a value, if present. + pub fn get_mut(&mut self, key: Key) -> Option<&mut T> { + self.data[key.index].as_mut() + } + + /// Iterate all key with values set. + pub fn keys(&self) -> impl Iterator> + '_ { + self.data.iter().enumerate().filter_map(|(i, val)| val.as_ref().map(|_| Key::new(i))) + } + + /// Iterate values. + pub fn values(&self) -> impl Iterator { + self.data.iter().filter_map(|val| val.as_ref()) + } + + /// Iterate values mutably. + pub fn values_mut(&mut self) -> impl Iterator { + self.data.iter_mut().filter_map(|val| val.as_mut()) + } + + /// Iterate entries. + pub fn iter<'s>(&'s self) -> impl Iterator, &'s T)> { + let map_key = |(i, val): (usize, &'s Option)| val.as_ref().map(|val| (Key::new(i), val)); + self.data.iter().enumerate().filter_map(map_key) + } + + /// Iterate entries mutably. + pub fn iter_mut<'s>(&'s mut self) -> impl Iterator, &'s mut T)> { + let map_key = + |(i, val): (usize, &'s mut Option)| val.as_mut().map(|val| (Key::new(i), val)); + self.data.iter_mut().enumerate().filter_map(map_key) + } +} + +impl std::ops::Index> for VecMap { + type Output = T; + fn index(&self, key: Key) -> &Self::Output { + self.get(key).unwrap() + } +} +impl std::ops::Index<&Key> for VecMap { + type Output = T; + fn index(&self, key: &Key) -> &Self::Output { + &self[*key] + } +} +impl std::ops::IndexMut> for VecMap { + fn index_mut(&mut self, key: Key) -> &mut Self::Output { + self.get_mut(key).unwrap() + } +} +impl std::ops::IndexMut<&Key> for VecMap { + fn index_mut(&mut self, key: &Key) -> &mut Self::Output { + &mut self[*key] + } +} + +/// Types used by `VecMap`. +pub mod vecmap { + use super::*; + + /// Marker indicating a key that may or may not currently be bound. + #[derive(Copy, Clone, Debug)] + pub struct MaybeBound; + /// Marker indicating a key that is not yet bound. + #[allow(missing_copy_implementations)] // Type is one-shot promise. + #[derive(Debug)] + pub struct Unbound; + + /// Identifies a location within a `VecMap`. + #[derive(Derivative)] + #[derivative(Copy(bound = "State: Copy"))] + #[derivative(Clone(bound = "State: Clone"))] + #[derivative(Debug(bound = ""))] + #[derivative(Eq(bound = ""))] + #[derivative(PartialEq(bound = ""))] + #[derivative(Ord(bound = ""))] + #[derivative(PartialOrd(bound = ""))] + #[derivative(Hash(bound = ""))] + pub struct Key { + pub(super) index: usize, + #[derivative(Debug = "ignore")] + marker: PhantomData<*const T>, + #[derivative(Debug = "ignore")] + state: PhantomData<*const State>, + } + + impl Key { + pub(super) fn new(index: usize) -> Self { + let marker = Default::default(); + let state = Default::default(); + Self { index, marker, state } + } + } + + /// Identifies a location within a `VecMap` that does not yet have a value bound. + pub type UnboundKey = Key; + + impl From<&'_ Key> for Key { + fn from(key: &Key) -> Self { + Self::new(key.index) + } + } + + impl std::fmt::Display for Key { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.index) + } + } +} diff --git a/lib/rust/metamodel/src/graphviz.rs b/lib/rust/metamodel/src/graphviz.rs new file mode 100644 index 0000000000..681119ebc1 --- /dev/null +++ b/lib/rust/metamodel/src/graphviz.rs @@ -0,0 +1,122 @@ +//! Rendering graphical representations of data models with GraphViz. + +use std::collections::BTreeSet; + + + +/// Hide data fields that don't reference any types outside the builtin set. +const PRUNE_PRIMITIVE_LEAFS: bool = true; + + + +// ============= +// === Graph === +// ============= + +/// A GraphViz graph of relationships between types. +#[derive(Default, Debug)] +pub struct Graph { + pub(crate) nodes: std::collections::HashMap, + pub(crate) edges: Vec<(String, String, EdgeType)>, +} + +#[derive(Debug)] +pub(crate) struct Node { + pub label: String, + pub node_type: NodeType, + pub primitive: bool, +} + +#[derive(Debug)] +pub(crate) enum NodeType { + Struct, + Enum, + Variant, + AbstractStruct, +} + +#[derive(Debug)] +pub(crate) enum EdgeType { + Variant, + Field, + OptionalField, + Subtype, +} + +impl std::fmt::Display for Graph { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let variant_color = "#7EA3CC"; + let primitive_attrs = vec![ + format!("style=filled"), + format!("fillcolor={:?}", "#262626"), + format!("fontcolor={:?}", "white"), + ]; + let enum_attrs = vec![ + format!("style=filled"), + format!("fillcolor={:?}", "#255C99"), + format!("fontcolor={:?}", "white"), + ]; + let variant_attrs = vec![ + format!("style=filled"), + format!("fillcolor={:?}", variant_color), + format!("shape=oval"), + ]; + let struct_attrs = vec![ + format!("style=filled"), + format!("fillcolor={:?}", "#B3001B"), + format!("fontcolor={:?}", "white"), + ]; + let abstract_struct_attrs = vec![ + format!("style=filled"), + format!("fillcolor={:?}", "#6D1321"), + format!("fontcolor={:?}", "white"), + ]; + let variant_edge_attrs = vec![format!("color={:?}", variant_color)]; + let field_edge_attrs = vec![]; + let optional_field_edge_attrs = vec![format!("style=dashed")]; + let subtype_edge_attrs = vec![format!("arrowhead=dot")]; + writeln!(f, "digraph refs {{")?; + let non_leafs: BTreeSet<_> = self.edges.iter().map(|(x, _, _)| x).cloned().collect(); + let mut pruned = BTreeSet::new(); + for (id, node) in &self.nodes { + let mut attrs; + if node.primitive { + if PRUNE_PRIMITIVE_LEAFS && !non_leafs.contains(id) { + pruned.insert(id.clone()); + continue; + } + attrs = primitive_attrs.clone(); + } else { + match node.node_type { + NodeType::Struct => attrs = struct_attrs.clone(), + NodeType::Enum => attrs = enum_attrs.clone(), + NodeType::Variant => attrs = variant_attrs.clone(), + NodeType::AbstractStruct => attrs = abstract_struct_attrs.clone(), + } + } + attrs.push(format!("label={:?}", node.label)); + let shape = match node.node_type { + NodeType::Enum => "diamond", + NodeType::Variant => "oval", + NodeType::Struct => "box", + NodeType::AbstractStruct => "diamond", + }; + attrs.push(format!("shape={}", shape)); + writeln!(f, "{:?} [{}];", id, attrs.join(","))?; + } + for (x, y, edgetype) in &self.edges { + if pruned.contains(x) || pruned.contains(y) { + continue; + } + let attrs = match edgetype { + EdgeType::Variant => &variant_edge_attrs, + EdgeType::Field => &field_edge_attrs, + EdgeType::OptionalField => &optional_field_edge_attrs, + EdgeType::Subtype => &subtype_edge_attrs, + }; + writeln!(f, "{:?} -> {:?} [{}];", x, y, attrs.join(","))?; + } + writeln!(f, "}}")?; + Ok(()) + } +} diff --git a/lib/rust/metamodel/src/java/bincode.rs b/lib/rust/metamodel/src/java/bincode.rs new file mode 100644 index 0000000000..e7b1e38215 --- /dev/null +++ b/lib/rust/metamodel/src/java/bincode.rs @@ -0,0 +1,336 @@ +//! Derivation of bincode[1] serialization for Java types. +//! [1]: https://github.com/bincode-org/bincode +//! +//! # Compatibility +//! +//! The generated deserialization methods support the same format as Rust's `serde-bincode` for an +//! analagous tree of types, with the following configuration: +//! ``` +//! # let data = &[0u8; 0]; +//! use bincode::Options; +//! let options = bincode::DefaultOptions::new().with_fixint_encoding(); +//! let serialized = options.serialize(data); +//! ``` +//! +//! # Nullability +//! +//! The [`crate::java]` model distinguishes between non-null fields, and fields that may be null. +//! If a field is *not* non-null, or if a type is wrapped in a `java.util.Optional`, whether it's +//! present is encoded compatibly with Rust's `Option` type (i.e. with a 1-byte discriminant). +//! +//! # Basic types +//! +//! Basic types (e.g. integer types, `boolean`, `String`) are encoded compatibly with the +//! corresponding types in Rust. +//! +//! # Sequence types +//! +//! A sequence (e.g. as encoded for a Rust `Vec`) is represented idiomatically in Java: +//! internally its implementation type is `java.util.ArrayList`, but in public interfaces it is +//! exposed as a `java.util.List`. +//! +//! # `Result` +//! +//! In Java, an `Either` type is used to represent a `Result` as used in Rust. `Either` +//! is similar to `Result`, with the main difference being that the `Ok` case is the `Right` value +//! of an `Either`, and the `Err` case is the `left`. +//! +//! # Overrides +//! +//! The default deserialization can be replaced or modified per-field; see the +//! [`DeserializationBuilder`] interface for details. +//! +//! # Deserialization errors +//! +//! The only runtime error possible is `FormatException`, defined in the Java `serialization` +//! support package; it is a `RuntimeException` rather than a checked exception, as deserialization +//! is extensively tested to succeed for any types that may be serialized in `Rust`. + +use crate::java::implementation::*; +use crate::java::*; + +use derivative::Derivative; +use std::fmt::Write; + + + +// ========================== +// === Derive Deserialize === +// ========================== + +/// Supports configuring deserialization for a type. +#[derive(Derivative)] +#[derivative(Debug)] +pub struct DeserializerBuilder { + root: ClassId, + #[derivative(Debug = "ignore")] + materializers: BTreeMap, + #[derivative(Debug = "ignore")] + mappers: BTreeMap, + support: String, + either_type: String, +} + +impl DeserializerBuilder { + /// Create a deserializer builder. + /// - `root`: The type to deserialize. + /// - `support`: The serialization support package. + /// - `either_type`: The fully-qualified name of the type that implements `Either`. + pub fn new(root: ClassId, support: impl Into, either_type: impl Into) -> Self { + let materializers = Default::default(); + let mappers = Default::default(); + let support = support.into(); + let either_type = either_type.into(); + Self { root, materializers, mappers, support, either_type } + } + + /// Configure the specified field to be produced according to an expression, instead of by + /// standard deserialization. The expression will be produced by the given function. + pub fn materialize(&mut self, field: FieldId, materializer: F) + where F: for<'a> FnOnce(MaterializerInput<'a>) -> String + 'static { + self.materializers.insert(field, Box::new(materializer)); + } + + /// Configure the specified field to be modified by an expression, after being deserialized. + /// The expression will be produced by the given function. + pub fn map(&mut self, field: FieldId, mapper: F) + where F: for<'a, 'b> FnOnce(MapperInput<'a, 'b>) -> String + 'static { + self.mappers.insert(field, Box::new(mapper)); + } + + /// Generate the deserialization method. + pub fn build(mut self, graph: &TypeGraph) -> Method { + let method = match graph[self.root].abstract_ { + true => self.deserialize_abstract(graph), + false => self.deserialize_concrete(graph), + }; + Method::Raw(method) + } +} + +type Materializer = Box FnOnce(MaterializerInput<'a>) -> String>; +type Mapper = Box FnOnce(MapperInput<'a, 'b>) -> String>; + +/// Input to a function that produces an expression that deserializes a field. +#[derive(Debug)] +pub struct MaterializerInput<'a> { + /// Identifier of the serialized message object. + pub message: &'a str, +} + +/// Input to a function that produces an expression that modifies a field after deserialization. +#[derive(Debug)] +pub struct MapperInput<'a, 'b> { + /// Identifier of the serialized message object. + pub message: &'a str, + /// Identifier of the field's value, after producing with standard deserialization. + pub value: &'b str, +} + + +// === Product Types === + +impl DeserializerBuilder { + /// Deserialize a `Class` of a fixed type (not dependant on further runtime data). + fn deserialize_concrete(&mut self, graph: &TypeGraph) -> syntax::Method { + let class = &graph[self.root]; + let message = "message"; + let mut body = String::new(); + let mut next_temp_variable_number = 0; + let mut get_temp = || { + let prefix = "generatedTemp"; + let result = format!("{}{}", prefix, next_temp_variable_number); + next_temp_variable_number += 1; + result + }; + let fields = class_fields(graph, class); + for field in &fields { + let ty_name = quote_type(graph, &field.data); + let expr = if let Some(materializer) = self.materializers.remove(&field.id()) { + (materializer)(MaterializerInput { message }) + } else { + match &field.data { + FieldData::Object { type_, non_null } => { + let value = get_temp(); + if *non_null { + self.deserialize_object( + graph, + *type_, + message, + &value, + &mut get_temp, + &mut body, + ); + } else { + self.deserialize_nullable( + graph, + *type_, + message, + &value, + &mut get_temp, + &mut body, + ); + } + value + } + FieldData::Primitive(Primitive::Int { .. }) => format!("{}.get32()", message), + FieldData::Primitive(Primitive::Long { .. }) => format!("{}.get64()", message), + FieldData::Primitive(Primitive::Bool) => format!("{}.getBoolean()", message), + } + }; + let expr = match self.mappers.remove(&field.id()) { + Some(mapper) => { + let value = get_temp(); + writeln!(body, "{} {} = {};", ty_name, &value, expr).unwrap(); + (mapper)(MapperInput { message, value: &value }) + } + None => expr, + }; + writeln!(body, "{} {} = {};", ty_name, &field.name, expr).unwrap(); + } + let constructor_args: Vec<_> = + fields.into_iter().map(|field| field.name.as_str()).collect(); + let constructor_args = constructor_args.join(", "); + writeln!(body, "return new {}({});", &class.name, constructor_args).unwrap(); + let message_ty = syntax::Type::named(format!("{}.Message", &self.support)); + let mut method = syntax::Method::new("deserialize", quote_class_type(graph, self.root)); + method.static_ = true; + method.body = body; + method.arguments = vec![(message_ty, message.to_owned())]; + method + } + + /// Deserialize an optional object; if it is not present, use the Java `null` value. + fn deserialize_nullable( + &self, + graph: &TypeGraph, + id: ClassId, + message: &str, + output: &str, + get_temp: &mut F, + body: &mut String, + ) where + F: FnMut() -> String, + { + let ty_name = quote_class_type(graph, id); + writeln!(body, "{ty_name} {output} = null;").unwrap(); + writeln!(body, "if ({message}.getBoolean()) {{").unwrap(); + let value = get_temp(); + self.deserialize_object(graph, id, message, &value, get_temp, body); + writeln!(body, "{output} = {value};").unwrap(); + writeln!(body, "}}").unwrap(); + } + + /// Deserialize an object that is non-optional (unconditionally present in the serialized data). + fn deserialize_object( + &self, + graph: &TypeGraph, + id: ClassId, + message: &str, + output: &str, + get_temp: &mut F, + body: &mut String, + ) where + F: FnMut() -> String, + { + let ty = &graph[id]; + let ty_name = quote_class_type(graph, id); + if !ty.builtin { + writeln!(body, "{ty_name} {output} = {ty_name}.deserialize({message});").unwrap(); + return; + } + match ty.name.as_str() { + STRING => writeln!(body, "{ty_name} {output} = {message}.getString();").unwrap(), + OPTIONAL => { + let base = ty.params[0]; + let present = get_temp(); + writeln!(body, "{ty_name} {output};").unwrap(); + writeln!(body, "boolean {present} = {message}.getBoolean();").unwrap(); + writeln!(body, "if ({present}) {{").unwrap(); + let value = get_temp(); + self.deserialize_object(graph, base, message, &value, get_temp, body); + writeln!(body, "{output} = {OPTIONAL}.of({value});").unwrap(); + writeln!(body, "}} else {output} = {OPTIONAL}.empty();").unwrap(); + } + LIST => { + let base = ty.params[0]; + let count = get_temp(); + writeln!(body, "int {count} = (int){message}.get64();").unwrap(); + let list_impl = get_temp(); + let params_ = quote_params(graph, &ty.params); + let impl_ty = syntax::Type::generic("java.util.ArrayList", params_); + writeln!(body, "{impl_ty} {list_impl} = new {impl_ty}({count});").unwrap(); + let unmodifiable_list = "java.util.Collections.unmodifiableList"; + writeln!(body, "for (int i=0; i<{count}; i++) {{").unwrap(); + let value = get_temp(); + self.deserialize_object(graph, base, message, &value, get_temp, body); + writeln!(body, "{list_impl}.add({value});").unwrap(); + writeln!(body, "}}").unwrap(); + writeln!(body, "{ty_name} {output} = {unmodifiable_list}({list_impl});").unwrap(); + } + x if x == self.either_type => { + let t0 = ty.params[0]; + let t1 = ty.params[1]; + let t0 = quote_class_type(graph, t0); + let t1 = quote_class_type(graph, t1); + let name = &ty.name; + let discriminant = get_temp(); + writeln!(body, "{ty_name} {output};").unwrap(); + writeln!(body, "int {discriminant} = {message}.get32();").unwrap(); + writeln!(body, "switch ({discriminant}) {{").unwrap(); + writeln!( + body, + "case 0: {output} = {name}.right({t1}.deserialize({message})); break;" + ) + .unwrap(); + writeln!( + body, + "case 1: {output} = {name}.left({t0}.deserialize({message})); break;" + ) + .unwrap(); + let err = format!("Unknown discriminant in {ty_name}."); + let serialization = &self.support; + writeln!(body, "default: throw new {serialization}.FormatException({err:?}); }}") + .unwrap(); + } + _ => unimplemented!("Deserialize builtin: {}", &ty.name), + } + } +} + + +// === Sum Types === + +impl DeserializerBuilder { + /// Deserialize a `Class` of known supertype, with concrete type encoded in the serialized data. + fn deserialize_abstract(&self, graph: &TypeGraph) -> syntax::Method { + let class = &graph[self.root]; + let message = "message"; + let mut n = 0; + let mut get_temp = |base| { + let suffix = "GeneratedTemp"; + let result = format!("{}{}{}", base, suffix, n); + n += 1; + result + }; + let mut body = String::new(); + let discriminant = get_temp("discriminant"); + writeln!(body, "int {discriminant} = {message}.get32();").unwrap(); + writeln!(body, "switch ({discriminant}) {{").unwrap(); + for (key, id) in &class.discriminants { + let ty = quote_class_type(graph, *id); + writeln!(body, "case {key}: return {ty}.deserialize({message});").unwrap(); + } + let ty_name = quote_class_type(graph, self.root); + let err = format!("Unknown discriminant in {ty_name}."); + let serialization = &self.support; + writeln!(body, "default: throw new {serialization}.FormatException({:?});", err).unwrap(); + writeln!(body, "}}").unwrap(); + let message_ty = syntax::Type::named(format!("{serialization}.Message")); + let mut method = syntax::Method::new("deserialize", ty_name); + method.static_ = true; + method.body = body; + method.arguments = vec![(message_ty, message.to_owned())]; + method + } +} diff --git a/lib/rust/metamodel/src/java/from_meta.rs b/lib/rust/metamodel/src/java/from_meta.rs new file mode 100644 index 0000000000..ad4b96b7dd --- /dev/null +++ b/lib/rust/metamodel/src/java/from_meta.rs @@ -0,0 +1,170 @@ +//! Translating a data model in the highly-abstracted `meta` representation to a data model in the +//! `crate::java` representation. +//! +//! As the `meta` and `java` models are similar, this is a straightforward translation. The main +//! differences are: +//! - In Java, there is a distinction between a few types that are unboxed primitives and all other +//! types, which are reference types. +//! - In Java, all classes are expected to implement certain methods. These methods are attached in +//! this stage, although [`Dynamic`] methods are used so that if any classes are modified before +//! the model is rendered to syntax, the generated methods will reflect the changes. + +use crate::java::*; + +use crate::meta; + + + +// ====================== +// === Java from Meta === +// ====================== + +/// Translate a data model in the [`meta`] representation to a data model in the Java typesystem. +pub fn from_meta( + graph: &meta::TypeGraph, + either_type: impl Into, +) -> (TypeGraph, BTreeMap) { + let primitives = Default::default(); + let mut java = TypeGraph::default(); + let mut class_promises: BTreeMap<_, _> = + graph.types.keys().map(|id| (id, java.classes.unbound_key())).collect(); + let meta_to_java = class_promises.iter().map(|(key, value)| (*key, value.into())).collect(); + let either_type = either_type.into(); + let mut from_meta = FromMeta { java, meta_to_java, primitives, either_type }; + // Translate primitives first, because in Java we need to know whether a type is primitive when + // we reference the type. + let mut unbound_ids: Vec<_> = class_promises.keys().copied().collect(); + for &id_ in &unbound_ids { + if let meta::Data::Primitive(ty) = &graph[id_].data { + match from_meta.primitive(ty) { + Ok(prim) => { + from_meta.primitives.insert(id_, prim); + } + Err(class) => { + from_meta.java.classes.bind(class_promises.remove(&id_).unwrap(), class); + } + } + } + } + unbound_ids.clear(); + unbound_ids.extend(class_promises.keys().copied()); + // Translate structs. + for id_ in unbound_ids { + let ty = &graph[id_]; + let fields_ = match &ty.data { + meta::Data::Primitive(_) => continue, + meta::Data::Struct(fields_) => fields_, + }; + let class = from_meta.class(ty, fields_); + from_meta.java.classes.bind(class_promises.remove(&id_).unwrap(), class); + } + let FromMeta { java, meta_to_java, .. } = from_meta; + (java, meta_to_java) +} + +#[derive(Debug)] +struct FromMeta { + java: TypeGraph, + meta_to_java: BTreeMap, + primitives: BTreeMap, + either_type: String, +} + +impl FromMeta { + /// Translate a primitive in the [`meta`] model to either a Java primitive, or a Java class. + fn primitive(&self, ty: &meta::Primitive) -> Result { + match ty { + meta::Primitive::Bool => Ok(Primitive::Bool), + meta::Primitive::U64 => Ok(Primitive::Long { unsigned: true }), + meta::Primitive::U32 => Ok(Primitive::Int { unsigned: true }), + meta::Primitive::String => Err(Class::string()), + meta::Primitive::Option(t0_) => Err(Class::optional(self.meta_to_java[t0_])), + meta::Primitive::Sequence(t0_) => Err(Class::list(self.meta_to_java[t0_])), + meta::Primitive::Result(t0_, t1_) => { + let t0 = self.meta_to_java[t0_]; + let t1 = self.meta_to_java[t1_]; + Err(Class::builtin(&self.either_type, vec![t1, t0])) + } + } + } + + /// Translate a type in the [`meta`] model to a Java class. + fn class<'f>( + &self, + ty: &meta::Type, + fields_: impl IntoIterator, + ) -> Class { + let name = ty.name.to_pascal_case(); + let abstract_ = ty.abstract_; + let sealed = ty.closed; + let parent = ty.parent.as_ref().map(|id| self.meta_to_java[id]); + let mut methods = match abstract_ { + true => abstract_methods(), + false => standard_methods(), + }; + let fields_ = fields_.into_iter(); + let mut fields = Vec::with_capacity(fields_.size_hint().0); + for field in fields_ { + let meta::Field { name, type_, hide, .. } = field; + let name = name.to_camel_case().expect("Unimplemented: Tuples."); + let field = match self.primitives.get(type_) { + Some(primitive) => Field::primitive(name, *primitive), + None => Field::object(name, self.meta_to_java[type_], true), + }; + if !hide { + methods.push(Method::Dynamic(Dynamic::Getter(field.id()))); + } + fields.push(field); + } + let discriminants = + ty.discriminants.iter().map(|(key, id)| (*key, self.meta_to_java[id])).collect(); + let child_field = ty.child_field; + Class { + name, + parent, + abstract_, + sealed, + fields, + methods, + discriminants, + child_field, + ..Default::default() + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_converting_graph() { + let mut meta = meta::TypeGraph::new(); + let u32_name = meta::TypeName::from_pascal_case("U32"); + let u32_ty = meta::Type::new(u32_name, meta::Data::Primitive(meta::Primitive::U32)); + let u32_ = meta.types.insert(u32_ty); + let inner_field_name = meta::FieldName::from_snake_case("inner_field"); + let inner_fields = vec![meta::Field::named(inner_field_name, u32_)]; + let inner_name = meta::TypeName::from_pascal_case("Inner"); + let inner = + meta.types.insert(meta::Type::new(inner_name, meta::Data::Struct(inner_fields))); + let outer_field_inner_name = meta::FieldName::from_snake_case("inner"); + let outer_name = meta::TypeName::from_pascal_case("Outer"); + let outer_fields = vec![meta::Field::named(outer_field_inner_name, inner)]; + let outer_ty = meta::Type::new(outer_name, meta::Data::Struct(outer_fields)); + let outer = meta.types.insert(outer_ty); + let (java, meta_to_java) = from_meta(&meta, "Either"); + let outer_ = meta_to_java[&outer]; + let inner_ = meta_to_java[&inner]; + assert_eq!(java[outer_].name, "Outer"); + assert_eq!(java[inner_].name, "Inner"); + assert_eq!(java[outer_].fields[0].data, FieldData::Object { + type_: inner_, + non_null: true, + }); + assert_eq!( + java[inner_].fields[0].data, + FieldData::Primitive(Primitive::Int { unsigned: true }) + ); + } +} diff --git a/lib/rust/metamodel/src/java/graphviz.rs b/lib/rust/metamodel/src/java/graphviz.rs new file mode 100644 index 0000000000..ce4266114a --- /dev/null +++ b/lib/rust/metamodel/src/java/graphviz.rs @@ -0,0 +1,48 @@ +//! Generating graphical representations of Java type systems. + +use super::*; + +use crate::graphviz::EdgeType; +use crate::graphviz::Graph; +use crate::graphviz::Node; +use crate::graphviz::NodeType; + + + +// ========================= +// === Graphviz Graphing === +// ========================= + +/// Produce a graphviz graph of the datatypes. +pub fn graph(java: &TypeGraph) -> Graph { + let mut graph = Graph::default(); + let classes = &java.classes; + for (id, ty) in classes.iter() { + let sname = format!("{}{}", ty.name, id); + let node_type = match &ty.abstract_ { + true => NodeType::AbstractStruct, + false => NodeType::Struct, + }; + let label = ty.name.clone(); + let primitive = ty.builtin; + graph.nodes.insert(sname.clone(), Node { primitive, node_type, label }); + if let Some(&parent) = ty.parent.as_ref() { + let sparent = format!("{}{}", classes[id].name, parent); + graph.edges.push((sparent.clone(), sname.clone(), EdgeType::Subtype)); + } + for field in &ty.fields { + match &field.data { + FieldData::Object { type_, non_null } => { + let sname2 = format!("{}{}", classes[id].name, type_); + let edgetype = match non_null { + false => EdgeType::OptionalField, + true => EdgeType::Field, + }; + graph.edges.push((sname.clone(), sname2, edgetype)); + } + FieldData::Primitive(_) => {} + } + } + } + graph +} diff --git a/lib/rust/metamodel/src/java/implementation.rs b/lib/rust/metamodel/src/java/implementation.rs new file mode 100644 index 0000000000..2244a9f021 --- /dev/null +++ b/lib/rust/metamodel/src/java/implementation.rs @@ -0,0 +1,336 @@ +//! Given a [`java`] representation of a data model, produce a [`java::syntax`] tree that can be +//! rendered to Java code implementing the data model. + +use crate::java::*; + +use std::fmt::Write; + + + +// =================================== +// === Implementing Java Datatypes === +// =================================== + +/// Produce Java syntax implement all the types modeled in a [`TypeGraph`]. +pub fn implement(graph: &TypeGraph, package: &str) -> Vec { + let mut implementations = BTreeMap::new(); + for (id, class) in graph.classes.iter() { + if !class.builtin { + implementations.insert(id, implement_class(graph, id)); + } + } + for (id, class) in graph.classes.iter() { + if let Some(parent) = class.parent { + let mut inner = implementations.remove(&id).unwrap(); + inner.static_ = true; + implementations.get_mut(&parent).unwrap().nested.push(inner); + } + } + for class in implementations.values_mut() { + class.package = Some(package.to_owned()); + } + implementations.into_values().collect() +} + +/// For some [`Class`] (identified by ID) in a [`TypeGraph`], get its qualified name, relative to +/// its package. If it is not a nested class, this will be the same as its unqualified name; if it +/// is a nested class, this will include the hierarchy of classes containing it as part of its +/// namespace. +/// +/// # Examples +/// +/// For a [`Class`] equivalent to the following: +/// ```java +/// class Token { +/// static class Ident { } +/// }; +/// ``` +/// The `path` would be "Token.Ident". +/// +/// For a non-nested [`Class`], like this: +/// ```java +/// class Error { +/// String message; +/// }; +/// ``` +/// The `path` would be "Error". +pub fn path(graph: &TypeGraph, id: ClassId) -> String { + let mut components = vec![]; + let mut next_id = Some(id); + while let Some(id) = next_id { + let ty = &graph[id]; + components.push(ty.name.as_str()); + next_id = ty.parent; + } + components.reverse(); + components.join(".") +} + +/// Get the fields owned by a class, including its own fields and the fields of its supertypes. +pub fn class_fields<'v, 's: 'v, 'c: 'v>(graph: &'s TypeGraph, class: &'c Class) -> Vec<&'v Field> { + let mut out = vec![]; + class_fields_(graph, class, &mut out, None, None); + out +} + +fn class_fields_<'v, 's: 'v, 'c: 'v>( + graph: &'s TypeGraph, + class: &'c Class, + out: &mut Vec<&'v Field>, + start: Option, + end: Option, +) { + let mut fields = &class.fields[..]; + if let Some(end) = end { + fields = &fields[..end]; + } + if let Some(start) = start { + fields = &fields[start..]; + } else if let Some(parent) = class.parent { + let index = Some(graph[parent].child_field.unwrap()); + class_fields_(graph, &graph[parent], out, None, index); + out.extend(fields); + class_fields_(graph, &graph[parent], out, index, None); + return; + } + out.extend(fields); +} + +/// Given a [`TypeGraph`] and a definition of a field's contents ([`FieldData`]), produce what is +/// referred to in the Java AST specification as an an `UnannType`[1]. This value is suitable for +/// use as the type portion of a field declaration, local variable declaration, formal parameter, or +/// return type specification. +/// +/// [1]: https://docs.oracle.com/javase/specs/jls/se18/html/jls-8.html#jls-UnannType +pub fn quote_type(graph: &TypeGraph, data: &FieldData) -> syntax::Type { + let class = match data { + FieldData::Object { type_, .. } => return quote_class_type(graph, *type_), + FieldData::Primitive(Primitive::Int { .. }) => "int", + FieldData::Primitive(Primitive::Bool) => "boolean", + FieldData::Primitive(Primitive::Long { .. }) => "long", + }; + syntax::Type::named(class) +} + +/// Given a [`TypeGraph`] and an ID identifying a [`Class`], produce what is referred to in the Java +/// AST specification as an an `UnannClassOrInterfaceType`[1]. This value is suitable for +/// use anywhere an `UnannType`[2] is expected. +/// +/// [1]: https://docs.oracle.com/javase/specs/jls/se18/html/jls-8.html#jls-UnannClassOrInterfaceType +/// [2]: https://docs.oracle.com/javase/specs/jls/se18/html/jls-8.html#jls-UnannType +pub fn quote_class_type(graph: &TypeGraph, id: ClassId) -> syntax::Type { + let class = path(graph, id); + let params = quote_params(graph, &graph[id].params); + syntax::Type { class, params } +} + +/// Render a parameter list. +pub fn quote_params<'a>( + graph: &TypeGraph, + params: impl IntoIterator, +) -> Vec { + params.into_iter().map(|ty| path(graph, *ty)).collect() +} + + +// === Helpers === + +/// Given a model of a field ([`Field`]), create a representation of the Java syntax defining a +/// class field with name, type, and attributes as specified in the model. +fn quote_field(graph: &TypeGraph, field: &Field) -> syntax::Field { + let Field { name, data, id: _ } = field; + let type_ = quote_type(graph, data); + let name = name.clone(); + let final_ = true; + syntax::Field { type_, name, final_ } +} + +/// Given a model of a method ([`Method`]), create a representation of the Java syntax implementing +/// the method. +fn method(graph: &TypeGraph, method: &Method, class: &Class) -> syntax::Method { + match method { + Method::Dynamic(method) => implement_method(graph, method, class), + Method::Raw(method) => method.clone(), + } +} + +/// Produce a representation of Java syntax implementing the specified [`Dynamic`] method, for the +/// specified [`Class`] within the specified [`TypeGraph`]. +fn implement_method(graph: &TypeGraph, method: &Dynamic, class: &Class) -> syntax::Method { + match method { + Dynamic::Constructor => implement_constructor(graph, class), + Dynamic::HashCode => implement_hash_code(graph, class), + Dynamic::Equals => implement_equals(graph, class), + Dynamic::ToString => implement_to_string(graph, class), + Dynamic::Getter(field) => implement_getter(graph, class, *field), + } +} + +/// Produce a representation of Java syntax implementing a constructor for the given [`Class`]. +/// +/// The constructor will accept a value for each of its fields, and for all fields of any classes +/// it extends, in an order that matches the order they appear in serialized formats. +/// +/// For all field that have the `non_null` property sets (see [`FieldData`]), the constructor will +/// produce `requireNonNull`[1] statements validating the corresponding inputs. +/// +/// [1]: https://docs.oracle.com/javase/8/docs/api/java/util/Objects.html#requireNonNull-T- +fn implement_constructor(graph: &TypeGraph, class: &Class) -> syntax::Method { + let suffix = "__GeneratedArgument"; + let arguments = class_fields(graph, class) + .into_iter() + .map(|field| (quote_type(graph, &field.data), format!("{}{}", &field.name, &suffix))) + .collect(); + let mut body = vec![]; + if let Some(parent) = class.parent { + let suffix = |field: &Field| format!("{}{}", &field.name, &suffix); + let fields: Vec<_> = class_fields(graph, &graph[parent]).into_iter().map(suffix).collect(); + body.push(format!("super({});", fields.join(", "))); + } + for field in &class.fields { + if let FieldData::Object { non_null: true, .. } = &field.data { + body.push(format!("java.util.Objects.requireNonNull({}{});", &field.name, &suffix)); + } + } + let own_field_initializers = + class.fields.iter().map(|field| format!("{} = {}{};", &field.name, &field.name, &suffix)); + body.extend(own_field_initializers); + let mut method = syntax::Method::constructor(class.name.clone()); + method.arguments = arguments; + method.body = body.join("\n"); + method +} + +/// Produce a representation of Java syntax implementing a method overriding `Object.hashCode`[1] +/// for the specified [`Class`]. +/// +/// The implementation will pass all fields of the class, and of any superclasses, to +/// `java.util.Objects.hash`[2] and return the result. +/// +/// [1]: https://docs.oracle.com/javase/7/docs/api/java/lang/Object.html#hashCode() +/// [2]: https://docs.oracle.com/javase/8/docs/api/java/util/Objects.html#hash-java.lang.Object...- +fn implement_hash_code(graph: &TypeGraph, class: &Class) -> syntax::Method { + let fields: Vec<_> = + class_fields(graph, class).into_iter().map(|field| field.name.as_str()).collect(); + let fields = fields.join(", "); + let body = format!("return java.util.Objects.hash({});", fields); + let return_ = FieldData::Primitive(Primitive::Int { unsigned: false }); + let return_ = quote_type(graph, &return_); + let mut method = syntax::Method::new("hashCode", return_); + method.override_ = true; + method.body = body; + method +} + +/// Produce a representation of Java syntax implementing a method overriding `Object.equals`[1] +/// for the specified [`Class`]. +/// +/// The implementation: +/// - Returns `true` if the objects are identity-equal. +/// - Returns `false` if the other object is not of the same type as this object. +/// Otherwise, returns a boolean-and of a field-by-field comparison: +/// - Primitive fields are compared with `==`. +/// - Reference-type fields are compared with `Object.equals`. +/// +/// [1]: https://docs.oracle.com/javase/7/docs/api/java/lang/Object.html#equals(java.lang.Object) +fn implement_equals(graph: &TypeGraph, class: &Class) -> syntax::Method { + let object = "object"; + let that = "that"; + let compare = + |field: &Field| field.data.fmt_equals(&field.name, &format!("{that}.{}", &field.name)); + let field_comparisons = class_fields(graph, class).into_iter().map(compare); + let mut values = vec!["true".to_string()]; + values.extend(field_comparisons); + let expr = values.join(" && "); + let body = vec![ + format!("if ({} == this) return true;", &object), + format!("if (!({} instanceof {})) return false;", &object, &class.name), + format!("{} {} = ({}){};", &class.name, &that, &class.name, &object), + format!("return {};", expr), + ]; + let return_ = FieldData::Primitive(Primitive::Bool); + let return_ = quote_type(graph, &return_); + let mut method = syntax::Method::new("equals", return_); + method.override_ = true; + method.arguments = vec![(syntax::Type::named("Object"), object.to_string())]; + method.body = body.join("\n"); + method +} + +/// Produce a representation of Java syntax implementing a method overriding `Object.toString`[1] +/// for the specified [`Class`]. +/// +/// The generated `toString` formats all the object's fields in the same manner as would be done by +/// a Java `record`[2] with the same fields. +/// +/// [1]: https://docs.oracle.com/javase/7/docs/api/java/lang/Object.html#toString() +/// [2]: https://openjdk.org/jeps/395 +fn implement_to_string(graph: &TypeGraph, class: &Class) -> syntax::Method { + let string_builder = "stringBuilder"; + let stringify = + |field: &Field| format!("{string_builder}.append(String.valueOf({}));", field.name); + let fields: Vec<_> = class_fields(graph, class).into_iter().map(stringify).collect(); + let mut body = String::new(); + let ty_name = &class.name; + writeln!(body, "StringBuilder {string_builder} = new StringBuilder();").unwrap(); + writeln!(body, "{string_builder}.append(\"{ty_name}[\");").unwrap(); + writeln!(body, "{}", fields.join(&format!("\n{string_builder}.append(\", \");\n"))).unwrap(); + writeln!(body, "{string_builder}.append(\"]\");").unwrap(); + writeln!(body, "return {string_builder}.toString();").unwrap(); + let return_ = syntax::Type::named("String"); + let mut method = syntax::Method::new("toString", return_); + method.override_ = true; + method.body = body; + method +} + +/// Produce a representation of Java syntax implementing a method returning the value of a field +/// (identified by [`FieldId`]) of the specified [`Class`]. +fn implement_getter(graph: &TypeGraph, class: &Class, id: FieldId) -> syntax::Method { + let field = class.fields.iter().find(|field| field.id() == id).unwrap(); + getter(graph, field) +} + +/// Produce a representation of Java syntax implementing a method returning the value of the +/// specified [`Field`]. The method must be attached to the same [`syntax::Class`] in which the +/// [`Field`] is defined. +fn getter(graph: &TypeGraph, field: &Field) -> syntax::Method { + let getter_name = |field| { + let field = crate::meta::Identifier::from_camel_case(field); + let mut name = crate::meta::Identifier::from_camel_case("get"); + name.append(field); + name.to_camel_case() + }; + let type_ = quote_type(graph, &field.data); + let mut method = syntax::Method::new(getter_name(&field.name), type_); + method.body = format!("return {};", &field.name); + method +} + +/// Produce a representation of Java syntax defining a `class` as specified by the given [`Class`] +/// (identified by its [`ClassId`]). +fn implement_class(graph: &TypeGraph, id: ClassId) -> syntax::Class { + let class = &graph[id]; + let name = class.name.clone(); + let abstract_ = class.abstract_; + let final_ = !abstract_; + let static_ = false; + let parent = class.parent.map(|id| quote_class_type(graph, id)); + let fields = class.fields.iter().map(|field| quote_field(graph, field)).collect(); + let nested = vec![]; + let methods = class.methods.iter().map(|m| method(graph, m, class)).collect(); + let package = Default::default(); + let sealed = class.sealed.then(Default::default); + syntax::Class { + package, + name, + abstract_, + final_, + static_, + parent, + fields, + methods, + nested, + sealed, + } +} diff --git a/lib/rust/metamodel/src/java/mod.rs b/lib/rust/metamodel/src/java/mod.rs new file mode 100644 index 0000000000..a0827fd045 --- /dev/null +++ b/lib/rust/metamodel/src/java/mod.rs @@ -0,0 +1,250 @@ +//! Representation of datatype definitions in the Java typesystem. + + +// ============== +// === Export === +// ============== + +pub mod bincode; + + + +mod from_meta; +#[cfg(feature = "graphviz")] +mod graphviz; +mod implementation; +pub mod syntax; +pub mod transform; + +use crate::data_structures::VecMap; +use derive_more::Index; +use derive_more::IndexMut; +pub use from_meta::from_meta; +pub use implementation::implement as to_syntax; +use std::collections::BTreeMap; + + + +// ===================== +// === Java Builtins === +// ===================== + +/// Fully-qualified name of Java's `Optional` type. +pub const OPTIONAL: &str = "java.util.Optional"; +/// Fully-qualified name of Java's `List` type. +pub const LIST: &str = "java.util.List"; +/// Fully-qualified name of Java's `String` type. +pub const STRING: &str = "String"; + + + +// ============================== +// === Type Parameterizations === +// ============================== + +/// Globally unique, stable identifier for a `Field`. +pub type FieldId = crate::data_structures::Id; +/// Identifies a Java class within a `TypeGraph`. +pub type ClassId = crate::data_structures::vecmap::Key; +/// Identifier for a class whose value hasn't been set yet. +pub type UnboundClassId = crate::data_structures::vecmap::UnboundKey; + + + +// ====================== +// === Datatype Types === +// ====================== + +/// A Java class. +#[derive(Debug, Default, PartialEq, Eq)] +pub struct Class { + /// The name of the class, not including package. + pub name: String, + /// Parameters of a generic class. + pub params: Vec, + /// The parent class, if any. + pub parent: Option, + /// Whether this class is `abstract`. + pub abstract_: bool, + /// Whether this class is `sealed`. + pub sealed: bool, + /// The data fields. + pub fields: Vec, + /// The class's methods. + pub methods: Vec, + builtin: bool, + // Attributes + discriminants: BTreeMap, + child_field: Option, +} + +impl Class { + /// Create a new "builtin" class. + pub fn builtin(name: &str, fields: impl IntoIterator) -> Self { + let params: Vec<_> = fields.into_iter().collect(); + let name = name.to_owned(); + let builtin = true; + let fields = params.iter().map(|&type_| Field::object("data", type_, true)).collect(); + Class { name, params, builtin, fields, ..Default::default() } + } + + /// Define a type for Java's `Optional` instantiated with a type. + pub fn optional(param: ClassId) -> Self { + Self::builtin(OPTIONAL, Some(param)) + } + + /// Define a type for Java's `List` instantiated with a type. + pub fn list(param: ClassId) -> Self { + Self::builtin(LIST, Some(param)) + } + + /// Define a type for Java's `String` instantiated with a type. + pub fn string() -> Self { + Self::builtin(STRING, []) + } + + /// Get a field by name. + pub fn find_field(&self, name: &str) -> Option<&Field> { + self.fields.iter().find(|field| field.name == name) + } +} + +/// A method of a class. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Method { + /// A `Dynamic` method. + Dynamic(Dynamic), + /// A literal method implementation. + Raw(syntax::Method), +} + +/// A method that is rendered to syntax on demand. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum Dynamic { + /// A constructor. + Constructor, + /// `hashCode` method. + HashCode, + /// `equals` method. + Equals, + /// `toString` method. + ToString, + /// A read-accessor for a field. + Getter(FieldId), +} + +impl From for Method { + fn from(method: Dynamic) -> Self { + Method::Dynamic(method) + } +} + +fn abstract_methods() -> Vec { + vec![Dynamic::Constructor.into()] +} + +fn standard_methods() -> Vec { + vec![ + Dynamic::Constructor.into(), + Dynamic::HashCode.into(), + Dynamic::Equals.into(), + Dynamic::ToString.into(), + ] +} + +/// A data field of a class. +#[derive(Debug, PartialEq, Eq)] +pub struct Field { + #[allow(missing_docs)] + pub name: String, + #[allow(missing_docs)] + pub data: FieldData, + id: FieldId, +} + +impl Field { + /// Create a field referencing a `Class` of a specified type. + pub fn object(name: impl Into, type_: ClassId, non_null: bool) -> Self { + let name = name.into(); + let data = FieldData::Object { type_, non_null }; + let id = Default::default(); + Self { name, data, id } + } + + /// Create a field holding primitive data. + pub fn primitive(name: impl Into, primitive: Primitive) -> Self { + let name = name.into(); + let data = FieldData::Primitive(primitive); + let id = Default::default(); + Self { name, data, id } + } + + #[allow(missing_docs)] + pub fn id(&self) -> FieldId { + self.id + } +} + +/// A field's data contents. +#[derive(Debug, Clone, PartialEq, Eq, Copy, PartialOrd, Ord, Hash)] +pub enum FieldData { + /// A reference to an object. + Object { + #[allow(missing_docs)] + type_: ClassId, + /// If `true`, this field should be subject to null-checking in constructors, and can be + /// assumed always to be present. + non_null: bool, + }, + /// An unboxed primitive. + Primitive(Primitive), +} + +impl FieldData { + fn fmt_equals(&self, a: &str, b: &str) -> String { + match self { + FieldData::Object { .. } => format!("{}.equals({})", a, b), + FieldData::Primitive(_) => format!("({} == {})", a, b), + } + } +} + +/// An unboxed type; i.e. a type that is not a subtype of `java.lang.Object`. +#[derive(Debug, Clone, PartialEq, Eq, Copy, PartialOrd, Ord, Hash)] +pub enum Primitive { + /// Java's `boolean` + Bool, + /// Java's `int` + Int { + /// If `true`, arithmetic on this value is to be performed with unsigned operations. + unsigned: bool, + }, + /// Java's `long` + Long { + /// If `true`, arithmetic on this value is to be performed with unsigned operations. + unsigned: bool, + }, +} + + + +// ============================ +// === Systems of Datatypes === +// ============================ + +/// A system of Java `Class`es. +#[derive(Debug, Default, Index, IndexMut)] +pub struct TypeGraph { + #[allow(missing_docs)] + pub classes: VecMap, +} + + +// === GraphViz support === + +#[cfg(feature = "graphviz")] +impl From<&'_ TypeGraph> for crate::graphviz::Graph { + fn from(graph: &'_ TypeGraph) -> Self { + graphviz::graph(graph) + } +} diff --git a/lib/rust/metamodel/src/java/syntax.rs b/lib/rust/metamodel/src/java/syntax.rs new file mode 100644 index 0000000000..911291425b --- /dev/null +++ b/lib/rust/metamodel/src/java/syntax.rs @@ -0,0 +1,239 @@ +//! Java syntax. + +use std::fmt; + + + +const TARGET_VERSION: usize = 14; + + + +// =================== +// === Syntax Data === +// =================== + +/// A class definition. +#[derive(Debug)] +pub struct Class { + #[allow(missing_docs)] + pub package: Option, + #[allow(missing_docs)] + pub name: String, + #[allow(missing_docs)] + pub abstract_: bool, + #[allow(missing_docs)] + pub final_: bool, + #[allow(missing_docs)] + pub static_: bool, + #[allow(missing_docs)] + pub parent: Option, + #[allow(missing_docs)] + pub fields: Vec, + #[allow(missing_docs)] + pub methods: Vec, + #[allow(missing_docs)] + pub sealed: Option>, + /// Classes defined in the scope of this class. + pub nested: Vec, +} + +/// A class field definition. +#[derive(Debug)] +pub struct Field { + #[allow(missing_docs)] + pub type_: Type, + #[allow(missing_docs)] + pub name: String, + #[allow(missing_docs)] + pub final_: bool, +} + +/// Identifies a type; this corresponds to `UnannType`[1] in the Java specification. +/// It is suitable for use as the type portion of a field declaration, local variable declaration, +/// formal parameter, or return type specification. +/// +/// [1]: https://docs.oracle.com/javase/specs/jls/se18/html/jls-8.html#jls-UnannType +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct Type { + /// Class name. + pub class: String, + /// Parameter list. + pub params: Vec, +} + +impl Type { + /// A simple type. + pub fn named(name: impl Into) -> Self { + let class = name.into(); + let params = vec![]; + Type { class, params } + } + + /// A generic type. + pub fn generic(name: impl Into, params: Vec) -> Self { + let class = name.into(); + Type { class, params } + } +} + +/// A method. +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Method { + #[allow(missing_docs)] + pub name: String, + #[allow(missing_docs)] + pub arguments: Vec<(Type, String)>, + /// Return value, unless this is a constructor. + pub return_: Option, + #[allow(missing_docs)] + pub static_: bool, + #[allow(missing_docs)] + pub final_: bool, + /// Literal body, not including brackets. + pub body: String, + #[allow(missing_docs)] + pub override_: bool, + #[allow(missing_docs)] + pub throws: Vec, +} + + +// === Constructors === + +impl Method { + /// Create a method. + pub fn new(name: impl Into, return_: Type) -> Self { + let name = name.into(); + let return_ = Some(return_); + let arguments = Default::default(); + let static_ = Default::default(); + let final_ = Default::default(); + let body = Default::default(); + let override_ = Default::default(); + let throws = Default::default(); + Method { name, arguments, return_, static_, final_, body, override_, throws } + } + + /// Create a constructor. + pub fn constructor(name: impl Into) -> Self { + let name = name.into(); + let arguments = Default::default(); + let return_ = Default::default(); + let static_ = Default::default(); + let final_ = Default::default(); + let body = Default::default(); + let override_ = Default::default(); + let throws = Default::default(); + Method { name, arguments, return_, static_, final_, body, override_, throws } + } +} + + + +// ========================= +// === Rendering to Text === +// ========================= + +impl fmt::Display for Class { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let Class { + package, + name, + abstract_, + final_, + static_, + parent, + fields, + methods, + nested, + sealed, + } = &self; + if let Some(package) = package { + writeln!(f, "package {};", package)?; + } + let mut modifiers = vec!["public".to_string()]; + static_.then(|| modifiers.push("static".to_string())); + final_.then(|| modifiers.push("final".to_string())); + abstract_.then(|| modifiers.push("abstract".to_string())); + if TARGET_VERSION >= 15 && sealed.is_some() { + modifiers.push("sealed".to_string()) + } + let mut tokens = modifiers; + tokens.push("class".to_string()); + tokens.push(name.to_string()); + if let Some(parent) = parent { + tokens.push("extends".to_string()); + tokens.push(parent.to_string()); + } + if let Some(sealed) = sealed { + if !sealed.is_empty() { + let types: Vec<_> = sealed.iter().map(|ty| ty.to_string()).collect(); + tokens.push(format!("permits {}", types.join(", "))); + } + } + let tokens = tokens.join(" "); + writeln!(f, "{} {{", tokens)?; + for field in fields { + write!(f, "{}", field)?; + } + for method in methods { + write!(f, "{}", method)?; + } + for class in nested { + write!(f, "{}", class)?; + } + writeln!(f, "}}")?; + Ok(()) + } +} + +impl fmt::Display for Field { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let Field { type_, name, final_ } = &self; + let mut tokens = vec!["protected".to_string()]; + final_.then(|| tokens.push("final".to_string())); + tokens.push(type_.to_string()); + tokens.push(name.clone()); + let tokens = tokens.join(" "); + writeln!(f, "{};", tokens) + } +} + +impl fmt::Display for Type { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", &self.class)?; + if !self.params.is_empty() { + write!(f, "<{}>", self.params.join(", "))?; + } + Ok(()) + } +} + +impl fmt::Display for Method { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let Method { name, arguments, return_, static_, final_, body, override_, throws } = &self; + let mut tokens = vec![]; + override_.then(|| tokens.push("@Override".to_string())); + tokens.push("public".to_string()); + static_.then(|| tokens.push("static".to_string())); + final_.then(|| tokens.push("final".to_string())); + if let Some(return_) = return_ { + tokens.push(return_.to_string()); + } + tokens.push(name.to_string()); + let tokens = tokens.join(" "); + let arguments: Vec<_> = + arguments.iter().map(|(ty, name)| format!("{} {}", ty, name)).collect(); + let arguments = arguments.join(", "); + writeln!(f, "{}({})", tokens, arguments)?; + if !throws.is_empty() { + let types: Vec<_> = throws.iter().map(|ty| ty.to_string()).collect(); + let types = types.join(", "); + writeln!(f, "throws {types}")?; + } + writeln!(f, "{{")?; + writeln!(f, "{body}")?; + writeln!(f, "}}")?; + Ok(()) + } +} diff --git a/lib/rust/metamodel/src/java/transform.rs b/lib/rust/metamodel/src/java/transform.rs new file mode 100644 index 0000000000..8ff18168e6 --- /dev/null +++ b/lib/rust/metamodel/src/java/transform.rs @@ -0,0 +1,43 @@ +//! Transformations of Java datamodels. + +use crate::java::*; + + + +// ======================== +// === Optional to Null === +// ======================== + +/// Rewrite the typegraph to eliminate `Optional` and instead allow some fields to be `null`. +/// +/// `TypeId` validity: +/// `TypeId`s that referred to `Optional` types: No long resolvable after transformation. +/// All other `TypeId`s: Unaffected. +pub fn optional_to_null(mut graph: TypeGraph) -> TypeGraph { + let mut optional_to_class = BTreeMap::new(); + for (id, class) in graph.classes.iter() { + if class.builtin && class.name == OPTIONAL { + let wrapped = class.params[0]; + optional_to_class.insert(id, wrapped); + } + } + let no_multilevel = "Handling of multi-level nullability has not been implemented."; + for class in optional_to_class.values() { + assert!(!optional_to_class.contains_key(class), "{}", no_multilevel); + } + for class in graph.classes.values_mut() { + for field in &mut class.fields { + if let FieldData::Object { type_, non_null } = &mut field.data { + if let Some(mapped) = optional_to_class.get(type_) { + assert!(*non_null, "{}", no_multilevel); + *non_null = false; + *type_ = *mapped; + } + } + } + } + for &id in optional_to_class.keys() { + graph.classes.remove(id); + } + graph +} diff --git a/lib/rust/metamodel/src/lib.rs b/lib/rust/metamodel/src/lib.rs new file mode 100644 index 0000000000..1e80bcca4b --- /dev/null +++ b/lib/rust/metamodel/src/lib.rs @@ -0,0 +1,75 @@ +//! A *metamodel* is a data model for data models in a particular typesystem. This crate defines a +//! few metamodels, some operations for inspecting and transforming data models within a metamodel, +//! and operations for translating a data model from one metamodel to another. +//! +//! # Modules +//! +//! The core modules define the metamodels, and operations on them: +//! - [`rust`]: A metamodel representing data models in the Rust typesystem. +//! - [`java`]: A metamodel representing data models in the Java typesystem. +//! - [`meta`]: An abstract metamodel, used to perform language-independent analysis of data models, +//! and as an intermediate when translating data models between language-specific metamodels. +//! +//! Some accessory modules: +//! - [`graphviz`]: Support for rendering graphical representations of data models with GraphViz. +//! This serves primarily to support developing and understanding transformations on and between +//! metamodels. +//! +//! # Use cases +//! +//! The primary use case currently supported by this crate is Rust-to-Java datatype transpilation: +//! - Accept a Rust data model as an input (which may be obtained by the [`enso_reflect`] crate). +//! - Translate the data model to a Java data model (via the [`meta`] intermediate representation), +//! using [`rust::to_meta`] and [`java::from_meta`]. +//! - Derive deserialization for the Java data model, using [`java::bincode`]. +//! - Generate Java code implementing the data model, using [`java::to_syntax`]. +//! +//! Other use cases supported include: +//! - Analyze a data model's serialization to generate exhaustive test cases, using +//! [`meta::serialization::testcases`]. +//! - Produce graphs of type relationships, using [graphviz::Graph]` +//! +//! # Design +//! +//! A major design principle of this crate is: Operate on the most abstracted representation +//! possible. Primarily, this means we don't try to analyze or reason about *syntax* any more than +//! necessary. The [`rust`] data produced by [`enso_reflect`] is much higher-level than the [`syn`] +//! trees it is created from; it is easier to reason about a graph of datatypes than the tree of +//! tokens that implements it. The [`meta`] intermediate representation is even more abstract, and +//! simpler to operate on than Rust or Java. When we manipulate the data in Java terms (i.e. using +//! [`java::transform::optional_to_null`] to rewrite `Optional` types to nullable types), we do so +//! on the [`java`] graph of types. It is not until we are done with analysis and transformation +//! that we generate a [`java::syntax`] tree from the [`java`] types. [`java::syntax`] is treated as +//! write-only; we never try to inspect it, but just use its [`Display`] implementation to produce +//! Java code after all computation is completed. + +// === Features === +#![feature(map_first_last)] +#![feature(associated_type_defaults)] +#![feature(option_get_or_insert_default)] +// === Standard Linter Configuration === +#![deny(non_ascii_idents)] +#![warn(unsafe_code)] +// === Non-Standard Linter Configuration === +#![allow(clippy::option_map_unit_fn)] +#![allow(clippy::precedence)] +#![allow(dead_code)] +#![deny(unconditional_recursion)] +#![warn(missing_copy_implementations)] +#![warn(missing_debug_implementations)] +#![warn(missing_docs)] +#![warn(trivial_casts)] +#![warn(trivial_numeric_casts)] +#![warn(unused_import_braces)] +#![warn(unused_qualifications)] + + + +mod data_structures; +#[cfg(feature = "graphviz")] +pub mod graphviz; +#[cfg(feature = "java")] +pub mod java; +pub mod meta; +#[cfg(feature = "rust")] +pub mod rust; diff --git a/lib/rust/metamodel/src/meta/graphviz.rs b/lib/rust/metamodel/src/meta/graphviz.rs new file mode 100644 index 0000000000..3f69e517e6 --- /dev/null +++ b/lib/rust/metamodel/src/meta/graphviz.rs @@ -0,0 +1,71 @@ +//! Graphical representation of a `TypeGraph` with GraphViz. + +use super::*; + +use crate::graphviz::EdgeType; +use crate::graphviz::Graph; +use crate::graphviz::Node; +use crate::graphviz::NodeType; + + + +// ============= +// === Graph === +// ============= + +/// Produce a GraphViz graph representation of the relationships between the types. +pub fn graph(typegraph: &TypeGraph) -> Graph { + let mut graph = Graph::default(); + let types = &typegraph.types; + for (id, ty) in types.iter() { + let sname = format!("{}{}", ty.name, id); + let node_type = match &ty.data { + Data::Struct(_) if ty.abstract_ => NodeType::AbstractStruct, + Data::Struct(_) if ty.abstract_ && ty.closed => NodeType::Enum, + Data::Struct(_) => NodeType::Struct, + Data::Primitive(_) => NodeType::Struct, + }; + let primitive = matches!(&ty.data, Data::Primitive(_)); + let label = ty.name.to_string(); + graph.nodes.insert(sname.clone(), Node { primitive, node_type, label }); + let parentlike = ty.parent.iter().chain(&ty.mixins); + for id in parentlike { + let sparent = format!("{}{}", types[id].name, id); + graph.edges.push((sparent.clone(), sname.clone(), EdgeType::Subtype)); + } + match &ty.data { + Data::Struct(fields) => + for Field { type_, name: _, hide: _, id: _ } in fields { + let sname2 = format!("{}{}", types[type_].name, type_); + graph.edges.push((sname.clone(), sname2, EdgeType::Field)); + }, + Data::Primitive(Primitive::U32) + | Data::Primitive(Primitive::Bool) + | Data::Primitive(Primitive::U64) + | Data::Primitive(Primitive::String) => {} + Data::Primitive(Primitive::Sequence(t0)) => graph.edges.push(( + sname.clone(), + format!("{}{}", types[t0].name, t0), + EdgeType::Field, + )), + Data::Primitive(Primitive::Option(t0)) => graph.edges.push(( + sname.clone(), + format!("{}{}", types[t0].name, t0), + EdgeType::Field, + )), + Data::Primitive(Primitive::Result(t0, t1)) => { + graph.edges.push(( + sname.clone(), + format!("{}{}", types[t0].name, t0), + EdgeType::Field, + )); + graph.edges.push(( + sname.clone(), + format!("{}{}", types[t1].name, t1), + EdgeType::Field, + )); + } + } + } + graph +} diff --git a/lib/rust/metamodel/src/meta/mod.rs b/lib/rust/metamodel/src/meta/mod.rs new file mode 100644 index 0000000000..a0130ecaf1 --- /dev/null +++ b/lib/rust/metamodel/src/meta/mod.rs @@ -0,0 +1,410 @@ +//! A language-independent metamodel for representing data models. +//! +//! This is used as an intermediate representation in translation from Rust to Java to: +//! - Decouple the complexities of the source language from those of the target language. +//! - Provide a simple representation in which to apply transformations. +//! +//! It is also used for language-independent analysis of data models. + + + +#[cfg(feature = "graphviz")] +mod graphviz; +pub mod serialization; +pub mod transform; + +use crate::data_structures::VecMap; +use derive_more::Index; +use derive_more::IndexMut; +use std::collections::BTreeMap; +use std::collections::BTreeSet; + + + +// ============================== +// === Type Parameterizations === +// ============================== + +/// Globally unique, stable identifier for a `Field`. +pub type FieldId = crate::data_structures::Id; + +/// Identfies a type within a `TypeGraph`. +pub type TypeId = crate::data_structures::vecmap::Key; +/// Identfies an unbound type within a `TypeGraph`. +pub type UnboundTypeId = crate::data_structures::vecmap::UnboundKey; + + + +// ====================== +// === Datatype Types === +// ====================== + +/// A datatype. +#[derive(Debug, PartialEq, Eq, Hash, Clone)] +pub struct Type { + /// The type's name. + pub name: TypeName, + /// The type's data content. + pub data: Data, + /// The parent type, if any. + pub parent: Option, + /// Types that this type inherits from that are not the parent. + pub mixins: Vec, + /// If true, this type cannot be instantiated. + pub abstract_: bool, + /// If true, this type is not open to extension by children outside those defined with it. + pub closed: bool, + /// When serializing/deserializing, indicates the index of the field in a `Type` before which a + /// child object's data will be placed/expected. + pub child_field: Option, + /// When serializing/deserializing, indicates the available concrete types and the values used + /// to identify them. + pub discriminants: BTreeMap, +} + +impl Type { + /// Create a new datatype, with defaults for most fields. + pub fn new(name: TypeName, data: Data) -> Self { + let parent = Default::default(); + let mixins = Default::default(); + let abstract_ = Default::default(); + let closed = Default::default(); + let child_field = Default::default(); + let discriminants = Default::default(); + Type { name, data, parent, mixins, abstract_, closed, child_field, discriminants } + } +} + +/// A datatype's data. +#[derive(Debug, PartialEq, Eq, Hash, Clone)] +pub enum Data { + /// A type with fields. + Struct(Vec), + /// A builtin type. + Primitive(Primitive), +} + +/// Standard types. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Copy)] +pub enum Primitive { + /// A boolean value. + Bool, + /// An unsigned 32-bit integer. + U32, + /// An unsigned 64-bit integer. + U64, + /// An UTF-8-encoded string. + String, + /// Zero or more values of a type. + Sequence(TypeId), + /// Zero or one value of a type. + Option(TypeId), + /// A value that may be one type in a success case, or another type in a failure case. + Result(TypeId, TypeId), +} + +/// A data field of a `Type`. +#[derive(Debug, PartialEq, Eq, Hash, Clone)] +pub struct Field { + /// The field's `Type`. + pub type_: TypeId, + /// The field's name. + pub name: FieldName, + /// Whether the field should be private in generated code. + pub hide: bool, + id: FieldId, +} + +impl Field { + /// Create a new named field. + pub fn named(name: FieldName, type_: TypeId) -> Self { + let hide = Default::default(); + let id = Default::default(); + Self { type_, name, hide, id } + } + + /// Create a new unnamed field. + pub fn unnamed(type_: TypeId) -> Self { + let name = Default::default(); + let hide = Default::default(); + let id = Default::default(); + Self { name, type_, hide, id } + } + + /// Get the field's `FieldId`. + pub fn id(&self) -> FieldId { + self.id + } +} + + + +// =================== +// === Identifiers === +// =================== + +/// An identifier, in a naming convention-agnostic representation. +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] +pub struct Identifier { + segments: Vec, +} + +impl Identifier { + fn new(segments: Vec) -> Self { + for segment in &segments { + assert!(!segment.is_empty()); + } + Self { segments } + } + + fn segments_len(&self) -> usize { + let mut n = 0; + for segment in &self.segments { + n += segment.len(); + } + n + } + + /// Render in PascalCase. + pub fn to_pascal_case(&self) -> String { + let mut pascal = String::with_capacity(self.segments_len() + self.segments.len()); + for segment in &self.segments { + let mut chars = segment.chars(); + pascal.push(chars.next().unwrap().to_ascii_uppercase()); + pascal.extend(chars); + } + pascal + } + + /// Render in camelCase. + pub fn to_camel_case(&self) -> String { + let mut camel = String::with_capacity(self.segments_len()); + let (head, tail) = self.segments.split_first().unwrap(); + camel.push_str(head); + for segment in tail { + let mut chars = segment.chars(); + camel.push(chars.next().unwrap().to_ascii_uppercase()); + camel.extend(chars); + } + camel + } + + /// Render in snake_case. + pub fn to_snake_case(&self) -> String { + self.segments.join("_") + } + + /// Parse an identifier expected to be in snake_case. + pub fn from_snake_case(s: &str) -> Self { + let segments = s.split('_').map(|s| s.to_string()).collect(); + Self::new(segments) + } + + /// Parse an identifier expected to be in camelCase. + pub fn from_camel_case(s: &str) -> Self { + Self::from_pascal_case(s) + } + + /// Parse an identifier expected to be in PascalCase. + pub fn from_pascal_case(s: &str) -> Self { + let mut segments = vec![]; + let mut current = String::new(); + for c in s.chars() { + if c.is_ascii_uppercase() && !current.is_empty() { + segments.push(std::mem::take(&mut current)); + } + current.push(c.to_ascii_lowercase()); + } + segments.push(current); + Self::new(segments) + } + + /// Append another `Identifier` to the end of `self`; when rendered, the boundary between the + /// old and new components will be indicated in a manner determined by the naming convention + /// chosen at rendering time. + pub fn append(&mut self, other: Self) { + self.segments.extend(other.segments) + } +} + + +// === Type Names === + +/// The name of a type, e.g. a `struct` in Rust or a `class` in Java. +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct TypeName(Identifier); + +impl std::fmt::Display for TypeName { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.0.to_pascal_case()) + } +} + +impl TypeName { + /// Parse from PascalCase. + pub fn from_pascal_case(s: &str) -> Self { + Self(Identifier::from_pascal_case(s)) + } + /// Render in PascalCase. + pub fn to_pascal_case(&self) -> String { + self.0.to_pascal_case() + } + /// Append another `TypeName` to the end of `self`. See `Identifier::append`. + pub fn append(&mut self, other: Self) { + self.0.append(other.0) + } +} + + +// === Field Names === + +/// The name of a field, e.g. the data members of a Rust struct or Java class. +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] +pub struct FieldName(Identifier); + +impl std::fmt::Display for FieldName { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.0.to_camel_case()) + } +} + +impl FieldName { + /// Parse from snake_case. + pub fn from_snake_case(s: &str) -> Self { + Self(Identifier::from_snake_case(s)) + } + /// Render in camelCase. + pub fn to_camel_case(&self) -> Option { + match self.0.to_camel_case() { + ident if ident.is_empty() => None, + ident => Some(ident), + } + } + /// Append another `FieldName` to the end of `self`. See `Identifier::append`. + pub fn append(&mut self, other: Self) { + self.0.append(other.0) + } +} + + + +// =========================== +// === System of Datatypes === +// =========================== + +/// A collection of [`Type`]s. The [`TypeGraph`] owns its types; they do not refer to each other +/// directly, but through [`TypeId`]s, which must be looked up in the graph (its [`Index`] +/// implementation provides a convenient interface). +#[derive(Debug, Default, Clone, Index, IndexMut)] +pub struct TypeGraph { + #[index] + #[index_mut] + #[allow(missing_docs)] + pub types: VecMap, +} + +impl TypeGraph { + /// Create a new, empty [`TypeGraph`]. + pub fn new() -> Self { + Default::default() + } + + /// For every `(id0, id1)` pair in the input, replace all occurrences of `id0` in any type + /// definition with the corresponding `id1`. + pub fn apply_aliases<'a>(&mut self, aliases: impl IntoIterator) { + let mut canonical = BTreeMap::new(); + for (from_, to_) in aliases.into_iter() { + canonical.insert(*from_, *to_); + } + let rewrite = |id: &mut TypeId| { + if let Some(id_) = canonical.get(id) { + *id = *id_; + } + }; + for ty in self.types.values_mut() { + if let Some(parent) = &mut ty.parent { + rewrite(parent); + } + for parent in &mut ty.mixins { + rewrite(parent); + } + match &mut ty.data { + Data::Struct(fields) => + for field in fields { + rewrite(&mut field.type_); + }, + Data::Primitive(Primitive::Sequence(t0)) + | Data::Primitive(Primitive::Option(t0)) => rewrite(t0), + Data::Primitive(Primitive::Result(t0, t1)) => { + rewrite(t0); + rewrite(t1); + } + Data::Primitive(Primitive::U32) + | Data::Primitive(Primitive::Bool) + | Data::Primitive(Primitive::U64) + | Data::Primitive(Primitive::String) => {} + } + } + } + + /// Eliminate types that are not in the referential transitive closure of the given collection + /// of roots. + pub fn gc(&mut self, roots: impl IntoIterator) { + let mut visited = BTreeSet::new(); + let mut to_visit = BTreeSet::new(); + to_visit.extend(roots); + while let Some(id) = to_visit.pop_last() { + let Type { + name: _, + data, + parent, + mixins, + abstract_: _, + closed: _, + child_field: _, + discriminants, + } = &self.types[id]; + let already_visited = !visited.insert(id); + if already_visited { + continue; + } + if let Some(parent) = parent { + to_visit.insert(*parent); + } + to_visit.extend(mixins); + to_visit.extend(discriminants.values()); + match data { + Data::Struct(fields) => to_visit.extend(fields.iter().map(|field| field.type_)), + Data::Primitive(Primitive::Sequence(t0)) + | Data::Primitive(Primitive::Option(t0)) => { + to_visit.insert(*t0); + } + Data::Primitive(Primitive::Result(t0, t1)) => { + to_visit.insert(*t0); + to_visit.insert(*t1); + } + Data::Primitive(Primitive::U32) + | Data::Primitive(Primitive::Bool) + | Data::Primitive(Primitive::U64) + | Data::Primitive(Primitive::String) => {} + } + } + let live = |id: &TypeId| visited.contains(id); + let ids: Vec<_> = self.types.keys().collect(); + for id in ids { + if !live(&id) { + self.types.remove(id); + } + } + } +} + + +// === GraphViz support === + +#[cfg(feature = "graphviz")] +impl From<&'_ TypeGraph> for crate::graphviz::Graph { + fn from(graph: &'_ TypeGraph) -> Self { + graphviz::graph(graph) + } +} diff --git a/lib/rust/metamodel/src/meta/serialization.rs b/lib/rust/metamodel/src/meta/serialization.rs new file mode 100644 index 0000000000..4b3a7cff1e --- /dev/null +++ b/lib/rust/metamodel/src/meta/serialization.rs @@ -0,0 +1,649 @@ +//! Serialization analysis on meta representations. +//! +//! # Test Case Generation +//! +//! The [`testcases`] function supports generation of deserialization test cases that cover all +//! types reachable from some root type in a [`TypeGraph`]. +//! +//! The implementation is based on computing a test program built from a small set of operation, +//! and then interpreting the program to generate all the needed test cases. +//! +//! ## Test programs +//! +//! Abstractly, a test program can be considered to be equivalent to a tree, where each node has +//! three possibilities (the implementation is equivalent, but more efficient to execute): +//! - `Constant`: Evaluates to some constant data. The value affects the output but is irrelevant to +//! control flow. (In the implementation, this is [`Op::U8(_)`], [`Op::U32(_)`], etc.) +//! - `Concat(A, B)`: Evaluates to the concatenation of the evaluation of its two child nodes. (In +//! the implementation, this operator is implicit in program order.) +//! - `Amb(A, B)`: In every evaluation, this must evaluate to either the value of `A` or the value +//! of `B`. For completeness, there must be at least one evaluation of the whole program in which +//! this is evaluated to `A`, and at least one evaluation where it is evaluated to `B`. (In the +//! implementation, this is an n-ary operator expressed with [`Op::SwitchPush`] / +//! [`Op::SwitchPop`] / [`Op::Case(_)`].) +//! +//! ## Program generation +//! +//! The input typegraph may contain cycles. The first step of program generation is to select a +//! *basecase* for every sum type such that the type graph, when excluding non-basecase +//! possibilities from every sum type, does not contain any cycles. For details on this problem and +//! the algorithm solving it, see [`select_basecase`]. +//! +//! Once we have the information necessary to avoid trying to emit cyclic structures, program +//! generation is straightforward: For product types, we use the equivalent of the `Concat` +//! operation described above; for sum types, the `Amb` operation. Compound primitives like `Option` +//! and `Result` are treated as similar user-defined sum types would be. +//! +//! ## Program interpretation +//! +//! Program interpretation is better described in terms of the sequence of [`Op`]s than the more +//! abstract tree representation described above. The interpreter advances a program counter over +//! every [`Op`] once, in sequence. It maintains a stack of the [`Op::SwitchPop`] corresponding to +//! every [`Op::SwitchPush`]--that is, the join points for the n-ary `Amb` operators that the +//! program counter is currently within. For each [`Op::Case`] (i.e. one possibility of an `Amb`), a +//! test case is generated: The test case will consist of the basecase-mode evaluation of the whole +//! program up to the active [`Op::Case`] in each open switch (this value is maintained as execution +//! proceeds), the present case in each open switch, and then the output of basecase-mode execution +//! from the join point of the switch on top of stack to the end of the program--thus efficiently +//! producing one test case for every [`Op::Case`] in the input, with each case composed of the +//! output of the whole program, using basecase values for all switches not in the stack at the +//! point the case is reached. + +use crate::meta::*; + +use std::fmt::Write; + + + +const DEBUG: bool = false; + + + +// ============================ +// === Test Case Generation === +// ============================ + +/// A set of *accept* and *reject* tests for a serialization format. +#[derive(Debug, Clone)] +pub struct TestCases { + /// Inputs that a deserializer should accept. + pub accept: Vec>, + /// Inputs that a deserializer should reject. + pub reject: Vec>, + program: Vec, + debuginfo: BTreeMap, +} + +/// Generate test cases. +/// +/// Produces 100% coverage of valid structures (i.e. every variant of every enum occurs in some +/// `accept` case), and a representative set of `reject` cases. +pub fn testcases(graph: &TypeGraph, root: TypeId) -> TestCases { + let mut builder = ProgramBuilder::new(graph, root); + builder.type_(root, Default::default()); + let ProgramBuilder { program, debuginfo, .. } = builder; + if DEBUG { + eprintln!("{}", fmt_program(&program, &debuginfo)); + } + let (accept, reject) = Interpreter::run(&program); + TestCases { accept, reject, program, debuginfo } +} + +impl TestCases { + /// Produce a JSON representation of test case data. + pub fn to_json(&self) -> String { + let accept: Vec<_> = self.accept.iter().map(|case| format!("{:?}", case)).collect(); + let accept = accept.join(", \n\t"); + let reject: Vec<_> = self.reject.iter().map(|case| format!("{:?}", case)).collect(); + let reject = reject.join(", \n\t"); + let mut out = String::new(); + writeln!(out, "{{").unwrap(); + writeln!(out, "\"accept\": [").unwrap(); + writeln!(out, "\t{accept}").unwrap(); + writeln!(out, "],").unwrap(); + writeln!(out, "\"reject\": [").unwrap(); + writeln!(out, "\t{reject}").unwrap(); + writeln!(out, "]}}").unwrap(); + out + } + + /// Render a debug representation of the test program used to generate the cases. + pub fn program(&self) -> String { + fmt_program(&self.program, &self.debuginfo) + } +} + +/// Produce a debug representation of a program. +fn fmt_program(program: &[Op], debuginfo: &BTreeMap) -> String { + let mut out = String::new(); + let mut indent = 0; + let continuations = collect_continuations(program); + let mut accept = 0; + let mut reject = 0; + for (i, op) in program.iter().enumerate() { + if *op == Op::SwitchPop { + indent -= 1 + } + write!(out, "{:>4}: ", i).unwrap(); + for _ in 0..indent { + write!(out, " ").unwrap(); + } + write!(out, "{:?}", op).unwrap(); + if let Some(debuginfo) = debuginfo.get(&i) { + write!(out, " -- {}", debuginfo).unwrap(); + } + if let Some(continuation) = continuations.get(&i) { + write!(out, " [{}]", continuation).unwrap(); + } + if *op == Op::Case(Case::Accept) { + write!(out, " # accept{accept}").unwrap(); + accept += 1; + } + if *op == Op::Case(Case::Reject) { + write!(out, " # reject{reject}").unwrap(); + reject += 1; + } + if *op == Op::SwitchPush { + indent += 1 + } + writeln!(out).unwrap(); + } + out +} + + + +// ========================== +// === Program Operations === +// ========================== + +/// Operations for a test-case-generating program. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum Op { + U8(u8), + U32(u32), + U64(u64), + SwitchPush, + SwitchPop, + Case(Case), +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum Case { + Accept, + Reject, +} + + + +// ========================== +// === Program Generation === +// ========================== + +/// Generates test-case-generating program for a type graph. +#[derive(Debug)] +struct ProgramBuilder<'g> { + graph: &'g TypeGraph, + will_visit: BTreeSet, + visited: BTreeSet, + debuginfo: BTreeMap, + program: Vec, + basecase_discriminant: BTreeMap, +} + +impl<'g> ProgramBuilder<'g> { + fn new(graph: &'g TypeGraph, root: TypeId) -> Self { + let mut graph_ = graph.clone(); + graph_.gc(vec![root]); + let program = Default::default(); + let visited = Default::default(); + let mut will_visit = BTreeSet::new(); + let mut basecase_discriminant = BTreeMap::new(); + let mut sb_visited = BTreeSet::new(); + for (id, ty) in graph_.types.iter() { + if let Data::Struct(fields) = &ty.data { + will_visit.extend(fields.iter().map(|field| field.type_)); + will_visit.extend(ty.discriminants.values()); + } + select_basecase(graph, id, &mut basecase_discriminant, &mut sb_visited); + sb_visited.clear(); + } + let debuginfo = Default::default(); + Self { graph, program, visited, will_visit, debuginfo, basecase_discriminant } + } + + fn emit(&mut self, op: Op) { + self.program.push(op); + } + + fn debug_next(&mut self, debug: impl std::fmt::Display) { + let n = self.program.len(); + self.debuginfo.insert(n, debug.to_string()); + } + + fn debug_prev(&mut self, debug: impl std::fmt::Display) { + let n = self.program.len() - 1; + self.debuginfo.insert(n, debug.to_string()); + } + + fn basecase(&self, id: TypeId) -> bool { + self.visited.contains(&id) || self.will_visit.contains(&id) + } + + fn type_(&mut self, id: TypeId, basecase: bool) { + let basecase = basecase || !self.visited.insert(id); + let ty = &self.graph[id]; + match &ty.data { + Data::Struct(_) => self.object(id, basecase), + Data::Primitive(primitive) => self.primitive(*primitive, basecase, id), + } + } + + /// Emit [`Op`]s reflecting the data of a [`Primitive`]. + /// + /// # Simple primitives + /// + /// If the [`Primitive`] is scalar data, like an integer or bool, operations producing an + /// arbitrary example value will be emitted. + /// + /// # Compound primitives + /// + /// For all compound primitives (primitives referring to other types), the `basecase` parameter + /// deterimines whether the output is minimal (as appropriate for previously-encountered types), + /// or exhaustive. + /// + /// If the input is an option: + /// - If `basecase` is `true`, only the `None` representation will be emitted. + /// - If `basecase` is `false`, an alternation of the `None` representation, the `Some` + /// representation, and a reject-case with an invalid discriminant will be emitted. + /// + /// If the input is a sequence: + /// - If `basecase` is `true`, a zero-length sequence will be emitted. + /// - If `basecase` is `false`, an alternation of an empty sequence and a 1-object sequence will + /// be emitted (this tests the correspondence between the encoded length and number of + /// elements). Although an `Option` also allows 0 or 1 objects, an `Option` is encoded with a + /// smaller (1-byte) length field, so they are encoded distinctly. + /// + /// If the input is a result: + /// - If `basecase` is `true`, an type that has been determined not to cause recursion will be + /// selected. + /// - If `basecase` is `false`, an alternation of the two possible types will be emitted, along + /// with a reject case with an invalid discriminant. + fn primitive(&mut self, primitive: Primitive, basecase: bool, id: TypeId) { + match primitive { + // Value doesn't matter, but this will be recognizable in the output, and will tend not + // to encode compatibly with other types. + Primitive::U32 => self.emit(Op::U32(1234567890)), + // Value 1 chosen to detect errors better: 0 encodes the same way as Option::None. + Primitive::Bool => self.emit(Op::U8(1)), + // Value doesn't matter, but this will be recognizable in the output, and will tend not + // to encode compatibly with other types. + Primitive::U64 => self.emit(Op::U64(1234567890123456789)), + Primitive::String => self.emit(Op::U64("".len() as u64)), + Primitive::Sequence(_) if basecase => self.emit(Op::U64(0)), + Primitive::Sequence(t0) => { + self.emit(Op::SwitchPush); + self.emit(Op::U64(0)); + self.emit(Op::Case(Case::Accept)); + self.emit(Op::U64(1)); + self.type_(t0, basecase); + self.emit(Op::Case(Case::Accept)); + self.emit(Op::SwitchPop); + } + Primitive::Option(_) if basecase => self.emit(Op::U8(0)), + Primitive::Option(t0) => { + self.emit(Op::SwitchPush); + if self.basecase(t0) { + self.emit(Op::U8(0)); + } else { + self.emit(Op::U8(1)); + self.type_(t0, basecase); + } + self.emit(Op::Case(Case::Accept)); + self.emit(Op::U8(2)); + self.emit(Op::Case(Case::Reject)); + self.emit(Op::SwitchPop); + } + Primitive::Result(t0, t1) => { + let basecase_index = self.basecase_discriminant[&id]; + let types = [t0, t1]; + let t0 = types[basecase_index]; + let t1 = types[1 - basecase_index]; + let i0 = basecase_index as u32; + let i1 = 1 - basecase_index as u32; + if basecase { + self.emit(Op::U32(i0)); + self.type_(t0, basecase); + } else { + self.emit(Op::SwitchPush); + if !self.basecase(t0) || self.basecase(t1) { + self.emit(Op::U32(i0)); + self.type_(t0, basecase); + self.emit(Op::Case(Case::Accept)); + } + if !self.basecase(t1) { + self.emit(Op::U32(i1)); + self.type_(t1, basecase); + self.emit(Op::Case(Case::Accept)); + } + self.emit(Op::U32(2)); + self.emit(Op::Case(Case::Reject)); + self.emit(Op::SwitchPop); + } + } + } + } + + /// Emit [`Op`]s reflecting the data of a [`Type`], as identified by ID. + /// + /// If `basecase` is true: An example of the specified type will be created that is intended to + /// be no larger than necessary, and that avoids infinite recursion; this is appropriate when + /// emitting data for a type that has already been exercised with `basecase=false`, or for a + /// type that has been determined to occur unconditionally as a field of another type. + /// + /// If `basecase` is false, if the type has child types, an alternation of all possible child + /// types will be emitted, along with a reject-case including a discriminant higher than the + /// highest valid discriminant, and reject-cases for any invalid discriminants lower than the + /// highest valid discriminant. + fn object(&mut self, id: TypeId, basecase: bool) { + let mut hierarchy = vec![id]; + let mut id = id; + while let Some(id_) = self.graph[id].parent { + id = id_; + hierarchy.push(id); + } + self.object_(&mut hierarchy, basecase); + assert_eq!(&hierarchy, &[]) + } + + /// Emit [`Op`]s reflecting the data of a [`Type`], as identified by a `Vec` `hierarchy` in + /// which: + /// - `hierarchy[0]` is a concrete [`Type`]. + /// - `hierarchy[i]` is the parent of `hierarchy[i-1]`. + /// - `hierarchy[hierarchy.len() - 1]` identifies a type that doesn't have any parent type. + /// + /// For a design description see the primary interface, [`Self::object`]. + fn object_(&mut self, hierarchy: &mut Vec, basecase: bool) { + let id = hierarchy.pop().unwrap(); + let ty = &self.graph[id]; + let fields = match &ty.data { + Data::Struct(fields) => fields, + _ => panic!(), + }; + for (i, field) in fields.iter().enumerate() { + if ty.child_field == Some(i) { + if hierarchy.is_empty() { + let basecase_discriminant = self.basecase_discriminant[&id]; + let discriminants = &ty.discriminants; + let basecase_ty = discriminants[&basecase_discriminant]; + hierarchy.push(basecase_ty); + if basecase { + self.emit(Op::U32(basecase_discriminant as u32)); + self.object_(hierarchy, basecase); + } else { + let (&max, _) = discriminants.last_key_value().unwrap(); + self.emit(Op::SwitchPush); + self.emit(Op::U32(basecase_discriminant as u32)); + self.debug_prev(&self.graph[basecase_ty].name); + self.object_(hierarchy, basecase); + self.emit(Op::Case(Case::Accept)); + for i in 0..=(max + 1) { + if i == basecase_discriminant { + continue; + } + self.emit(Op::U32(i as u32)); + match discriminants.get(&i) { + Some(id) => { + hierarchy.push(*id); + self.debug_prev(&self.graph[*id].name); + self.object_(hierarchy, basecase); + self.emit(Op::Case(Case::Accept)); + } + None => self.emit(Op::Case(Case::Reject)), + } + } + self.emit(Op::SwitchPop); + } + } else { + self.object_(hierarchy, basecase); + } + } + self.type_(field.type_, basecase); + self.debug_prev(format!(".{}", &field.name)); + } + } +} + +/// Choose a discriminant for the specified type, and if necessary for some other types reachable +/// from it in the composition graph, so that the composition graph for the type is non-recursive. +/// +/// If any child type doesn't have own any sum-types, we select it. Otherwise, selections are made +/// according to the following recursive algorithm: +/// - If we have a child that doesn't own any sum-type fields, choose it and return Ok. +/// - Otherwise, recurse into each child; if one returns Ok, choose it and return Ok. +/// - If no child returns Ok, we got here by recursing into a bad choice; return Err. +/// - If we reach a type we have already visited, this choice contains a cycle; return Err. (Because +/// we only visit each type once, the time complexity of this algorithm is linear in the number of +/// types we need to select discriminants for). +/// +/// The top-level call will always return Ok because: There must be a sum type in our descendants +/// that has a child that doesn't own any sum-type fields, or there would be a type in the input +/// that is only possible to instantiate with cyclic or infinite data. +fn select_basecase( + graph: &TypeGraph, + id: TypeId, + out: &mut BTreeMap, + visited: &mut BTreeSet, +) { + select_basecase_(graph, id, out, visited).unwrap() +} + +/// Implementation. See the documentation for [`select_basecase`]. +fn select_basecase_( + graph: &TypeGraph, + id: TypeId, + out: &mut BTreeMap, + visited: &mut BTreeSet, +) -> Result<(), ()> { + if out.contains_key(&id) { + return Ok(()); + } + if !visited.insert(id) { + return Err(()); + } + let mut result_discriminants = BTreeMap::new(); + let discriminants = match &graph[id].data { + Data::Primitive(Primitive::Result(t0, t1)) => { + result_discriminants.insert(0, *t0); + result_discriminants.insert(1, *t1); + &result_discriminants + } + _ => &graph[id].discriminants, + }; + if discriminants.is_empty() { + return Ok(()); + } + let mut descendants = BTreeMap::<_, Vec<_>>::new(); + let mut child_fields = BTreeSet::new(); + let mut child_sums = BTreeSet::new(); + for (&i, &child) in discriminants { + child_fields.clear(); + child_sums.clear(); + child_fields.insert(child); + while let Some(child_) = child_fields.pop_last() { + let ty = &graph[child_]; + if ty.child_field.is_some() { + child_sums.insert(child_); + } + match &ty.data { + Data::Struct(fields) => child_fields.extend(fields.iter().map(|field| field.type_)), + Data::Primitive(Primitive::Result(_, _)) => { + child_sums.insert(child_); + } + Data::Primitive(_) => (), + } + } + if child_sums.is_empty() { + out.insert(id, i); + return Ok(()); + } + descendants.insert(i, child_sums.iter().copied().collect()); + } + for (i, descendants) in descendants { + let is_ok = |id: &TypeId| select_basecase_(graph, *id, out, visited).is_ok(); + if descendants.iter().all(is_ok) { + out.insert(id, i); + return Ok(()); + } + } + Err(()) +} + + + +// ================= +// === Execution === +// ================= + +/// Runs a test-case-generating program. +#[derive(Debug, Default)] +struct Interpreter<'p> { + program: &'p [Op], + continuations: BTreeMap, +} + +/// A control-stack frame of the interpreted program. +#[derive(Debug, Default, PartialEq, Eq)] +struct Frame { + /// A return address, as an index into the sequence of [`Op`]s. + return_: usize, + /// A height of the data stack. + prefix_len: usize, +} + +impl<'p> Interpreter<'p> { + /// Interpret a program, producing collections of accept-cases and reject-cases. + fn run(program: &'p [Op]) -> (Vec>, Vec>) { + let continuations = collect_continuations(program); + let self_ = Self { program, continuations }; + self_.run_() + } + + /// Interpret every instruction in the program, in order. For every case in each switch, emit an + /// (accept or reject) output consisting of the basecase interpretation of all data before the + /// given switch, the switch case's data, and then the basecase interpretation of all data after + /// the switch. + fn run_(&self) -> (Vec>, Vec>) { + let mut accept: Vec> = Default::default(); + let mut reject: Vec> = Default::default(); + let mut prefix: Vec = Default::default(); + let mut stack: Vec = Default::default(); + for (pc, op) in self.program.iter().enumerate() { + match op { + Op::SwitchPush => stack + .push(Frame { return_: self.continuations[&pc], prefix_len: prefix.len() }), + Op::SwitchPop => { + let Frame { prefix_len, .. } = stack.pop().unwrap(); + prefix.truncate(prefix_len); + let cont_stack = vec![self.continuations[&pc]]; + if DEBUG { + eprintln!("- delimited continuation: {pc} -> {cont_stack:?}"); + } + self.run_continuation(cont_stack, &mut prefix); + } + Op::U8(data) => prefix.push(*data), + Op::U32(data) => prefix.extend(&data.to_le_bytes()), + Op::U64(data) => prefix.extend(&data.to_le_bytes()), + Op::Case(case) => { + if DEBUG { + match case { + Case::Accept => eprint!("accept{}: ", accept.len()), + Case::Reject => eprint!("reject{}: ", reject.len()), + }; + } + let results = match case { + Case::Accept => &mut accept, + Case::Reject => &mut reject, + }; + let Frame { prefix_len, .. } = stack.last().unwrap(); + let stack = stack.iter().map(|frame| frame.return_).collect(); + let mut data = prefix.clone(); + if DEBUG { + eprintln!("{pc} -> {stack:?}"); + } + let final_pc = self.run_continuation(stack, &mut data); + let returned = "Returned from escape continuation"; + assert_eq!(final_pc, self.program.len(), "{returned} at {final_pc}."); + results.push(data); + prefix.truncate(*prefix_len); + } + } + } + assert_eq!(&stack, &[]); + (accept, reject) + } + + /// Given an initial return stack, run the program until the last stack frame is exited, + /// running only basecase cases of each switch encountered, emitting the data to the `Vec` + /// passed in the `out` parameter. + /// + /// The return value is the program counter when the last stack frame was exited. + /// + /// If the given stack is the full stack at a certain point in program execution, the + /// continuation is an escape continuation that will run the program until completion. + /// + /// If the given stack is a consecutive slice of the stack at a certain point in program + /// execution, the continuation is a delimited continuation. + fn run_continuation(&self, mut stack: Vec, out: &mut Vec) -> usize { + let mut pc = stack.pop().unwrap(); + while let Some(op) = self.program.get(pc) { + match op { + Op::SwitchPush => stack.push(self.continuations[&pc]), + Op::SwitchPop => panic!("Fell through a switch at {pc}."), + Op::U8(data) => out.push(*data), + Op::U32(data) => out.extend(&data.to_le_bytes()), + Op::U64(data) => out.extend(&data.to_le_bytes()), + Op::Case(Case::Accept) => { + if let Some(pc_) = stack.pop() { + if DEBUG { + eprintln!("- ret: {pc} -> {pc_}"); + } + pc = pc_; + continue; + } + return pc; + } + Op::Case(Case::Reject) => panic!("Rejected base case at {}.", pc), + } + pc += 1; + } + assert_eq!(&stack, &[]); + pc + } +} + +/// Analyze a program to calculate the index of the target of each [`Op`] that implicitly refers to +/// another location in the program. +fn collect_continuations(program: &[Op]) -> BTreeMap { + let mut continuations = BTreeMap::new(); + let mut switch_stack = vec![]; + for (pc, op) in program.iter().enumerate() { + match op { + Op::SwitchPush => switch_stack.push(pc), + Op::SwitchPop => { + let push_pc = switch_stack.pop().unwrap(); + let pop_pc = pc; + // A `SwitchPush` pushes its continuation onto the return stack; the return address + // for an `Ok`/`Fail` is after the switch. + continuations.insert(push_pc, pop_pc + 1); + // When we "fall through" a switch after executing all the `Ok`/`Fail` cases, we + // re-run the switch's first (delimited) continuation in basecase mode before + // proceeding. + continuations.insert(pop_pc, push_pc + 1); + } + _ => (), + } + } + assert_eq!(&switch_stack, &[]); + continuations +} diff --git a/lib/rust/metamodel/src/meta/transform.rs b/lib/rust/metamodel/src/meta/transform.rs new file mode 100644 index 0000000000..3e4513ff04 --- /dev/null +++ b/lib/rust/metamodel/src/meta/transform.rs @@ -0,0 +1,118 @@ +//! Transformations on the meta representation. + +use crate::meta::*; + +use derivative::Derivative; + + + +// =============== +// === Flatten === +// =============== + +/// `flatten` the specified fields into their containing structs, transitively. +/// +/// Each inserted field will have its name prepended with the name of its eliminated container. +/// If the `hide` property is set for the container, it will be inherited by its child fields. +/// +/// This implements the [`reflect(flatten)`](../enso_reflect_macros/#reflectflatten-field-attribute) +/// attribute of the `#[derive(Reflect)]` macro; see the `enso_reflect_macros` documentation for an +/// example of the usage and results of the transformation. +pub fn flatten(graph: &mut TypeGraph, ids: &mut BTreeSet) { + let order = toposort(graph.types.keys(), TypeGraphDependencyVisitor { graph, ids }); + for id in order { + flatten_(graph, ids, id); + } +} + +/// `flatten` the fields specified in `to_flatten` into the type identified by `outer`. +/// +/// For design notes, see [`flatten`]. +fn flatten_(graph: &mut TypeGraph, to_flatten: &mut BTreeSet, outer: TypeId) { + let outer_fields = match &mut graph[outer].data { + Data::Struct(ref mut fields) => std::mem::take(fields), + _ => return, + }; + let mut child_field = graph[outer].child_field; + let mut flattened = Vec::with_capacity(outer_fields.len()); + for (i, field) in outer_fields.into_iter().enumerate() { + let inner = field.type_; + if to_flatten.remove(&field.id) { + let inner_ty = &graph[inner]; + let inner_fields = match &inner_ty.data { + Data::Struct(fields) => fields, + Data::Primitive(_) => panic!("Cannot flatten a primitive field."), + }; + let flatten_field = |inner_: &Field| { + let mut name = field.name.clone(); + name.append(inner_.name.clone()); + let mut flat = Field::named(name, inner_.type_); + flat.hide = field.hide || inner_.hide; + flat + }; + flattened.extend(inner_fields.iter().map(flatten_field)); + } else { + flattened.push(field); + } + if child_field == Some(i + 1) { + child_field = Some(flattened.len()); + } + } + graph[outer].child_field = child_field; + match &mut graph[outer].data { + Data::Struct(fields) => *fields = flattened, + _ => unreachable!(), + }; +} + + +// === Topologic Sort === + +fn toposort(iter: impl IntoIterator, dependencies: V) -> Vec +where + T: Copy + Ord, + V: DependencyVisitor, { + let mut sort = TopoSort::default(); + for id in iter { + sort.visit(id, &dependencies); + } + sort.order +} + +#[derive(Derivative)] +#[derivative(Default(bound = ""))] +struct TopoSort { + visited: BTreeSet, + order: Vec, +} + +impl TopoSort { + fn visit(&mut self, t: T, visitor: &impl DependencyVisitor) + where T: Copy + Ord { + if self.visited.insert(t) { + visitor.visit(self, t); + self.order.push(t); + } + } +} + +trait DependencyVisitor { + fn visit(&self, sort: &mut TopoSort, t: T); +} + +struct TypeGraphDependencyVisitor<'g, 'i> { + graph: &'g TypeGraph, + ids: &'i BTreeSet, +} + +impl DependencyVisitor for TypeGraphDependencyVisitor<'_, '_> { + fn visit(&self, sort: &mut TopoSort, id: TypeId) { + if let Data::Struct(fields) = &self.graph[id].data { + for field in fields { + if self.ids.contains(&field.id) { + sort.visit(field.type_, self); + } + } + } + } +} diff --git a/lib/rust/metamodel/src/rust/graphviz.rs b/lib/rust/metamodel/src/rust/graphviz.rs new file mode 100644 index 0000000000..9e8dd0e319 --- /dev/null +++ b/lib/rust/metamodel/src/rust/graphviz.rs @@ -0,0 +1,73 @@ +//! Graphical representation of Rust type relationships with GraphViz. + +use super::*; + +use crate::graphviz::EdgeType; +use crate::graphviz::Graph; +use crate::graphviz::Node; +use crate::graphviz::NodeType; + +use std::collections::BTreeMap; + + + +// ============= +// === Graph === +// ============= + +/// Generate a graph of the given type's relationships with other types. +pub fn graph(root: LazyType) -> Graph { + let mut to_visit = vec![root]; + let mut types = BTreeMap::new(); + while let Some(type_) = to_visit.pop() { + let id = type_.id; + if types.contains_key(&id) { + continue; + } + let type_ = type_.evaluate(); + to_visit.extend(type_.referenced_types().into_iter()); + types.insert(id, type_); + } + let mut graph = Graph::default(); + let mut numbers = BTreeMap::new(); + let mut next_id = 0; + let mut number = |key: TypeId| { + *numbers.entry(key).or_insert_with(|| { + let id = next_id; + next_id += 1; + id + }) + }; + for type_ in types.values() { + let sname = format!("{}{}", type_.name, number(type_.id)); + let primitive = type_.is_primitive(); + let node_type = match type_.type_type() { + TypeType::Sum => NodeType::Enum, + TypeType::Product => NodeType::Struct, + }; + let label = type_.name.clone(); + graph.nodes.insert(sname.clone(), Node { primitive, node_type, label }); + if let Data::Enum(enum_) = &type_.data { + for variant in &enum_.variants { + let svariant = format!("{}_{}", sname, variant.ident); + let primitive = false; + let node_type = NodeType::Variant; + let label = variant.ident.clone(); + graph.nodes.insert(svariant.clone(), Node { primitive, node_type, label }); + graph.edges.push((sname.clone(), svariant.clone(), EdgeType::Variant)); + for ty in variant.fields.referenced_types() { + let ty = &types[&ty.id]; + let sname2 = format!("{}{}", ty.name, number(ty.id)); + graph.edges.push((svariant.clone(), sname2, EdgeType::Field)); + } + } + } else { + for ty in type_.referenced_types() { + let ty = &types[&ty.id]; + let sname2 = format!("{}{}", ty.name, number(ty.id)); + graph.edges.push((sname.clone(), sname2, EdgeType::Field)); + } + } + } + graph +} diff --git a/lib/rust/metamodel/src/rust/mod.rs b/lib/rust/metamodel/src/rust/mod.rs new file mode 100644 index 0000000000..b17576867e --- /dev/null +++ b/lib/rust/metamodel/src/rust/mod.rs @@ -0,0 +1,364 @@ +//! Representation of data models in the Rust typesystem. +//! +//! Unlike the other metamodels in this crate, the Rust model uses a lazy-evaluation representation +//! of type graphs. While this representation doesn't support analysis as easily as the +//! `crate::data_structures::VecMap` representation, it can be created by a context-free translation +//! from Rust syntax, so it can be built directly by a proc macro, like [`enso_reflect`]. + + +// ============== +// === Export === +// ============== + +pub use to_meta::to_meta; + + + +#[cfg(feature = "graphviz")] +mod graphviz; +mod to_meta; + + + +// ================== +// === Data model === +// ================== + +/// A type. +#[derive(Debug, Clone)] +pub struct TypeData { + /// A value uniquely identifying the type. + pub id: TypeId, + /// The Rust identifier of the type. + pub name: String, + /// The type's contents. + pub data: Data, + /// A value uniquely-identifying the type up to the type of a certain field. + pub subtype_erased: GenericTypeId, +} + +/// A type's data content. +#[derive(Debug, Clone)] +pub enum Data { + /// A `struct`. + Struct(Struct), + /// An `enum`. + Enum(Enum), + /// Builtins, including basic types ands generics. + Primitive(Primitive), +} + +/// An `enum`. +#[derive(Debug, Clone)] +pub struct Enum { + /// The variants. + pub variants: Vec, +} + +/// A possible value of an `enum`. +#[derive(Debug, Clone)] +pub struct Variant { + /// The variant's name. + pub ident: String, + /// The variant's data. + pub fields: Fields, + /// If true, when abstracting to the `meta` representation, rather than generate a type for + /// this variant, its (sole) field will become a child of the parent enum. + pub inline: bool, +} + +/// A `struct`. +#[derive(Debug, Clone)] +pub struct Struct { + /// The fields. + pub fields: Fields, + /// If true, this field should be passed-through to its (sole) field when abstracting to the + /// `meta` representation. + pub transparent: bool, +} + +/// A field with a name. +#[derive(Debug, Clone)] +pub struct NamedField { + #[allow(missing_docs)] + pub name: String, + #[allow(missing_docs)] + pub type_: LazyType, + /// If true, this type should become the parent of the type in this field. + pub subtype: bool, + /// If true, the fields of this field should be inserted in place of it. + pub flatten: bool, + /// If true, this field should be hidden in generated code, in a target-language-dependent + /// manner. + pub hide: bool, +} + +/// A field in a tuple struct or tuple variant. +#[derive(Debug, Copy, Clone)] +pub struct UnnamedField { + #[allow(missing_docs)] + pub type_: LazyType, +} + +/// The data of a struct or variant. +#[derive(Debug, Clone)] +pub enum Fields { + #[allow(missing_docs)] + Named(Vec), + #[allow(missing_docs)] + Unnamed(Vec), + #[allow(missing_docs)] + Unit, +} + +/// Rust standard types. +#[derive(Debug, Clone, Copy)] +pub enum Primitive { + /// A `bool`. + Bool, + /// A `usize`. + Usize, + /// A `u32`. + U32, + /// A `String`. + String, + /// A `Vec<_>`. + Vec(LazyType), + /// An `Option<_>`. + Option(LazyType), + /// A `Result<_, _>`. + Result(LazyType, LazyType), +} + + + +// ======================= +// === Type references === +// ======================= + +/// Uniquely identifies a type. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] +pub struct TypeId(std::any::TypeId); + +impl TypeId { + #[allow(missing_docs)] + pub fn new(id: std::any::TypeId) -> Self { + Self(id) + } +} + +/// Distinguishes a type, irrespective of any sole type parameter present in the field marked +/// `#[reflect(subtype)]`, if any. Used in the implementation of the `subtype` transform. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] +pub struct GenericTypeId(std::any::TypeId); + +impl GenericTypeId { + #[allow(missing_docs)] + pub fn new(id: std::any::TypeId) -> Self { + Self(id) + } +} + +/// Identifies a type, and can be evaluated to obtain the type's definition. +/// +/// This is used for the fields of a type's `TypeData` because type graphs may contain cycles. +#[derive(Copy, Clone, Debug)] +pub struct LazyType { + #[allow(missing_docs)] + pub id: TypeId, + evaluate: Thunk, +} + +impl LazyType { + #[allow(missing_docs)] + pub fn new(id: TypeId, evaluate: Thunk) -> Self { + Self { id, evaluate } + } + + /// Obtain the type's definition. + pub fn evaluate(&self) -> TypeData { + (self.evaluate)() + } +} + +type Thunk = fn() -> T; + + +// === Reference Traversal === + +/// Type reference traversal. +pub trait ReferencedTypes { + /// Identify all the types this type contains references to. + fn referenced_types(&self) -> Vec; +} + +impl ReferencedTypes for Primitive { + fn referenced_types(&self) -> Vec { + match self { + Primitive::Bool | Primitive::Usize | Primitive::String | Primitive::U32 => vec![], + Primitive::Vec(ty) | Primitive::Option(ty) => vec![*ty], + Primitive::Result(ty0, ty1) => vec![*ty0, *ty1], + } + } +} + +impl ReferencedTypes for TypeData { + fn referenced_types(&self) -> Vec { + self.data.referenced_types() + } +} + +impl ReferencedTypes for Data { + fn referenced_types(&self) -> Vec { + match self { + Data::Struct(struct_) => struct_.referenced_types(), + Data::Enum(enum_) => enum_.referenced_types(), + Data::Primitive(primitive) => primitive.referenced_types(), + } + } +} + +impl ReferencedTypes for Enum { + fn referenced_types(&self) -> Vec { + let mut referenced = vec![]; + for variant in &self.variants { + referenced.extend(variant.referenced_types()); + } + referenced + } +} + +impl ReferencedTypes for Struct { + fn referenced_types(&self) -> Vec { + self.fields.referenced_types() + } +} + +impl ReferencedTypes for Variant { + fn referenced_types(&self) -> Vec { + self.fields.referenced_types() + } +} + +impl ReferencedTypes for Fields { + fn referenced_types(&self) -> Vec { + match self { + Fields::Named(fields) => fields.iter().map(|field| field.type_).collect(), + Fields::Unnamed(fields) => fields.iter().map(|field| field.type_).collect(), + Fields::Unit => vec![], + } + } +} + +impl ReferencedTypes for NamedField { + fn referenced_types(&self) -> Vec { + vec![self.type_] + } +} + +impl ReferencedTypes for UnnamedField { + fn referenced_types(&self) -> Vec { + vec![self.type_] + } +} + + + +// ==================================== +// === Abstractions over data model === +// ==================================== + +/// Categorizes types by the nature of their composition operators. +#[derive(Copy, Clone, Debug)] +pub enum TypeType { + /// A type like an `enum`, that only contains data for one of its constituent types. + Sum, + /// A type like a `struct` or tuple, that contains data for all of its constituent types. + Product, +} + +impl TypeData { + /// Get information about the composition operator relating the types this type is composed of. + pub fn type_type(&self) -> TypeType { + match &self.data { + Data::Struct(_) => TypeType::Product, + Data::Enum(_) => TypeType::Sum, + Data::Primitive(primitive) => primitive.type_type(), + } + } +} + +impl Primitive { + /// Get information about the composition operator relating the types this type is composed of. + pub fn type_type(&self) -> TypeType { + match &self { + Primitive::Bool + | Primitive::Usize + | Primitive::U32 + | Primitive::String + | Primitive::Vec(_) => TypeType::Product, + Primitive::Option(_) | Primitive::Result(_, _) => TypeType::Sum, + } + } +} + + + +// ================================ +// === Operations on data model === +// ================================ + +impl Fields { + /// Get the sole field this type contains, if it has exactly one. + pub fn as_wrapped_type(&self) -> Option { + match self { + Fields::Named(fields) if fields.len() == 1 => Some(fields[0].type_), + Fields::Unnamed(fields) if fields.len() == 1 => Some(fields[0].type_), + _ => None, + } + } +} + +impl TypeData { + /// Return whether this type is a `Primitive`. + pub fn is_primitive(&self) -> bool { + matches!(&self.data, Data::Primitive(_)) + } +} + +impl NamedField { + #[allow(missing_docs)] + pub fn type_id(&self) -> TypeId { + self.type_.id + } + + #[allow(missing_docs)] + pub fn type_(&self) -> TypeData { + self.type_.evaluate() + } +} + +impl UnnamedField { + #[allow(missing_docs)] + pub fn type_id(&self) -> TypeId { + self.type_.id + } + + #[allow(missing_docs)] + pub fn type_(&self) -> TypeData { + self.type_.evaluate() + } +} + + + +// ======================== +// === GraphViz support === +// ======================== + +#[cfg(feature = "graphviz")] +impl From for crate::graphviz::Graph { + fn from(root: LazyType) -> Self { + graphviz::graph(root) + } +} diff --git a/lib/rust/metamodel/src/rust/to_meta.rs b/lib/rust/metamodel/src/rust/to_meta.rs new file mode 100644 index 0000000000..29c21778d5 --- /dev/null +++ b/lib/rust/metamodel/src/rust/to_meta.rs @@ -0,0 +1,287 @@ +//! Abstracting Rust data models to the `meta` representation. + +use crate::rust::*; + +use crate::meta; + +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::mem::take; + + + +// ==================== +// === Rust to Meta === +// ==================== + +/// Abstract the data model to the meta represenation. +pub fn to_meta(ty: TypeData) -> (meta::TypeGraph, BTreeMap) { + let mut to_meta = ToMeta::new(); + let root_ = to_meta.run(ty); + to_meta.graph.gc(vec![root_]); + (to_meta.graph, to_meta.rust_to_meta) +} + +#[derive(Debug, Default)] +struct ToMeta { + // Outputs + rust_to_meta: BTreeMap, + graph: meta::TypeGraph, + // Intermediate state + interfaces: Vec<(meta::TypeId, meta::TypeId)>, + parent_types: BTreeMap, + subtypings: Vec<(GenericTypeId, TypeId, meta::UnboundTypeId)>, + flatten: BTreeSet, +} + +impl ToMeta { + fn new() -> Self { + Default::default() + } +} + +impl ToMeta { + fn named_struct<'f>( + &mut self, + id_: meta::UnboundTypeId, + name: &str, + fields: impl IntoIterator, + erased: Option, + ) { + let mut body = vec![]; + let mut child_field = None; + for (i, field) in fields.into_iter().enumerate() { + assert!(!(field.flatten && field.subtype)); + if field.subtype { + assert_eq!(child_field, None); + child_field = Some((i, field.type_.id)); + continue; + } + let type_ = self.rust_to_meta[&field.type_.id]; + let name = field_name(&field.name); + let mut field_ = meta::Field::named(name, type_); + if field.flatten { + self.flatten.insert(field_.id()); + } + field_.hide = field.hide; + body.push(field_); + } + let data = meta::Data::Struct(body); + let name = type_name(name); + if let Some((index, field)) = child_field { + let erased = erased.unwrap(); + self.parent_types.insert(erased, (name, data, index)); + self.subtypings.push((erased, field, id_)); + return; + } + let ty = meta::Type::new(name, data); + self.graph.types.bind(id_, ty); + } + + fn unnamed_struct(&mut self, id_: meta::UnboundTypeId, name: &str, fields: &[UnnamedField]) { + let abstract_field = + |field: &UnnamedField| meta::Field::unnamed(self.rust_to_meta[&field.type_.id]); + let data = fields.iter().map(abstract_field).collect(); + let data = meta::Data::Struct(data); + let name = type_name(name); + let ty = meta::Type::new(name, data); + self.graph.types.bind(id_, ty); + } + + fn struct_( + &mut self, + id_: meta::UnboundTypeId, + name: &str, + fields: &Fields, + erased: Option, + ) { + match fields { + Fields::Named(fields) => self.named_struct(id_, name, fields, erased), + Fields::Unnamed(fields) => self.unnamed_struct(id_, name, fields), + Fields::Unit => self.unit_struct(id_, name), + } + } + + fn unit_struct(&mut self, id_: meta::UnboundTypeId, name: &str) { + let data = meta::Data::Struct(vec![]); + let name = type_name(name); + let ty = meta::Type::new(name, data); + self.graph.types.bind(id_, ty); + } + + fn enum_(&mut self, id_: meta::UnboundTypeId, name: &str, variants: &[Variant]) { + let name = type_name(name); + let children = variants.iter().map(|Variant { ident, fields, inline: transparent }| { + if *transparent { + let field = &fields.as_wrapped_type().unwrap().id; + let field_ = self.rust_to_meta[field]; + self.interfaces.push(((&id_).into(), field_)); + field_ + } else { + let promise = self.graph.types.unbound_key(); + let new_ = meta::TypeId::from(&promise); + self.struct_(promise, ident, fields, None); + self.graph[new_].parent = Some((&id_).into()); + new_ + } + }); + let data = meta::Data::Struct(vec![]); + let mut ty = meta::Type::new(name, data); + ty.abstract_ = true; + ty.closed = true; + ty.discriminants = children.enumerate().collect(); + self.graph.types.bind(id_, ty); + } + + fn primitive(&mut self, id_: meta::UnboundTypeId, name: &str, primitive: &Primitive) { + let primitive = match primitive { + Primitive::Bool => meta::Primitive::Bool, + Primitive::U32 => meta::Primitive::U32, + // In platform-independent formats, a `usize` is serialized as 64 bits. + Primitive::Usize => meta::Primitive::U64, + Primitive::String => meta::Primitive::String, + Primitive::Vec(t0) => meta::Primitive::Sequence(self.rust_to_meta[&t0.id]), + Primitive::Option(t0) => meta::Primitive::Option(self.rust_to_meta[&t0.id]), + Primitive::Result(t0, t1) => + meta::Primitive::Result(self.rust_to_meta[&t0.id], self.rust_to_meta[&t1.id]), + }; + let data = meta::Data::Primitive(primitive); + let name = type_name(name); + let ty = meta::Type::new(name, data); + self.graph.types.bind(id_, ty); + } +} + +impl ToMeta { + fn remove_transparent( + &mut self, + types: &mut BTreeMap, + ) -> BTreeMap { + let mut alias = BTreeMap::new(); + types.retain(|id, TypeData { data, .. }| { + let sole_field = + "`#[reflect(transparent)]` can only be applied to types with exactly one field."; + let target = match data { + Data::Struct(Struct { fields, transparent }) if *transparent => + fields.as_wrapped_type().expect(sole_field).id, + _ => return true, + }; + alias.insert(*id, target); + false + }); + let entries: Vec<_> = alias.iter().map(|(k, v)| (*k, *v)).collect(); + for (key, mut value) in entries { + while let Some(value_) = alias.get(&value).copied() { + alias.insert(key, value_); + value = value_; + } + } + alias + } + + /// Perform the transformation for the reference-closure of the given type. + pub fn run(&mut self, ty: TypeData) -> meta::TypeId { + let root_rust_id = ty.id; + let mut rust_types = collect_types(ty); + let aliases = self.remove_transparent(&mut rust_types); + let mut meta_promises: BTreeMap<_, _> = + rust_types.keys().map(|id| (*id, self.graph.types.unbound_key())).collect(); + self.rust_to_meta = + meta_promises.iter().map(|(k, v)| (*k, meta::TypeId::from(v))).collect(); + for (id, target) in aliases { + let target_ = self.rust_to_meta[&target]; + self.rust_to_meta.insert(id, target_); + } + for (&id, rust) in &rust_types { + let name = &rust.name; + let id_ = meta_promises.remove(&id).unwrap(); + let erased = Some(rust.subtype_erased); + match &rust.data { + Data::Struct(Struct { fields, transparent: _ }) => + self.struct_(id_, name, fields, erased), + Data::Enum(Enum { variants }) => self.enum_(id_, name, variants), + Data::Primitive(primitive) => self.primitive(id_, name, primitive), + }; + } + for (parent_, child_) in self.interfaces.drain(..) { + let old_parent = self.graph[child_].parent.replace(parent_); + assert_eq!(None, old_parent); + } + self.generate_subtypes(&rust_types); + meta::transform::flatten(&mut self.graph, &mut self.flatten); + self.rust_to_meta[&root_rust_id] + } + + fn generate_subtypes(&mut self, rust_types: &BTreeMap) { + let mut parent_ids = BTreeMap::new(); + let mut aliases = vec![]; + let subtypings = take(&mut self.subtypings); + let mut concrete_subtypes = vec![]; + for (erased, field, promise) in subtypings { + let id_ = meta::TypeId::from(&promise); + let field_ty = &rust_types[&field]; + match &field_ty.data { + Data::Enum(_) => { + let field_ = self.rust_to_meta[&field]; + let (name, wrapper_data, index) = self.parent_types.remove(&erased).unwrap(); + // Move the Enum: We're merging the wrapper data into it, so any reference + // to it that wasn't through the wrapper must be an error. + // Note: This approach won't allow types that are subsetted by multiple enums. + let mut enum_ty_ = self.graph.types.remove(field_); + enum_ty_.name = name; + enum_ty_.data = wrapper_data; + enum_ty_.child_field = Some(index); + let children_: Vec<_> = enum_ty_.discriminants.values().copied().collect(); + self.graph.types.bind(promise, enum_ty_); + for child_ in children_ { + let old_parent = self.graph[child_].parent.replace(id_); + assert_eq!(old_parent, Some(field_)); + } + parent_ids.insert(erased, id_); + } + Data::Struct(_) => { + concrete_subtypes.push((erased, field, id_)); + continue; + } + Data::Primitive(_) => panic!("Cannot transform a builtin to a subtype."), + }; + } + for (_erased, field, id_) in concrete_subtypes { + let variants_only = "Applying `#[reflect(subtype)]` to a field that does not occur \ + in a variant of an enum used to instantiate the field is not supported."; + let id = *self.rust_to_meta.get(&field).expect(variants_only); + aliases.push((id_, id)); + } + self.graph.apply_aliases(&aliases); + } +} + +/// Gather the Rust type IDs and definitions for the given type and its closure in the type +/// graph. +fn collect_types(root: TypeData) -> BTreeMap { + let mut to_visit = BTreeMap::new(); + let mut new_types = BTreeMap::new(); + for lazy in root.referenced_types() { + to_visit.insert(lazy.id, lazy); + } + let root_id = root.id; + new_types.insert(root_id, root); + while let Some((id, lazy)) = to_visit.pop_last() { + new_types.entry(id).or_insert_with(|| { + let type_ = lazy.evaluate(); + debug_assert_eq!(id, type_.id); + let refs = type_.referenced_types().into_iter().map(|lazy: LazyType| (lazy.id, lazy)); + to_visit.extend(refs); + type_ + }); + } + new_types +} + +fn field_name(s: &str) -> meta::FieldName { + meta::FieldName::from_snake_case(s) +} + +fn type_name(s: &str) -> meta::TypeName { + meta::TypeName::from_pascal_case(s) +} diff --git a/lib/rust/parser/Cargo.toml b/lib/rust/parser/Cargo.toml index c5fd1eaff6..91f67619ea 100644 --- a/lib/rust/parser/Cargo.toml +++ b/lib/rust/parser/Cargo.toml @@ -10,9 +10,15 @@ repository = "https://github.com/enso-org/enso" license-file = "../../LICENSE" [dependencies] -enso-prelude = { path = "../prelude" } +enso-prelude = { path = "../prelude", features = ["serde"] } +enso-reflect = { path = "../reflect" } enso-data-structures = { path = "../data-structures" } -enso-types = { path = "../types" } +enso-types = { path = "../types", features = ["serde"] } enso-shapely-macros = { path = "../shapely/macros" } enso-parser-syntax-tree-visitor = { path = "src/syntax/tree/visitor" } enso-parser-syntax-tree-builder = { path = "src/syntax/tree/builder" } +serde = { version = "1.0", features = ["derive"] } +bincode = "1.3" + +[lib] +path = "src/main.rs" diff --git a/lib/rust/parser/generate-java/Cargo.toml b/lib/rust/parser/generate-java/Cargo.toml new file mode 100644 index 0000000000..53069e6e71 --- /dev/null +++ b/lib/rust/parser/generate-java/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "enso-parser-generate-java" +version = "0.1.0" +authors = ["Enso Team "] +edition = "2021" +description = "Generates Java bindings and deserialization for Enso Parser AST types." +readme = "README.md" +homepage = "https://github.com/enso-org/enso" +repository = "https://github.com/enso-org/enso" +license-file = "../../LICENSE" + +[dependencies] +enso-metamodel = { path = "../../metamodel", features = ["rust", "java"] } +enso-prelude = { path = "../../prelude" } +enso-parser = { path = ".." } +enso-reflect = { path = "../../reflect", features = ["graphviz"] } +derivative = "2.2" diff --git a/lib/rust/parser/generate-java/java/org/enso/syntax2/serialization/Either.java b/lib/rust/parser/generate-java/java/org/enso/syntax2/serialization/Either.java new file mode 100644 index 0000000000..d8938af696 --- /dev/null +++ b/lib/rust/parser/generate-java/java/org/enso/syntax2/serialization/Either.java @@ -0,0 +1,16 @@ +package org.enso.syntax2.serialization; + +public class Either { + protected Left left; + protected Right right; + protected Either(Left leftIn, Right rightIn) { + left = leftIn; + right = rightIn; + } + public static final Either left(L left) { + return new Either(left, null); + } + public static final Either right(R right) { + return new Either(null, right); + } +} diff --git a/lib/rust/parser/generate-java/java/org/enso/syntax2/serialization/FormatException.java b/lib/rust/parser/generate-java/java/org/enso/syntax2/serialization/FormatException.java new file mode 100644 index 0000000000..dfea92731f --- /dev/null +++ b/lib/rust/parser/generate-java/java/org/enso/syntax2/serialization/FormatException.java @@ -0,0 +1,11 @@ +package org.enso.syntax2.serialization; + +public class FormatException + extends RuntimeException { + public FormatException(String errorMessage, Throwable err) { + super(errorMessage, err); + } + public FormatException(String errorMessage) { + super(errorMessage); + } +} diff --git a/lib/rust/parser/generate-java/java/org/enso/syntax2/serialization/Message.java b/lib/rust/parser/generate-java/java/org/enso/syntax2/serialization/Message.java new file mode 100644 index 0000000000..f0f29ec627 --- /dev/null +++ b/lib/rust/parser/generate-java/java/org/enso/syntax2/serialization/Message.java @@ -0,0 +1,53 @@ +package org.enso.syntax2.serialization; + +public final class Message { + private final java.nio.ByteBuffer buffer; + private final java.nio.ByteBuffer context; + private final long base; + + public Message(java.nio.ByteBuffer bufferIn, java.nio.ByteBuffer contextIn, long baseIn) { + buffer = bufferIn; + context = contextIn; + base = baseIn; + } + + public long get64() { + return buffer.getLong(); + } + + public int get32() { + return buffer.getInt(); + } + + public boolean getBoolean() { + switch (buffer.get()) { + case 0: return false; + case 1: return true; + default: throw new FormatException("Boolean out of range"); + } + } + + public String getString() { + int len = (int)get64(); + byte[] dst = new byte[len]; + buffer.get(dst); + try { + return new String(dst, "UTF-8"); + } catch (java.io.UnsupportedEncodingException e) { + throw new FormatException("Expected UTF-8", e); + } + } + + public java.nio.ByteBuffer context() { + return context; + } + + public int offset(int xLow32) { + // Given the low bits of `x`, the value of `base`, and the invariant `x >= base`, + // return `x - base`. + long tmp = xLow32 - base; + if (tmp < 0) + tmp += 0x0000000100000000L; + return (int)tmp; + } +} diff --git a/lib/rust/parser/generate-java/run.sh b/lib/rust/parser/generate-java/run.sh new file mode 100755 index 0000000000..1c4809071e --- /dev/null +++ b/lib/rust/parser/generate-java/run.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +set -e + +echo $0 | grep lib/rust || ( echo This tool must be run from the repo root, as lib/rust/parser/generate-java/run.sh; exit 1 ) + +BASE=target/generated_java +OUT=$BASE/org/enso/syntax2 +LIB=lib/rust/parser/generate-java/java +mkdir -p $OUT +cargo test -p enso-parser-generate-java +cargo run -p enso-parser-generate-java --bin enso-parser-generate-java -- $OUT +cargo run -p enso-parser-generate-java --bin java-tests > $BASE/GeneratedFormatTests.java +javac -classpath "$LIB:$BASE" -d $BASE $BASE/GeneratedFormatTests.java +java -classpath $BASE GeneratedFormatTests diff --git a/lib/rust/parser/generate-java/src/bin/graph-java.rs b/lib/rust/parser/generate-java/src/bin/graph-java.rs new file mode 100644 index 0000000000..462a515298 --- /dev/null +++ b/lib/rust/parser/generate-java/src/bin/graph-java.rs @@ -0,0 +1,30 @@ +//! Generate a GraphViz graph of parser datatype relationships in the Java type system. +//! +//! Usage: +//! ```console +//! graph-java > java.dot +//! dot -Tx11 java.dot +//! ``` + +// === Standard Linter Configuration === +#![deny(non_ascii_idents)] +#![warn(unsafe_code)] + +use enso_metamodel::graphviz; +use enso_metamodel::java; +use enso_metamodel::rust; +use enso_reflect::Reflect; + + + +// =========================== +// === Graphing Java types === +// =========================== + +fn main() { + let (graph, _) = rust::to_meta(enso_parser::syntax::Tree::reflect()); + let (graph, _) = java::from_meta(&graph, enso_parser_generate_java::EITHER_TYPE); + let graph = java::transform::optional_to_null(graph); + let rendered = graphviz::Graph::from(&graph); + println!("{}", rendered); +} diff --git a/lib/rust/parser/generate-java/src/bin/graph-meta.rs b/lib/rust/parser/generate-java/src/bin/graph-meta.rs new file mode 100644 index 0000000000..a4d5b9611e --- /dev/null +++ b/lib/rust/parser/generate-java/src/bin/graph-meta.rs @@ -0,0 +1,25 @@ +//! Generate a GraphViz graph of parser datatype relationships in the `meta` metamodel. +//! +//! Usage: +//! ```console +//! graph-meta > meta.dot +//! dot -Tx11 meta.dot +//! ``` + +// === Standard Linter Configuration === +#![deny(non_ascii_idents)] +#![warn(unsafe_code)] + +use enso_reflect::Reflect; + + + +// ============================= +// === Graphing `meta` types === +// ============================= + +fn main() { + let (graph, _) = enso_metamodel::rust::to_meta(enso_parser::syntax::Tree::reflect()); + let rendered = enso_metamodel::graphviz::Graph::from(&graph); + println!("{}", rendered); +} diff --git a/lib/rust/parser/generate-java/src/bin/graph-rust.rs b/lib/rust/parser/generate-java/src/bin/graph-rust.rs new file mode 100644 index 0000000000..c925cf9124 --- /dev/null +++ b/lib/rust/parser/generate-java/src/bin/graph-rust.rs @@ -0,0 +1,22 @@ +//! Generate a GraphViz graph of parser datatype relationships in the Rust type system. +//! +//! Usage: +//! ```console +//! graph-rust > rust.dot +//! dot -Tx11 rust.dot +//! ``` + +// === Standard Linter Configuration === +#![deny(non_ascii_idents)] +#![warn(unsafe_code)] + + + +// =========================== +// === Graphing Rust types === +// =========================== + +fn main() { + let rendered = enso_reflect::graph::(); + println!("{}", rendered); +} diff --git a/lib/rust/parser/generate-java/src/bin/java-tests.rs b/lib/rust/parser/generate-java/src/bin/java-tests.rs new file mode 100644 index 0000000000..ec930d1fa6 --- /dev/null +++ b/lib/rust/parser/generate-java/src/bin/java-tests.rs @@ -0,0 +1,81 @@ +//! Generates Java format tests. +//! +//! Usage: +//! ```console +//! java-tests > GeneratedFormatTests.java +//! javac -d generated-java/ GeneratedFormatTests.java && java GeneratedFormatTests +//! ``` + +// === Standard Linter Configuration === +#![deny(non_ascii_idents)] +#![warn(unsafe_code)] + + + +// ============================ +// === Java Test Generation === +// ============================ + +fn main() { + let cases = enso_parser_generate_java::generate_testcases(); + let fmt_cases = |cases: &[Vec]| { + let cases: Vec<_> = cases + .iter() + .map(|case| { + let case: Vec<_> = case.iter().map(|byte| (*byte as i8).to_string()).collect(); + format!("{{{}}}", case.join(", ")) + }) + .collect(); + cases.join(", ") + }; + let accept = fmt_cases(&cases.accept); + let reject = fmt_cases(&cases.reject); + let package = enso_parser_generate_java::PACKAGE; + let serialization = enso_parser_generate_java::SERIALIZATION_SUPPORT; + println!("import {package}.Tree;"); + println!("import {serialization}.Message;",); + println!("import java.nio.ByteBuffer;"); + println!("import java.nio.ByteOrder;"); + println!(); + println!("class GeneratedFormatTests {{"); + println!(" public static void main(String[] args) {{"); + println!(" byte[][] accept = {{{accept}}};"); + println!(" byte[][] reject = {{{reject}}};"); + println!(" int result = 0;"); + println!(" for (int i = 0; i < accept.length; i++) {{"); + println!(" ByteBuffer buffer = ByteBuffer.wrap(accept[i]);"); + println!(" buffer.order(ByteOrder.LITTLE_ENDIAN);"); + println!(" ByteBuffer context = ByteBuffer.allocate(0);"); + println!(" Message message = new Message(buffer, context, 0);"); + println!(" try {{"); + println!(" Tree tree = Tree.deserialize(message);"); + println!(" System.out.print(\"- pass: \");"); + println!(" System.out.println(tree.toString());"); + println!(" }} catch (RuntimeException e) {{"); + println!(" System.out.println(\"- fail:\");"); + println!(" e.printStackTrace();"); + println!(" result = 1;"); + println!(" }}"); + println!(" }}"); + println!(" for (int i = 0; i < reject.length; i++) {{"); + println!(" ByteBuffer buffer = ByteBuffer.wrap(reject[i]);"); + println!(" buffer.order(ByteOrder.LITTLE_ENDIAN);"); + println!(" ByteBuffer context = ByteBuffer.allocate(0);"); + println!(" Message message = new Message(buffer, context, 0);"); + println!(" try {{"); + println!(" Tree tree = Tree.deserialize(message);"); + println!(" System.out.print(\"- fail: accepted: \");"); + println!(" System.out.println(tree.toString());"); + println!(" result = 1;"); + println!(" }} catch ({serialization}.FormatException e) {{"); + println!(" System.out.println(\"- pass: (rejected)\");"); + println!(" }} catch (RuntimeException e) {{"); + println!(" System.out.println(\"- fail: wrong exception: \");"); + println!(" e.printStackTrace();"); + println!(" result = 1;"); + println!(" }}"); + println!(" }}"); + println!(" System.exit(result);"); + println!(" }}"); + println!("}}"); +} diff --git a/lib/rust/parser/generate-java/src/lib.rs b/lib/rust/parser/generate-java/src/lib.rs new file mode 100644 index 0000000000..3059d34abf --- /dev/null +++ b/lib/rust/parser/generate-java/src/lib.rs @@ -0,0 +1,83 @@ +//! Supports generation of Java types corresponding to `enso-parser`'s AST types, and testing and +//! debugging the translation process. + +// === Standard Linter Configuration === +#![deny(non_ascii_idents)] +#![warn(unsafe_code)] +// === Non-Standard Linter Configuration === +#![allow(clippy::option_map_unit_fn)] +#![allow(clippy::precedence)] +#![allow(dead_code)] +#![deny(unconditional_recursion)] +#![warn(missing_copy_implementations)] +#![warn(missing_debug_implementations)] +#![warn(missing_docs)] +#![warn(trivial_casts)] +#![warn(trivial_numeric_casts)] +#![warn(unused_import_braces)] +#![warn(unused_qualifications)] + +use enso_metamodel::meta; +use enso_reflect::Reflect; + + +// ============== +// === Export === +// ============== + +pub mod serialization; + + + +// ===================== +// === Configuration === +// ===================== + +/// The package for the generated code. +pub const PACKAGE: &str = "org.enso.syntax2"; +/// The package for the non-generated serialization support code. +pub const SERIALIZATION_SUPPORT: &str = "org.enso.syntax2.serialization"; +/// The fully-qualified name of an `Either` type. +pub const EITHER_TYPE: &str = "org.enso.syntax2.serialization.Either"; + + + +// ================== +// === Test Cases === +// ================== + +use enso_parser::syntax; + +/// Generate accept/reject test case set for the parser types rooted at `syntax::Tree`. +pub fn generate_testcases() -> meta::serialization::TestCases { + let root = syntax::Tree::reflect(); + let root_id = root.id; + let (graph, rust_to_meta) = enso_metamodel::rust::to_meta(root); + let root = rust_to_meta[&root_id]; + meta::serialization::testcases(&graph, root) +} + + + +// =========================== +// === Rust Format Testing === +// =========================== + +#[cfg(test)] +mod test { + /// Check Rust deserialization against test cases. + #[test] + fn test_format() { + let cases = super::generate_testcases(); + for (i, case) in cases.accept.iter().enumerate() { + if let Err(err) = enso_parser::serialization::deserialize_tree(case) { + panic!("accept{i} fail: {err:?}"); + } + } + for (i, case) in cases.reject.iter().enumerate() { + if let Ok(tree) = enso_parser::serialization::deserialize_tree(case) { + panic!("reject{i} fail: accepted: {tree:?}"); + } + } + } +} diff --git a/lib/rust/parser/generate-java/src/main.rs b/lib/rust/parser/generate-java/src/main.rs new file mode 100644 index 0000000000..5325f4697d --- /dev/null +++ b/lib/rust/parser/generate-java/src/main.rs @@ -0,0 +1,55 @@ +//! Generate the Java types corresponding to `enso-parser`'s AST types. +//! +//! # Usage +//! +//! Generated files will be placed in the directory given as an argument: +//! ```console +//! generate-java org/enso/syntax2/ +//! ``` + +// === Standard Linter Configuration === +#![deny(non_ascii_idents)] +#![warn(unsafe_code)] +// === Non-Standard Linter Configuration === +#![allow(clippy::option_map_unit_fn)] +#![allow(clippy::precedence)] +#![allow(dead_code)] +#![deny(unconditional_recursion)] +#![warn(missing_copy_implementations)] +#![warn(missing_debug_implementations)] +#![warn(missing_docs)] +#![warn(trivial_casts)] +#![warn(trivial_numeric_casts)] +#![warn(unused_import_braces)] +#![warn(unused_qualifications)] + +use enso_metamodel::java; +use enso_metamodel::rust; +use enso_parser_generate_java::serialization; +use enso_reflect::Reflect; + + + +// ======================= +// === Java Generation === +// ======================= + +fn main() { + let ast = enso_parser::syntax::Tree::reflect(); + let tree = enso_parser::syntax::Tree::reflect().id; + let token = enso_parser::syntax::Token::::reflect().id; + let (graph, rust_to_meta) = rust::to_meta(ast); + let (graph, meta_to_java) = java::from_meta(&graph, enso_parser_generate_java::EITHER_TYPE); + let mut graph = java::transform::optional_to_null(graph); + let rust_to_java = |id| meta_to_java[&rust_to_meta[&id]]; + let (tree, token) = (rust_to_java(tree), rust_to_java(token)); + serialization::derive(&mut graph, tree, token); + let graph = java::to_syntax(&graph, enso_parser_generate_java::PACKAGE); + let mut args = std::env::args(); + args.next().unwrap(); + let dir = args.next().expect("Usage: generate-java "); + for class in graph { + let code = class.to_string(); + std::fs::write(format!("{}/{}.java", &dir, &class.name), &code).unwrap(); + } +} diff --git a/lib/rust/parser/generate-java/src/serialization.rs b/lib/rust/parser/generate-java/src/serialization.rs new file mode 100644 index 0000000000..cdc4677b04 --- /dev/null +++ b/lib/rust/parser/generate-java/src/serialization.rs @@ -0,0 +1,103 @@ +//! Serialization overrides for the `enso_parser` types. + +use enso_metamodel::java::*; + +use enso_metamodel::java::bincode::MapperInput; +use enso_metamodel::java::bincode::MaterializerInput; + + + +// ============================== +// === Derive Deserialization === +// ============================== + +// FIXME: After we have implemented a transformation from the raw `Reflect` output to a +// `rust::TypeGraph`, at which time we can assign unique `FieldId`s: We should identify +// generated fields in Java classes by starting from a `str -> rust::FieldId` query on Rust +// type data, and mapping fields analogously to `rust_to_java` for types. +const CODE_GETTER: &str = "codeRepr"; +const TREE_BEGIN: &str = "spanLeftOffsetCodeReprBegin"; +const TREE_LEN: &str = "spanLeftOffsetCodeReprLen"; +const TOKEN_BEGIN: &str = "codeReprBegin"; +const TOKEN_LEN: &str = "codeReprLen"; +const TOKEN_OFFSET_BEGIN: &str = "leftOffsetCodeReprBegin"; +//const TOKEN_OFFSET_LEN: &str = "leftOffsetCodeReprLen"; + +/// Derive deserialization for all types in the typegraph. +pub fn derive(graph: &mut TypeGraph, tree: ClassId, token: ClassId) { + let source = "source"; + impl_deserialize(graph, tree, token, source); + graph[token].methods.push(impl_getter(CODE_GETTER, source, TOKEN_BEGIN, TOKEN_LEN)); + graph[tree].methods.push(impl_getter(CODE_GETTER, source, TREE_BEGIN, TREE_LEN)); +} + + +// === Deserialization Methods === + +fn impl_deserialize(graph: &mut TypeGraph, tree: ClassId, token: ClassId, source: &str) { + // Add source field to parent types. + let buffer = Class::builtin("java.nio.ByteBuffer", vec![]); + let buffer = graph.classes.insert(buffer); + let tree_source_ = Field::object(source, buffer, true); + let tree_source = tree_source_.id(); + graph[tree].fields.push(tree_source_); + let token_source_ = Field::object(source, buffer, true); + let token_source = token_source_.id(); + graph[token].fields.push(token_source_); + let ids: Vec<_> = graph.classes.keys().collect(); + for id in ids { + let class = &graph[id]; + let mut deserialization = + bincode::DeserializerBuilder::new(id, crate::SERIALIZATION_SUPPORT, crate::EITHER_TYPE); + match () { + // Base classes: Map the code repr fields. + _ if id == tree => { + let code_begin = class.find_field(TREE_BEGIN).unwrap().id(); + deserialization.map(code_begin, offset_mapper()); + } + _ if id == token => { + let code_begin = class.find_field(TOKEN_BEGIN).unwrap().id(); + let offset_begin = class.find_field(TOKEN_OFFSET_BEGIN).unwrap().id(); + deserialization.map(code_begin, offset_mapper()); + deserialization.map(offset_begin, offset_mapper()); + } + // Child classes: Pass context object from deserializer to parent. + _ if class.parent == Some(tree) => + deserialization.materialize(tree_source, context_materializer()), + _ if class.parent == Some(token) => + deserialization.materialize(token_source, context_materializer()), + // Everything else: Standard deserialization. + _ => (), + } + let deserializer = deserialization.build(graph); + graph[id].methods.push(deserializer); + } +} + +fn context_materializer() -> impl for<'a> FnOnce(MaterializerInput<'a>) -> String + 'static { + |MaterializerInput { message }| format!("{message}.context()") +} +fn offset_mapper() -> impl for<'a, 'b> FnOnce(MapperInput<'a, 'b>) -> String + 'static { + |MapperInput { message, value }| format!("{message}.offset({value})") +} + + +// === Source Code Getters === + +fn impl_getter(name: &str, buffer: &str, begin: &str, len: &str) -> Method { + use std::fmt::Write; + let mut body = String::new(); + let serialization = crate::SERIALIZATION_SUPPORT; + let exception = format!("{serialization}.FormatException"); + writeln!(body, "byte[] dst = new byte[{len}];").unwrap(); + writeln!(body, "{buffer}.position({begin});").unwrap(); + writeln!(body, "{buffer}.get(dst);").unwrap(); + writeln!(body, "try {{").unwrap(); + writeln!(body, " return new String(dst, \"UTF-8\");").unwrap(); + writeln!(body, "}} catch (java.io.UnsupportedEncodingException e) {{").unwrap(); + writeln!(body, " throw new {exception}(\"Expected UTF-8\", e);").unwrap(); + writeln!(body, "}}").unwrap(); + let mut method = syntax::Method::new(name, syntax::Type::named("String")); + method.body = body; + Method::Raw(method) +} diff --git a/lib/rust/parser/src/main.rs b/lib/rust/parser/src/main.rs index cfd38dbf7c..825e2eea33 100644 --- a/lib/rust/parser/src/main.rs +++ b/lib/rust/parser/src/main.rs @@ -107,6 +107,7 @@ use crate::prelude::*; pub mod lexer; pub mod macros; +pub mod serialization; pub mod source; pub mod syntax; @@ -114,7 +115,10 @@ pub mod syntax; /// Popular utilities, imported by most modules of this crate. pub mod prelude { + pub use enso_prelude::serde_reexports::*; pub use enso_prelude::*; + pub use enso_reflect as reflect; + pub use enso_reflect::Reflect; pub use enso_types::traits::*; pub use enso_types::unit2::Bytes; } diff --git a/lib/rust/parser/src/serialization.rs b/lib/rust/parser/src/serialization.rs new file mode 100644 index 0000000000..c823646648 --- /dev/null +++ b/lib/rust/parser/src/serialization.rs @@ -0,0 +1,91 @@ +//! Serialization/deserialization support. +//! +//! Deserialization is used only for testing, but it is used by dependent crates, so it cannot be +//! gated with `#[cfg(test)]`. + +use crate::prelude::*; + + + +// ============ +// === Tree === +// ============ + +/// Deserialize a `Tree` from its binary representation. +pub fn deserialize_tree(data: &[u8]) -> Result { + use bincode::Options; + let options = bincode::DefaultOptions::new().with_fixint_encoding(); + options.deserialize(data) +} + + + +// ============ +// === Code === +// ============ + +/// Serialized representation of a source code `Cow`. +#[derive(Serialize, Reflect)] +pub(crate) struct Code { + #[reflect(hide)] + begin: u32, + #[reflect(hide)] + len: u32, +} + +/// Serde wrapper to serialize a `Cow` as the `Code` representation. +#[allow(clippy::ptr_arg)] // This is the signature required by serde. +pub(crate) fn serialize_cow(cow: &Cow<'_, str>, ser: S) -> Result +where S: serde::Serializer { + let s = match cow { + Cow::Borrowed(s) => *s, + Cow::Owned(_) => panic!(), + }; + let begin = s.as_ptr() as u32; + let len = s.len() as u32; + let serializable = Code { begin, len }; + serializable.serialize(ser) +} + +pub(crate) fn deserialize_cow<'c, 'de, D>(deserializer: D) -> Result, D::Error> +where D: serde::Deserializer<'de> { + let _ = deserializer.deserialize_u64(DeserializeU64); + Ok(Cow::Owned(String::new())) +} + + + +// ============= +// === Error === +// ============= + +/// Deserialization type for `crate::syntax::tree::Error`. +#[derive(Deserialize, Debug, Clone)] +pub(crate) struct Error(String); + +impl From for crate::syntax::tree::Error { + fn from(_: Error) -> Self { + crate::syntax::tree::Error { message: "" } + } +} + + + +// ================ +// === Visitors === +// ================ + +struct DeserializeU64; + +impl<'de> serde::de::Visitor<'de> for DeserializeU64 { + type Value = u64; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "An unsigned 64-bit integer.") + } + + fn visit_u64(self, i: u64) -> Result + where E: serde::de::Error { + Ok(i) + } +} diff --git a/lib/rust/parser/src/source/code.rs b/lib/rust/parser/src/source/code.rs index b3530c7534..affbaad783 100644 --- a/lib/rust/parser/src/source/code.rs +++ b/lib/rust/parser/src/source/code.rs @@ -9,10 +9,13 @@ use crate::prelude::*; // ============ /// A code representation. It can either be a borrowed source code or a modified owned one. -#[derive(Clone, Default, Eq, PartialEq, From, Into, Shrinkwrap)] +#[derive(Clone, Default, Eq, PartialEq, From, Into, Shrinkwrap, Serialize, Reflect, Deserialize)] #[shrinkwrap(mutable)] #[allow(missing_docs)] pub struct Code<'s> { + #[serde(serialize_with = "crate::serialization::serialize_cow")] + #[serde(deserialize_with = "crate::serialization::deserialize_cow")] + #[reflect(as = "crate::serialization::Code", flatten)] pub repr: Cow<'s, str>, } diff --git a/lib/rust/parser/src/source/span.rs b/lib/rust/parser/src/source/span.rs index 5228b7aa4f..110d2e3305 100644 --- a/lib/rust/parser/src/source/span.rs +++ b/lib/rust/parser/src/source/span.rs @@ -23,11 +23,10 @@ pub mod traits { /// A strongly typed visible offset size. For example, a space character has value of 1, while the /// tab character has value of 4. For other space-like character sizes, refer to the lexer /// implementation. -#[derive( - Clone, Copy, Debug, Default, From, Into, Add, AddAssign, Sub, PartialEq, Eq, Hash, PartialOrd, - Ord -)] +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[derive(From, Into, Add, AddAssign, Sub, Reflect, Serialize, Deserialize)] #[allow(missing_docs)] +#[reflect(transparent)] pub struct VisibleOffset { pub width_in_spaces: usize, } @@ -60,10 +59,11 @@ impl From<&str> for VisibleOffset { /// Offset information. In most cases it is used to express the left-hand-side whitespace offset /// for tokens and AST nodes. -#[derive(Clone, Debug, Default, PartialEq, Eq)] +#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Reflect, Deserialize)] #[allow(missing_docs)] pub struct Offset<'s> { pub visible: VisibleOffset, + #[reflect(flatten)] pub code: Code<'s>, } @@ -129,11 +129,13 @@ impl<'s> std::ops::AddAssign<&Offset<'s>> for Offset<'s> { /// element. This is done in order to not duplicate the data. For example, some AST nodes contain a /// lot of tokens. They need to remember their span, but they do not need to remember their code, /// because it is already stored in the tokens. -#[derive(Clone, Debug, Default, Eq, PartialEq)] +#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Reflect, Deserialize)] #[allow(missing_docs)] pub struct Span<'s> { + #[reflect(flatten)] pub left_offset: Offset<'s>, /// The length of the code, excluding [`left_offset`]. + #[reflect(hide)] pub code_length: Bytes, } diff --git a/lib/rust/parser/src/syntax/token.rs b/lib/rust/parser/src/syntax/token.rs index 0dd073ecc3..b8c4e1784c 100644 --- a/lib/rust/parser/src/syntax/token.rs +++ b/lib/rust/parser/src/syntax/token.rs @@ -103,13 +103,16 @@ use enso_shapely_macros::tagged_enum; // ============= /// The lexical token definition. See the module docs to learn more about its usage scenarios. -#[derive(Clone, Deref, DerefMut, Eq, PartialEq)] +#[derive(Clone, Deref, DerefMut, Eq, PartialEq, Serialize, Reflect, Deserialize)] #[allow(missing_docs)] pub struct Token<'s, T = Variant> { #[deref] #[deref_mut] + #[reflect(subtype)] pub variant: T, + #[reflect(flatten, hide)] pub left_offset: Offset<'s>, + #[reflect(flatten, hide)] pub code: Code<'s>, } @@ -241,8 +244,10 @@ impl<'s, 'a, T: Debug> Debug for Ref<'s, 'a, T> { macro_rules! with_token_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)* /// Elements that can be found in the source code. #[tagged_enum] - #[derive(Clone, Copy, PartialEq, Eq)] + #[derive(Clone, Copy, PartialEq, Eq, Serialize, Reflect, Deserialize)] #[allow(missing_docs)] + #[tagged_enum(apply_attributes_to = "variants")] + #[reflect(inline)] pub enum Variant { Newline, Symbol, diff --git a/lib/rust/parser/src/syntax/tree.rs b/lib/rust/parser/src/syntax/tree.rs index 22ef4c1b41..ca90c84187 100644 --- a/lib/rust/parser/src/syntax/tree.rs +++ b/lib/rust/parser/src/syntax/tree.rs @@ -16,12 +16,14 @@ use enso_shapely_macros::tagged_enum; // ============ /// The Abstract Syntax Tree of the language. -#[derive(Clone, Deref, DerefMut, Eq, PartialEq)] +#[derive(Clone, Deref, DerefMut, Eq, PartialEq, Serialize, Reflect, Deserialize)] #[allow(missing_docs)] pub struct Tree<'s> { #[deref] #[deref_mut] + #[reflect(subtype)] pub variant: Box>, + #[reflect(flatten)] pub span: Span<'s>, } @@ -57,7 +59,9 @@ impl<'s> AsRef> for Tree<'s> { macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)* /// [`Tree`] variants definition. See its docs to learn more. #[tagged_enum] - #[derive(Clone, Eq, PartialEq, Visitor)] + #[derive(Clone, Eq, PartialEq, Visitor, Serialize, Reflect, Deserialize)] + #[tagged_enum(apply_attributes_to = "variants")] + #[reflect(inline)] pub enum Variant<'s> { /// Invalid [`Tree`] fragment with an attached [`Error`]. Invalid { @@ -146,9 +150,12 @@ with_ast_definition!(generate_ast_definition()); // === Invalid === /// Error of parsing attached to an [`Tree`] node. -#[derive(Clone, Copy, Debug, Eq, PartialEq, Visitor)] +#[derive(Clone, Copy, Debug, Eq, PartialEq, Visitor, Serialize, Reflect, Deserialize)] #[allow(missing_docs)] +#[reflect(transparent)] +#[serde(from = "crate::serialization::Error")] pub struct Error { + #[serde(skip_deserializing)] pub message: &'static str, } @@ -179,7 +186,7 @@ impl<'s> span::Builder<'s> for Error { pub type OperatorOrError<'s> = Result, MultipleOperatorError<'s>>; /// Error indicating multiple operators found next to each other, like `a + * b`. -#[derive(Clone, Debug, Eq, PartialEq, Visitor)] +#[derive(Clone, Debug, Eq, PartialEq, Visitor, Serialize, Reflect, Deserialize)] #[allow(missing_docs)] pub struct MultipleOperatorError<'s> { pub operators: NonEmptyVec>, @@ -195,7 +202,7 @@ impl<'s> span::Builder<'s> for MultipleOperatorError<'s> { // === MultiSegmentApp === /// A segment of [`MultiSegmentApp`], like `if cond` in the `if cond then ok else fail` expression. -#[derive(Clone, Debug, Eq, PartialEq, Visitor)] +#[derive(Clone, Debug, Eq, PartialEq, Visitor, Serialize, Reflect, Deserialize)] #[allow(missing_docs)] pub struct MultiSegmentAppSegment<'s> { pub header: Token<'s>, @@ -374,7 +381,10 @@ macro_rules! define_visitor_for_tokens { ( $(#$kind_meta:tt)* pub enum $kind:ident { - $( $variant:ident $({$($args:tt)*})? ),* $(,)? + $( + $(#$variant_meta:tt)* + $variant:ident $({$($args:tt)*})? + ),* $(,)? } ) => { impl<'s, 'a> TreeVisitable<'s, 'a> for token::$kind {} diff --git a/lib/rust/prelude/Cargo.toml b/lib/rust/prelude/Cargo.toml index 77ab0d5177..e1825d880f 100644 --- a/lib/rust/prelude/Cargo.toml +++ b/lib/rust/prelude/Cargo.toml @@ -16,6 +16,7 @@ publish = true crate-type = ["cdylib", "rlib"] [dependencies] +enso-reflect = { path = "../reflect" } enso-shapely = { version = "^0.2.0", path = "../shapely" } anyhow = "1.0.37" assert_approx_eq = { version = "1.1.0" } diff --git a/lib/rust/prelude/src/data/non_empty_vec.rs b/lib/rust/prelude/src/data/non_empty_vec.rs index d8f1449868..c24ca0eae6 100644 --- a/lib/rust/prelude/src/data/non_empty_vec.rs +++ b/lib/rust/prelude/src/data/non_empty_vec.rs @@ -14,7 +14,10 @@ use std::vec::Splice; /// A version of [`std::vec::Vec`] that can't be empty. #[allow(missing_docs)] -#[derive(Clone, Debug, Eq, PartialEq, Deref, DerefMut)] +#[derive(Clone, Debug, Eq, PartialEq, Deref, DerefMut, Reflect)] +#[reflect(transparent)] +#[cfg_attr(feature = "serde", derive(crate::serde_reexports::Serialize))] +#[cfg_attr(feature = "serde", derive(crate::serde_reexports::Deserialize))] pub struct NonEmptyVec { pub elems: Vec, } diff --git a/lib/rust/prelude/src/lib.rs b/lib/rust/prelude/src/lib.rs index f592adcdc5..8eee7dda6b 100644 --- a/lib/rust/prelude/src/lib.rs +++ b/lib/rust/prelude/src/lib.rs @@ -91,6 +91,8 @@ pub use std::collections::hash_map::DefaultHasher; pub use std::hash::Hash; pub use std::hash::Hasher; +pub use enso_reflect::prelude::*; + use std::cell::UnsafeCell; diff --git a/lib/rust/reflect/Cargo.toml b/lib/rust/reflect/Cargo.toml new file mode 100644 index 0000000000..65345a8e05 --- /dev/null +++ b/lib/rust/reflect/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "enso-reflect" +version = "0.1.0" +edition = "2021" +authors = ["Enso Team "] + +[dependencies] +enso-reflect-macros = { path = "macros" } +enso-metamodel = { path = "../metamodel", features = ["rust"] } +derivative = "2.2" + +[features] +graphviz = ["enso-metamodel/graphviz"] diff --git a/lib/rust/reflect/macros/Cargo.toml b/lib/rust/reflect/macros/Cargo.toml new file mode 100644 index 0000000000..ab864a0efb --- /dev/null +++ b/lib/rust/reflect/macros/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "enso-reflect-macros" +version = "0.1.0" +edition = "2021" +authors = ["Enso Team "] + +[lib] +proc-macro = true + +[dependencies] +proc-macro2 = "1.0" +quote = "1.0" +syn = { version = "1.0", features = [ + "full", + "extra-traits", + "printing", + "parsing" +] } diff --git a/lib/rust/reflect/macros/src/analyze.rs b/lib/rust/reflect/macros/src/analyze.rs new file mode 100644 index 0000000000..4a3115993c --- /dev/null +++ b/lib/rust/reflect/macros/src/analyze.rs @@ -0,0 +1,313 @@ +//! Parse syntax into (macro execution-time) representations of data structure definitions. + +use super::*; + +use quote::ToTokens; +use syn::punctuated::Punctuated; +use syn::DeriveInput; +use syn::GenericParam; +use syn::Token; + + + +// =============== +// === Analyze === +// =============== + +/// Parse `syn` syntax and produce type definitions. +pub(crate) fn analyze(input: TokenStream) -> Type { + let input = syn::parse2::(input).unwrap(); + let ident = input.ident; + let attrs: ContainerAttrs = input.attrs.iter().collect(); + let mut lifetimes: Punctuated<_, Token![,]> = Punctuated::new(); + let mut generic_params: Punctuated<_, Token![,]> = Punctuated::new(); + for param in input.generics.params { + match param { + GenericParam::Type(type_) => generic_params.push(type_.ident.to_token_stream()), + GenericParam::Lifetime(lifetime) => lifetimes.push(lifetime.lifetime.to_token_stream()), + GenericParam::Const(_) => unimplemented!("Reflect for const generics."), + } + } + let mut generics = lifetimes.clone(); + generics.extend(generic_params.clone()); + let data = match input.data { + syn::Data::Struct(struct_) => Data::Struct(parse_fields(struct_.fields)), + syn::Data::Enum(enum_) => + Data::Enum(enum_.variants.into_iter().map(Variant::from).collect()), + syn::Data::Union(_) => unimplemented!("Reflect for `union`s."), + }; + Type { ident, generics, lifetimes, generic_params, data, attrs } +} + + + +// =============== +// === Parsing === +// =============== + +fn parse_fields(fields: syn::Fields) -> Fields { + match fields { + syn::Fields::Named(syn_fields) => { + let mut fields = vec![]; + 'fields: for field in syn_fields.named { + let mut field_ = NamedField::new(field.ident.unwrap(), field.ty); + let mut annotations = Default::default(); + for attr in field.attrs { + parse_field_attrs(&attr, &mut annotations); + } + for annotation in annotations { + match annotation { + FieldAttr::Flatten => field_.flatten = true, + FieldAttr::Hide => field_.hide = true, + FieldAttr::Subtype => field_.subtype = true, + FieldAttr::As(ty) => field_.refer = Some(ty), + // NOTE: Implementing `skip` at analysis time makes our Rust information + // incomplete. For `reflect` to be used to generate Rust deserialization + // code, we'd need to emit a field with a type that is a marker type, + // which we'd filter out when abstracting. + FieldAttr::Skip => continue 'fields, + } + } + fields.push(field_); + } + Fields::Named { fields } + } + syn::Fields::Unnamed(fields) => + Fields::Unnamed(fields.unnamed.into_iter().map(UnnamedField::from).collect()), + syn::Fields::Unit => Fields::Unit, + } +} + +impl From for UnnamedField { + fn from(field: syn::Field) -> Self { + let type_ = field.ty; + UnnamedField { type_ } + } +} + +impl From for Variant { + fn from(variant: syn::Variant) -> Self { + if variant.discriminant.is_some() { + unimplemented!("Explicit discriminators."); + } + let fields = parse_fields(variant.fields); + let mut transparent = false; + let mut annotations = Default::default(); + for attr in &variant.attrs { + parse_variant_attrs(attr, &mut annotations); + } + for annotation in annotations { + match annotation { + VariantAttr::Inline => transparent = true, + } + } + let ident = variant.ident; + Variant { ident, fields, transparent } + } +} + + + +// ========================= +// === Helper attributes === +// ========================= + +/// Helper attribute identifier. Must match the value `attributes(_)` parameter in the +/// `proc_macro_derive` annotation on this crate's entry point. +const HELPER_ATTRIBUTE_PATH: &str = "reflect"; +const INVALID_HELPER_SYNTAX: &str = "Unknown helper attribute syntax."; +const UNKNOWN_HELPER: &str = "Unknown helper attribute."; + + +// === Field Attributes === + +#[derive(PartialEq, Eq)] +enum FieldAttr { + Flatten, + Hide, + Skip, + Subtype, + As(Box), +} + +fn parse_field_attrs(attr: &syn::Attribute, out: &mut Vec) { + if attr.style != syn::AttrStyle::Outer { + return; + } + match attr.path.get_ident() { + Some(ident) if ident == HELPER_ATTRIBUTE_PATH => (), + _ => return, + } + let meta = attr.parse_meta().expect(INVALID_HELPER_SYNTAX); + match meta { + syn::Meta::List(metalist) => + out.extend(metalist.nested.iter().map(|meta| parse_field_annotation(meta, attr))), + syn::Meta::Path(_) | syn::Meta::NameValue(_) => + panic!("{}: {}.", INVALID_HELPER_SYNTAX, meta.to_token_stream()), + } +} + +fn parse_field_annotation(meta: &syn::NestedMeta, attr: &syn::Attribute) -> FieldAttr { + let meta = match meta { + syn::NestedMeta::Meta(meta) => meta, + _ => panic!("{}: {}.", INVALID_HELPER_SYNTAX, meta.into_token_stream()), + }; + match meta { + syn::Meta::Path(path) => { + let ident = path.get_ident().expect(INVALID_HELPER_SYNTAX); + match ident.to_string().as_str() { + "flatten" => FieldAttr::Flatten, + "hide" => FieldAttr::Hide, + "skip" => FieldAttr::Skip, + "subtype" => FieldAttr::Subtype, + _ => panic!("{}: {}.", UNKNOWN_HELPER, ident.into_token_stream()), + } + } + syn::Meta::NameValue(syn::MetaNameValue { path, lit: syn::Lit::Str(lit), .. }) => { + let ident = path.get_ident().expect(INVALID_HELPER_SYNTAX); + match ident.to_string().as_str() { + "as" => FieldAttr::As(Box::new(lit.parse().expect(INVALID_HELPER_SYNTAX))), + _ => panic!("{}: {}.", UNKNOWN_HELPER, ident.into_token_stream()), + } + } + _ => panic!("{}: {}.", INVALID_HELPER_SYNTAX, attr.into_token_stream()), + } +} + + +// === Variant Attributes === + +#[derive(PartialEq, Eq)] +enum VariantAttr { + Inline, +} + +fn parse_variant_attrs(attr: &syn::Attribute, out: &mut Vec) { + if attr.style != syn::AttrStyle::Outer { + return; + } + match attr.path.get_ident() { + Some(ident) if ident == HELPER_ATTRIBUTE_PATH => (), + _ => return, + } + let meta = attr.parse_meta().expect(INVALID_HELPER_SYNTAX); + match meta { + syn::Meta::List(metalist) => { + let parse = |meta| match parse_meta_ident(meta).to_string().as_str() { + "inline" => VariantAttr::Inline, + _ => panic!("{}: {}.", UNKNOWN_HELPER, meta.into_token_stream()), + }; + out.extend(metalist.nested.iter().map(parse)); + } + syn::Meta::Path(_) | syn::Meta::NameValue(_) => + panic!("{}: {}.", INVALID_HELPER_SYNTAX, meta.into_token_stream()), + } +} + + +// === Container Attributes === + +#[derive(PartialEq, Eq)] +enum ContainerAttr { + Transparent, +} + +fn parse_container_attrs(attr: &syn::Attribute, out: &mut Vec) { + if attr.style != syn::AttrStyle::Outer { + return; + } + match attr.path.get_ident() { + Some(ident) if ident == HELPER_ATTRIBUTE_PATH => (), + _ => return, + } + let meta = attr.parse_meta().expect(INVALID_HELPER_SYNTAX); + match meta { + syn::Meta::List(metalist) => { + let parse = |meta| match parse_meta_ident(meta).to_string().as_str() { + "transparent" => ContainerAttr::Transparent, + _ => panic!("{}: {}.", UNKNOWN_HELPER, attr.into_token_stream()), + }; + out.extend(metalist.nested.iter().map(parse)); + } + syn::Meta::Path(_) | syn::Meta::NameValue(_) => + panic!("{}: {}.", INVALID_HELPER_SYNTAX, attr.into_token_stream()), + } +} + +impl<'a> FromIterator<&'a syn::Attribute> for ContainerAttrs { + fn from_iter>(iter: T) -> Self { + let mut transparent = false; + let mut annotations = Default::default(); + for attr in iter { + parse_container_attrs(attr, &mut annotations); + } + for annotation in annotations { + match annotation { + ContainerAttr::Transparent => transparent = true, + } + } + ContainerAttrs { transparent } + } +} + + +// === Helpers === + +fn parse_meta_ident(meta: &syn::NestedMeta) -> &syn::Ident { + let path = match meta { + syn::NestedMeta::Meta(syn::Meta::Path(path)) => path, + _ => panic!("{}: {}.", INVALID_HELPER_SYNTAX, meta.into_token_stream()), + }; + path.get_ident().expect(INVALID_HELPER_SYNTAX) +} + + + +// ============= +// === Tests === +// ============= + +#[cfg(test)] +mod tests { + use super::analyze::analyze; + use quote::quote; + + #[test] + fn accept_inputs() { + let inputs = [ + quote! { + struct Foo; + }, + quote! { + struct Bar { + bar: u32, + baar: &'static str, + } + }, + quote! { + enum Baz { + Bar(Bar), + Baz, + } + }, + quote! { + struct Quux { + quux: T, + } + }, + quote! { + struct Quuux { + quux: Box, + } + }, + quote! { + struct Code<'s> { + repr: std::borrow::Cow<'s, str>, + } + }, + ]; + for input in inputs { + analyze(input); + } + } +} diff --git a/lib/rust/reflect/macros/src/lib.rs b/lib/rust/reflect/macros/src/lib.rs new file mode 100644 index 0000000000..fc53258d13 --- /dev/null +++ b/lib/rust/reflect/macros/src/lib.rs @@ -0,0 +1,267 @@ +//! # Rust reflection +//! +//! This crate implements a macro, `#[derive(Reflect)]`, which adds runtime reflection support to +//! datatype definitions. Its main motivation is type-driven code generation. +//! +//! ## General Attributes +//! +//! ### `#[reflect(skip)]` (field attribute) +//! The field will be excluded from reflection data. +//! When this attribute is present, the field's type does not need to implement `Reflect`. +//! +//! ### `#[reflect(as = "OtherType")]` (field attribute) +//! The field's type in the reflection data will be `OtherType` rather than the field's real type. +//! When this attribute is present, the field's real type does not need to implement `Reflect`. The +//! alternative type specified must implement `Reflect`. +//! +//! ## Attributes for Abstraction Customization +//! +//! Application of `#[derive(Reflect)]` to data types is enough to enable reflection over Rust +//! types. However, if the types will be abstracted with `enso_metamodel::meta` (i.e. for +//! transpilation to another language), some customization is likely: Direct translation into +//! another language would reproduce Rust patterns where they are likely not necessary (on top of +//! the target-language patterns introduced by the translation), resulting in an overly-complex +//! data model. In order to avert this (without using heuristics, which would result in +//! unpredictable output), this crate supports helper attributes to inform the abstractor about the +//! use of Rust patterns that can be erased in translation. +//! +//! ### `#[reflect(transparent)]` (struct attribute) +//! Only applicable to single-field `struct`s. The type will be not appear in abstracted reflection +//! data; all references will appear as references to the contained type. +//! +//! ### `#[reflect(hide)]` (field attribute) +//! In target languages that support it, the field will be hidden from direct access. In the Java +//! target, this prevents the generation of accessors. +//! +//! ### `#[reflect(flatten)]` (field attribute) +//! In abstracted reflection data, the field will be replaced in this `struct` with the contents of +//! its type, which must be a `struct` type. +//! +//! To reduce the chance of name conflicts, the names of inserted fields will be created by +//! prepending the name of the flattened-away field to the names of the fields originating from the +//! inner type. Other field attributes such as [`hide`](#reflecthide-field-attribute) that were +//! applied to the flattened field will be inherited by the inserted fields. +//! +//! #### Example: +//! This input code: +//! ```ignore +//! #[derive(Reflect)] +//! struct Outer { +//! first: u32, +//! #[reflect(flatten, hide)] +//! inner: Inner, +//! last: u32, +//! } +//! +//! #[derive(Reflect)] +//! struct Inner { +//! value0: u32, +//! value1: u32, +//! } +//! ``` +//! +//! Will be represented the same as this input: +//! ```ignore +//! #[derive(Reflect)] +//! struct Outer { +//! first: u32, +//! #[reflect(hide)] +//! inner_value0: u32, +//! #[reflect(hide)] +//! inner_value1: u32, +//! last: u32, +//! } +//! ``` +//! +//! ### `#[reflect(subtype)]` (field attribute) +//! In the abstracted representation, the containing type will be made the parent of the field's +//! type. There must be no references to the field's type except through the containing type. The +//! field's type must be an `enum`, or a generic parameter. +//! If the field's type is a generic parameter, the parameter must be instantiated with one `enum`, +//! and may be instantiated with any types that are members of the `enum` (this can only occur with +//! `#[reflect(inline)]`; see below). References to the type instantiated with the `enum` will +//! become references to the resulting parent type; references to `struct` instantiatons will become +//! references to the resulting child types. +//! +//! ### `#[reflect(inline)]` (`enum` variant attribute) +//! In the abstracted representation, no type will be generated for the variant (which must be a +//! single-field variant); the contained type will instead by treated as a member of the `enum`. +//! +//! # Using `#[derive(Reflect)]` versus writing a proc macro +//! +//! Proc macros have some limitations. A proc macro should be: +//! - A pure function +//! - from syntax to syntax +//! - operating on each item in isolation. +//! +//! This crate doesn't have these limitations; it supports reasoning about the whole typegraph, and +//! has no restrictions about side effects. However, a user of this crate must depend on its subject +//! code to obtain the reflection data at runtime; so automatic generation of Rust code during +//! compilation requires use of a build script to perform the reflection/code-generation step. + +// === Standard Linter Configuration === +#![deny(non_ascii_idents)] +#![warn(unsafe_code)] +// === Non-Standard Linter Configuration === +#![allow(clippy::option_map_unit_fn)] +#![allow(clippy::precedence)] +#![allow(dead_code)] +#![deny(unconditional_recursion)] +#![warn(missing_copy_implementations)] +#![warn(missing_debug_implementations)] +#![warn(missing_docs)] +#![warn(trivial_casts)] +#![warn(trivial_numeric_casts)] +#![warn(unused_import_braces)] +#![warn(unused_qualifications)] + +use proc_macro2::TokenStream; +use quote::quote; +use quote::ToTokens; +use syn::punctuated::Punctuated; +use syn::Token; + + + +mod analyze; +mod runtime; + +use runtime::Quote; + + + +// ======================== +// === Type Definitions === +// ======================== + +/// Represents a type definition. +#[derive(Debug)] +pub(crate) struct Type { + ident: syn::Ident, + generics: Punctuated, + lifetimes: Punctuated, + generic_params: Punctuated, + data: Data, + attrs: ContainerAttrs, +} + +#[derive(Debug)] +enum Data { + Struct(Fields), + Enum(Vec), +} + +#[derive(Debug)] +struct NamedField { + name: syn::Ident, + type_: syn::Type, + subtype: bool, + refer: Option>, + flatten: bool, + hide: bool, +} + +impl NamedField { + pub fn new(name: syn::Ident, type_: syn::Type) -> Self { + let subtype = Default::default(); + let refer = Default::default(); + let flatten = Default::default(); + let hide = Default::default(); + Self { name, type_, subtype, refer, flatten, hide } + } +} + +#[derive(Debug)] +struct UnnamedField { + type_: syn::Type, +} + +#[derive(Debug)] +enum Fields { + Named { fields: Vec }, + Unnamed(Vec), + Unit, +} + +#[derive(Debug)] +struct Variant { + ident: syn::Ident, + fields: Fields, + transparent: bool, +} + +#[derive(Debug, Default)] +struct ContainerAttrs { + /// If true, the container must have exactly one field; the container will not appear in + /// reflection data; all references to it are treated as references to the contained type. + transparent: bool, +} + + + +// ====================== +// === Derive Reflect === +// ====================== + +/// Derive a function providing information at runtime about the type's definition. See [`crate`] +/// for detailed documentation. +#[proc_macro_derive(Reflect, attributes(reflect))] +pub fn derive_reflect(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + let type_ = analyze::analyze(input.into()); + let ident = &type_.ident; + let generics = &type_.generics; + let mut generic_bounds = type_.lifetimes.clone(); + let with_bound = |param| (quote! { #param: enso_reflect::Reflect }).into_token_stream(); + let type_bounds = type_.generic_params.iter().map(with_bound); + generic_bounds.extend(type_bounds); + let type_expr = type_.quote(); + let static_lifetimes: Vec<_> = type_.lifetimes.iter().map(|_| quote! { 'static }).collect(); + let to_static = + |param| (quote! { <#param as enso_reflect::Reflect>::Static }).into_token_stream(); + let static_types = type_.generic_params.iter().map(to_static); + let mut static_params = vec![]; + static_params.extend(static_lifetimes.iter().cloned()); + static_params.extend(static_types); + let mut subtype_erased = quote! { Self::Static }; + if let Some(ty) = subtype_field_type(&type_.data) { + let erased_types = type_.generic_params.iter().cloned().map(|param| { + let param_ty: syn::Type = syn::parse2(param.clone()).unwrap(); + if param_ty == ty { + quote! { () } + } else { + param + } + }); + let mut erased_params = vec![]; + erased_params.extend(static_lifetimes.iter().cloned()); + erased_params.extend(erased_types); + subtype_erased = quote! { #ident<#(#erased_params),*> }; + } + let impl_reflect = quote! { + impl<#generic_bounds> enso_reflect::Reflect for #ident<#generics> { + type Static = #ident<#(#static_params),*>; + type SubtypeErased = #subtype_erased; + fn reflect() -> enso_reflect::metamodel::rust::TypeData { + #type_expr + } + } + }; + impl_reflect.into() +} + +fn subtype_field_type(data: &Data) -> Option { + match data { + Data::Struct(Fields::Named { fields }) => { + let mut type_ = None; + for field in fields { + if field.subtype { + let err = "A struct cannot have more than one field with #[reflect(subtype)]."; + assert_eq!(type_, None, "{}", err); + type_ = Some(field.type_.clone()); + } + } + type_ + } + _ => None, + } +} diff --git a/lib/rust/reflect/macros/src/runtime.rs b/lib/rust/reflect/macros/src/runtime.rs new file mode 100644 index 0000000000..99222dfeed --- /dev/null +++ b/lib/rust/reflect/macros/src/runtime.rs @@ -0,0 +1,190 @@ +//! Convert compile-time type definitions to syntax that evaluates to corresponding runtime values. + +use super::*; + +use syn::punctuated::Punctuated; +use syn::Token; + + + +// ============= +// === Quote === +// ============= + +/// Convert a value to syntax that evaluates to an analogous value at runtime. +pub(crate) trait Quote { + fn quote(&self) -> TokenStream; +} + + +// === Implementations === + +impl Quote for Type { + fn quote(&self) -> TokenStream { + let ident = &self.ident; + let generics = &self.generics; + let data = self.data.quote(self.attrs.transparent); + let name = self.ident.to_string(); + quote! { + enso_reflect::metamodel::rust::TypeData { + id: enso_reflect::type_id::<#ident<#generics>>(), + name: #name.to_owned(), + data: #data, + subtype_erased: enso_reflect::generic_id::(), + } + } + } +} + +impl Data { + fn quote(&self, transparent: bool) -> TokenStream { + match self { + Data::Struct(fields) => { + let fields = fields.quote(); + quote! { + enso_reflect::metamodel::rust::Data::Struct(enso_reflect::metamodel::rust::Struct { + fields: #fields, + transparent: #transparent, + }) + } + } + Data::Enum(variants) => { + assert!(!transparent, "`#[reflect(transparent)]` is not applicable to `enum`s."); + let variants: Punctuated<_, Token![,]> = + variants.iter().map(Quote::quote).collect(); + quote! { + enso_reflect::metamodel::rust::Data::Enum(enso_reflect::metamodel::rust::Enum { + variants: vec![#variants], + }) + } + } + } + } +} + +impl Quote for Fields { + fn quote(&self) -> TokenStream { + match self { + Fields::Named { fields } => { + let fields: Punctuated<_, Token![,]> = fields.iter().map(Quote::quote).collect(); + quote! { enso_reflect::metamodel::rust::Fields::Named(vec![#fields]) } + } + Fields::Unnamed(fields) => { + let fields: Punctuated<_, Token![,]> = fields.iter().map(Quote::quote).collect(); + quote! { enso_reflect::metamodel::rust::Fields::Unnamed(vec![#fields]) } + } + Fields::Unit => quote! { enso_reflect::metamodel::rust::Fields::Unit }, + } + } +} + +impl Quote for NamedField { + fn quote(&self) -> TokenStream { + let name = self.name.to_string(); + let typename = match &self.refer { + Some(ty) => ty, + None => &self.type_, + }; + let subtype = self.subtype; + let flatten = self.flatten; + let hide = self.hide; + quote! { + enso_reflect::metamodel::rust::NamedField { + name: #name.to_owned(), + type_: enso_reflect::reflect_lazy::<#typename>(), + subtype: #subtype, + flatten: #flatten, + hide: #hide, + } + } + } +} + +impl Quote for UnnamedField { + fn quote(&self) -> TokenStream { + let typename = &self.type_; + quote! { + enso_reflect::metamodel::rust::UnnamedField { + type_: enso_reflect::reflect_lazy::<#typename>(), + } + } + } +} + +impl Quote for Variant { + fn quote(&self) -> TokenStream { + let ident = self.ident.to_string(); + let fields = self.fields.quote(); + let inline = self.transparent; + let quoted = quote! { + enso_reflect::metamodel::rust::Variant { + ident: #ident.to_owned(), + fields: #fields, + inline: #inline, + } + }; + quoted + } +} + + + +// ============= +// === Tests === +// ============= + +#[cfg(test)] +mod tests { + use super::analyze::analyze; + use crate::Quote; + use quote::quote; + + #[test] + fn accept_simple_inputs() { + let inputs = [ + quote! { + struct Foo; + }, + quote! { + struct Bar { + bar: Foo, + baar: &'static str, + } + }, + ]; + for input in inputs { + analyze(input).quote(); + } + } + + #[test] + fn accept_generics() { + let inputs = [ + quote! { + struct Quux { + quux: T, + } + }, + quote! { + struct Quuux { + quux: Box, + } + }, + ]; + for input in inputs { + analyze(input).quote(); + } + } + + #[test] + fn accept_generic_lifetimes() { + let inputs = [quote! { + struct Code<'s> { + repr: std::borrow::Cow<'s, str>, + } + }]; + for input in inputs { + analyze(input).quote(); + } + } +} diff --git a/lib/rust/reflect/src/lib.rs b/lib/rust/reflect/src/lib.rs new file mode 100644 index 0000000000..d1e5bf7cad --- /dev/null +++ b/lib/rust/reflect/src/lib.rs @@ -0,0 +1,308 @@ +//! Runtime support crate for [`enso_reflect_macros`]. +//! +//! For data producers: See the docs of [`enso_reflect_macros`] for how to derive [`Reflect`]. It is +//! not recommended to explicitly `impl Reflect`; the derived implementation can be extensively +//! customized through attributes. The meanings of the associated types required by the trait are +//! rather obtuse, and the trait itself should not be considered a stable interface--expect that +//! new, even more obtuse associated types will be added to the trait in the future. +//! +//! For data consumers: The `Reflect` trait can be used to obtain reflection data; after that, the +//! [`enso_metamodel`] crate supports working with it--see the docs there. +//! +//! # Implementation +//! +//! The functionality exposed by the `Reflect` trait is to report information about type +//! relationships--what fields compose a `struct`, what variants compose an `enum`, etc. +//! +//! The chief design constraint of the `Reflect` trait is: It must be possible for a pure function +//! from Rust syntax to Rust syntax, operating on each data type in isolation (e.g. a proc macro) +//! to generate a `Reflect` implementation for any type. +//! +//! ## Producing a type graph from syntax +//! +//! Because Rust doesn't have value-level type information (i.e. it doesn't have a native reflection +//! mechanism), and a `Reflect` implementation must be generatable from syntax, when the `Reflect` +//! implementation needs to refer to another type, it does so by creating an invocation of the +//! `Reflect` method of the type being referred to. However, it cannot call these functions +//! directly--type graphs often contain cycles. To address this, the [`enso_metamodel::rust`] +//! representation is based on *lazy* graphs: A reference to a type contains a thunk that can be +//! evaluated to obtain type information. +//! +//! ## Associating unique identifiers with types +//! +//! This solves the problem of producing references between types, potentially cyclic, from syntax. +//! However, the consumer of the data needs more information not to be stymied by the cyclic nature +//! of type graphs; without attaching some notion of identity to the type references, it would be +//! impossible for a data consumer to tell whether they are following a cycle (repeatedly visiting +//! types they've encountered before), or encountering new, similarly-shaped types. +//! +//! Assigning IDs is not straightforward: How does a pure function from the syntax representing a +//! type name to the syntax representing an expression produce something that will evaluate to a +//! value uniquely identifying a type? +//! +//! Referring to the address of the type's `Reflect::reflect` function might seem like a solution, +//! but that isn't reliable--if two `reflect` function bodies compiled to the same code, LLVM might +//! implement them both with one function; conversely, one function in the source code can have +//! multiple addresses, for example if a generic type is instantiated with the same parameters in +//! different compilation units. +//! +//! Fortunately, there is an answer: `std::TypeId::of::()` returns a value uniquely identifying +//! any type `T`. "Wait," you ask--"std::TypeId::of:: has a `T: 'static` bound! How could we +//! possibly use it to implement a trait for types that may be non-`'static`?" And so, we have come +//! to the motivation for the first obtuse associated type, `Reflect::Static`: +//! ``` +//! pub trait Reflect { +//! type Static: 'static; +//! // ... +//! } +//! ``` +//! While a function operating on syntax has extremely limited ability to reason about types, one +//! thing it can do is tell a `'static` type from a non-`'static` type: A type is always `'static` +//! unless it is parameterized with some type parameter other than `'static`. Thus, a proc macro is +//! able to, for any type, name a type that is the same except with only `'static` lifetime +//! parameters, and therefore a `'static` type. [`enso_reflect_macros`] uses this life-extension +//! approach to provide `Reflect::Static` types, and `Reflect` uses the `std::any::TypeId` of its +//! associated `Static` to attach an identifier to a type, or to a lazy reference to a type. +//! +//! ## Adding a little parametric polymorphism +//! +//! Due to its syntax-transformation implementation, `reflect` sees types post-monomorphization: For +//! example, in the following type definition: +//! ```ignore +//! #[derive(Reflect)] +//! struct Foo { +//! field: T, +//! } +//! ``` +//! Syntactically, the reflect implementation for `Foo` will refer to the type of `field` by the +//! name of its parameter `T`--but when its `reflect` function is run to collect the data, +//! monomorphization has already occurred; the resulting data will not be able to distinguish +//! between a field with a parameteric type `T` that has been instantiated with, e.g. `u32`, and a +//! field with a concrete type that is always `u32`. +//! +//! However, to support a Rust pattern in [`enso_parser`], it was necessary for +//! [`enso_reflect_macros`] to provide the `#[reflect(subtype)]` attribute. If you refer to the +//! documentation for that field, you may notice that its implementation requires identifying when a +//! generic type with a field whose type is a type parameter is instantiated with different types +//! for that parameter. A certain amount of type-erasure is called for. +//! +//! And so, we come to the second obtuse assocatied type of `Reflect`: +//! ``` +//! pub trait Reflect { +//! // ... +//! type SubtypeErased: 'static; +//! // ... +//! } +//! ``` +//! The `SubtypeErased` type is used to obtain a `TypeId` that does not depend on the parameter of +//! the type of the field annotated with `#[reflect(subtype)]`, if any. This is accomplished by a +//! similar approach to the way lifetimes are erased: Starting with the lifetime-erased type used to +//! for `Reflect::Static`, identify the relevant parameter instantiation within the type, and +//! replace it with a constant type, to obtain a type that is invariant in one parameter, and +//! covariant in all the others. The implementation uses `()` for the constant type. (Thus, it is +//! not currently supported for types to apply the `subtype` transform to a field with a parameter +//! that has arbitrary bounds. While this could be achieved to some extent by using a `Box` +//! for the invariant parameter, that has other complications--we'd need to identify associated +//! types of the trait in question used in other fields--and [`enso_parser`] hasn't had a need for +//! it, and probably never will.) + +// === Features === +#![feature(map_first_last)] +#![feature(associated_type_defaults)] +#![feature(option_get_or_insert_default)] +// === Standard Linter Configuration === +#![deny(non_ascii_idents)] +#![warn(unsafe_code)] +// === Non-Standard Linter Configuration === +#![allow(clippy::option_map_unit_fn)] +#![allow(clippy::precedence)] +#![allow(dead_code)] +#![deny(unconditional_recursion)] +#![warn(missing_copy_implementations)] +#![warn(missing_debug_implementations)] +#![warn(missing_docs)] +#![warn(trivial_casts)] +#![warn(trivial_numeric_casts)] +#![warn(unused_import_braces)] +#![warn(unused_qualifications)] + +use enso_metamodel::rust::*; + + +// ============== +// === Export === +// ============== + +pub use enso_metamodel as metamodel; + + + +/// Imports for crates that `#[derive(Reflect)]`. +pub mod prelude { + pub use enso_reflect_macros::Reflect; +} + + + +// =============== +// === Reflect === +// =============== + +/// Supports obtaining descriptions of the definitions of types at runtime. +pub trait Reflect { + /// This must be a type that uniquely identifies `Self`, ignoring any lifetime parameters. + type Static: 'static; + /// This must be a type that uniquely identifies `Self`, ignoring any lifetime parameters, and + /// invariant to any one generic parameter that may occur in the definition of a field marked + /// `#[reflect(subtype)]`, if present. The type used for the erased parameter can be any type + /// that satisfies `Self`'s bounds. + type SubtypeErased: 'static; + /// Get information about the type's definition. + fn reflect() -> TypeData; + /// Get information about type, identified by a reference. + fn reflect_type(&self) -> TypeData { + Self::reflect() + } +} + + +// === Implementations for standard types === + +impl Reflect for std::borrow::Cow<'_, str> { + type Static = String; + type SubtypeErased = Self::Static; + fn reflect() -> TypeData { + ::reflect() + } +} + +impl Reflect for std::rc::Rc +where T: Reflect +{ + type Static = T::Static; + type SubtypeErased = Self::Static; + fn reflect() -> TypeData { + T::reflect() + } +} + +impl Reflect for Box +where T: Reflect +{ + type Static = T::Static; + type SubtypeErased = Self::Static; + fn reflect() -> TypeData { + T::reflect() + } +} + +impl Reflect for Option +where T: Reflect +{ + type Static = Option; + type SubtypeErased = Self::Static; + fn reflect() -> TypeData { + let id = type_id::(); + let name = "Option".to_owned(); + let data = Data::Primitive(Primitive::Option(reflect_lazy::())); + let subtype_erased = generic_id::(); + TypeData { id, name, data, subtype_erased } + } +} + +impl Reflect for Result +where + T: Reflect, + E: Reflect, +{ + type Static = Result; + type SubtypeErased = Self::Static; + fn reflect() -> TypeData { + let id = type_id::(); + let name = "Result".to_owned(); + let ok = reflect_lazy::(); + let err = reflect_lazy::(); + let data = Data::Primitive(Primitive::Result(ok, err)); + let subtype_erased = generic_id::(); + TypeData { id, name, data, subtype_erased } + } +} + +impl Reflect for &'_ str { + type Static = String; + type SubtypeErased = Self::Static; + fn reflect() -> TypeData { + ::reflect() + } +} + +impl Reflect for Vec +where T: Reflect +{ + type Static = Vec; + type SubtypeErased = Self::Static; + fn reflect() -> TypeData { + let id = type_id::>(); + let name = "Vec".to_owned(); + let data = Data::Primitive(Primitive::Vec(reflect_lazy::())); + let subtype_erased = generic_id::(); + TypeData { id, name, data, subtype_erased } + } +} + +macro_rules! reflect_primitive { + ($ty: ty, $primitive: expr) => { + impl Reflect for $ty { + type Static = Self; + type SubtypeErased = Self::Static; + fn reflect() -> TypeData { + let id = type_id::<$ty>(); + let name = stringify!($ty).to_owned(); + let data = Data::Primitive($primitive); + let subtype_erased = generic_id::(); + TypeData { id, name, data, subtype_erased } + } + } + }; +} + +reflect_primitive!(bool, Primitive::Bool); +reflect_primitive!(usize, Primitive::Usize); +reflect_primitive!(u32, Primitive::U32); +reflect_primitive!(String, Primitive::String); + + + +// ================== +// === Reflectors === +// ================== + +/// Return a value that can be used to obtain type information. +pub fn reflect_lazy() -> LazyType { + let id = type_id::(); + let evaluate = ::reflect; + LazyType::new(id, evaluate) +} + +/// Get an identifier that uniquely identifies the type, up to the instantiation of the parameter +/// of any field marked with the attribute `#[reflect(subtype)]` +pub fn generic_id() -> GenericTypeId { + GenericTypeId::new(std::any::TypeId::of::()) +} + +/// Obtain a unique identifier for a type. +pub fn type_id() -> TypeId { + TypeId::new(std::any::TypeId::of::()) +} + + + +// ================ +// === GraphViz === +// ================ + +/// Generate a graph of the given type's relationships with other types. +#[cfg(feature = "graphviz")] +pub fn graph() -> metamodel::graphviz::Graph { + reflect_lazy::().into() +} diff --git a/lib/rust/reflect/tests/test.rs b/lib/rust/reflect/tests/test.rs new file mode 100644 index 0000000000..3ddf700b35 --- /dev/null +++ b/lib/rust/reflect/tests/test.rs @@ -0,0 +1,39 @@ +// The type definitions in this crate exercise `#[derive(Reflect)]`. + +// === Non-Standard Linter Configuration === +#![allow(dead_code)] + +use enso_reflect as reflect; +use enso_reflect_macros::Reflect; + + + +#[derive(Reflect)] +struct Foo; + +#[derive(Reflect)] +struct Bar { + bar: Foo, +} + +#[derive(Reflect)] +enum Baz { + Bar(Bar), + Baz, +} + +#[derive(Reflect)] +struct Quux { + _quux: T, +} + +#[derive(Reflect)] +pub struct Code<'s> { + pub _repr: std::borrow::Cow<'s, str>, +} + +#[test] +fn test() { + use reflect::Reflect; + let _type = Baz::reflect(); +} diff --git a/lib/rust/shapely/macros/src/tagged_enum.rs b/lib/rust/shapely/macros/src/tagged_enum.rs index b95f59f726..4293f43169 100644 --- a/lib/rust/shapely/macros/src/tagged_enum.rs +++ b/lib/rust/shapely/macros/src/tagged_enum.rs @@ -1,6 +1,8 @@ use crate::prelude::*; use inflector::cases::snakecase::to_snake_case; +use syn::AttrStyle; +use syn::Attribute; use syn::Data; use syn::DeriveInput; use syn::Fields; @@ -32,8 +34,22 @@ use syn::Fields; /// ``` /// /// # Attributes -/// All attributes defined before the `#[tagged_enum]` one will be applied to the enum only, while -/// all other attributes will be applied to both the enum and all the variant structs. +/// Attributes defined after `#[tagged_enum]` and not in a section (see below) will be applied to +/// the enum and also all the variants structs produced; this is the default because it is +/// appropriate for common attributes like `#[derive(..)]`. +/// +/// The attribute `#[tagged_enum(apply-attrs-to = "enum")]` starts an attribute section; any +/// attributes in the section will be applied only to the enum itself. +/// +/// The attribute `#[tagged_enum(apply-attrs-to = "variants")]` starts an attribute section; any +/// attributes in the section will be applied only to the variant structs produced. +/// +/// An attribute can be placed before the `#[tagged_enum]` if its proc macro needs to operate on +/// the enum before expanding `tagged_enum`; otherwise, to avoid confusion, attributes should not be +/// placed before `#[tagged_enum]`, as the results would differ for *active* or *inert* +/// attributes[1]. +/// [1]: https://doc.rust-lang.org/reference/attributes.html#active-and-inert-attributes + pub fn run( attr: proc_macro::TokenStream, input: proc_macro::TokenStream, @@ -45,7 +61,9 @@ pub fn run( } else if !attrs.is_empty() { panic!("Unsupported attributes: {:?}", attrs); } - let decl = syn::parse_macro_input!(input as DeriveInput); + let mut decl = syn::parse_macro_input!(input as DeriveInput); + let (enum_attrs, variant_types_attrs, variants_attrs) = + split_attr_sections(std::mem::take(&mut decl.attrs)); let (impl_generics, ty_generics, inherent_where_clause_opt) = &decl.generics.split_for_impl(); let mut where_clause = enso_macro_utils::new_where_clause(vec![]); for inherent_where_clause in inherent_where_clause_opt { @@ -80,7 +98,6 @@ pub fn run( // } let vis = &decl.vis; let enum_name = &decl.ident; - let enum_attrs = &decl.attrs; let variant_names: Vec<_> = data.variants.iter().map(|v| &v.ident).collect(); let variant_bodies = variant_names.iter().map(|v| { if is_boxed { @@ -89,11 +106,15 @@ pub fn run( quote!(#v #ty_generics) } }); + let variants_attrs = quote! { #(#variants_attrs)* }; output.push(quote! { #(#enum_attrs)* #[allow(missing_docs)] #vis enum #enum_name #ty_generics #where_clause { - #(#variant_names(#variant_bodies)),* + #( + #variants_attrs + #variant_names(#variant_bodies) + ),* } impl #impl_generics Debug for #enum_name #ty_generics #where_clause { @@ -183,7 +204,7 @@ pub fn run( let fields = &variant.fields; let fields = if fields.is_empty() { quote!({}) } else { quote!(#fields) }; output.push(quote! { - #(#enum_attrs)* + #(#variant_types_attrs)* #(#variant_attrs)* #[derive(Debug)] #[allow(missing_docs)] @@ -318,3 +339,74 @@ pub fn run( output.into() } + + + +// ================== +// === Attributes === +// ================== + +/// The path used to identify helper-attributes that configure the macro. +/// E.g. `tagged_enum` in `#[tagged_enum(apply_attributes_to = "variants")]` +const HELPER_ATTRIBUTE_PATH: &str = "tagged_enum"; + +enum Attr { + ApplyAttributesTo(ApplyAttributesTo), +} + +enum ApplyAttributesTo { + Enum, + VariantTypes, + Variants, +} + +fn parse_attr(attr: &Attribute) -> Option { + if attr.style != AttrStyle::Outer { + return None; + } + if attr.path.get_ident()? != HELPER_ATTRIBUTE_PATH { + return None; + } + let name_value = "Parsing name-value argument"; + let syn::MetaNameValue { lit, path, .. } = attr.parse_args().expect(name_value); + match path.get_ident().expect("Unsupported helper-attribute name").to_string().as_str() { + "apply_attributes_to" => Some(Attr::ApplyAttributesTo({ + let value = match lit { + syn::Lit::Str(lit_str) => lit_str.value(), + _ => panic!("Expected a LitStr in argument to helper-attribute."), + }; + match value.as_str() { + "enum" => ApplyAttributesTo::Enum, + "variant-types" => ApplyAttributesTo::VariantTypes, + "variants" => ApplyAttributesTo::Variants, + _ => panic!("Unexpected value in string argument to helper-attribute."), + } + })), + _ => panic!("Unsupported helper-attribute name: {:?}.", path), + } +} + +fn split_attr_sections(attrs: Vec) -> (Vec, Vec, Vec) { + let mut enum_attrs = vec![]; + let mut variant_types_attrs = vec![]; + let mut variants_attrs = vec![]; + let mut attr_section = None; + for attr in attrs { + if let Some(attr) = parse_attr(&attr) { + match attr { + Attr::ApplyAttributesTo(apply_to) => attr_section = Some(apply_to), + } + continue; + } + match attr_section { + None => { + enum_attrs.push(attr.clone()); + variant_types_attrs.push(attr); + } + Some(ApplyAttributesTo::Enum) => enum_attrs.push(attr), + Some(ApplyAttributesTo::VariantTypes) => variant_types_attrs.push(attr), + Some(ApplyAttributesTo::Variants) => variants_attrs.push(attr), + } + } + (enum_attrs, variant_types_attrs, variants_attrs) +} diff --git a/lib/rust/types/Cargo.toml b/lib/rust/types/Cargo.toml index 5eec9c7b7c..fc7173c75c 100644 --- a/lib/rust/types/Cargo.toml +++ b/lib/rust/types/Cargo.toml @@ -7,6 +7,8 @@ edition = "2021" [lib] [dependencies] +enso-reflect = { path = "../reflect" } nalgebra = { version = "0.26.1" } num-traits = { version = "0.2" } paste = "1.0.7" +serde = { version = "1.0", features = ["derive"], optional = true } diff --git a/lib/rust/types/src/unit2.rs b/lib/rust/types/src/unit2.rs index 9c2f38e6b6..3a4465d4f7 100644 --- a/lib/rust/types/src/unit2.rs +++ b/lib/rust/types/src/unit2.rs @@ -6,6 +6,8 @@ //! [`Duration`] or a number, respectfully. You are allowed to define any combination of operators //! and rules of how the result inference should be performed. +use enso_reflect::prelude::*; + use paste::paste; use std::borrow::Cow; use std::marker::PhantomData; @@ -94,8 +96,12 @@ pub trait Variant { /// Internal representation of every unit. #[repr(transparent)] +#[derive(Reflect)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[reflect(transparent)] pub struct UnitData { repr: R, + #[reflect(skip)] variant: PhantomData, } @@ -513,7 +519,8 @@ macro_rules! define { $(#$meta)* pub type $name = $crate::unit2::Unit<[<$name:snake:upper>]>; $(#$meta)* - #[derive(Debug, Clone, Copy)] + #[derive(Debug, Clone, Copy, Reflect)] + #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct [<$name:snake:upper>]; impl $crate::unit2::Variant for [<$name:snake:upper>] {