mirror of
https://github.com/enso-org/enso.git
synced 2024-12-22 22:51:38 +03:00
Simple assignments and function definitions (#3572)
implement simple variable assignments and function definitions. This implements: - https://www.pivotaltracker.com/story/show/182497122 - https://www.pivotaltracker.com/story/show/182497144 (the code blocks are not created yet, but the function declaration is recognized.) # Important Notes - Introduced S-expression-based tests, and pretty-printing-roundtrip testing. - Started writing tests for TypeDef based on the examples in the issue. None of them parse successfully. - Fixed Number tokenizing. - Moved most contents of parser's `main.rs` to `lib.rs` (fixes a warning).
This commit is contained in:
parent
d8dddf40c6
commit
100eeda673
40
Cargo.lock
generated
40
Cargo.lock
generated
@ -2035,6 +2035,18 @@ dependencies = [
|
||||
"derive_more",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "enso-metamodel-lexpr"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"derivative",
|
||||
"enso-metamodel",
|
||||
"enso-reflect",
|
||||
"lexpr",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "enso-optics"
|
||||
version = "0.2.0"
|
||||
@ -2048,12 +2060,15 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"enso-data-structures",
|
||||
"enso-metamodel",
|
||||
"enso-metamodel-lexpr",
|
||||
"enso-parser-syntax-tree-builder",
|
||||
"enso-parser-syntax-tree-visitor",
|
||||
"enso-prelude",
|
||||
"enso-reflect",
|
||||
"enso-shapely-macros",
|
||||
"enso-types",
|
||||
"lexpr",
|
||||
"serde",
|
||||
]
|
||||
|
||||
@ -3650,7 +3665,7 @@ dependencies = [
|
||||
"base64 0.13.0",
|
||||
"bytes 1.1.0",
|
||||
"http",
|
||||
"httpdate 0.3.2",
|
||||
"httpdate 1.0.2",
|
||||
"language-tags 0.3.2",
|
||||
"mime 0.3.16",
|
||||
"percent-encoding 2.1.0",
|
||||
@ -4075,6 +4090,29 @@ version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "lexpr"
|
||||
version = "0.2.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ceee0b80e0043f17bf81130471e1b0975179af75fe657af45577d80e2698fe3b"
|
||||
dependencies = [
|
||||
"itoa 0.4.8",
|
||||
"lexpr-macros",
|
||||
"proc-macro-hack",
|
||||
"ryu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lexpr-macros"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cd627fb38e19c00d8d068618259205f7a91c91aeade5c15bc35dbca037bb1c35"
|
||||
dependencies = [
|
||||
"proc-macro-hack",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.126"
|
||||
|
@ -12,6 +12,7 @@ derive_more = "0.99"
|
||||
bincode = "1.3"
|
||||
|
||||
[features]
|
||||
default = ["graphviz", "java", "rust"]
|
||||
graphviz = []
|
||||
java = []
|
||||
rust = []
|
||||
|
16
lib/rust/metamodel/lexpr/Cargo.toml
Normal file
16
lib/rust/metamodel/lexpr/Cargo.toml
Normal file
@ -0,0 +1,16 @@
|
||||
[package]
|
||||
name = "enso-metamodel-lexpr"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
authors = ["Enso Team <contact@enso.org>"]
|
||||
|
||||
[dependencies]
|
||||
lexpr = "0.2.6"
|
||||
bincode = "1.3"
|
||||
serde = "1"
|
||||
enso-metamodel = { path = "../" }
|
||||
derivative = "2.2"
|
||||
|
||||
[dev-dependencies]
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
enso-reflect = { path = "../../reflect" }
|
277
lib/rust/metamodel/lexpr/src/lib.rs
Normal file
277
lib/rust/metamodel/lexpr/src/lib.rs
Normal file
@ -0,0 +1,277 @@
|
||||
//! Producing S-expression representations of data based on reflection information.
|
||||
//!
|
||||
//! The chosen output format is compact--more so than the pretty-printing supported by `lexpr`
|
||||
//! (which is what is used by `serde_lexpr` to derive an S-expression "format" for data).
|
||||
//!
|
||||
//! - A struct is represented as a list of its fields.
|
||||
//! - No type names are emitted. For variant types, the discriminant is included before the fields.
|
||||
//! - Named fields are represented with the structure used for Lisp's `alist`s: `(name . value)`.
|
||||
//! - Field names are prefixed with ':'.
|
||||
//! - Sequence types like Rust's `Vec<_>` are represent with `lexpr` `Vector`s: `#(element element)`
|
||||
//! - An option prints the same way as its contained value in the `Some` case, or as an empty list
|
||||
//! `()` in the `None` case.
|
||||
//!
|
||||
//! # Example
|
||||
//!
|
||||
//! ```
|
||||
//! # use enso_reflect::prelude::*;
|
||||
//! # use serde::Serialize;
|
||||
//! use enso_reflect::Reflect;
|
||||
//! use lexpr::Value;
|
||||
//!
|
||||
//! // A type, and a value. We'd like to see the S-expr representation of the value.
|
||||
//! #[derive(Serialize, Reflect)]
|
||||
//! struct A {
|
||||
//! value: u32,
|
||||
//! }
|
||||
//! let value = A { value: 36 };
|
||||
//! // Get `meta` type info for the type.
|
||||
//! let (graph, rust_to_meta) = enso_metamodel::rust::to_meta(value.reflect_type());
|
||||
//! let value_ty = rust_to_meta[&value.reflect_type().id];
|
||||
//! // Use the type info to get a representation of an instance's data.
|
||||
//! let s_expr = enso_metamodel_lexpr::ToSExpr::new(&graph).value(value_ty, &value);
|
||||
//! let field_expr = Value::cons(Value::symbol(":value"), Value::Number(36.into()));
|
||||
//! assert_eq!(s_expr, Value::cons(field_expr, Value::Null));
|
||||
//! ```
|
||||
|
||||
// === Features ===
|
||||
// === Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![warn(unsafe_code)]
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![allow(clippy::option_map_unit_fn)]
|
||||
#![allow(clippy::precedence)]
|
||||
#![allow(dead_code)]
|
||||
#![deny(unconditional_recursion)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(trivial_casts)]
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unused_import_braces)]
|
||||
#![warn(unused_qualifications)]
|
||||
|
||||
use derivative::Derivative;
|
||||
use enso_metamodel::meta::*;
|
||||
use lexpr::Value;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
|
||||
|
||||
// =============================
|
||||
// === Meta to S-expressions ===
|
||||
// =============================
|
||||
|
||||
/// Render data to an S-expression representation based on its `meta` model.
|
||||
#[derive(Derivative)]
|
||||
#[derivative(Debug)]
|
||||
pub struct ToSExpr<'g> {
|
||||
graph: &'g TypeGraph,
|
||||
#[derivative(Debug = "ignore")]
|
||||
mappers: BTreeMap<TypeId, Box<dyn Fn(Value) -> Value>>,
|
||||
}
|
||||
|
||||
impl<'g> ToSExpr<'g> {
|
||||
#[allow(missing_docs)]
|
||||
pub fn new(graph: &'g TypeGraph) -> Self {
|
||||
let mappers = Default::default();
|
||||
Self { graph, mappers }
|
||||
}
|
||||
|
||||
/// Set a transformation to be applied to a type after translating to an S-expression.
|
||||
pub fn mapper(&mut self, id: TypeId, f: impl Fn(Value) -> Value + 'static) {
|
||||
self.mappers.insert(id, Box::new(f));
|
||||
}
|
||||
|
||||
/// Given a bincode-serialized input, use its `meta` type info to transcribe it to an
|
||||
/// S-expression.
|
||||
pub fn value<T: serde::Serialize>(&self, id: TypeId, input: &T) -> Value {
|
||||
use bincode::Options;
|
||||
let bincoder = bincode::DefaultOptions::new().with_fixint_encoding();
|
||||
let data = bincoder.serialize(input).unwrap();
|
||||
let mut data = &data[..];
|
||||
let value = self.value_(id, &mut data);
|
||||
assert_eq!(data, &[0; 0], "{}", value);
|
||||
value
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// === Implementation ===
|
||||
|
||||
impl<'g> ToSExpr<'g> {
|
||||
fn value_(&self, id: TypeId, data: &mut &[u8]) -> Value {
|
||||
match &self.graph[id].data {
|
||||
Data::Struct(_) => self.struct_(id, data),
|
||||
Data::Primitive(primitive) => self.primitive(*primitive, data),
|
||||
}
|
||||
}
|
||||
|
||||
fn struct_(&self, id: TypeId, data: &mut &[u8]) -> Value {
|
||||
let mut hierarchy = vec![];
|
||||
let mut child = None;
|
||||
let discriminants = &self.graph[id].discriminants;
|
||||
if !discriminants.is_empty() {
|
||||
let discriminant_index = read_u32(data);
|
||||
let child_ = discriminants[&(discriminant_index as usize)];
|
||||
hierarchy.push(child_);
|
||||
child = Some(child_);
|
||||
}
|
||||
hierarchy.push(id);
|
||||
let mut id_ = id;
|
||||
while let Some(parent) = self.graph[id_].parent {
|
||||
hierarchy.push(parent);
|
||||
id_ = parent;
|
||||
}
|
||||
let mut out = vec![];
|
||||
self.fields(&mut hierarchy, data, &mut out);
|
||||
assert_eq!(hierarchy, &[]);
|
||||
let mut value = Value::list(out);
|
||||
if let Some(id) = child {
|
||||
if let Some(mapper) = self.mappers.get(&id) {
|
||||
value = (mapper)(value);
|
||||
if !value.is_cons() {
|
||||
value = Value::cons(value, Value::Null);
|
||||
}
|
||||
};
|
||||
let discriminant = self.graph[id].name.to_pascal_case().into_boxed_str();
|
||||
let discriminant = Value::Symbol(discriminant);
|
||||
value = Value::cons(discriminant, value);
|
||||
}
|
||||
if let Some(mapper) = self.mappers.get(&id) {
|
||||
value = (mapper)(value);
|
||||
}
|
||||
value
|
||||
}
|
||||
|
||||
fn fields(&self, hierarchy: &mut Vec<TypeId>, data: &mut &[u8], out: &mut Vec<Value>) {
|
||||
let id = match hierarchy.pop() {
|
||||
Some(id) => id,
|
||||
None => return,
|
||||
};
|
||||
let fields = match &self.graph[id].data {
|
||||
Data::Struct(fields) => fields,
|
||||
Data::Primitive(_) => panic!(),
|
||||
};
|
||||
if self.graph[id].child_field == Some(0) || fields.is_empty() {
|
||||
self.fields(hierarchy, data, out);
|
||||
}
|
||||
for (i, field) in fields.iter().enumerate() {
|
||||
if !field.name.is_empty() {
|
||||
let car = Value::Symbol(format!(":{}", field.name).into_boxed_str());
|
||||
let cdr = self.value_(field.type_, data);
|
||||
out.push(Value::cons(car, cdr));
|
||||
} else {
|
||||
out.push(self.value_(field.type_, data));
|
||||
}
|
||||
if self.graph[id].child_field == Some(i + 1) {
|
||||
self.fields(hierarchy, data, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn primitive(&self, primitive: Primitive, data: &mut &[u8]) -> Value {
|
||||
match primitive {
|
||||
Primitive::U32 => Value::Number(read_u32(data).into()),
|
||||
Primitive::U64 => Value::Number(read_u64(data).into()),
|
||||
Primitive::Bool => {
|
||||
let value = read_u8(data);
|
||||
let value = match value {
|
||||
0 => false,
|
||||
1 => true,
|
||||
_ => panic!(),
|
||||
};
|
||||
Value::Bool(value)
|
||||
}
|
||||
Primitive::String => Value::String(read_string(data).into()),
|
||||
Primitive::Sequence(t0) => {
|
||||
let len = read_u64(data);
|
||||
Value::vector((0..len).map(|_| self.value_(t0, data)))
|
||||
}
|
||||
Primitive::Option(t0) => match read_u8(data) {
|
||||
0 => Value::Null,
|
||||
1 => self.value_(t0, data),
|
||||
_ => panic!(),
|
||||
},
|
||||
Primitive::Result(t0, t1) => {
|
||||
let mut values = vec![];
|
||||
match read_u32(data) {
|
||||
0 => {
|
||||
values.push(Value::Symbol("Ok".to_owned().into_boxed_str()));
|
||||
values.push(self.value_(t0, data));
|
||||
}
|
||||
1 => {
|
||||
values.push(Value::Symbol("Err".to_owned().into_boxed_str()));
|
||||
values.push(self.value_(t1, data));
|
||||
}
|
||||
_ => panic!(),
|
||||
}
|
||||
Value::list(values)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// === Primitive Deserializers ===
|
||||
|
||||
fn read_u8(buffer: &mut &[u8]) -> u8 {
|
||||
let (bytes, rest) = buffer.split_at(1);
|
||||
*buffer = rest;
|
||||
bytes[0]
|
||||
}
|
||||
|
||||
fn read_u32(buffer: &mut &[u8]) -> u32 {
|
||||
let (bytes, rest) = buffer.split_at(4);
|
||||
*buffer = rest;
|
||||
let mut data = [0; 4];
|
||||
data.copy_from_slice(bytes);
|
||||
u32::from_le_bytes(data)
|
||||
}
|
||||
|
||||
fn read_u64(buffer: &mut &[u8]) -> u64 {
|
||||
let (bytes, rest) = buffer.split_at(8);
|
||||
*buffer = rest;
|
||||
let mut data = [0; 8];
|
||||
data.copy_from_slice(bytes);
|
||||
u64::from_le_bytes(data)
|
||||
}
|
||||
|
||||
fn read_string(buffer: &mut &[u8]) -> String {
|
||||
let len = read_u64(buffer);
|
||||
let (bytes, rest) = buffer.split_at(len as usize);
|
||||
*buffer = rest;
|
||||
String::from_utf8(bytes.to_owned()).unwrap()
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === Tests ===
|
||||
// =============
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn unit_test() {
|
||||
#[derive(serde::Serialize)]
|
||||
struct A {
|
||||
value: u32,
|
||||
}
|
||||
let mut graph = TypeGraph::new();
|
||||
let int_name = TypeName::from_pascal_case("U32");
|
||||
let int = Type::new(int_name, Data::Primitive(Primitive::U32));
|
||||
let int = graph.types.insert(int);
|
||||
let a_name = TypeName::from_pascal_case("A");
|
||||
let a_field_name = FieldName::from_snake_case("value");
|
||||
let a_field = Field::named(a_field_name, int);
|
||||
let a = Type::new(a_name, Data::Struct(vec![a_field]));
|
||||
let a = graph.types.insert(a);
|
||||
let a_value = A { value: 36 };
|
||||
let s_expr = ToSExpr::new(&graph).value(a, &a_value);
|
||||
let field_expr = Value::cons(Value::symbol(":value"), Value::Number(36.into()));
|
||||
assert_eq!(s_expr, Value::cons(field_expr, Value::Null));
|
||||
}
|
||||
}
|
@ -45,7 +45,6 @@
|
||||
|
||||
// === Features ===
|
||||
#![feature(map_first_last)]
|
||||
#![feature(associated_type_defaults)]
|
||||
#![feature(option_get_or_insert_default)]
|
||||
// === Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
|
@ -179,12 +179,13 @@ impl Identifier {
|
||||
/// Render in camelCase.
|
||||
pub fn to_camel_case(&self) -> String {
|
||||
let mut camel = String::with_capacity(self.segments_len());
|
||||
let (head, tail) = self.segments.split_first().unwrap();
|
||||
camel.push_str(head);
|
||||
for segment in tail {
|
||||
let mut chars = segment.chars();
|
||||
camel.push(chars.next().unwrap().to_ascii_uppercase());
|
||||
camel.extend(chars);
|
||||
if let Some((head, tail)) = self.segments.split_first() {
|
||||
camel.push_str(head);
|
||||
for segment in tail {
|
||||
let mut chars = segment.chars();
|
||||
camel.push(chars.next().unwrap().to_ascii_uppercase());
|
||||
camel.extend(chars);
|
||||
}
|
||||
}
|
||||
camel
|
||||
}
|
||||
@ -225,6 +226,11 @@ impl Identifier {
|
||||
pub fn append(&mut self, other: Self) {
|
||||
self.segments.extend(other.segments)
|
||||
}
|
||||
|
||||
/// Return whether this identifier is zero-length.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.segments.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -284,6 +290,10 @@ impl FieldName {
|
||||
pub fn append(&mut self, other: Self) {
|
||||
self.0.append(other.0)
|
||||
}
|
||||
/// Return whether this identifier is zero-length.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.0.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -18,8 +18,9 @@ use std::mem::take;
|
||||
pub fn to_meta(ty: TypeData) -> (meta::TypeGraph, BTreeMap<TypeId, meta::TypeId>) {
|
||||
let mut to_meta = ToMeta::new();
|
||||
let root_ = to_meta.run(ty);
|
||||
to_meta.graph.gc(vec![root_]);
|
||||
(to_meta.graph, to_meta.rust_to_meta)
|
||||
let (mut graph, rust_to_meta) = to_meta.finish();
|
||||
graph.gc(vec![root_]);
|
||||
(graph, rust_to_meta)
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
@ -212,6 +213,11 @@ impl ToMeta {
|
||||
self.rust_to_meta[&root_rust_id]
|
||||
}
|
||||
|
||||
/// Return results.
|
||||
pub fn finish(self) -> (meta::TypeGraph, BTreeMap<TypeId, meta::TypeId>) {
|
||||
(self.graph, self.rust_to_meta)
|
||||
}
|
||||
|
||||
fn generate_subtypes(&mut self, rust_types: &BTreeMap<TypeId, TypeData>) {
|
||||
let mut parent_ids = BTreeMap::new();
|
||||
let mut aliases = vec![];
|
||||
|
@ -20,5 +20,7 @@ enso-parser-syntax-tree-builder = { path = "src/syntax/tree/builder" }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
bincode = "1.3"
|
||||
|
||||
[lib]
|
||||
path = "src/main.rs"
|
||||
[dev-dependencies]
|
||||
enso-metamodel = { path = "../metamodel", features = ["rust"] }
|
||||
enso-metamodel-lexpr = { path = "../metamodel/lexpr" }
|
||||
lexpr = "0.2.6"
|
||||
|
@ -721,12 +721,12 @@ impl<'s> Lexer<'s> {
|
||||
/// 2. Some parsers could consume input even if it should be qualified as something else. Thus, some
|
||||
/// parsers should be run first in order to make the token consuming process correct.
|
||||
const PARSERS: &[for<'r> fn(&'r mut Lexer<'_>)] = &[
|
||||
|t| t.number(),
|
||||
|t| t.ident(),
|
||||
|t| t.operator(),
|
||||
|t| t.newline(),
|
||||
|t| t.symbol(),
|
||||
|t| t.comment(),
|
||||
|t| t.number(),
|
||||
|t| t.text(),
|
||||
];
|
||||
|
||||
@ -752,7 +752,7 @@ impl<'s> Lexer<'s> {
|
||||
}
|
||||
}
|
||||
if self.current_char != None {
|
||||
panic!("Internal error. Lexer did not consume all input.");
|
||||
panic!("Internal error. Lexer did not consume all input. State: {self:?}");
|
||||
}
|
||||
while self.end_block().is_some() {
|
||||
let block_end = self.marker_token(token::Variant::block_end());
|
||||
@ -902,6 +902,11 @@ mod tests {
|
||||
]))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_numeric_literal() {
|
||||
test_lexer("10", vec![number_("", "10")]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_case_idents() {
|
||||
test_lexer_many(vec![
|
||||
|
265
lib/rust/parser/src/lib.rs
Normal file
265
lib/rust/parser/src/lib.rs
Normal file
@ -0,0 +1,265 @@
|
||||
//! The Enso parser. Parsing is a multi-stage process:
|
||||
//!
|
||||
//! # Lexing.
|
||||
//! First, the source code is feed to [`lexer::Lexer`], which consumes it and outputs a stream of
|
||||
//! [`Token`]. Tokens are chunks of the input with a generic description attached, like "operator",
|
||||
//! or "identifier".
|
||||
//!
|
||||
//! # Building macro registry.
|
||||
//! Macros in Enso are a very powerful mechanism and are used to transform group of tokens into
|
||||
//! almost any statement. First, macros need to be discovered and registered. Currently, there is no
|
||||
//! real macro discovery process, as there is no support for user-defined macros. Instead, there is
|
||||
//! a set of hardcoded macros defined in the compiler.
|
||||
//!
|
||||
//! Each macro defines one or more segments. Every segment starts with a predefined token and can
|
||||
//! contain any number of other tokens. For example, the macro `if ... then ... else ...` contains
|
||||
//! three segments. Macros can also accept prefix tokens, a set of tokens on the left of the first
|
||||
//! segment. A good example is the lambda macro `... -> ...`.
|
||||
//!
|
||||
//! In this step, a [`MacroMatchTree`] is built. Basically, it is a map from the possible next
|
||||
//! segment name to information of what other segments are required and what is the macro definition
|
||||
//! in case these segments were found. For example, let's consider two macros: `if ... then ...`,
|
||||
//! and `if ... then ... else ...`. In such a case, the macro registry will contain only one entry,
|
||||
//! "if", and two sets of possible resolution paths: ["then"], and ["then", "else"], each associated
|
||||
//! with the corresponding macro definition.
|
||||
//!
|
||||
//! # Splitting the token stream by the macro segments.
|
||||
//! The input token stream is being iterated and is being split based on the segments of the
|
||||
//! registered macros. For example, for the input `if a b then c d else e f`, the token stream will
|
||||
//! be split into three segments, `a b`, `c d`, and `e f`, which will be associated with the
|
||||
//! `if ... then ... else ...` macro definition.
|
||||
//!
|
||||
//! The splitting process is hierarchical. It means that a new macro can start being resolved during
|
||||
//! resolution of a parent macro. For example, `if if a then b then c else d` is a correct
|
||||
//! expression. After finding the first `if` token, the token stream will be split. The next `if`
|
||||
//! token starts a new token stream splitting. The first `then` token belongs to the nested macro,
|
||||
//! however, as soon as the resolver sees the second `then` token, it will consider the nested macro
|
||||
//! to be finished, and will come back to parent macro resolution.
|
||||
//!
|
||||
//! # Resolving right-hand-side patterns of macro segments.
|
||||
//! In the next steps, each macro is being analyzed, started from the most nested ones. For each
|
||||
//! macro, the [`Pattern`] of last segment is being run to check which tokens belong to that macro,
|
||||
//! and which tokens should be transferred to parent macro definition. For example, consider the
|
||||
//! following code `process (read file) content-> print content`. The `(...)` is a macro with two
|
||||
//! sections `(` and `)`. Let's mark the token splitting with `[` and `]` characters. The previous
|
||||
//! macro resolution steps would output such split of the token stream:
|
||||
//! `process [(read file][) content[-> print content]]`. In this step, the most inner macro will be
|
||||
//! analyzed first. The pattern of the last segment of the inner macro (`->`) defines that it
|
||||
//! consumes all tokens, so all the tokens `print content` are left as they are. Now, the resolution
|
||||
//! moves to the parent macro. Its last segment starts with the `)` token, which pattern defines
|
||||
//! that it does not consume any tokens, so all of its current tokens (`content[-> print content]]`)
|
||||
//! are popped to a parent definition, forming `process [(read file][)] content[-> print content]`.
|
||||
//!
|
||||
//! Please note, that root of the expression is considered a special macro as well. It is done for
|
||||
//! the algorithm unification purposes.
|
||||
//!
|
||||
//! # Resolving left-hand-side patterns of macro segments.
|
||||
//! In this step, each macro is being analyzed, started from the most nested ones. For each macro,
|
||||
//! the [`Pattern`] of the macro prefix is being run to check which tokens belong to the prefix of
|
||||
//! the macro (in case the macro defines the prefix). In the example above, the macro `->` defines
|
||||
//! complex prefix rules: if the token on the left of the arrow used no space, then only a single
|
||||
//! token will be consumed. As a result of this step, the following token split will occur:
|
||||
//! `[process [(read file][)] [content-> print content]`, which is exactly what we wanted.
|
||||
//!
|
||||
//! # Resolving patterns of macro segments.
|
||||
//! In this step, all macro segment patterns are being resolved and errors are reported in case it
|
||||
//! was not possible. If tokens in a segment match the segment pattern, they are sent to the
|
||||
//! operator precedence resolver for final transformation.
|
||||
//!
|
||||
//! # Operator precedence resolution.
|
||||
//! Each token stream sent to the operator resolver is processed by a modified Shunting Yard
|
||||
//! algorithm, which handles such situations as multiple operators placed next to each other,
|
||||
//! multiple identifiers placed next to each other, and also takes spacing into consideration in
|
||||
//! order to implement spacing-aware precedence rules. After all segments are resolved, the macro
|
||||
//! is being treated as a single token in one of the segments of the parent macro, and is being
|
||||
//! processed by the operator precedence resolver as well. In the end, a single [`syntax::Tree`] is
|
||||
//! produced, containing the parsed expression.
|
||||
|
||||
#![recursion_limit = "256"]
|
||||
// === Features ===
|
||||
#![allow(incomplete_features)]
|
||||
#![feature(allocator_api)]
|
||||
#![feature(test)]
|
||||
#![feature(specialization)]
|
||||
#![feature(let_chains)]
|
||||
#![feature(if_let_guard)]
|
||||
// === Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![warn(unsafe_code)]
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![allow(clippy::option_map_unit_fn)]
|
||||
#![allow(clippy::precedence)]
|
||||
#![allow(dead_code)]
|
||||
#![deny(unconditional_recursion)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(trivial_casts)]
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unused_import_braces)]
|
||||
#![warn(unused_qualifications)]
|
||||
|
||||
use crate::prelude::*;
|
||||
|
||||
|
||||
// ==============
|
||||
// === Export ===
|
||||
// ==============
|
||||
|
||||
pub mod lexer;
|
||||
pub mod macros;
|
||||
pub mod serialization;
|
||||
pub mod source;
|
||||
pub mod syntax;
|
||||
|
||||
|
||||
|
||||
/// Popular utilities, imported by most modules of this crate.
|
||||
pub mod prelude {
|
||||
pub use enso_prelude::serde_reexports::*;
|
||||
pub use enso_prelude::*;
|
||||
pub use enso_reflect as reflect;
|
||||
pub use enso_reflect::Reflect;
|
||||
pub use enso_types::traits::*;
|
||||
pub use enso_types::unit2::Bytes;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==============
|
||||
// === Parser ===
|
||||
// ==============
|
||||
|
||||
/// Enso parser. See the module documentation to learn more about how it works.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Debug)]
|
||||
pub struct Parser {
|
||||
pub macros: macros::resolver::SegmentMap<'static>,
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
/// Constructor.
|
||||
pub fn new() -> Self {
|
||||
let macros = macros::built_in::all();
|
||||
Self { macros }
|
||||
}
|
||||
|
||||
/// Main entry point.
|
||||
pub fn run<'s>(&self, code: &'s str) -> syntax::Tree<'s> {
|
||||
let tokens = lexer::run(code);
|
||||
let mut statements = vec![];
|
||||
let mut tokens = tokens.into_iter().peekable();
|
||||
while tokens.peek().is_some() {
|
||||
let resolver = macros::resolver::Resolver::new_root();
|
||||
let tree = resolver.run(&self.macros, &mut tokens);
|
||||
let tree = expression_to_statement(tree);
|
||||
statements.push(tree);
|
||||
}
|
||||
syntax::Tree::block(statements)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Parser {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// == Parsing helpers ==
|
||||
|
||||
/// Reinterpret an expression in a statement context (i.e. as a top level member of a block).
|
||||
///
|
||||
/// In statement context, an expression that has an assignment operator at its top level is
|
||||
/// interpreted as a variable assignment or method definition.
|
||||
fn expression_to_statement(tree: syntax::Tree<'_>) -> syntax::Tree<'_> {
|
||||
use syntax::tree::*;
|
||||
let tree_ = match &*tree.variant {
|
||||
Variant::OprSectionBoundary(OprSectionBoundary { ast }) => ast,
|
||||
_ => &tree,
|
||||
};
|
||||
let mut replacement = None;
|
||||
if let Variant::OprApp(opr_app) = &*tree_.variant {
|
||||
replacement = expression_to_binding(opr_app);
|
||||
}
|
||||
match replacement {
|
||||
Some(modified) => modified,
|
||||
None => tree,
|
||||
}
|
||||
}
|
||||
|
||||
/// If the input is an "=" expression, try to interpret it as either a variable assignment or method
|
||||
/// definition.
|
||||
fn expression_to_binding<'a>(app: &syntax::tree::OprApp<'a>) -> Option<syntax::Tree<'a>> {
|
||||
use syntax::tree::*;
|
||||
match app {
|
||||
OprApp { lhs: Some(lhs), opr: Ok(opr), rhs } if opr.code == "=" => {
|
||||
let mut lhs = lhs;
|
||||
let mut args = vec![];
|
||||
while let Variant::App(App { func, arg }) = &*lhs.variant {
|
||||
lhs = func;
|
||||
args.push(arg.clone());
|
||||
}
|
||||
args.reverse();
|
||||
if let Some(rhs) = rhs && args.is_empty() {
|
||||
Some(Tree::assignment(lhs.clone(), opr.clone(), rhs.clone()))
|
||||
} else if let Variant::Ident(Ident { token }) = &*lhs.variant {
|
||||
Some(Tree::function(token.clone(), args, opr.clone(), rhs.clone()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === Tests ===
|
||||
// =============
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use enso_parser_syntax_tree_builder::ast_builder;
|
||||
|
||||
macro_rules! test_parse {
|
||||
($input:tt = {$($def:tt)*}) => {
|
||||
assert_eq!(
|
||||
Parser::new().run($input),
|
||||
ast_builder! { $($def)* }
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_expressions() {
|
||||
test_parse! {"a" = {a}};
|
||||
test_parse! {"a b" = {a b}};
|
||||
test_parse! {"a b c" = {[a b] c}};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==================
|
||||
// === Benchmarks ===
|
||||
// ==================
|
||||
|
||||
#[cfg(test)]
|
||||
mod benches {
|
||||
use super::*;
|
||||
extern crate test;
|
||||
use test::Bencher;
|
||||
|
||||
#[bench]
|
||||
fn bench_parsing_type_defs(bencher: &mut Bencher) {
|
||||
let reps = 1_000;
|
||||
let str = "type Option a b c\n".repeat(reps);
|
||||
let parser = Parser::new();
|
||||
bencher.iter(move || {
|
||||
parser.run(&str);
|
||||
});
|
||||
}
|
||||
}
|
@ -71,7 +71,8 @@ fn type_def_body(matched_segments: NonEmptyVec<MatchedSegment>) -> syntax::Tree
|
||||
// println!("{:#?}", name);
|
||||
// println!("\n\n------------- 2");
|
||||
|
||||
let params = v.nested().query("param").unwrap();
|
||||
let no_params = vec![];
|
||||
let params = v.nested().query("param").unwrap_or(&no_params);
|
||||
// println!("{:#?}", params);
|
||||
// println!("\n\n------------- 3");
|
||||
|
||||
|
@ -1,79 +1,4 @@
|
||||
//! The Enso parser. Parsing is a multi-stage process:
|
||||
//!
|
||||
//! # Lexing.
|
||||
//! First, the source code is feed to [`lexer::Lexer`], which consumes it and outputs a stream of
|
||||
//! [`Token`]. Tokens are chunks of the input with a generic description attached, like "operator",
|
||||
//! or "identifier".
|
||||
//!
|
||||
//! # Building macro registry.
|
||||
//! Macros in Enso are a very powerful mechanism and are used to transform group of tokens into
|
||||
//! almost any statement. First, macros need to be discovered and registered. Currently, there is no
|
||||
//! real macro discovery process, as there is no support for user-defined macros. Instead, there is
|
||||
//! a set of hardcoded macros defined in the compiler.
|
||||
//!
|
||||
//! Each macro defines one or more segments. Every segment starts with a predefined token and can
|
||||
//! contain any number of other tokens. For example, the macro `if ... then ... else ...` contains
|
||||
//! three segments. Macros can also accept prefix tokens, a set of tokens on the left of the first
|
||||
//! segment. A good example is the lambda macro `... -> ...`.
|
||||
//!
|
||||
//! In this step, a [`MacroMatchTree`] is built. Basically, it is a map from the possible next
|
||||
//! segment name to information of what other segments are required and what is the macro definition
|
||||
//! in case these segments were found. For example, let's consider two macros: `if ... then ...`,
|
||||
//! and `if ... then ... else ...`. In such a case, the macro registry will contain only one entry,
|
||||
//! "if", and two sets of possible resolution paths: ["then"], and ["then", "else"], each associated
|
||||
//! with the corresponding macro definition.
|
||||
//!
|
||||
//! # Splitting the token stream by the macro segments.
|
||||
//! The input token stream is being iterated and is being split based on the segments of the
|
||||
//! registered macros. For example, for the input `if a b then c d else e f`, the token stream will
|
||||
//! be split into three segments, `a b`, `c d`, and `e f`, which will be associated with the
|
||||
//! `if ... then ... else ...` macro definition.
|
||||
//!
|
||||
//! The splitting process is hierarchical. It means that a new macro can start being resolved during
|
||||
//! resolution of a parent macro. For example, `if if a then b then c else d` is a correct
|
||||
//! expression. After finding the first `if` token, the token stream will be split. The next `if`
|
||||
//! token starts a new token stream splitting. The first `then` token belongs to the nested macro,
|
||||
//! however, as soon as the resolver sees the second `then` token, it will consider the nested macro
|
||||
//! to be finished, and will come back to parent macro resolution.
|
||||
//!
|
||||
//! # Resolving right-hand-side patterns of macro segments.
|
||||
//! In the next steps, each macro is being analyzed, started from the most nested ones. For each
|
||||
//! macro, the [`Pattern`] of last segment is being run to check which tokens belong to that macro,
|
||||
//! and which tokens should be transferred to parent macro definition. For example, consider the
|
||||
//! following code `process (read file) content-> print content`. The `(...)` is a macro with two
|
||||
//! sections `(` and `)`. Let's mark the token splitting with `[` and `]` characters. The previous
|
||||
//! macro resolution steps would output such split of the token stream:
|
||||
//! `process [(read file][) content[-> print content]]`. In this step, the most inner macro will be
|
||||
//! analyzed first. The pattern of the last segment of the inner macro (`->`) defines that it
|
||||
//! consumes all tokens, so all the tokens `print content` are left as they are. Now, the resolution
|
||||
//! moves to the parent macro. Its last segment starts with the `)` token, which pattern defines
|
||||
//! that it does not consume any tokens, so all of its current tokens (`content[-> print content]]`)
|
||||
//! are popped to a parent definition, forming `process [(read file][)] content[-> print content]`.
|
||||
//!
|
||||
//! Please note, that root of the expression is considered a special macro as well. It is done for
|
||||
//! the algorithm unification purposes.
|
||||
//!
|
||||
//! # Resolving left-hand-side patterns of macro segments.
|
||||
//! In this step, each macro is being analyzed, started from the most nested ones. For each macro,
|
||||
//! the [`Pattern`] of the macro prefix is being run to check which tokens belong to the prefix of
|
||||
//! the macro (in case the macro defines the prefix). In the example above, the macro `->` defines
|
||||
//! complex prefix rules: if the token on the left of the arrow used no space, then only a single
|
||||
//! token will be consumed. As a result of this step, the following token split will occur:
|
||||
//! `[process [(read file][)] [content-> print content]`, which is exactly what we wanted.
|
||||
//!
|
||||
//! # Resolving patterns of macro segments.
|
||||
//! In this step, all macro segment patterns are being resolved and errors are reported in case it
|
||||
//! was not possible. If tokens in a segment match the segment pattern, they are sent to the
|
||||
//! operator precedence resolver for final transformation.
|
||||
//!
|
||||
//! # Operator precedence resolution.
|
||||
//! Each token stream sent to the operator resolver is processed by a modified Shunting Yard
|
||||
//! algorithm, which handles such situations as multiple operators placed next to each other,
|
||||
//! multiple identifiers placed next to each other, and also takes spacing into consideration in
|
||||
//! order to implement spacing-aware precedence rules. After all segments are resolved, the macro
|
||||
//! is being treated as a single token in one of the segments of the parent macro, and is being
|
||||
//! processed by the operator precedence resolver as well. In the end, a single [`syntax::Tree`] is
|
||||
//! produced, containing the parsed expression.
|
||||
//! Tests for [`enso_parser`].
|
||||
|
||||
#![recursion_limit = "256"]
|
||||
// === Features ===
|
||||
@ -82,6 +7,7 @@
|
||||
#![feature(test)]
|
||||
#![feature(specialization)]
|
||||
#![feature(let_chains)]
|
||||
#![feature(if_let_guard)]
|
||||
// === Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![warn(unsafe_code)]
|
||||
@ -98,70 +24,7 @@
|
||||
#![warn(unused_import_braces)]
|
||||
#![warn(unused_qualifications)]
|
||||
|
||||
use crate::prelude::*;
|
||||
|
||||
|
||||
// ==============
|
||||
// === Export ===
|
||||
// ==============
|
||||
|
||||
pub mod lexer;
|
||||
pub mod macros;
|
||||
pub mod serialization;
|
||||
pub mod source;
|
||||
pub mod syntax;
|
||||
|
||||
|
||||
|
||||
/// Popular utilities, imported by most modules of this crate.
|
||||
pub mod prelude {
|
||||
pub use enso_prelude::serde_reexports::*;
|
||||
pub use enso_prelude::*;
|
||||
pub use enso_reflect as reflect;
|
||||
pub use enso_reflect::Reflect;
|
||||
pub use enso_types::traits::*;
|
||||
pub use enso_types::unit2::Bytes;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==============
|
||||
// === Parser ===
|
||||
// ==============
|
||||
|
||||
/// Enso parser. See the module documentation to learn more about how it works.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Debug)]
|
||||
pub struct Parser {
|
||||
pub macros: macros::resolver::SegmentMap<'static>,
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
/// Constructor.
|
||||
pub fn new() -> Self {
|
||||
let macros = macros::built_in::all();
|
||||
Self { macros }
|
||||
}
|
||||
|
||||
/// Main entry point.
|
||||
pub fn run<'s>(&self, code: &'s str) -> syntax::Tree<'s> {
|
||||
let tokens = lexer::run(code);
|
||||
let mut statements = vec![];
|
||||
let mut tokens = tokens.into_iter().peekable();
|
||||
while tokens.peek().is_some() {
|
||||
let resolver = macros::resolver::Resolver::new_root();
|
||||
let tree = resolver.run(&self.macros, &mut tokens);
|
||||
statements.push(tree);
|
||||
}
|
||||
syntax::Tree::module(statements)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Parser {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
use enso_parser::prelude::*;
|
||||
|
||||
|
||||
|
||||
@ -171,52 +34,7 @@ impl Default for Parser {
|
||||
|
||||
fn main() {
|
||||
init_tracing(TRACE);
|
||||
let ast = Parser::new().run("type Option (a) b c");
|
||||
let ast = enso_parser::Parser::new().run("type Option (a) b c");
|
||||
println!("\n\n==================\n\n");
|
||||
println!("{:#?}", ast);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use enso_parser_syntax_tree_builder::ast_builder;
|
||||
|
||||
macro_rules! test_parse {
|
||||
($input:tt = {$($def:tt)*}) => {
|
||||
assert_eq!(
|
||||
Parser::new().run($input),
|
||||
ast_builder! { $($def)* }
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_expressions() {
|
||||
test_parse! {"a" = {a}};
|
||||
test_parse! {"a b" = {a b}};
|
||||
test_parse! {"a b c" = {[a b] c}};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==================
|
||||
// === Benchmarks ===
|
||||
// ==================
|
||||
|
||||
#[cfg(test)]
|
||||
mod benches {
|
||||
use super::*;
|
||||
extern crate test;
|
||||
use test::Bencher;
|
||||
|
||||
#[bench]
|
||||
fn bench_parsing_type_defs(bencher: &mut Bencher) {
|
||||
let reps = 1_000;
|
||||
let str = "type Option a b c\n".repeat(reps);
|
||||
let parser = Parser::new();
|
||||
bencher.iter(move || {
|
||||
parser.run(&str);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -39,6 +39,7 @@ pub(crate) fn serialize_cow<S>(cow: &Cow<'_, str>, ser: S) -> Result<S::Ok, S::E
|
||||
where S: serde::Serializer {
|
||||
let s = match cow {
|
||||
Cow::Borrowed(s) => *s,
|
||||
Cow::Owned(s) if s.is_empty() => "",
|
||||
Cow::Owned(_) => panic!(),
|
||||
};
|
||||
let begin = s.as_ptr() as u32;
|
||||
|
@ -15,7 +15,7 @@ use crate::prelude::*;
|
||||
pub struct Code<'s> {
|
||||
#[serde(serialize_with = "crate::serialization::serialize_cow")]
|
||||
#[serde(deserialize_with = "crate::serialization::deserialize_cow")]
|
||||
#[reflect(as = "crate::serialization::Code", flatten)]
|
||||
#[reflect(as = "crate::serialization::Code")]
|
||||
pub repr: Cow<'s, str>,
|
||||
}
|
||||
|
||||
|
@ -62,8 +62,9 @@ impl From<&str> for VisibleOffset {
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Reflect, Deserialize)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Offset<'s> {
|
||||
#[reflect(hide)]
|
||||
pub visible: VisibleOffset,
|
||||
#[reflect(flatten)]
|
||||
#[reflect(flatten, hide)]
|
||||
pub code: Code<'s>,
|
||||
}
|
||||
|
||||
|
@ -46,6 +46,7 @@ impl<'s> Item<'s> {
|
||||
match self {
|
||||
Item::Token(token) => match token.variant {
|
||||
token::Variant::Ident(ident) => Tree::ident(token.with_variant(ident)),
|
||||
token::Variant::Number(number) => Tree::number(token.with_variant(number)),
|
||||
_ => todo!(),
|
||||
},
|
||||
Item::Tree(ast) => ast,
|
||||
|
@ -16,6 +16,7 @@ use crate::syntax::token::Token;
|
||||
// computations for any operator (according to the spec)
|
||||
fn precedence_of(operator: &str) -> usize {
|
||||
match operator {
|
||||
"=" => 1,
|
||||
"+" => 3,
|
||||
"-" => 3,
|
||||
"*" => 7,
|
||||
|
@ -68,13 +68,17 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
|
||||
pub error: Error,
|
||||
pub ast: Tree<'s>,
|
||||
},
|
||||
Module {
|
||||
Block {
|
||||
pub statements: Vec<Tree<'s>>,
|
||||
},
|
||||
/// A simple identifier, like `foo` or `bar`.
|
||||
Ident {
|
||||
pub token: token::Ident<'s>,
|
||||
},
|
||||
/// A numeric literal, like `10`.
|
||||
Number {
|
||||
pub token: token::Number<'s>,
|
||||
},
|
||||
/// A simple application, like `print "hello"`.
|
||||
App {
|
||||
pub func: Tree<'s>,
|
||||
@ -106,12 +110,22 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
|
||||
MultiSegmentApp {
|
||||
pub segments: NonEmptyVec<MultiSegmentAppSegment<'s>>,
|
||||
},
|
||||
|
||||
TypeDef {
|
||||
pub keyword: Token<'s>,
|
||||
pub name: Tree<'s>,
|
||||
pub params: Vec<Tree<'s>>,
|
||||
}
|
||||
},
|
||||
Assignment {
|
||||
pub pattern: Tree<'s>,
|
||||
pub equals: token::Operator<'s>,
|
||||
pub expr: Tree<'s>,
|
||||
},
|
||||
Function {
|
||||
pub name: token::Ident<'s>,
|
||||
pub args: Vec<Tree<'s>>,
|
||||
pub equals: token::Operator<'s>,
|
||||
pub body: Option<Tree<'s>>,
|
||||
},
|
||||
}
|
||||
}};}
|
||||
|
||||
|
@ -39,7 +39,7 @@ use std::mem;
|
||||
#[proc_macro]
|
||||
pub fn ast_builder(tokens: proc_macro::TokenStream) -> proc_macro::TokenStream {
|
||||
let output = expr(tokens, None);
|
||||
let output = quote!(crate::syntax::Tree::module(vec![#output]));
|
||||
let output = quote!(crate::syntax::Tree::block(vec![#output]));
|
||||
output.into()
|
||||
}
|
||||
|
||||
|
206
lib/rust/parser/tests/parse.rs
Normal file
206
lib/rust/parser/tests/parse.rs
Normal file
@ -0,0 +1,206 @@
|
||||
//! Parse expressions and compare their results to expected values.
|
||||
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![allow(clippy::option_map_unit_fn)]
|
||||
#![allow(clippy::precedence)]
|
||||
#![allow(dead_code)]
|
||||
#![deny(non_ascii_idents)]
|
||||
#![deny(unconditional_recursion)]
|
||||
#![warn(unsafe_code)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(trivial_casts)]
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unused_import_braces)]
|
||||
#![warn(unused_qualifications)]
|
||||
|
||||
use lexpr::sexp;
|
||||
|
||||
|
||||
|
||||
// ===========================
|
||||
// === Test support macros ===
|
||||
// ===========================
|
||||
|
||||
/// Parses input as a sequence of S-expressions, and wraps it in a `Block`.
|
||||
macro_rules! block {
|
||||
( $statements:tt ) => {
|
||||
sexp![(Block #($statements))]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === Tests ===
|
||||
// =============
|
||||
|
||||
#[test]
|
||||
fn application() {
|
||||
test("a b c", block![(App (App (Ident a) (Ident b)) (Ident c))]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn type_definition_bool() {
|
||||
test("type Bool", block![(TypeDef (Ident type) (Ident Bool) #())]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn type_definition_option() {
|
||||
test("type Option a", block![(TypeDef (Ident type) (Ident Option) #((Ident a)))]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn assignment_simple() {
|
||||
test("foo = 23", block![(Assignment (Ident foo) "=" (Number 23))]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn function_inline_simple_args() {
|
||||
test("foo a = 23", block![(Function foo #((Ident a)) "=" (Number 23))]);
|
||||
test("foo a b = 23", block![(Function foo #((Ident a) (Ident b)) "=" (Number 23))]);
|
||||
test("foo a b c = 23", block![(Function foo #((Ident a) (Ident b) (Ident c)) "=" (Number 23))]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn function_block_noargs() {
|
||||
test("foo =", block![(Function foo #() "=" ())]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn function_block_simple_args() {
|
||||
test("foo a =", block![(Function foo #((Ident a)) "=" ())]);
|
||||
test("foo a b =", block![(Function foo #((Ident a) (Ident b)) "=" ())]);
|
||||
test("foo a b c =", block![(Function foo #((Ident a) (Ident b) (Ident c)) "=" ())]);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ====================
|
||||
// === Test Support ===
|
||||
// ====================
|
||||
|
||||
use enso_metamodel_lexpr::ToSExpr;
|
||||
use enso_reflect::Reflect;
|
||||
use std::collections::HashSet;
|
||||
|
||||
/// Given a block of input Enso code, test that:
|
||||
/// - The given code parses to the AST represented by the given S-expression.
|
||||
/// - The AST pretty-prints back to the original code.
|
||||
///
|
||||
/// The S-expression format is as documented for [`enso_metamodel_lexpr`], with some
|
||||
/// postprocessing:
|
||||
/// - For concision, field names are stripped (as if all structs were tuple structs).
|
||||
/// - Most token types are represented as their contents, rather than as a token struct. For
|
||||
/// example, a `token::Number` may be represented like: `sexp![10]`, and a `token::Ident` may look
|
||||
/// like `sexp![foo]`.
|
||||
fn test(code: &str, expect: lexpr::Value) {
|
||||
let ast = enso_parser::Parser::new().run(code);
|
||||
let ast_s_expr = to_s_expr(&ast, code);
|
||||
assert_eq!(ast_s_expr.to_string(), expect.to_string());
|
||||
assert_eq!(ast.code(), code);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =====================
|
||||
// === S-expressions ===
|
||||
// =====================
|
||||
|
||||
/// Produce an S-expression representation of the input AST type.
|
||||
pub fn to_s_expr<T>(value: &T, code: &str) -> lexpr::Value
|
||||
where T: serde::Serialize + Reflect {
|
||||
let (graph, rust_to_meta) = enso_metamodel::rust::to_meta(value.reflect_type());
|
||||
let ast_ty = rust_to_meta[&value.reflect_type().id];
|
||||
let base = code.as_bytes().as_ptr() as usize;
|
||||
let code: Box<str> = Box::from(code);
|
||||
let mut to_s_expr = ToSExpr::new(&graph);
|
||||
to_s_expr.mapper(ast_ty, strip_hidden_fields);
|
||||
let ident_token = rust_to_meta[&enso_parser::syntax::token::variant::Ident::reflect().id];
|
||||
let operator_token = rust_to_meta[&enso_parser::syntax::token::variant::Operator::reflect().id];
|
||||
let number_token = rust_to_meta[&enso_parser::syntax::token::variant::Number::reflect().id];
|
||||
let token_to_str = move |token: lexpr::Value| {
|
||||
let range = token_code_range(&token, base);
|
||||
code[range].to_owned().into_boxed_str()
|
||||
};
|
||||
let token_to_str_ = token_to_str.clone();
|
||||
to_s_expr.mapper(ident_token, move |token| lexpr::Value::symbol(token_to_str_(token)));
|
||||
let token_to_str_ = token_to_str.clone();
|
||||
to_s_expr.mapper(operator_token, move |token| lexpr::Value::string(token_to_str_(token)));
|
||||
let token_to_str_ = token_to_str;
|
||||
to_s_expr.mapper(number_token, move |token| {
|
||||
lexpr::Value::Number(token_to_str_(token).parse::<u64>().unwrap().into())
|
||||
});
|
||||
tuplify(to_s_expr.value(ast_ty, &value))
|
||||
}
|
||||
|
||||
/// Strip certain fields that should be excluded from output.
|
||||
fn strip_hidden_fields(tree: lexpr::Value) -> lexpr::Value {
|
||||
let hidden_tree_fields =
|
||||
[":spanLeftOffsetVisible", ":spanLeftOffsetCodeRepr", ":spanCodeLength"];
|
||||
let hidden_tree_fields: HashSet<_> = hidden_tree_fields.into_iter().collect();
|
||||
lexpr::Value::list(tree.to_vec().unwrap().into_iter().filter(|val| match val {
|
||||
lexpr::Value::Cons(cons) => match cons.car() {
|
||||
lexpr::Value::Symbol(symbol) => !hidden_tree_fields.contains(symbol.as_ref()),
|
||||
_ => panic!(),
|
||||
},
|
||||
_ => true,
|
||||
}))
|
||||
}
|
||||
|
||||
/// Given an S-expression representation of a [`Token`] and the base address for `Code` `Cow`s,
|
||||
/// return the range of the input code the token references.
|
||||
fn token_code_range(token: &lexpr::Value, base: usize) -> std::ops::Range<usize> {
|
||||
let code_repr = fields(token).find(|(name, _)| *name == ":codeRepr").unwrap().1;
|
||||
let mut begin = None;
|
||||
let mut len = None;
|
||||
for (name, value) in fields(code_repr) {
|
||||
match name {
|
||||
":begin" => begin = Some(value.as_u64().unwrap() as u32),
|
||||
":len" => len = Some(value.as_u64().unwrap() as u32),
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
let begin = begin.unwrap();
|
||||
let begin = (begin as u64) | (base as u64 & !0xFFFF_FFFF);
|
||||
let begin = if begin < (base as u64) { begin + 0x1_0000_0000 } else { begin };
|
||||
let begin = begin as usize - base;
|
||||
let len = len.unwrap() as usize;
|
||||
begin..(begin + len)
|
||||
}
|
||||
|
||||
/// Iterate the field `(name, value)` pairs of the S-expression of a struct with named fields.
|
||||
fn fields(value: &'_ lexpr::Value) -> impl Iterator<Item = (&'_ str, &'_ lexpr::Value)> {
|
||||
value.list_iter().unwrap().filter_map(|value| match value {
|
||||
lexpr::Value::Cons(cons) => match cons.car() {
|
||||
lexpr::Value::Symbol(symbol) => Some((&symbol[..], cons.cdr())),
|
||||
_ => None,
|
||||
},
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Strip field names from struct representations, so that they are printed more concisely, as if
|
||||
/// they were tuple-structs.
|
||||
fn tuplify(value: lexpr::Value) -> lexpr::Value {
|
||||
let (car, cdr) = match value {
|
||||
lexpr::Value::Cons(cons) => cons.into_pair(),
|
||||
lexpr::Value::Vector(mut vector) => {
|
||||
for value in vector.iter_mut() {
|
||||
let original = std::mem::replace(value, lexpr::Value::Nil);
|
||||
*value = tuplify(original);
|
||||
}
|
||||
return lexpr::Value::Vector(vector);
|
||||
}
|
||||
value => return value,
|
||||
};
|
||||
if let lexpr::Value::Symbol(symbol) = &car {
|
||||
if let Some(':') = symbol.chars().next() {
|
||||
return tuplify(cdr);
|
||||
}
|
||||
}
|
||||
let car = tuplify(car);
|
||||
let cdr = tuplify(cdr);
|
||||
lexpr::Value::Cons(lexpr::Cons::new(car, cdr))
|
||||
}
|
@ -10,4 +10,5 @@ enso-metamodel = { path = "../metamodel", features = ["rust"] }
|
||||
derivative = "2.2"
|
||||
|
||||
[features]
|
||||
default = ["graphviz"]
|
||||
graphviz = ["enso-metamodel/graphviz"]
|
||||
|
Loading…
Reference in New Issue
Block a user