Code blocks (#3585)

This commit is contained in:
Kaz Wesley 2022-07-20 07:53:20 -07:00 committed by GitHub
parent f61849ce04
commit 3b99e18f94
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 1108 additions and 426 deletions

21
Cargo.lock generated
View File

@ -1734,7 +1734,7 @@ dependencies = [
[[package]] [[package]]
name = "enso-build" name = "enso-build"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/enso-org/ci-build?branch=develop#5a55bf5241f55bd314ba04498b34d048dae93a34" source = "git+https://github.com/enso-org/ci-build?branch=develop#acc5a7dacc223ad69ebfc7651c5ed0e3c0f1c9e5"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"async-compression", "async-compression",
@ -1808,7 +1808,7 @@ dependencies = [
[[package]] [[package]]
name = "enso-build-cli" name = "enso-build-cli"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/enso-org/ci-build?branch=develop#5a55bf5241f55bd314ba04498b34d048dae93a34" source = "git+https://github.com/enso-org/ci-build?branch=develop#acc5a7dacc223ad69ebfc7651c5ed0e3c0f1c9e5"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"byte-unit", "byte-unit",
@ -2064,13 +2064,14 @@ dependencies = [
"enso-data-structures", "enso-data-structures",
"enso-metamodel", "enso-metamodel",
"enso-metamodel-lexpr", "enso-metamodel-lexpr",
"enso-parser-syntax-tree-builder",
"enso-parser-syntax-tree-visitor", "enso-parser-syntax-tree-visitor",
"enso-prelude", "enso-prelude",
"enso-reflect", "enso-reflect",
"enso-shapely-macros", "enso-shapely-macros",
"enso-types", "enso-types",
"lexpr", "lexpr",
"rand 0.8.5",
"rand_chacha 0.3.1",
"serde", "serde",
] ]
@ -2085,16 +2086,6 @@ dependencies = [
"enso-reflect", "enso-reflect",
] ]
[[package]]
name = "enso-parser-syntax-tree-builder"
version = "0.1.0"
dependencies = [
"enso-macro-utils",
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "enso-parser-syntax-tree-visitor" name = "enso-parser-syntax-tree-visitor"
version = "0.1.0" version = "0.1.0"
@ -3702,7 +3693,7 @@ version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5617e92fc2f2501c3e2bc6ce547cad841adba2bae5b921c7e52510beca6d084c" checksum = "5617e92fc2f2501c3e2bc6ce547cad841adba2bae5b921c7e52510beca6d084c"
dependencies = [ dependencies = [
"base64 0.13.0", "base64 0.11.0",
"bytes 1.1.0", "bytes 1.1.0",
"http", "http",
"httpdate 1.0.2", "httpdate 1.0.2",
@ -3715,7 +3706,7 @@ dependencies = [
[[package]] [[package]]
name = "ide-ci" name = "ide-ci"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/enso-org/ci-build?branch=develop#5a55bf5241f55bd314ba04498b34d048dae93a34" source = "git+https://github.com/enso-org/ci-build?branch=develop#acc5a7dacc223ad69ebfc7651c5ed0e3c0f1c9e5"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"async-compression", "async-compression",

View File

@ -10,7 +10,6 @@ members = [
"build/rust-scripts", "build/rust-scripts",
"lib/rust/*", "lib/rust/*",
"lib/rust/parser/src/syntax/tree/visitor", "lib/rust/parser/src/syntax/tree/visitor",
"lib/rust/parser/src/syntax/tree/builder",
"lib/rust/parser/generate-java", "lib/rust/parser/generate-java",
"lib/rust/profiler/data", "lib/rust/profiler/data",
"integration-test" "integration-test"

View File

@ -55,6 +55,7 @@ use enso_metamodel::meta::*;
use derivative::Derivative; use derivative::Derivative;
use lexpr::Value; use lexpr::Value;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::collections::BTreeSet;
@ -69,13 +70,15 @@ pub struct ToSExpr<'g> {
graph: &'g TypeGraph, graph: &'g TypeGraph,
#[derivative(Debug = "ignore")] #[derivative(Debug = "ignore")]
mappers: BTreeMap<TypeId, Box<dyn Fn(Value) -> Value>>, mappers: BTreeMap<TypeId, Box<dyn Fn(Value) -> Value>>,
skip: BTreeSet<TypeId>,
} }
impl<'g> ToSExpr<'g> { impl<'g> ToSExpr<'g> {
#[allow(missing_docs)] #[allow(missing_docs)]
pub fn new(graph: &'g TypeGraph) -> Self { pub fn new(graph: &'g TypeGraph) -> Self {
let mappers = Default::default(); let mappers = Default::default();
Self { graph, mappers } let skip = Default::default();
Self { graph, mappers, skip }
} }
/// Set a transformation to be applied to a type after translating to an S-expression. /// Set a transformation to be applied to a type after translating to an S-expression.
@ -83,6 +86,14 @@ impl<'g> ToSExpr<'g> {
self.mappers.insert(id, Box::new(f)); self.mappers.insert(id, Box::new(f));
} }
/// Omit a type, specified by ID, from the output, wherever it occurs. If it occurs as a field
/// in another struct, that field will be omitted. If the type occurs as a variant of an enum,
/// or as the top-level type passed to [`Self::value`], it will be represented as if it had no
/// fields.
pub fn skip(&mut self, id: TypeId) {
self.skip.insert(id);
}
/// Given a bincode-serialized input, use its `meta` type info to transcribe it to an /// Given a bincode-serialized input, use its `meta` type info to transcribe it to an
/// S-expression. /// S-expression.
pub fn value<T: serde::Serialize>(&self, id: TypeId, input: &T) -> Value { pub fn value<T: serde::Serialize>(&self, id: TypeId, input: &T) -> Value {
@ -126,7 +137,10 @@ impl<'g> ToSExpr<'g> {
let mut out = vec![]; let mut out = vec![];
self.fields(&mut hierarchy, data, &mut out); self.fields(&mut hierarchy, data, &mut out);
assert_eq!(hierarchy, &[]); assert_eq!(hierarchy, &[]);
let mut value = Value::list(out); let mut value = match self.skip.contains(&id) {
true => Value::Null,
false => Value::list(out),
};
if let Some(id) = child { if let Some(id) = child {
if let Some(mapper) = self.mappers.get(&id) { if let Some(mapper) = self.mappers.get(&id) {
value = (mapper)(value); value = (mapper)(value);
@ -157,11 +171,14 @@ impl<'g> ToSExpr<'g> {
self.fields(hierarchy, data, out); self.fields(hierarchy, data, out);
} }
for (i, field) in fields.iter().enumerate() { for (i, field) in fields.iter().enumerate() {
let skip = self.skip.contains(&field.type_);
if !field.name.is_empty() { if !field.name.is_empty() {
let car = Value::Symbol(format!(":{}", field.name).into_boxed_str()); let car = Value::Symbol(format!(":{}", field.name).into_boxed_str());
let cdr = self.value_(field.type_, data); let cdr = self.value_(field.type_, data);
out.push(Value::cons(car, cdr)); if !skip {
} else { out.push(Value::cons(car, cdr));
}
} else if !skip {
out.push(self.value_(field.type_, data)); out.push(self.value_(field.type_, data));
} }
if self.graph[id].child_field == Some(i + 1) { if self.graph[id].child_field == Some(i + 1) {

View File

@ -16,7 +16,6 @@ enso-data-structures = { path = "../data-structures" }
enso-types = { path = "../types", features = ["serde"] } enso-types = { path = "../types", features = ["serde"] }
enso-shapely-macros = { path = "../shapely/macros" } enso-shapely-macros = { path = "../shapely/macros" }
enso-parser-syntax-tree-visitor = { path = "src/syntax/tree/visitor" } enso-parser-syntax-tree-visitor = { path = "src/syntax/tree/visitor" }
enso-parser-syntax-tree-builder = { path = "src/syntax/tree/builder" }
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
bincode = "1.3" bincode = "1.3"
@ -24,3 +23,5 @@ bincode = "1.3"
enso-metamodel = { path = "../metamodel", features = ["rust"] } enso-metamodel = { path = "../metamodel", features = ["rust"] }
enso-metamodel-lexpr = { path = "../metamodel/lexpr" } enso-metamodel-lexpr = { path = "../metamodel/lexpr" }
lexpr = "0.2.6" lexpr = "0.2.6"
rand = "0.8.5"
rand_chacha = "0.3.1"

View File

@ -1,15 +0,0 @@
#!/bin/sh
set -e
echo $0 | grep lib/rust || ( echo This tool must be run from the repo root, as lib/rust/parser/generate-java/run.sh; exit 1 )
BASE=target/generated_java
OUT=$BASE/org/enso/syntax2
LIB=lib/rust/parser/generate-java/java
mkdir -p $OUT
cargo test -p enso-parser-generate-java
cargo run -p enso-parser-generate-java --bin enso-parser-generate-java -- $OUT
cargo run -p enso-parser-generate-java --bin java-tests > $BASE/GeneratedFormatTests.java
javac -classpath "$LIB:$BASE" -d $BASE $BASE/GeneratedFormatTests.java
java -classpath $BASE GeneratedFormatTests

View File

@ -78,10 +78,12 @@ pattern_impl_for_char_slice!(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
pub struct Lexer<'s> { pub struct Lexer<'s> {
#[deref] #[deref]
#[deref_mut] #[deref_mut]
pub state: LexerState, pub state: LexerState,
pub input: &'s str, pub input: &'s str,
pub iterator: str::CharIndices<'s>, pub iterator: str::CharIndices<'s>,
pub output: Vec<Token<'s>>, pub output: Vec<Token<'s>>,
/// Memory for storing tokens, reused as an optimization.
pub token_storage: VecAllocation<Token<'s>>,
} }
/// Internal state of the [`Lexer`]. /// Internal state of the [`Lexer`].
@ -101,9 +103,10 @@ impl<'s> Lexer<'s> {
pub fn new(input: &'s str) -> Self { pub fn new(input: &'s str) -> Self {
let iterator = input.char_indices(); let iterator = input.char_indices();
let capacity = input.len() / AVERAGE_TOKEN_LEN; let capacity = input.len() / AVERAGE_TOKEN_LEN;
let output = Vec::with_capacity(capacity * mem::size_of::<Token<'s>>()); let output = Vec::with_capacity(capacity);
let state = default(); let state = default();
Self { input, iterator, output, state }.init() let token_storage = default();
Self { input, iterator, output, state, token_storage }.init()
} }
fn init(mut self) -> Self { fn init(mut self) -> Self {
@ -677,35 +680,31 @@ impl<'s> Lexer<'s> {
fn newline(&mut self) { fn newline(&mut self) {
if let Some(token) = self.line_break() { if let Some(token) = self.line_break() {
let mut newlines = vec![token.with_variant(token::Variant::newline())]; let mut newlines = self.token_storage.take();
while let Some(token) = self.line_break() { while let Some(token) = self.line_break() {
newlines.push(token.with_variant(token::Variant::newline())); newlines.push(token.with_variant(token::Variant::newline()));
} }
let block_indent = self.last_spaces_visible_offset; let block_indent = self.last_spaces_visible_offset;
if block_indent > self.current_block_indent { if block_indent > self.current_block_indent {
let block_start = self.marker_token(token::Variant::block_start()); let block_start = self.marker_token(token::Variant::block_start());
self.submit_token(block_start); self.submit_token(block_start);
self.start_block(block_indent); self.start_block(block_indent);
} else { }
while block_indent < self.current_block_indent { while block_indent < self.current_block_indent {
let err = "Lexer internal error. Inconsistent code block hierarchy."; let previous_indent = self.block_indent_stack.last().copied().unwrap_or_default();
let parent_block_indent = self.end_block().expect(err); if block_indent > previous_indent {
if block_indent > self.current_block_indent { // The new line indent is smaller than current block but bigger than the
// The new line indent is smaller than current block but bigger than the // previous one. We are treating the line as belonging to the
// // previous one. We are treating the line as belonging to the // block. The warning should be reported by parser.
// block. The warning should be reported by parser. break;
self.start_block(parent_block_indent);
break;
} else {
let block_end = self.marker_token(token::Variant::block_end());
self.submit_token(block_end);
}
} }
self.end_block();
let block_end = self.marker_token(token::Variant::block_end());
self.submit_token(block_end);
} }
for newline in newlines { self.submit_token(token.with_variant(token::Variant::newline()));
self.submit_token(newline); newlines.drain(..).for_each(|token| self.submit_token(token));
} self.token_storage.set_from(newlines);
} }
} }
} }
@ -876,12 +875,45 @@ mod tests {
ident_(" ", "bar"), ident_(" ", "bar"),
block_end_("", ""), block_end_("", ""),
]), ]),
("foo\n +", vec![
ident_("", "foo"),
block_start_("", ""),
newline_("", "\n"),
operator_(" ", "+"),
block_end_("", ""),
]),
]); ]);
} }
#[test] #[test]
fn test_case_empty() { fn test_case_block_bad_indents() {
test_lexer("", vec![]); #[rustfmt::skip]
test_lexer_many(vec![
("\n foo\n bar\nbaz", vec![
block_start_("", ""),
newline_("", "\n"), ident_(" ", "foo"),
newline_("", "\n"), ident_(" ", "bar"),
block_end_("", ""),
newline_("", "\n"), ident_("", "baz"),
]),
("\n foo\n bar\n baz", vec![
block_start_("", ""),
newline_("", "\n"), ident_(" ", "foo"),
newline_("", "\n"), ident_(" ", "bar"),
newline_("", "\n"), ident_(" ", "baz"),
block_end_("", ""),
]),
]);
}
#[test]
fn test_case_whitespace_only_line() {
test_lexer_many(vec![("foo\n \nbar", vec![
ident_("", "foo"),
newline_("", "\n"),
newline_(" ", "\n"),
ident_("", "bar"),
])]);
} }
#[test] #[test]

View File

@ -79,10 +79,12 @@
// === Features === // === Features ===
#![allow(incomplete_features)] #![allow(incomplete_features)]
#![feature(allocator_api)] #![feature(allocator_api)]
#![feature(exact_size_is_empty)]
#![feature(test)] #![feature(test)]
#![feature(specialization)] #![feature(specialization)]
#![feature(let_chains)] #![feature(let_chains)]
#![feature(if_let_guard)] #![feature(if_let_guard)]
#![feature(box_patterns)]
// === Standard Linter Configuration === // === Standard Linter Configuration ===
#![deny(non_ascii_idents)] #![deny(non_ascii_idents)]
#![warn(unsafe_code)] #![warn(unsafe_code)]
@ -147,15 +149,8 @@ impl Parser {
/// Main entry point. /// Main entry point.
pub fn run<'s>(&self, code: &'s str) -> syntax::Tree<'s> { pub fn run<'s>(&self, code: &'s str) -> syntax::Tree<'s> {
let tokens = lexer::run(code); let tokens = lexer::run(code);
let mut statements = vec![]; let resolver = macros::resolver::Resolver::new_root();
let mut tokens = tokens.into_iter().peekable(); resolver.run(&self.macros, tokens)
while tokens.peek().is_some() {
let resolver = macros::resolver::Resolver::new_root();
let tree = resolver.run(&self.macros, &mut tokens);
let tree = expression_to_statement(tree);
statements.push(tree);
}
syntax::Tree::block(statements)
} }
} }
@ -172,73 +167,41 @@ impl Default for Parser {
/// ///
/// In statement context, an expression that has an assignment operator at its top level is /// In statement context, an expression that has an assignment operator at its top level is
/// interpreted as a variable assignment or method definition. /// interpreted as a variable assignment or method definition.
fn expression_to_statement(tree: syntax::Tree<'_>) -> syntax::Tree<'_> { fn expression_to_statement(mut tree: syntax::Tree<'_>) -> syntax::Tree<'_> {
use syntax::tree::*; use syntax::tree::*;
let tree_ = match &*tree.variant { let tree_ = match &mut *tree.variant {
Variant::OprSectionBoundary(OprSectionBoundary { ast }) => ast, Variant::OprSectionBoundary(OprSectionBoundary { ast }) => ast,
_ => &tree, _ => &mut tree,
}; };
let mut replacement = None; let opr_app = match &mut *tree_.variant {
if let Variant::OprApp(opr_app) = &*tree_.variant { Variant::OprApp(opr_app) => opr_app,
replacement = expression_to_binding(opr_app); _ => return tree,
} };
match replacement { if let OprApp { lhs: Some(lhs), opr: Ok(opr), rhs } = opr_app && opr.code == "=" {
Some(modified) => modified, let mut args = vec![];
None => tree, let mut lhs = lhs;
} while let Tree { variant: box Variant::App(App { func, arg }), .. } = lhs {
} lhs = func;
args.push(arg.clone());
/// If the input is an "=" expression, try to interpret it as either a variable assignment or method }
/// definition. args.reverse();
fn expression_to_binding<'a>(app: &syntax::tree::OprApp<'a>) -> Option<syntax::Tree<'a>> { if args.is_empty() && let Some(rhs) = rhs && !is_body_block(rhs) {
use syntax::tree::*; // If the LHS has no arguments, and there is a RHS, and the RHS is not a body block,
match app { // this is a variable assignment.
OprApp { lhs: Some(lhs), opr: Ok(opr), rhs } if opr.code == "=" => { return Tree::assignment(mem::take(lhs), mem::take(opr), mem::take(rhs))
let mut lhs = lhs; }
let mut args = vec![]; if let Variant::Ident(Ident { token }) = &mut *lhs.variant {
while let Variant::App(App { func, arg }) = &*lhs.variant { // If this is not a variable assignment, and the leftmost leaf of the `App` tree is
lhs = func; // an identifier, this is a function definition.
args.push(arg.clone()); return Tree::function(mem::take(token), args, mem::take(opr), mem::take(rhs))
}
args.reverse();
if let Some(rhs) = rhs && args.is_empty() {
Some(Tree::assignment(lhs.clone(), opr.clone(), rhs.clone()))
} else if let Variant::Ident(Ident { token }) = &*lhs.variant {
Some(Tree::function(token.clone(), args, opr.clone(), rhs.clone()))
} else {
None
}
} }
_ => None,
} }
tree
} }
/// Return whether the expression is a body block.
fn is_body_block(expression: &syntax::tree::Tree<'_>) -> bool {
// ============= matches!(&*expression.variant, syntax::tree::Variant::BodyBlock { .. })
// === Tests ===
// =============
#[cfg(test)]
mod tests {
use super::*;
use enso_parser_syntax_tree_builder::ast_builder;
macro_rules! test_parse {
($input:tt = {$($def:tt)*}) => {
assert_eq!(
Parser::new().run($input),
ast_builder! { $($def)* }
)
};
}
#[test]
fn test_expressions() {
test_parse! {"a" = {a}};
test_parse! {"a b" = {a b}};
test_parse! {"a b c" = {[a b] c}};
}
} }
@ -262,4 +225,44 @@ mod benches {
parser.run(&str); parser.run(&str);
}); });
} }
#[bench]
fn bench_blocks(bencher: &mut Bencher) {
use rand::prelude::*;
use rand_chacha::ChaCha8Rng;
let lines = 10_000;
let mut str = String::new();
let mut rng = ChaCha8Rng::seed_from_u64(0);
let mut indent = 0u32;
for _ in 0..lines {
// Indent:
// 1/8 chance of increasing.
// 1/8 chance of decreasing.
// 3/4 chance of leaving unchanged.
match rng.gen_range(0..8) {
0u32 => indent = indent.saturating_sub(1),
1 => indent += 1,
_ => (),
}
for _ in 0..indent {
str.push(' ');
}
// 1/4 chance of operator-block line syntax.
if rng.gen_range(0..4) == 0u32 {
str.push_str("* ");
}
str.push('x');
// Equal chance of the next line being interpreted as a body block or argument block
// line, if it is indented and doesn't match the operator-block syntax.
// The `=` operator is chosen to exercise the expression-to-statement conversion path.
if rng.gen() {
str.push_str(" =");
}
str.push('\n');
}
let parser = Parser::new();
bencher.iter(move || {
parser.run(&str);
});
}
} }

View File

@ -106,8 +106,7 @@ fn matched_segments_into_multi_segment_app(
let segments = matched_segments.mapped(|segment| { let segments = matched_segments.mapped(|segment| {
let header = segment.header; let header = segment.header;
let tokens = segment.result.tokens(); let tokens = segment.result.tokens();
let body = (!tokens.is_empty()) let body = syntax::operator::resolve_operator_precedence_if_non_empty(tokens);
.as_some_from(|| syntax::operator::resolve_operator_precedence(tokens));
syntax::tree::MultiSegmentAppSegment { header, body } syntax::tree::MultiSegmentAppSegment { header, body }
}); });
syntax::Tree::multi_segment_app(segments) syntax::Tree::multi_segment_app(segments)

View File

@ -67,7 +67,7 @@ fn type_def_body(matched_segments: NonEmptyVec<MatchedSegment>) -> syntax::Tree
let mut v = match_tree.view(); let mut v = match_tree.view();
let name = &v.query("name").unwrap()[0]; let name = &v.query("name").unwrap()[0];
let name = operator::resolve_operator_precedence(name.clone()); let name = operator::resolve_operator_precedence_if_non_empty(name.clone()).unwrap();
// println!("{:#?}", name); // println!("{:#?}", name);
// println!("\n\n------------- 2"); // println!("\n\n------------- 2");
@ -78,7 +78,7 @@ fn type_def_body(matched_segments: NonEmptyVec<MatchedSegment>) -> syntax::Tree
let params = params let params = params
.iter() .iter()
.map(|tokens| operator::resolve_operator_precedence(tokens.clone())) .map(|tokens| operator::resolve_operator_precedence_if_non_empty(tokens.clone()).unwrap())
.collect_vec(); .collect_vec();
// println!("{:#?}", params); // println!("{:#?}", params);
syntax::Tree::type_def(segment.header, name, params) syntax::Tree::type_def(segment.header, name, params)

View File

@ -1,4 +1,26 @@
//! Macro resolver implementation. Refer to the docs of the main parser module to learn more. //! Macro resolver implementation. Refer to the docs of the main parser module to learn more.
//!
//! # Blocks
//!
//! Macro resolution is informed by block structure.
//!
//! Macros can explicitly manipulate blocks: A macro can use [`pattern`]s to match depending on the
//! contents of a child block, and a macro can create any arbitrary block structure in its output.
//!
//! However, there is one rule that makes block structure more primitive than macros: Each of a
//! macro's segments must begin in the top level of the same block.
//!
//! For some invalid inputs, this rule affects how errors are reported. For example:
//! ```Enso
//! if foo
//! then bar
//! ```
//! This will be parsed as an `if` macro whose condition is an argument block application applying
//! `foo` to `then bar`; the reported error will be an incomplete application of the `if` macro.
//!
//! This is implemented by starting a new macro resolution [`Scope`] at the beginning of every
//! block; the new scope is initialized with only the root macro. Within a scope the state of all
//! macros defined in parent scopes will never be advanced.
use crate::prelude::*; use crate::prelude::*;
@ -95,11 +117,16 @@ impl<'a> PartiallyMatchedMacro<'a> {
body: Rc::new(|v| { body: Rc::new(|v| {
// Taking the first segment, hardcoded above. // Taking the first segment, hardcoded above.
let body = v.pop().0.result; let body = v.pop().0.result;
syntax::operator::resolve_operator_precedence(body.tokens()) syntax::operator::resolve_operator_precedence_if_non_empty(body.tokens()).unwrap()
}), }),
})); }));
Self { current_segment, resolved_segments, possible_next_segments, matched_macro_def } Self { current_segment, resolved_segments, possible_next_segments, matched_macro_def }
} }
/// Append an item or partially-matched macro to the current segment.
fn push(&mut self, item: impl Into<ItemOrPartiallyMatchedMacro<'a>>) {
self.current_segment.body.push(item.into());
}
} }
@ -174,8 +201,12 @@ impl<'s> TryAsRef<PartiallyMatchedMacro<'s>> for ItemOrPartiallyMatchedMacro<'s>
/// to learn more about the macro resolution steps. /// to learn more about the macro resolution steps.
#[derive(Debug)] #[derive(Debug)]
pub struct Resolver<'s> { pub struct Resolver<'s> {
current_macro: PartiallyMatchedMacro<'s>, current_macro: PartiallyMatchedMacro<'s>,
macro_stack: Vec<PartiallyMatchedMacro<'s>>, macro_stack: Vec<PartiallyMatchedMacro<'s>>,
scopes: Vec<Scope<'s>>,
lines: Vec<syntax::tree::block::Line<'s>>,
newline: Option<token::Newline<'s>>,
line_contains_items: bool,
} }
/// Result of the macro resolution step. /// Result of the macro resolution step.
@ -186,26 +217,50 @@ enum Step<'s> {
MacroStackPop(syntax::Item<'s>), MacroStackPop(syntax::Item<'s>),
} }
/// Information about macro resolution state that is stored while processing a deeper indentation
/// level.
///
/// See the module docs ([`self`]) for about the interaction between blocks and macros.
#[derive(Debug)]
struct Scope<'s> {
parent_tokens: std::vec::IntoIter<syntax::Item<'s>>,
macros_start: usize,
outputs_start: usize,
prev_newline: Option<token::Newline<'s>>,
prev_macro: PartiallyMatchedMacro<'s>,
}
impl<'s> Resolver<'s> { impl<'s> Resolver<'s> {
/// New resolver with a special "root" segment definition allowing parsing arbitrary /// New resolver with a special "root" segment definition allowing parsing arbitrary
/// expressions. /// expressions.
pub fn new_root() -> Self { pub fn new_root() -> Self {
let current_macro = PartiallyMatchedMacro::new_root(); let current_macro = PartiallyMatchedMacro::new_root();
let macro_stack = default(); let macro_stack = default();
Self { current_macro, macro_stack } let scopes = default();
let lines = default();
let newline = Some(token::newline("", ""));
let line_contains_items = default();
Self { current_macro, macro_stack, scopes, lines, newline, line_contains_items }
} }
fn replace_current_with_parent_macro(&mut self, mut parent_macro: PartiallyMatchedMacro<'s>) { fn replace_current_with_parent_macro(&mut self, parent_macro: PartiallyMatchedMacro<'s>) {
mem::swap(&mut parent_macro, &mut self.current_macro); let child_macro = mem::replace(&mut self.current_macro, parent_macro);
let child_macro = parent_macro; self.current_macro.push(child_macro);
self.current_macro.current_segment.body.push(child_macro.into()); }
/// Returns the index of the first element in `self.macro_stack` that is active in the current
/// scope. Any macros before that index are active in some block that contains the current
/// block, so they will not match tokens within this block.
fn macro_scope_start(&self) -> usize {
self.scopes.last().map(|scope| scope.macros_start).unwrap_or_default()
} }
/// Pop the macro stack if the current token is reserved. For example, when matching the /// Pop the macro stack if the current token is reserved. For example, when matching the
/// `if a if b then c then d` expression, the token `then` after the token `c` will be /// `if a if b then c then d` expression, the token `then` after the token `c` will be
/// considered reserved and the macro resolution of `if b then c` will be popped from the stack. /// considered reserved and the macro resolution of `if b then c` will be popped from the stack.
fn pop_macro_stack_if_reserved(&mut self, repr: &str) -> Option<PartiallyMatchedMacro<'s>> { fn pop_macro_stack_if_reserved(&mut self, repr: &str) -> Option<PartiallyMatchedMacro<'s>> {
let reserved = self.macro_stack.iter().any(|p| p.possible_next_segments.contains_key(repr)); let macros = &self.macro_stack[self.macro_scope_start()..];
let reserved = macros.iter().any(|p| p.possible_next_segments.contains_key(repr));
reserved.and_option_from(|| self.macro_stack.pop()) reserved.and_option_from(|| self.macro_stack.pop())
} }
@ -213,16 +268,15 @@ impl<'s> Resolver<'s> {
pub fn run( pub fn run(
mut self, mut self,
root_macro_map: &SegmentMap<'s>, root_macro_map: &SegmentMap<'s>,
tokens: &mut iter::Peekable<std::vec::IntoIter<syntax::Item<'s>>>, tokens: Vec<syntax::Item<'s>>,
) -> syntax::Tree<'s> { ) -> syntax::Tree<'s> {
let mut tokens = tokens.into_iter();
event!(TRACE, "Running macro resolver. Registered macros:\n{:#?}", root_macro_map); event!(TRACE, "Running macro resolver. Registered macros:\n{:#?}", root_macro_map);
let mut opt_item: Option<syntax::Item<'s>>; let mut opt_item: Option<syntax::Item<'s>>;
macro_rules! next_token { macro_rules! next_token {
() => {{ () => {{
opt_item = tokens.next(); opt_item = tokens.next();
if let Some(token) = opt_item.as_ref() { event!(TRACE, "Next token {:#?}", &opt_item);
event!(TRACE, "New token {:#?}", token);
}
}}; }};
} }
macro_rules! trace_state { macro_rules! trace_state {
@ -232,9 +286,61 @@ impl<'s> Resolver<'s> {
}; };
} }
next_token!(); next_token!();
while let Some(token) = opt_item && !token.is_newline() { loop {
while opt_item.is_none() {
if let Some(newline) = self.newline.take() {
let expression = self.line_contains_items.as_some_from(|| self.unwind_stack());
self.lines.push(syntax::tree::block::Line { newline, expression });
}
if let Some(parent_tokens) = self.exit_current_scope() {
tokens = parent_tokens;
next_token!();
continue;
}
break;
}
let token = match opt_item {
Some(token) => token,
None => break,
};
if let syntax::Item::Token(Token {
variant: token::Variant::Newline(_),
left_offset,
code,
}) = token
{
let new_newline = token::newline(left_offset, code);
let newline = mem::replace(&mut self.newline, Some(new_newline));
if let Some(newline) = newline {
let expression = self.line_contains_items.as_some_from(|| self.unwind_stack());
self.lines.push(syntax::tree::block::Line { newline, expression });
}
next_token!();
self.line_contains_items = false;
continue;
}
self.line_contains_items = true;
let step_result = match token { let step_result = match token {
syntax::Item::Token(token) => self.process_token(root_macro_map, token), syntax::Item::Token(token) => self.process_token(root_macro_map, token),
syntax::Item::Block(tokens_) => {
let parent_tokens = mem::replace(&mut tokens, tokens_.into_iter());
let new_root = PartiallyMatchedMacro::new_root();
let prev_macro = mem::replace(&mut self.current_macro, new_root);
let macros_start = self.macro_stack.len();
let outputs_start = self.lines.len();
let prev_newline = self.newline.take();
let scope = Scope {
parent_tokens,
macros_start,
outputs_start,
prev_newline,
prev_macro,
};
self.scopes.push(scope);
next_token!();
self.line_contains_items = false;
continue;
}
_ => Step::NormalToken(token), _ => Step::NormalToken(token),
}; };
match step_result { match step_result {
@ -247,20 +353,63 @@ impl<'s> Resolver<'s> {
next_token!() next_token!()
} }
Step::NormalToken(item) => { Step::NormalToken(item) => {
self.current_macro.current_segment.body.push(item.into()); self.current_macro.push(item);
trace_state!(); trace_state!();
next_token!(); next_token!();
} }
} }
} }
syntax::tree::block::body_from_lines(self.lines)
}
event!(TRACE, "Finishing resolution. Popping the macro stack."); /// Finish processing the current block and close its macro scope, unless this is the top-level
while let Some(parent_macro) = self.macro_stack.pop() { /// block, which is indicated by returning `None`.
self.replace_current_with_parent_macro(parent_macro); ///
/// This builds a [`syntax::Item::Block`] from the outputs of the current scope, restores the
/// state to resume processing the parent scope, and submits the built block as a token to the
/// newly-current macro (which would have been the macro active when the block began).
///
/// Returns the remaining tokens of the parent block.
fn exit_current_scope(&mut self) -> Option<std::vec::IntoIter<syntax::Item<'s>>> {
let scope = self.scopes.pop()?;
let Scope { parent_tokens, macros_start, outputs_start, prev_newline, prev_macro } = scope;
debug_assert_eq!(macros_start, self.macro_stack.len());
self.current_macro = prev_macro;
let lines = self.lines.drain(outputs_start..);
let mut out = Vec::with_capacity(lines.len() * 2);
for line in lines {
let syntax::tree::block::Line { newline, expression } = line;
let newline = syntax::Token::from(newline);
let newline = syntax::Item::from(newline);
out.push(newline);
if let Some(expression) = expression {
let expression = syntax::Item::from(expression);
out.push(expression);
}
} }
let block = syntax::Item::Block(out);
self.current_macro.push(block);
self.line_contains_items = true;
self.newline = prev_newline;
Some(parent_tokens)
}
fn unwind_stack(&mut self) -> syntax::Tree<'s> {
macro_rules! trace_state {
() => {
event!(TRACE, "Current macro:\n{:#?}", self.current_macro);
event!(TRACE, "Parent macros:\n{:#?}", self.macro_stack);
};
}
event!(TRACE, "Finishing resolution. Popping the macro stack.");
let macros = self.macro_stack.drain(self.macro_scope_start()..).rev();
for parent_macro in macros {
let child_macro = mem::replace(&mut self.current_macro, parent_macro);
self.current_macro.push(child_macro);
}
trace_state!(); trace_state!();
let (tree, rest) = Self::resolve(self.current_macro); let macro_ = mem::replace(&mut self.current_macro, PartiallyMatchedMacro::new_root());
let (tree, rest) = Self::resolve(macro_);
if !rest.is_empty() { if !rest.is_empty() {
panic!( panic!(
"Internal error. Not all tokens were consumed by the macro resolver:\n{:#?}", "Internal error. Not all tokens were consumed by the macro resolver:\n{:#?}",

View File

@ -34,7 +34,7 @@ use enso_parser::prelude::*;
fn main() { fn main() {
init_tracing(TRACE); init_tracing(TRACE);
let ast = enso_parser::Parser::new().run("type Option (a) b c"); let ast = enso_parser::Parser::new().run("foo = 23");
println!("\n\n==================\n\n"); println!("\n\n==================\n\n");
println!("{:#?}", ast); println!("{:#?}", ast);
} }

View File

@ -15,7 +15,7 @@ use crate::prelude::*;
pub struct Code<'s> { pub struct Code<'s> {
#[serde(serialize_with = "crate::serialization::serialize_cow")] #[serde(serialize_with = "crate::serialization::serialize_cow")]
#[serde(deserialize_with = "crate::serialization::deserialize_cow")] #[serde(deserialize_with = "crate::serialization::deserialize_cow")]
#[reflect(as = "crate::serialization::Code")] #[reflect(as = "crate::serialization::Code", flatten)]
pub repr: Cow<'s, str>, pub repr: Cow<'s, str>,
} }

View File

@ -327,7 +327,10 @@ where T: Builder<'s>
{ {
#[inline(always)] #[inline(always)]
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> { fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
self.as_mut().map(|t| Builder::add_to_span(t, span)).unwrap_or_default() match self {
Some(t) => Builder::add_to_span(t, span),
None => span,
}
} }
} }

View File

@ -14,7 +14,7 @@ use crate::syntax::*;
/// Abstraction for [`Token`] and [`Tree`]. Some functions, such as macro resolver need to /// Abstraction for [`Token`] and [`Tree`]. Some functions, such as macro resolver need to
/// distinguish between two cases and need to handle both incoming tokens and already constructed /// distinguish between two cases and need to handle both incoming tokens and already constructed
/// [`Tree`] nodes. This structure provides handy utilities to work with such cases. /// [`Tree`] nodes. This structure provides handy utilities to work with such cases.
#[derive(Clone, Debug)] #[derive(Clone, Debug, PartialEq, Eq)]
#[allow(missing_docs)] #[allow(missing_docs)]
pub enum Item<'s> { pub enum Item<'s> {
Token(Token<'s>), Token(Token<'s>),
@ -47,10 +47,20 @@ impl<'s> Item<'s> {
Item::Token(token) => match token.variant { Item::Token(token) => match token.variant {
token::Variant::Ident(ident) => Tree::ident(token.with_variant(ident)), token::Variant::Ident(ident) => Tree::ident(token.with_variant(ident)),
token::Variant::Number(number) => Tree::number(token.with_variant(number)), token::Variant::Number(number) => Tree::number(token.with_variant(number)),
_ => todo!(), _ => todo!("{token:?}"),
}, },
Item::Tree(ast) => ast, Item::Tree(ast) => ast,
Item::Block(_) => todo!(), Item::Block(items) => build_block(items),
}
}
/// If this item is an [`Item::Tree`], apply the given function to the contained [`Tree`] and
/// return the result.
pub fn map_tree<'t: 's, F>(self, f: F) -> Self
where F: FnOnce(Tree<'s>) -> Tree<'t> {
match self {
Item::Tree(tree) => Item::Tree(f(tree)),
_ => self,
} }
} }
} }
@ -73,6 +83,32 @@ impl<'s> TryAsRef<Item<'s>> for Item<'s> {
} }
} }
/// Given a sequence of [`Item`]s belonging to one block, create an AST block node, of a type
/// determined by the syntax of the lines in the block.
fn build_block<'s>(items: impl IntoIterator<Item = Item<'s>>) -> Tree<'s> {
let mut line = vec![];
let mut block_builder = tree::block::Builder::new();
let mut newline = None;
for item in items {
match item {
Item::Token(Token { variant: token::Variant::Newline(_), left_offset, code }) => {
let newline = mem::replace(&mut newline, Some(token::newline(left_offset, code)));
if let Some(newline) = newline {
let line: Vec<_> = line.drain(..).collect();
let expression = operator::resolve_operator_precedence_if_non_empty(line);
block_builder.push(newline, expression);
}
}
_ => line.push(item),
}
}
if let Some(newline) = newline {
let expression = operator::resolve_operator_precedence_if_non_empty(line);
block_builder.push(newline, expression);
}
block_builder.build()
}
// =========== // ===========

View File

@ -13,7 +13,7 @@ use crate::syntax::token::Token;
// ================== // ==================
// FIXME: The current implementation hard-codes precedence values and does not support precedence // FIXME: The current implementation hard-codes precedence values and does not support precedence
// computations for any operator (according to the spec) // computations for any operator (according to the spec)
fn precedence_of(operator: &str) -> usize { fn precedence_of(operator: &str) -> usize {
match operator { match operator {
"=" => 1, "=" => 1,
@ -45,77 +45,93 @@ impl<T> WithPrecedence<T> {
/// example, `if cond then.x else.y` is parsed as `if cond then .x else .y`, which after expansion /// example, `if cond then.x else.y` is parsed as `if cond then .x else .y`, which after expansion
/// translates to `if cond then (\t -> t.x) else (\t -> t.y)`. However, for some macros spacing is /// translates to `if cond then (\t -> t.x) else (\t -> t.y)`. However, for some macros spacing is
/// not needed. For example, `(.x)` is parsed as `(\t -> t.x)`, which is understandable. /// not needed. For example, `(.x)` is parsed as `(\t -> t.x)`, which is understandable.
fn annotate_tokens_that_need_spacing(items: Vec<syntax::Item>) -> Vec<syntax::Item> { fn annotate_tokens_that_need_spacing(item: syntax::Item) -> syntax::Item {
// TODO: It should be possible to make it faster by iterating over mut vec. To be checked. use syntax::tree::Variant::*;
items item.map_tree(|ast| match &*ast.variant {
.into_iter() MultiSegmentApp(data) if !data.segments.first().header.is_symbol() =>
.map(|item| match item { ast.with_error("This expression cannot be used in a non-spaced equation."),
syntax::Item::Block(_) => item, _ => ast,
syntax::Item::Token(_) => item, })
syntax::Item::Tree(ast) => syntax::Item::Tree(match &*ast.variant {
syntax::tree::Variant::MultiSegmentApp(data)
if !data.segments.first().header.is_symbol() =>
ast.with_error("This expression cannot be used in a non-spaced equation."),
_ => ast,
}),
})
.collect()
} }
/// Take [`Item`] stream, resolve operators precedence and return the final AST. The precedence /// If the input sequence is non-empty, return the result of applying
/// resolution algorithm bases on the [Shunting yard algorithm](https://en.wikipedia.org/wiki/Shunting_yard_algorithm). /// [`resolve_operator_precedence`] to it.
/// It is extended to handle operator sections. pub fn resolve_operator_precedence_if_non_empty(
#[inline(always)] items: Vec<syntax::Item<'_>>,
pub fn resolve_operator_precedence<'s>(items: Vec<syntax::Item<'s>>) -> syntax::Tree<'s> { ) -> Option<syntax::Tree<'_>> {
match NonEmptyVec::try_from(items) {
Ok(items) => Some(resolve_operator_precedence(items)),
_ => None,
}
}
/// Take [`Item`] stream, resolve operator precedence and return the final AST.
///
/// The precedence resolution algorithm is based on the Shunting yard algorithm[1], extended to
/// handle operator sections.
/// [1]: https://en.wikipedia.org/wiki/Shunting_yard_algorithm
pub fn resolve_operator_precedence<'s>(items: NonEmptyVec<syntax::Item<'s>>) -> syntax::Tree<'s> {
type Tokens<'s> = Vec<syntax::Item<'s>>; type Tokens<'s> = Vec<syntax::Item<'s>>;
let mut flattened: Tokens<'s> = default(); let mut flattened: Tokens<'s> = default();
let mut no_space_group: Tokens<'s> = default(); let mut no_space_group: Tokens<'s> = default();
let processs_no_space_group = |flattened: &mut Tokens<'s>, no_space_group: &mut Tokens<'s>| { let process_no_space_group = |flattened: &mut Tokens<'s>, no_space_group: &mut Tokens<'s>| {
let tokens = mem::take(no_space_group); let tokens = no_space_group.drain(..);
if tokens.len() == 1 { if tokens.len() < 2 {
flattened.extend(tokens); flattened.extend(tokens);
} else { } else {
let tokens = annotate_tokens_that_need_spacing(tokens); let tokens = tokens.map(annotate_tokens_that_need_spacing);
let ast = resolve_operator_precedence_internal(tokens); let ast = resolve_operator_precedence_internal(tokens);
flattened.push(ast.into()); flattened.push(ast.into());
} }
}; };
for item in items { // Returns `true` for an item if that item should not follow any other item in a no-space group
if item.left_visible_offset().width_in_spaces == 0 || no_space_group.is_empty() { // (i.e. the item has "space" before it).
no_space_group.push(item) let starts_new_no_space_group = |item: &syntax::item::Item| {
} else if !no_space_group.is_empty() { if item.left_visible_offset().width_in_spaces != 0 {
processs_no_space_group(&mut flattened, &mut no_space_group); return true;
no_space_group.push(item);
} else {
// FIXME: this is unreachable.
flattened.push(item);
} }
if let syntax::item::Item::Block(_) = item {
return true;
}
false
};
for item in items {
if starts_new_no_space_group(&item) {
process_no_space_group(&mut flattened, &mut no_space_group);
}
no_space_group.push(item);
} }
if !no_space_group.is_empty() { process_no_space_group(&mut flattened, &mut no_space_group);
processs_no_space_group(&mut flattened, &mut no_space_group);
}
resolve_operator_precedence_internal(flattened) resolve_operator_precedence_internal(flattened)
} }
fn resolve_operator_precedence_internal(items: Vec<syntax::Item<'_>>) -> syntax::Tree<'_> { fn resolve_operator_precedence_internal<'s>(
items: impl IntoIterator<Item = syntax::Item<'s>>,
) -> syntax::Tree<'s> {
// Reverse-polish notation encoding. // Reverse-polish notation encoding.
/// Classify an item as an operator-token, or other data; we track this state information
/// because whenever consecutive operators or consecutive non-operators occur, we merge them
/// into one node.
#[derive(PartialEq, Eq)]
enum ItemType {
Ast,
Opr,
}
use ItemType::*;
let mut was_section_used = false; let mut was_section_used = false;
let mut output: Vec<syntax::Item> = default(); let mut output: Vec<syntax::Item> = default();
let mut operator_stack: Vec<WithPrecedence<syntax::tree::OperatorOrError>> = default(); let mut operator_stack: Vec<WithPrecedence<syntax::tree::OperatorOrError>> = default();
let mut last_token_was_ast = false; let mut prev_type = None;
let mut last_token_was_opr = false;
for item in items { for item in items {
if let syntax::Item::Token(token) = item.clone() if let syntax::Item::Token(
&& let token::Variant::Operator(opr) = token.variant { Token { variant: token::Variant::Operator(opr), left_offset, code }) = item {
// Item is an operator. // Item is an operator.
let last_token_was_opr_copy = last_token_was_opr; let prev_type = mem::replace(&mut prev_type, Some(Opr));
last_token_was_ast = false;
last_token_was_opr = true;
let prec = precedence_of(&token.code); let prec = precedence_of(&code);
let opr = Token(token.left_offset, token.code, opr); let opr = Token(left_offset, code, opr);
if last_token_was_opr_copy && let Some(prev_opr) = operator_stack.last_mut() { if prev_type == Some(Opr) && let Some(prev_opr) = operator_stack.last_mut() {
// Error. Multiple operators next to each other. // Error. Multiple operators next to each other.
match &mut prev_opr.elem { match &mut prev_opr.elem {
Err(err) => err.operators.push(opr), Err(err) => err.operators.push(opr),
@ -133,37 +149,38 @@ fn resolve_operator_precedence_internal(items: Vec<syntax::Item<'_>>) -> syntax:
// Prev operator in the [`operator_stack`] has a higher precedence. // Prev operator in the [`operator_stack`] has a higher precedence.
let lhs = output.pop().map(|t| t.to_ast()); let lhs = output.pop().map(|t| t.to_ast());
if lhs.is_none() { was_section_used = true; } if lhs.is_none() { was_section_used = true; }
let ast = syntax::Tree::opr_app(lhs, prev_opr.elem, Some(rhs.to_ast())); let ast = syntax::tree::apply_operator(lhs, prev_opr.elem, Some(rhs.to_ast()));
output.push(ast.into()); output.push(ast.into());
} }
operator_stack.push(WithPrecedence::new(prec, Ok(opr))); operator_stack.push(WithPrecedence::new(prec, Ok(opr)));
} }
} else if last_token_was_ast && let Some(lhs) = output.pop() { } else if prev_type == Some(Ast) && let Some(lhs) = output.pop() {
// Multiple non-operators next to each other. // Multiple non-operators next to each other.
let lhs = lhs.to_ast(); let lhs = lhs.to_ast();
let rhs = item.to_ast(); let rhs = item.to_ast();
let ast = syntax::Tree::app(lhs, rhs); let ast = syntax::tree::apply(lhs, rhs);
output.push(ast.into()); output.push(ast.into());
} else { } else {
// Non-operator that follows previously consumed operator. // Non-operator that follows previously consumed operator.
last_token_was_ast = true; prev_type = Some(Ast);
last_token_was_opr = false;
output.push(item); output.push(item);
} }
} }
let mut opt_rhs = last_token_was_ast.and_option_from(|| output.pop().map(|t| t.to_ast())); let mut opt_rhs = (prev_type == Some(Ast)).and_option_from(|| output.pop().map(|t| t.to_ast()));
while let Some(opr) = operator_stack.pop() { while let Some(opr) = operator_stack.pop() {
let opt_lhs = output.pop().map(|t| t.to_ast()); let opt_lhs = output.pop().map(|t| t.to_ast());
if opt_lhs.is_none() || opt_rhs.is_none() { if opt_lhs.is_none() || opt_rhs.is_none() {
was_section_used = true; was_section_used = true;
} }
opt_rhs = Some(syntax::Tree::opr_app(opt_lhs, opr.elem, opt_rhs)); opt_rhs = Some(syntax::tree::apply_operator(opt_lhs, opr.elem, opt_rhs));
} }
if !output.is_empty() { if !output.is_empty() {
panic!("Internal error. Not all tokens were consumed while constructing the expression."); panic!("Internal error. Not all tokens were consumed while constructing the expression.");
} }
// FIXME // This unwrap is safe because:
// - resolve_operator_precedence only calls this function with non-empty sequences as inputs.
// - Given a non-empty input, we will always have at least one output.
let out = opt_rhs.unwrap(); let out = opt_rhs.unwrap();
if was_section_used { if was_section_used {
syntax::Tree::opr_section_boundary(out) syntax::Tree::opr_section_boundary(out)

View File

@ -103,7 +103,7 @@ use enso_shapely_macros::tagged_enum;
// ============= // =============
/// The lexical token definition. See the module docs to learn more about its usage scenarios. /// The lexical token definition. See the module docs to learn more about its usage scenarios.
#[derive(Clone, Deref, DerefMut, Eq, PartialEq, Serialize, Reflect, Deserialize)] #[derive(Clone, Default, Deref, DerefMut, Eq, PartialEq, Serialize, Reflect, Deserialize)]
#[allow(missing_docs)] #[allow(missing_docs)]
pub struct Token<'s, T = Variant> { pub struct Token<'s, T = Variant> {
#[deref] #[deref]
@ -248,6 +248,8 @@ macro_rules! with_token_definition { ($f:ident ($($args:tt)*)) => { $f! { $($arg
#[allow(missing_docs)] #[allow(missing_docs)]
#[tagged_enum(apply_attributes_to = "variants")] #[tagged_enum(apply_attributes_to = "variants")]
#[reflect(inline)] #[reflect(inline)]
#[tagged_enum(apply_attributes_to = "variant-types")]
#[derive(Default)]
pub enum Variant { pub enum Variant {
Newline, Newline,
Symbol, Symbol,
@ -272,6 +274,12 @@ macro_rules! with_token_definition { ($f:ident ($($args:tt)*)) => { $f! { $($arg
} }
}}} }}}
impl Default for Variant {
fn default() -> Self {
Self::Newline(variant::Newline {})
}
}
macro_rules! generate_token_aliases { macro_rules! generate_token_aliases {
( (
$(#$enum_meta:tt)* $(#$enum_meta:tt)*

View File

@ -9,6 +9,8 @@ use crate::span_builder;
use enso_parser_syntax_tree_visitor::Visitor; use enso_parser_syntax_tree_visitor::Visitor;
use enso_shapely_macros::tagged_enum; use enso_shapely_macros::tagged_enum;
pub mod block;
// ============ // ============
@ -53,6 +55,15 @@ impl<'s> AsRef<Span<'s>> for Tree<'s> {
} }
} }
impl<'s> Default for Tree<'s> {
fn default() -> Self {
Self {
variant: Box::new(Variant::Ident(Ident { token: Default::default() })),
span: Default::default(),
}
}
}
/// Macro providing [`Tree`] type definition. It is used to both define the ast [`Variant`], and to /// Macro providing [`Tree`] type definition. It is used to both define the ast [`Variant`], and to
/// define impls for every token type in other modules. /// define impls for every token type in other modules.
#[macro_export] #[macro_export]
@ -68,8 +79,28 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
pub error: Error, pub error: Error,
pub ast: Tree<'s>, pub ast: Tree<'s>,
}, },
Block { /// A sequence of lines introduced by a line ending in an operator.
pub statements: Vec<Tree<'s>>, BodyBlock {
/// The lines of the block.
pub statements: Vec<block::Line<'s>>,
},
/// A sequence of lines comprising the arguments of a function call.
ArgumentBlockApplication {
/// The expression for the value to which the arguments are to be applied.
pub lhs: Option<Tree<'s>>,
/// The lines of the block.
pub arguments: Vec<block::Line<'s>>,
},
/// A sequence of lines comprising a tree of operator expressions.
OperatorBlockApplication {
/// The expression preceding the block; this will be the leftmost-leaf of the binary
/// tree.
pub lhs: Option<Tree<'s>>,
/// The lines of the block.
pub expressions: Vec<block::OperatorLine<'s>>,
/// Lines that appear lexically within the block, but are not syntactically consistent
/// with an operator block.
pub excess: Vec<block::Line<'s>>,
}, },
/// A simple identifier, like `foo` or `bar`. /// A simple identifier, like `foo` or `bar`.
Ident { Ident {
@ -115,15 +146,25 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
pub name: Tree<'s>, pub name: Tree<'s>,
pub params: Vec<Tree<'s>>, pub params: Vec<Tree<'s>>,
}, },
/// A variable assignment, like `foo = bar 23`.
Assignment { Assignment {
/// The pattern which should be unified with the expression.
pub pattern: Tree<'s>, pub pattern: Tree<'s>,
/// The `=` token.
pub equals: token::Operator<'s>, pub equals: token::Operator<'s>,
/// The expression initializing the value(s) in the pattern.
pub expr: Tree<'s>, pub expr: Tree<'s>,
}, },
/// A function definition, like `add x y = x + y`.
Function { Function {
/// The identifier to which the function should be bound.
pub name: token::Ident<'s>, pub name: token::Ident<'s>,
/// The argument patterns.
pub args: Vec<Tree<'s>>, pub args: Vec<Tree<'s>>,
/// The `=` token.
pub equals: token::Operator<'s>, pub equals: token::Operator<'s>,
/// The body, which will typically be an inline expression or a `BodyBlock` expression.
/// It is an error for this to be empty.
pub body: Option<Tree<'s>>, pub body: Option<Tree<'s>>,
}, },
} }
@ -135,7 +176,7 @@ macro_rules! generate_variant_constructors {
pub enum $enum:ident<'s> { pub enum $enum:ident<'s> {
$( $(
$(#$variant_meta:tt)* $(#$variant_meta:tt)*
$variant:ident $({ $(pub $field:ident : $field_ty:ty),* $(,)? })? $variant:ident $({$($(#$field_meta:tt)* pub $field:ident : $field_ty:ty),* $(,)? })?
),* $(,)? ),* $(,)?
} }
) => { paste! { ) => { paste! {
@ -212,6 +253,29 @@ impl<'s> span::Builder<'s> for MultipleOperatorError<'s> {
} }
} }
/// A sequence of one or more operators.
pub trait NonEmptyOperatorSequence<'s> {
/// Return a reference to the first operator.
fn first_operator(&self) -> &token::Operator<'s>;
/// Return a mutable reference to the first operator.
fn first_operator_mut(&mut self) -> &mut token::Operator<'s>;
}
impl<'s> NonEmptyOperatorSequence<'s> for OperatorOrError<'s> {
fn first_operator(&self) -> &token::Operator<'s> {
match self {
Ok(opr) => opr,
Err(oprs) => oprs.operators.first(),
}
}
fn first_operator_mut(&mut self) -> &mut token::Operator<'s> {
match self {
Ok(opr) => opr,
Err(oprs) => oprs.operators.first_mut(),
}
}
}
// === MultiSegmentApp === // === MultiSegmentApp ===
@ -231,6 +295,53 @@ impl<'s> span::Builder<'s> for MultiSegmentAppSegment<'s> {
// ====================================
// === Tree-construction operations ===
// ====================================
/// Join two nodes with a new node appropriate for their types.
///
/// For most input types, this simply constructs an `App`; however, for some block type operands
/// application has special semantics.
pub fn apply<'s>(func: Tree<'s>, mut arg: Tree<'s>) -> Tree<'s> {
match &mut *arg.variant {
Variant::ArgumentBlockApplication(block) if block.lhs.is_none() => {
block.lhs = Some(func);
arg
}
Variant::OperatorBlockApplication(block) if block.lhs.is_none() => {
block.lhs = Some(func);
arg
}
_ => Tree::app(func, arg),
}
}
/// Join two nodes with an operator, in a way appropriate for their types.
///
/// For most operands this will simply construct an `OprApp`; however, a non-operator block (i.e. an
/// `ArgumentBlock`) is reinterpreted as a `BodyBlock` when it appears in the RHS of an operator
/// expression.
pub fn apply_operator<'s>(
lhs: Option<Tree<'s>>,
opr: OperatorOrError<'s>,
mut rhs: Option<Tree<'s>>,
) -> Tree<'s> {
if let Some(rhs_) = rhs.as_mut() {
if let Variant::ArgumentBlockApplication(block) = &mut *rhs_.variant {
if block.lhs.is_none() {
let ArgumentBlockApplication { lhs: _, arguments } = block;
let arguments = mem::take(arguments);
let rhs_ = block::body_from_lines(arguments);
rhs = Some(rhs_);
}
}
}
Tree::opr_app(lhs, opr, rhs)
}
// ================ // ================
// === Visitors === // === Visitors ===
// ================ // ================

View File

@ -0,0 +1,252 @@
//! Code blocks.
use crate::syntax::tree::*;
// =============
// === Lines ===
// =============
/// A line of code.
#[derive(Debug, Clone, PartialEq, Eq, Visitor, Reflect, Serialize, Deserialize)]
pub struct Line<'s> {
/// Token ending the previous line, if any.
pub newline: token::Newline<'s>,
/// The content of the line, if any.
pub expression: Option<Tree<'s>>,
}
impl<'s> Line<'s> {
/// Transform the content of the line with the provided function, if any is present; return the
/// result.
pub fn map_expression(self, f: impl FnOnce(Tree<'s>) -> Tree<'s>) -> Self {
let Self { newline, expression } = self;
let expression = expression.map(f);
Self { newline, expression }
}
}
impl<'s> From<token::Newline<'s>> for Line<'s> {
fn from(newline: token::Newline<'s>) -> Self {
Self { newline, expression: None }
}
}
impl<'s> span::Builder<'s> for Line<'s> {
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
span.add(&mut self.newline).add(&mut self.expression)
}
}
// ==================
// === Body Block ===
// ==================
/// Build a body block from a sequence of lines; this involves reinterpreting the input expressions
/// in statement context (i.e. expressions at the top-level of the block that involve the `=`
/// operator will be reinterpreted as function/variable bindings).
pub fn body_from_lines<'s>(expressions: impl IntoIterator<Item = Line<'s>>) -> Tree<'s> {
use crate::expression_to_statement;
let expressions = expressions.into_iter();
let statements = expressions.map(|line| line.map_expression(expression_to_statement));
let statements = statements.collect();
Tree::body_block(statements)
}
// ======================
// === Operator Block ===
// ======================
/// The content of a line in an operator block.
#[derive(Debug, Clone, PartialEq, Eq, Visitor, Reflect, Serialize, Deserialize)]
pub struct OperatorBlockExpression<'s> {
/// The operator at the beginning of the line.
pub operator: OperatorOrError<'s>,
/// The rest of the expression.
pub expression: Tree<'s>,
}
/// Interpret the given expression as an `OperatorBlockExpression`, if it fits the correct pattern.
fn to_operator_block_expression(
expression_: Tree<'_>,
) -> Result<OperatorBlockExpression<'_>, Tree<'_>> {
let tree_ = match &*expression_.variant {
Variant::OprSectionBoundary(OprSectionBoundary { ast }) => ast,
_ => return Err(expression_),
};
if let Variant::OprApp(OprApp { lhs: None, opr, rhs: Some(expression) }) = &*tree_.variant {
if expression.span.left_offset.visible.width_in_spaces < 1 {
return Err(expression_);
}
let mut operator = opr.clone();
operator.first_operator_mut().left_offset = expression_.span.left_offset;
let expression = expression.clone();
Ok(OperatorBlockExpression { operator, expression })
} else {
Err(expression_)
}
}
impl<'s> span::Builder<'s> for OperatorBlockExpression<'s> {
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
span.add(&mut self.operator).add(&mut self.expression)
}
}
// === Operator block lines ====
/// A line in an operator block.
#[derive(Debug, Clone, PartialEq, Eq, Visitor, Reflect, Serialize, Deserialize)]
pub struct OperatorLine<'s> {
/// Token ending the previous line, if any.
pub newline: token::Newline<'s>,
/// The operator-expression, if any.
pub expression: Option<OperatorBlockExpression<'s>>,
}
impl<'s> From<token::Newline<'s>> for OperatorLine<'s> {
fn from(newline: token::Newline<'s>) -> Self {
Self { newline, expression: None }
}
}
impl<'s> span::Builder<'s> for OperatorLine<'s> {
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
span.add(&mut self.newline).add(&mut self.expression)
}
}
// =====================
// === Block Builder ===
// =====================
/// Builds an AST block type from a sequence of lines.
///
/// Note that the block type is not fully determined at this stage: We apply context information
/// later (see `apply_operator`) to distinguish the two non-operator block types, `BodyBlock` and
/// `ArgumentBlockApplication`. Here we treat every non-operator block as an argument block,
/// because creating a body block involves re-interpreting the expressions in statement context.
///
/// The implementation is a state machine. The only top-level transitions are:
/// - `Indeterminate` -> `Operator`
/// - `Indeterminate` -> `NonOperator`
///
/// The `Operator` state has two substates, and one possible transition:
/// - `body_lines is empty` -> `body_lines is not empty`
#[derive(Debug)]
pub enum Builder<'s> {
/// The builder is in an indeterminate state until a non-empty line has been encountered, which
/// would distinguish an operator-block from a non-operator block.
Indeterminate {
/// The `Newline` token introducing the block, and `Newline` tokens for any empty lines
/// that have been encountered.
empty_lines: Vec<token::Newline<'s>>,
},
/// Building an operator block. If any line doesn't fit the operator-block syntax, that line
/// and all following will be placed in `body_lines`.
Operator {
/// Valid operator-block expressions.
operator_lines: Vec<OperatorLine<'s>>,
/// Any lines violating the expected operator-block syntax.
body_lines: Vec<Line<'s>>,
},
/// Building a non-operator block (either a body block or an argument block).
NonOperator {
/// The block content.
body_lines: Vec<Line<'s>>,
},
}
impl<'s> Builder<'s> {
/// Create a new instance, in initial state.
pub fn new() -> Self {
Self::Indeterminate { empty_lines: default() }
}
/// Create a new instance, in a state appropriate for the given expression.
fn new_with_expression(
empty_lines: impl IntoIterator<Item = token::Newline<'s>>,
newline: token::Newline<'s>,
expression: Tree<'s>,
) -> Self {
let empty_lines = empty_lines.into_iter();
let new_lines = 1;
match to_operator_block_expression(expression) {
Ok(expression) => {
let expression = Some(expression);
let mut operator_lines = Vec::with_capacity(empty_lines.size_hint().0 + new_lines);
operator_lines.extend(empty_lines.map(block::OperatorLine::from));
operator_lines.push(OperatorLine { newline, expression });
Self::Operator { operator_lines, body_lines: default() }
}
Err(expression) => {
let expression = Some(expression);
let mut body_lines = Vec::with_capacity(empty_lines.size_hint().0 + new_lines);
body_lines.extend(empty_lines.map(block::Line::from));
body_lines.push(Line { newline, expression });
Self::NonOperator { body_lines }
}
}
}
/// Apply a new line to the state.
pub fn push(&mut self, newline: token::Newline<'s>, expression: Option<Tree<'s>>) {
match self {
Builder::Indeterminate { empty_lines } => match expression {
Some(expression) =>
*self = Self::new_with_expression(empty_lines.drain(..), newline, expression),
None => empty_lines.push(newline),
},
Builder::NonOperator { body_lines, .. } =>
body_lines.push(Line { newline, expression }),
Builder::Operator { body_lines, .. } if !body_lines.is_empty() => {
body_lines.push(Line { newline, expression });
}
Builder::Operator { operator_lines, body_lines, .. }
if let Some(expression) = expression => {
match to_operator_block_expression(expression) {
Ok(expression) => {
let expression = Some(expression);
operator_lines.push(OperatorLine { newline, expression });
}
Err(expression) => {
let expression = Some(expression);
body_lines.push(Line { newline, expression })
},
}
}
Builder::Operator { operator_lines, .. } => operator_lines.push(newline.into()),
}
}
/// Produce an AST node from the state.
pub fn build(self) -> Tree<'s> {
match self {
Builder::Indeterminate { empty_lines } => {
let empty_lines = empty_lines.into_iter();
let lines = empty_lines.map(Line::from).collect();
Tree::argument_block_application(None, lines)
}
Builder::Operator { operator_lines, body_lines } =>
Tree::operator_block_application(None, operator_lines, body_lines),
Builder::NonOperator { body_lines } =>
Tree::argument_block_application(None, body_lines),
}
}
}
impl<'s> Default for Builder<'s> {
fn default() -> Self {
Self::new()
}
}

View File

@ -1,22 +0,0 @@
[package]
name = "enso-parser-syntax-tree-builder"
version = "0.1.0"
authors = ["Enso Team <enso-dev@enso.org>"]
edition = "2021"
description = "Enso Parser AST Builder."
readme = "README.md"
homepage = "https://github.com/enso-org/enso"
repository = "https://github.com/enso-org/enso"
license-file = "../../LICENSE"
[lib]
proc-macro = true
[dependencies]
proc-macro2 = "1.0"
enso-macro-utils = { path = "../../../../../macro-utils" }
quote = "1.0"
[dependencies.syn]
version = "1.0"
features = ['extra-traits', 'visit', 'full']

View File

@ -1,136 +0,0 @@
//! Definition of a macro allowing building mock AST structures, mostly useful for testing.
// === Features ===
#![feature(proc_macro_span)]
// === Standard Linter Configuration ===
#![deny(non_ascii_idents)]
#![warn(unsafe_code)]
// === Non-Standard Linter Configuration ===
#![allow(clippy::option_map_unit_fn)]
#![allow(clippy::precedence)]
#![allow(dead_code)]
#![deny(unconditional_recursion)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
#![warn(missing_docs)]
#![warn(trivial_casts)]
#![warn(trivial_numeric_casts)]
#![warn(unused_import_braces)]
#![warn(unused_qualifications)]
use proc_macro2::TokenStream;
use quote::quote;
use std::mem;
/// A macro allowing building mock AST structures, mostly useful for testing.
///
/// Currently supported syntax:
///
/// - `a b c` Application of arguments. Arguments are applied in-order, from left to right. Here,
/// this expression would be the same as `[[a b] c]`.
///
/// - `a [b c] d` Grouping syntax that does not produce AST group expression. Here, `b c` is just
/// the first argument passed to `a`.
///
/// - `{if} a {then} b {else} c` Multi-segment application. All segments should be enclosed in curly
/// braces. You can also place segments in quotes, like `{"("} a {")"}`.
#[proc_macro]
pub fn ast_builder(tokens: proc_macro::TokenStream) -> proc_macro::TokenStream {
let output = expr(tokens, None);
let output = quote!(crate::syntax::Tree::block(vec![#output]));
output.into()
}
struct Segment {
header: TokenStream,
body: TokenStream,
}
impl Segment {
fn new(header: TokenStream) -> Self {
let body = quote!();
Self { header, body }
}
}
fn expr(tokens: proc_macro::TokenStream, parent_spacing: Option<usize>) -> TokenStream {
use proc_macro::TokenTree::*;
let mut output = quote! {};
let mut prefix: Option<TokenStream> = None;
let mut segments: Vec<Segment> = vec![];
let mut current_segment: Option<Segment> = None;
let mut last_column: Option<usize> = None;
let app_to_output = |output: &mut TokenStream, tok| {
if output.is_empty() {
*output = tok;
} else {
*output = quote! {syntax::Tree::app(#output,#tok)};
}
};
let mut inherited_spacing = parent_spacing.unwrap_or(0);
for token in tokens {
let spacing = last_column.map(|t| token.span().start().column - t).unwrap_or(0);
let spacing = spacing + inherited_spacing;
inherited_spacing = 0;
last_column = Some(token.span().end().column);
match &token {
// a b c ...
Ident(ident) => {
let ident = ident.to_string();
let spacing = " ".repeat(spacing);
app_to_output(
&mut output,
quote! {crate::syntax::Tree::ident(crate::syntax::Token(#spacing, #ident, syntax::token::Variant::new_ident_unchecked(#ident)))},
);
}
// {if} a {then} b {else} c
// {"("} a {")"}
Group(group) if group.delimiter() == proc_macro::Delimiter::Brace => {
if let Some(mut current_segment) = mem::take(&mut current_segment) {
current_segment.body = mem::take(&mut output);
segments.push(current_segment);
} else if !output.is_empty() {
prefix = Some(mem::take(&mut output));
}
let ident = group.stream().to_string();
let spacing = " ".repeat(spacing);
current_segment = Some(Segment::new(
quote! { Token(#spacing, #ident, syntax::token::Variant::new_ident_unchecked(#ident).into())},
)); // Token::symbol
}
// a [b c] d
Group(group) if group.delimiter() == proc_macro::Delimiter::Bracket => {
app_to_output(&mut output, expr(group.stream(), Some(spacing)));
}
_ => panic!("Unsupported token {:?}", token),
}
}
if let Some(mut current_segment) = current_segment {
current_segment.body = mem::take(&mut output);
segments.push(current_segment);
let segments: Vec<TokenStream> = segments
.into_iter()
.map(|t| {
let header = t.header;
let body = t.body;
let body = if !body.is_empty() {
quote!(Some(syntax::Tree::opr_section_boundary(#body)))
} else {
quote!(None)
};
quote! { syntax::tree::MultiSegmentAppSegment { header: #header, body: #body } }
})
.collect();
let pfx = prefix
.map(|t| quote! {Some(Box::new(syntax::Tree::opr_section_boundary(#t)))})
.unwrap_or_else(|| quote! {None});
let segments = quote! {NonEmptyVec::try_from(vec![#(#segments),*]).unwrap()};
output = quote! {
syntax::Tree::multi_segment_app (#pfx, #segments)
}
}
output
}

View File

@ -16,6 +16,7 @@
#![warn(unused_qualifications)] #![warn(unused_qualifications)]
use lexpr::sexp; use lexpr::sexp;
use lexpr::Value;
@ -23,10 +24,10 @@ use lexpr::sexp;
// === Test support macros === // === Test support macros ===
// =========================== // ===========================
/// Parses input as a sequence of S-expressions, and wraps it in a `Block`. /// Parses input as a sequence of S-expressions, and wraps it in a `BodyBlock`.
macro_rules! block { macro_rules! block {
( $statements:tt ) => { ( $($statements:tt)* ) => {
sexp![(Block #($statements))] sexp![(BodyBlock #( $( $statements )* ) )]
} }
} }
@ -36,18 +37,48 @@ macro_rules! block {
// === Tests === // === Tests ===
// ============= // =============
#[test]
fn nothing() {
test("", block![()]);
}
#[test] #[test]
fn application() { fn application() {
test("a b c", block![(App (App (Ident a) (Ident b)) (Ident c))]); test("a b c", block![(App (App (Ident a) (Ident b)) (Ident c))]);
} }
#[test] #[test]
fn type_definition_bool() { fn parentheses_simple() {
test("type Bool", block![(TypeDef (Ident type) (Ident Bool) #())]); let expected = block![
(MultiSegmentApp #(((Symbol "(") (App (Ident a) (Ident b))) ((Symbol ")") ())))
];
test("(a b)", expected);
} }
#[test] #[test]
fn type_definition_option() { fn section_simple() {
let expected_lhs = block![(OprSectionBoundary (OprApp () (Ok "+") (Ident a)))];
test("+ a", expected_lhs);
let expected_rhs = block![(OprSectionBoundary (OprApp (Ident a) (Ok "+") ()))];
test("a +", expected_rhs);
}
#[test]
fn parentheses_nested() {
#[rustfmt::skip]
let expected = block![
(MultiSegmentApp #(
((Symbol "(")
(App (MultiSegmentApp #(((Symbol "(") (App (Ident a) (Ident b))) ((Symbol ")") ())))
(Ident c)))
((Symbol ")") ())))
];
test("((a b) c)", expected);
}
#[test]
fn type_definition() {
test("type Bool", block![(TypeDef (Ident type) (Ident Bool) #())]);
test("type Option a", block![(TypeDef (Ident type) (Ident Option) #((Ident a)))]); test("type Option a", block![(TypeDef (Ident type) (Ident Option) #((Ident a)))]);
} }
@ -75,6 +106,118 @@ fn function_block_simple_args() {
test("foo a b c =", block![(Function foo #((Ident a) (Ident b) (Ident c)) "=" ())]); test("foo a b c =", block![(Function foo #((Ident a) (Ident b) (Ident c)) "=" ())]);
} }
#[test]
fn code_block_body() {
let code = ["main =", " 4"];
test(&code.join("\n"), block![(Function main #() "=" (BodyBlock #((Number 4))))]);
let code = ["main =", " ", " 4"];
test(&code.join("\n"), block![(Function main #() "=" (BodyBlock #(() (Number 4))))]);
let code = ["main =", " ", " 4"];
test(&code.join("\n"), block![(Function main #() "=" (BodyBlock #(() (Number 4))))]);
let code = ["main =", " ", " 4"];
test(&code.join("\n"), block![(Function main #() "=" (BodyBlock #(() (Number 4))))]);
let code = ["main =", "", " 4"];
test(&code.join("\n"), block![(Function main #() "=" (BodyBlock #(() (Number 4))))]);
#[rustfmt::skip]
let code = [
"main =",
" +4",
" print 23",
];
#[rustfmt::skip]
let expect = block![
(Function main #() "=" (BodyBlock #(
(OprSectionBoundary (OprApp () (Ok "+") (Number 4)))
(App (Ident print) (Number 23)))))
];
test(&code.join("\n"), expect);
}
#[test]
fn code_block_operator() {
let code = ["value = nums", " * each random", " + constant"];
let expect = block![
(Assignment (Ident value) "="
(OperatorBlockApplication (Ident nums)
#(((Ok "*") (App (Ident each) (Ident random)))
((Ok "+") (Ident constant)))
#()))
];
test(&code.join("\n"), expect);
}
#[test]
fn code_block_argument_list() {
#[rustfmt::skip]
let code = [
"value = foo",
" bar",
];
let expect = block![
(Assignment (Ident value) "=" (ArgumentBlockApplication (Ident foo) #((Ident bar))))
];
test(&code.join("\n"), expect);
#[rustfmt::skip]
let code = [
"value = foo",
" +1",
" bar",
];
#[rustfmt::skip]
let expect = block![
(Assignment (Ident value) "="
(ArgumentBlockApplication (Ident foo) #(
(OprSectionBoundary (OprApp () (Ok "+") (Number 1)))
(Ident bar))))
];
test(&code.join("\n"), expect);
}
#[test]
fn code_block_empty() {
// The first line here should parse as a function with no body expression (which is an error).
// No input would parse as an empty `ArgumentBlock` or `OperatorBlock`, because those types are
// distinguished from a body continuation by the presence of non-empty indented lines.
let code = ["foo =", "bar"];
test(&code.join("\n"), block![(Function foo #() "=" ()) (Ident bar)]);
// This parses similarly to above; a line with no non-whitespace content does not create a code
// block.
let code = ["foo =", " ", "bar"];
test(&code.join("\n"), block![(Function foo #() "=" ()) () (Ident bar)]);
}
#[test]
fn code_block_bad_indents1() {
let code = ["main =", " foo", " bar", " baz"];
let expected = block![
(Function main #() "=" (BodyBlock #((Ident foo) (Ident bar) (Ident baz))))
];
test(&code.join("\n"), expected);
}
#[test]
fn code_block_bad_indents2() {
let code = ["main =", " foo", " bar", "baz"];
let expected = block![
(Function main #() "=" (BodyBlock #((Ident foo) (Ident bar))))
(Ident baz)
];
test(&code.join("\n"), expected);
}
#[test]
fn code_block_with_following_statement() {
let code = ["main =", " foo", "bar"];
let expected = block![
(Function main #() "=" (BodyBlock #((Ident foo))))
(Ident bar)
];
test(&code.join("\n"), expected);
}
// ==================== // ====================
@ -95,11 +238,11 @@ use std::collections::HashSet;
/// - Most token types are represented as their contents, rather than as a token struct. For /// - Most token types are represented as their contents, rather than as a token struct. For
/// example, a `token::Number` may be represented like: `sexp![10]`, and a `token::Ident` may look /// example, a `token::Number` may be represented like: `sexp![10]`, and a `token::Ident` may look
/// like `sexp![foo]`. /// like `sexp![foo]`.
fn test(code: &str, expect: lexpr::Value) { fn test(code: &str, expect: Value) {
let ast = enso_parser::Parser::new().run(code); let ast = enso_parser::Parser::new().run(code);
let ast_s_expr = to_s_expr(&ast, code); let ast_s_expr = to_s_expr(&ast, code);
assert_eq!(ast_s_expr.to_string(), expect.to_string()); assert_eq!(ast_s_expr.to_string(), expect.to_string(), "{:?}", &ast);
assert_eq!(ast.code(), code); assert_eq!(ast.code(), code, "{:?}", &ast);
} }
@ -109,40 +252,61 @@ fn test(code: &str, expect: lexpr::Value) {
// ===================== // =====================
/// Produce an S-expression representation of the input AST type. /// Produce an S-expression representation of the input AST type.
pub fn to_s_expr<T>(value: &T, code: &str) -> lexpr::Value pub fn to_s_expr<T>(value: &T, code: &str) -> Value
where T: serde::Serialize + Reflect { where T: serde::Serialize + Reflect {
use enso_parser::syntax::token;
use enso_parser::syntax::tree;
let (graph, rust_to_meta) = enso_metamodel::rust::to_meta(value.reflect_type()); let (graph, rust_to_meta) = enso_metamodel::rust::to_meta(value.reflect_type());
let ast_ty = rust_to_meta[&value.reflect_type().id]; let ast_ty = rust_to_meta[&value.reflect_type().id];
let base = code.as_bytes().as_ptr() as usize; let base = code.as_bytes().as_ptr() as usize;
let code: Box<str> = Box::from(code); let code: Box<str> = Box::from(code);
let mut to_s_expr = ToSExpr::new(&graph); let mut to_s_expr = ToSExpr::new(&graph);
to_s_expr.mapper(ast_ty, strip_hidden_fields); to_s_expr.mapper(ast_ty, strip_hidden_fields);
let ident_token = rust_to_meta[&enso_parser::syntax::token::variant::Ident::reflect().id]; let ident_token = rust_to_meta[&token::variant::Ident::reflect().id];
let operator_token = rust_to_meta[&enso_parser::syntax::token::variant::Operator::reflect().id]; let operator_token = rust_to_meta[&token::variant::Operator::reflect().id];
let number_token = rust_to_meta[&enso_parser::syntax::token::variant::Number::reflect().id]; let symbol_token = rust_to_meta[&token::variant::Symbol::reflect().id];
let token_to_str = move |token: lexpr::Value| { let number_token = rust_to_meta[&token::variant::Number::reflect().id];
let newline_token = rust_to_meta[&token::variant::Newline::reflect().id];
// TODO: Implement `#[reflect(flag = "enso::concrete")]`, which just attaches user data to the
// type info; then filter by flag here instead of hard-coding these simplifications.
let line = rust_to_meta[&tree::block::Line::reflect().id];
let operator_line = rust_to_meta[&tree::block::OperatorLine::reflect().id];
let token_to_str = move |token: Value| {
let range = token_code_range(&token, base); let range = token_code_range(&token, base);
code[range].to_owned().into_boxed_str() code[range].to_owned().into_boxed_str()
}; };
let token_to_str_ = token_to_str.clone(); let token_to_str_ = token_to_str.clone();
to_s_expr.mapper(ident_token, move |token| lexpr::Value::symbol(token_to_str_(token))); to_s_expr.mapper(ident_token, move |token| Value::symbol(token_to_str_(token)));
let token_to_str_ = token_to_str.clone(); let token_to_str_ = token_to_str.clone();
to_s_expr.mapper(operator_token, move |token| lexpr::Value::string(token_to_str_(token))); to_s_expr.mapper(operator_token, move |token| Value::string(token_to_str_(token)));
let token_to_str_ = token_to_str.clone();
to_s_expr.mapper(symbol_token, move |token| Value::string(token_to_str_(token)));
let token_to_str_ = token_to_str; let token_to_str_ = token_to_str;
to_s_expr.mapper(number_token, move |token| { to_s_expr.mapper(number_token, move |token| {
lexpr::Value::Number(token_to_str_(token).parse::<u64>().unwrap().into()) Value::Number(token_to_str_(token).parse::<u64>().unwrap().into())
}); });
let into_car = |cons| match cons {
Value::Cons(cons) => cons.into_pair().0,
_ => panic!(),
};
to_s_expr.mapper(line, into_car);
to_s_expr.mapper(operator_line, into_car);
to_s_expr.skip(newline_token);
tuplify(to_s_expr.value(ast_ty, &value)) tuplify(to_s_expr.value(ast_ty, &value))
} }
/// Strip certain fields that should be excluded from output. /// Strip certain fields that should be excluded from output.
fn strip_hidden_fields(tree: lexpr::Value) -> lexpr::Value { fn strip_hidden_fields(tree: Value) -> Value {
let hidden_tree_fields = let hidden_tree_fields = [
[":spanLeftOffsetVisible", ":spanLeftOffsetCodeRepr", ":spanCodeLength"]; ":spanLeftOffsetVisible",
":spanLeftOffsetCodeReprBegin",
":spanLeftOffsetCodeReprLen",
":spanCodeLength",
];
let hidden_tree_fields: HashSet<_> = hidden_tree_fields.into_iter().collect(); let hidden_tree_fields: HashSet<_> = hidden_tree_fields.into_iter().collect();
lexpr::Value::list(tree.to_vec().unwrap().into_iter().filter(|val| match val { Value::list(tree.to_vec().unwrap().into_iter().filter(|val| match val {
lexpr::Value::Cons(cons) => match cons.car() { Value::Cons(cons) => match cons.car() {
lexpr::Value::Symbol(symbol) => !hidden_tree_fields.contains(symbol.as_ref()), Value::Symbol(symbol) => !hidden_tree_fields.contains(symbol.as_ref()),
_ => panic!(), _ => panic!(),
}, },
_ => true, _ => true,
@ -151,30 +315,23 @@ fn strip_hidden_fields(tree: lexpr::Value) -> lexpr::Value {
/// Given an S-expression representation of a [`Token`] and the base address for `Code` `Cow`s, /// Given an S-expression representation of a [`Token`] and the base address for `Code` `Cow`s,
/// return the range of the input code the token references. /// return the range of the input code the token references.
fn token_code_range(token: &lexpr::Value, base: usize) -> std::ops::Range<usize> { fn token_code_range(token: &Value, base: usize) -> std::ops::Range<usize> {
let code_repr = fields(token).find(|(name, _)| *name == ":codeRepr").unwrap().1; let get_u32 =
let mut begin = None; |field| fields(token).find(|(name, _)| *name == field).unwrap().1.as_u64().unwrap() as u32;
let mut len = None; let begin = get_u32(":codeReprBegin");
for (name, value) in fields(code_repr) { let len = get_u32(":codeReprLen");
match name {
":begin" => begin = Some(value.as_u64().unwrap() as u32),
":len" => len = Some(value.as_u64().unwrap() as u32),
_ => (),
}
}
let begin = begin.unwrap();
let begin = (begin as u64) | (base as u64 & !0xFFFF_FFFF); let begin = (begin as u64) | (base as u64 & !0xFFFF_FFFF);
let begin = if begin < (base as u64) { begin + 0x1_0000_0000 } else { begin }; let begin = if begin < (base as u64) { begin + 0x1_0000_0000 } else { begin };
let begin = begin as usize - base; let begin = begin as usize - base;
let len = len.unwrap() as usize; let len = len as usize;
begin..(begin + len) begin..(begin + len)
} }
/// Iterate the field `(name, value)` pairs of the S-expression of a struct with named fields. /// Iterate the field `(name, value)` pairs of the S-expression of a struct with named fields.
fn fields(value: &'_ lexpr::Value) -> impl Iterator<Item = (&'_ str, &'_ lexpr::Value)> { fn fields(value: &'_ Value) -> impl Iterator<Item = (&'_ str, &'_ Value)> {
value.list_iter().unwrap().filter_map(|value| match value { value.list_iter().unwrap().filter_map(|value| match value {
lexpr::Value::Cons(cons) => match cons.car() { Value::Cons(cons) => match cons.car() {
lexpr::Value::Symbol(symbol) => Some((&symbol[..], cons.cdr())), Value::Symbol(symbol) => Some((&symbol[..], cons.cdr())),
_ => None, _ => None,
}, },
_ => None, _ => None,
@ -183,24 +340,24 @@ fn fields(value: &'_ lexpr::Value) -> impl Iterator<Item = (&'_ str, &'_ lexpr::
/// Strip field names from struct representations, so that they are printed more concisely, as if /// Strip field names from struct representations, so that they are printed more concisely, as if
/// they were tuple-structs. /// they were tuple-structs.
fn tuplify(value: lexpr::Value) -> lexpr::Value { fn tuplify(value: Value) -> Value {
let (car, cdr) = match value { let (car, cdr) = match value {
lexpr::Value::Cons(cons) => cons.into_pair(), Value::Cons(cons) => cons.into_pair(),
lexpr::Value::Vector(mut vector) => { Value::Vector(mut vector) => {
for value in vector.iter_mut() { for value in vector.iter_mut() {
let original = std::mem::replace(value, lexpr::Value::Nil); let original = std::mem::replace(value, Value::Nil);
*value = tuplify(original); *value = tuplify(original);
} }
return lexpr::Value::Vector(vector); return Value::Vector(vector);
} }
value => return value, value => return value,
}; };
if let lexpr::Value::Symbol(symbol) = &car { if let Value::Symbol(symbol) = &car {
if let Some(':') = symbol.chars().next() { if let Some(':') = symbol.chars().next() {
return tuplify(cdr); return tuplify(cdr);
} }
} }
let car = tuplify(car); let car = tuplify(car);
let cdr = tuplify(cdr); let cdr = tuplify(cdr);
lexpr::Value::Cons(lexpr::Cons::new(car, cdr)) Value::Cons(lexpr::Cons::new(car, cdr))
} }

View File

@ -33,7 +33,8 @@ impl<T> NonEmptyVec<T> {
/// let mut vec: NonEmptyVec<usize> = NonEmptyVec::new(0, vec![]); /// let mut vec: NonEmptyVec<usize> = NonEmptyVec::new(0, vec![]);
/// ``` /// ```
pub fn new(first: T, rest: Vec<T>) -> NonEmptyVec<T> { pub fn new(first: T, rest: Vec<T>) -> NonEmptyVec<T> {
let mut elems = vec![first]; let mut elems = Vec::with_capacity(1 + rest.len());
elems.push(first);
elems.extend(rest); elems.extend(rest);
NonEmptyVec { elems } NonEmptyVec { elems }
} }

View File

@ -1,5 +1,6 @@
//! This module defines utilities for working with the [`std::vec::Vec`] type. //! This module defines utilities for working with the [`std::vec::Vec`] type.
use derivative::Derivative;
use failure::_core::hint::unreachable_unchecked; use failure::_core::hint::unreachable_unchecked;
@ -84,6 +85,84 @@ pub trait VecOps<T>: AsMut<Vec<T>> + Sized {
impl<T> VecOps<T> for Vec<T> {} impl<T> VecOps<T> for Vec<T> {}
// =====================
// === VecAllocation ===
// =====================
/// Owns a storage allocation for a [`std::vec::Vec`], but no elements.
///
/// # Usage
///
/// This data structure implements an optimization when creating temporary vectors. The use case
/// occurs when:
/// - Within some scope, a `Vec` is created, added to, and discarded.
/// - The scope may be entered multiple times.
///
/// The optimization is to reuse an allocation between entries to the scope. This is sometimes done
/// by storing and reusing the `Vec`, but that pattern is misleading; owning a `Vec` suggests that
/// values may be retained between entries to the scope. This type explicitly has only one logical
/// state (empty).
///
/// ```
/// # use enso_prelude::*;
/// #[derive(Default)]
/// struct NumberAdder {
/// // In a more complex struct it would be important to be able to tell what state the object
/// // retains from its fields.
/// temporary_nums: VecAllocation<f64>,
/// }
///
/// impl NumberAdder {
/// /// Add some numbers, with better precision than simply adding `f32` values in a loop.
/// /// (For the sake of example, ignore that this is not a fast or accurate approach.)
/// ///
/// /// Because we reuse an allocation, if this method is called repeatedly it will only have to
/// /// allocate enough space to accommodate the largest single input it processes. Thus, rather
/// /// than performing a number of reallocations that scales linearly in the number of batches
/// /// of input (assuming batch size has some constant geometric mean), it performs a number of
/// /// allocations that scales with the log of the size of the largest batch; the worst case of
/// /// this implementation has the same performance as the best case of an implementation that
/// /// doesn't reuse its allocation.
/// pub fn add_nums(&mut self, inputs: impl IntoIterator<Item = f32>) -> f32 {
/// let mut extended_precision = self.temporary_nums.take();
/// extended_precision.extend(inputs.into_iter().map(f64::from));
/// let result = extended_precision.drain(..).fold(0.0, f64::add);
/// self.temporary_nums.set_from(extended_precision);
/// result as f32
/// }
/// }
/// ```
#[derive(Clone, Debug, Derivative, Eq, PartialEq)]
#[derivative(Default(bound = ""))]
pub struct VecAllocation<T> {
data: Vec<T>,
}
impl<T> VecAllocation<T> {
/// Create a new, empty allocation.
pub fn new() -> Self {
Self::default()
}
/// Drop any elements from the given `Vec`, keeping its allocated memory. It can be retrieved
/// later with `take`.
pub fn set_from(&mut self, mut data: Vec<T>) {
data.clear();
self.data = data;
}
/// Return a `Vec` containing no elements, whose allocated storage comes from the most recent
/// call to `set_from`, unless `take` has been called since then. Any subsequent call before the
/// next `set_from` would return a newly-created `Vec` with no allocated memory.
pub fn take(&mut self) -> Vec<T> {
std::mem::take(&mut self.data)
}
}
// ============= // =============
// === Tests === // === Tests ===
// ============= // =============