mirror of
https://github.com/enso-org/enso.git
synced 2024-12-23 03:21:44 +03:00
Code blocks (#3585)
This commit is contained in:
parent
f61849ce04
commit
3b99e18f94
21
Cargo.lock
generated
21
Cargo.lock
generated
@ -1734,7 +1734,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "enso-build"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/enso-org/ci-build?branch=develop#5a55bf5241f55bd314ba04498b34d048dae93a34"
|
||||
source = "git+https://github.com/enso-org/ci-build?branch=develop#acc5a7dacc223ad69ebfc7651c5ed0e3c0f1c9e5"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-compression",
|
||||
@ -1808,7 +1808,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "enso-build-cli"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/enso-org/ci-build?branch=develop#5a55bf5241f55bd314ba04498b34d048dae93a34"
|
||||
source = "git+https://github.com/enso-org/ci-build?branch=develop#acc5a7dacc223ad69ebfc7651c5ed0e3c0f1c9e5"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"byte-unit",
|
||||
@ -2064,13 +2064,14 @@ dependencies = [
|
||||
"enso-data-structures",
|
||||
"enso-metamodel",
|
||||
"enso-metamodel-lexpr",
|
||||
"enso-parser-syntax-tree-builder",
|
||||
"enso-parser-syntax-tree-visitor",
|
||||
"enso-prelude",
|
||||
"enso-reflect",
|
||||
"enso-shapely-macros",
|
||||
"enso-types",
|
||||
"lexpr",
|
||||
"rand 0.8.5",
|
||||
"rand_chacha 0.3.1",
|
||||
"serde",
|
||||
]
|
||||
|
||||
@ -2085,16 +2086,6 @@ dependencies = [
|
||||
"enso-reflect",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "enso-parser-syntax-tree-builder"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"enso-macro-utils",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "enso-parser-syntax-tree-visitor"
|
||||
version = "0.1.0"
|
||||
@ -3702,7 +3693,7 @@ version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5617e92fc2f2501c3e2bc6ce547cad841adba2bae5b921c7e52510beca6d084c"
|
||||
dependencies = [
|
||||
"base64 0.13.0",
|
||||
"base64 0.11.0",
|
||||
"bytes 1.1.0",
|
||||
"http",
|
||||
"httpdate 1.0.2",
|
||||
@ -3715,7 +3706,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "ide-ci"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/enso-org/ci-build?branch=develop#5a55bf5241f55bd314ba04498b34d048dae93a34"
|
||||
source = "git+https://github.com/enso-org/ci-build?branch=develop#acc5a7dacc223ad69ebfc7651c5ed0e3c0f1c9e5"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-compression",
|
||||
|
@ -10,7 +10,6 @@ members = [
|
||||
"build/rust-scripts",
|
||||
"lib/rust/*",
|
||||
"lib/rust/parser/src/syntax/tree/visitor",
|
||||
"lib/rust/parser/src/syntax/tree/builder",
|
||||
"lib/rust/parser/generate-java",
|
||||
"lib/rust/profiler/data",
|
||||
"integration-test"
|
||||
|
@ -55,6 +55,7 @@ use enso_metamodel::meta::*;
|
||||
use derivative::Derivative;
|
||||
use lexpr::Value;
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
|
||||
|
||||
@ -69,13 +70,15 @@ pub struct ToSExpr<'g> {
|
||||
graph: &'g TypeGraph,
|
||||
#[derivative(Debug = "ignore")]
|
||||
mappers: BTreeMap<TypeId, Box<dyn Fn(Value) -> Value>>,
|
||||
skip: BTreeSet<TypeId>,
|
||||
}
|
||||
|
||||
impl<'g> ToSExpr<'g> {
|
||||
#[allow(missing_docs)]
|
||||
pub fn new(graph: &'g TypeGraph) -> Self {
|
||||
let mappers = Default::default();
|
||||
Self { graph, mappers }
|
||||
let skip = Default::default();
|
||||
Self { graph, mappers, skip }
|
||||
}
|
||||
|
||||
/// Set a transformation to be applied to a type after translating to an S-expression.
|
||||
@ -83,6 +86,14 @@ impl<'g> ToSExpr<'g> {
|
||||
self.mappers.insert(id, Box::new(f));
|
||||
}
|
||||
|
||||
/// Omit a type, specified by ID, from the output, wherever it occurs. If it occurs as a field
|
||||
/// in another struct, that field will be omitted. If the type occurs as a variant of an enum,
|
||||
/// or as the top-level type passed to [`Self::value`], it will be represented as if it had no
|
||||
/// fields.
|
||||
pub fn skip(&mut self, id: TypeId) {
|
||||
self.skip.insert(id);
|
||||
}
|
||||
|
||||
/// Given a bincode-serialized input, use its `meta` type info to transcribe it to an
|
||||
/// S-expression.
|
||||
pub fn value<T: serde::Serialize>(&self, id: TypeId, input: &T) -> Value {
|
||||
@ -126,7 +137,10 @@ impl<'g> ToSExpr<'g> {
|
||||
let mut out = vec![];
|
||||
self.fields(&mut hierarchy, data, &mut out);
|
||||
assert_eq!(hierarchy, &[]);
|
||||
let mut value = Value::list(out);
|
||||
let mut value = match self.skip.contains(&id) {
|
||||
true => Value::Null,
|
||||
false => Value::list(out),
|
||||
};
|
||||
if let Some(id) = child {
|
||||
if let Some(mapper) = self.mappers.get(&id) {
|
||||
value = (mapper)(value);
|
||||
@ -157,11 +171,14 @@ impl<'g> ToSExpr<'g> {
|
||||
self.fields(hierarchy, data, out);
|
||||
}
|
||||
for (i, field) in fields.iter().enumerate() {
|
||||
let skip = self.skip.contains(&field.type_);
|
||||
if !field.name.is_empty() {
|
||||
let car = Value::Symbol(format!(":{}", field.name).into_boxed_str());
|
||||
let cdr = self.value_(field.type_, data);
|
||||
out.push(Value::cons(car, cdr));
|
||||
} else {
|
||||
if !skip {
|
||||
out.push(Value::cons(car, cdr));
|
||||
}
|
||||
} else if !skip {
|
||||
out.push(self.value_(field.type_, data));
|
||||
}
|
||||
if self.graph[id].child_field == Some(i + 1) {
|
||||
|
@ -16,7 +16,6 @@ enso-data-structures = { path = "../data-structures" }
|
||||
enso-types = { path = "../types", features = ["serde"] }
|
||||
enso-shapely-macros = { path = "../shapely/macros" }
|
||||
enso-parser-syntax-tree-visitor = { path = "src/syntax/tree/visitor" }
|
||||
enso-parser-syntax-tree-builder = { path = "src/syntax/tree/builder" }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
bincode = "1.3"
|
||||
|
||||
@ -24,3 +23,5 @@ bincode = "1.3"
|
||||
enso-metamodel = { path = "../metamodel", features = ["rust"] }
|
||||
enso-metamodel-lexpr = { path = "../metamodel/lexpr" }
|
||||
lexpr = "0.2.6"
|
||||
rand = "0.8.5"
|
||||
rand_chacha = "0.3.1"
|
||||
|
@ -1,15 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
echo $0 | grep lib/rust || ( echo This tool must be run from the repo root, as lib/rust/parser/generate-java/run.sh; exit 1 )
|
||||
|
||||
BASE=target/generated_java
|
||||
OUT=$BASE/org/enso/syntax2
|
||||
LIB=lib/rust/parser/generate-java/java
|
||||
mkdir -p $OUT
|
||||
cargo test -p enso-parser-generate-java
|
||||
cargo run -p enso-parser-generate-java --bin enso-parser-generate-java -- $OUT
|
||||
cargo run -p enso-parser-generate-java --bin java-tests > $BASE/GeneratedFormatTests.java
|
||||
javac -classpath "$LIB:$BASE" -d $BASE $BASE/GeneratedFormatTests.java
|
||||
java -classpath $BASE GeneratedFormatTests
|
@ -78,10 +78,12 @@ pattern_impl_for_char_slice!(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
|
||||
pub struct Lexer<'s> {
|
||||
#[deref]
|
||||
#[deref_mut]
|
||||
pub state: LexerState,
|
||||
pub input: &'s str,
|
||||
pub iterator: str::CharIndices<'s>,
|
||||
pub output: Vec<Token<'s>>,
|
||||
pub state: LexerState,
|
||||
pub input: &'s str,
|
||||
pub iterator: str::CharIndices<'s>,
|
||||
pub output: Vec<Token<'s>>,
|
||||
/// Memory for storing tokens, reused as an optimization.
|
||||
pub token_storage: VecAllocation<Token<'s>>,
|
||||
}
|
||||
|
||||
/// Internal state of the [`Lexer`].
|
||||
@ -101,9 +103,10 @@ impl<'s> Lexer<'s> {
|
||||
pub fn new(input: &'s str) -> Self {
|
||||
let iterator = input.char_indices();
|
||||
let capacity = input.len() / AVERAGE_TOKEN_LEN;
|
||||
let output = Vec::with_capacity(capacity * mem::size_of::<Token<'s>>());
|
||||
let output = Vec::with_capacity(capacity);
|
||||
let state = default();
|
||||
Self { input, iterator, output, state }.init()
|
||||
let token_storage = default();
|
||||
Self { input, iterator, output, state, token_storage }.init()
|
||||
}
|
||||
|
||||
fn init(mut self) -> Self {
|
||||
@ -677,35 +680,31 @@ impl<'s> Lexer<'s> {
|
||||
|
||||
fn newline(&mut self) {
|
||||
if let Some(token) = self.line_break() {
|
||||
let mut newlines = vec![token.with_variant(token::Variant::newline())];
|
||||
let mut newlines = self.token_storage.take();
|
||||
while let Some(token) = self.line_break() {
|
||||
newlines.push(token.with_variant(token::Variant::newline()));
|
||||
}
|
||||
let block_indent = self.last_spaces_visible_offset;
|
||||
|
||||
if block_indent > self.current_block_indent {
|
||||
let block_start = self.marker_token(token::Variant::block_start());
|
||||
self.submit_token(block_start);
|
||||
self.start_block(block_indent);
|
||||
} else {
|
||||
while block_indent < self.current_block_indent {
|
||||
let err = "Lexer internal error. Inconsistent code block hierarchy.";
|
||||
let parent_block_indent = self.end_block().expect(err);
|
||||
if block_indent > self.current_block_indent {
|
||||
// The new line indent is smaller than current block but bigger than the
|
||||
// // previous one. We are treating the line as belonging to the
|
||||
// block. The warning should be reported by parser.
|
||||
self.start_block(parent_block_indent);
|
||||
break;
|
||||
} else {
|
||||
let block_end = self.marker_token(token::Variant::block_end());
|
||||
self.submit_token(block_end);
|
||||
}
|
||||
}
|
||||
while block_indent < self.current_block_indent {
|
||||
let previous_indent = self.block_indent_stack.last().copied().unwrap_or_default();
|
||||
if block_indent > previous_indent {
|
||||
// The new line indent is smaller than current block but bigger than the
|
||||
// previous one. We are treating the line as belonging to the
|
||||
// block. The warning should be reported by parser.
|
||||
break;
|
||||
}
|
||||
self.end_block();
|
||||
let block_end = self.marker_token(token::Variant::block_end());
|
||||
self.submit_token(block_end);
|
||||
}
|
||||
for newline in newlines {
|
||||
self.submit_token(newline);
|
||||
}
|
||||
self.submit_token(token.with_variant(token::Variant::newline()));
|
||||
newlines.drain(..).for_each(|token| self.submit_token(token));
|
||||
self.token_storage.set_from(newlines);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -876,12 +875,45 @@ mod tests {
|
||||
ident_(" ", "bar"),
|
||||
block_end_("", ""),
|
||||
]),
|
||||
("foo\n +", vec![
|
||||
ident_("", "foo"),
|
||||
block_start_("", ""),
|
||||
newline_("", "\n"),
|
||||
operator_(" ", "+"),
|
||||
block_end_("", ""),
|
||||
]),
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_case_empty() {
|
||||
test_lexer("", vec![]);
|
||||
fn test_case_block_bad_indents() {
|
||||
#[rustfmt::skip]
|
||||
test_lexer_many(vec![
|
||||
("\n foo\n bar\nbaz", vec![
|
||||
block_start_("", ""),
|
||||
newline_("", "\n"), ident_(" ", "foo"),
|
||||
newline_("", "\n"), ident_(" ", "bar"),
|
||||
block_end_("", ""),
|
||||
newline_("", "\n"), ident_("", "baz"),
|
||||
]),
|
||||
("\n foo\n bar\n baz", vec![
|
||||
block_start_("", ""),
|
||||
newline_("", "\n"), ident_(" ", "foo"),
|
||||
newline_("", "\n"), ident_(" ", "bar"),
|
||||
newline_("", "\n"), ident_(" ", "baz"),
|
||||
block_end_("", ""),
|
||||
]),
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_case_whitespace_only_line() {
|
||||
test_lexer_many(vec![("foo\n \nbar", vec![
|
||||
ident_("", "foo"),
|
||||
newline_("", "\n"),
|
||||
newline_(" ", "\n"),
|
||||
ident_("", "bar"),
|
||||
])]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -79,10 +79,12 @@
|
||||
// === Features ===
|
||||
#![allow(incomplete_features)]
|
||||
#![feature(allocator_api)]
|
||||
#![feature(exact_size_is_empty)]
|
||||
#![feature(test)]
|
||||
#![feature(specialization)]
|
||||
#![feature(let_chains)]
|
||||
#![feature(if_let_guard)]
|
||||
#![feature(box_patterns)]
|
||||
// === Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![warn(unsafe_code)]
|
||||
@ -147,15 +149,8 @@ impl Parser {
|
||||
/// Main entry point.
|
||||
pub fn run<'s>(&self, code: &'s str) -> syntax::Tree<'s> {
|
||||
let tokens = lexer::run(code);
|
||||
let mut statements = vec![];
|
||||
let mut tokens = tokens.into_iter().peekable();
|
||||
while tokens.peek().is_some() {
|
||||
let resolver = macros::resolver::Resolver::new_root();
|
||||
let tree = resolver.run(&self.macros, &mut tokens);
|
||||
let tree = expression_to_statement(tree);
|
||||
statements.push(tree);
|
||||
}
|
||||
syntax::Tree::block(statements)
|
||||
let resolver = macros::resolver::Resolver::new_root();
|
||||
resolver.run(&self.macros, tokens)
|
||||
}
|
||||
}
|
||||
|
||||
@ -172,73 +167,41 @@ impl Default for Parser {
|
||||
///
|
||||
/// In statement context, an expression that has an assignment operator at its top level is
|
||||
/// interpreted as a variable assignment or method definition.
|
||||
fn expression_to_statement(tree: syntax::Tree<'_>) -> syntax::Tree<'_> {
|
||||
fn expression_to_statement(mut tree: syntax::Tree<'_>) -> syntax::Tree<'_> {
|
||||
use syntax::tree::*;
|
||||
let tree_ = match &*tree.variant {
|
||||
let tree_ = match &mut *tree.variant {
|
||||
Variant::OprSectionBoundary(OprSectionBoundary { ast }) => ast,
|
||||
_ => &tree,
|
||||
_ => &mut tree,
|
||||
};
|
||||
let mut replacement = None;
|
||||
if let Variant::OprApp(opr_app) = &*tree_.variant {
|
||||
replacement = expression_to_binding(opr_app);
|
||||
}
|
||||
match replacement {
|
||||
Some(modified) => modified,
|
||||
None => tree,
|
||||
}
|
||||
}
|
||||
|
||||
/// If the input is an "=" expression, try to interpret it as either a variable assignment or method
|
||||
/// definition.
|
||||
fn expression_to_binding<'a>(app: &syntax::tree::OprApp<'a>) -> Option<syntax::Tree<'a>> {
|
||||
use syntax::tree::*;
|
||||
match app {
|
||||
OprApp { lhs: Some(lhs), opr: Ok(opr), rhs } if opr.code == "=" => {
|
||||
let mut lhs = lhs;
|
||||
let mut args = vec![];
|
||||
while let Variant::App(App { func, arg }) = &*lhs.variant {
|
||||
lhs = func;
|
||||
args.push(arg.clone());
|
||||
}
|
||||
args.reverse();
|
||||
if let Some(rhs) = rhs && args.is_empty() {
|
||||
Some(Tree::assignment(lhs.clone(), opr.clone(), rhs.clone()))
|
||||
} else if let Variant::Ident(Ident { token }) = &*lhs.variant {
|
||||
Some(Tree::function(token.clone(), args, opr.clone(), rhs.clone()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
let opr_app = match &mut *tree_.variant {
|
||||
Variant::OprApp(opr_app) => opr_app,
|
||||
_ => return tree,
|
||||
};
|
||||
if let OprApp { lhs: Some(lhs), opr: Ok(opr), rhs } = opr_app && opr.code == "=" {
|
||||
let mut args = vec![];
|
||||
let mut lhs = lhs;
|
||||
while let Tree { variant: box Variant::App(App { func, arg }), .. } = lhs {
|
||||
lhs = func;
|
||||
args.push(arg.clone());
|
||||
}
|
||||
args.reverse();
|
||||
if args.is_empty() && let Some(rhs) = rhs && !is_body_block(rhs) {
|
||||
// If the LHS has no arguments, and there is a RHS, and the RHS is not a body block,
|
||||
// this is a variable assignment.
|
||||
return Tree::assignment(mem::take(lhs), mem::take(opr), mem::take(rhs))
|
||||
}
|
||||
if let Variant::Ident(Ident { token }) = &mut *lhs.variant {
|
||||
// If this is not a variable assignment, and the leftmost leaf of the `App` tree is
|
||||
// an identifier, this is a function definition.
|
||||
return Tree::function(mem::take(token), args, mem::take(opr), mem::take(rhs))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
tree
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === Tests ===
|
||||
// =============
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use enso_parser_syntax_tree_builder::ast_builder;
|
||||
|
||||
macro_rules! test_parse {
|
||||
($input:tt = {$($def:tt)*}) => {
|
||||
assert_eq!(
|
||||
Parser::new().run($input),
|
||||
ast_builder! { $($def)* }
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_expressions() {
|
||||
test_parse! {"a" = {a}};
|
||||
test_parse! {"a b" = {a b}};
|
||||
test_parse! {"a b c" = {[a b] c}};
|
||||
}
|
||||
/// Return whether the expression is a body block.
|
||||
fn is_body_block(expression: &syntax::tree::Tree<'_>) -> bool {
|
||||
matches!(&*expression.variant, syntax::tree::Variant::BodyBlock { .. })
|
||||
}
|
||||
|
||||
|
||||
@ -262,4 +225,44 @@ mod benches {
|
||||
parser.run(&str);
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_blocks(bencher: &mut Bencher) {
|
||||
use rand::prelude::*;
|
||||
use rand_chacha::ChaCha8Rng;
|
||||
let lines = 10_000;
|
||||
let mut str = String::new();
|
||||
let mut rng = ChaCha8Rng::seed_from_u64(0);
|
||||
let mut indent = 0u32;
|
||||
for _ in 0..lines {
|
||||
// Indent:
|
||||
// 1/8 chance of increasing.
|
||||
// 1/8 chance of decreasing.
|
||||
// 3/4 chance of leaving unchanged.
|
||||
match rng.gen_range(0..8) {
|
||||
0u32 => indent = indent.saturating_sub(1),
|
||||
1 => indent += 1,
|
||||
_ => (),
|
||||
}
|
||||
for _ in 0..indent {
|
||||
str.push(' ');
|
||||
}
|
||||
// 1/4 chance of operator-block line syntax.
|
||||
if rng.gen_range(0..4) == 0u32 {
|
||||
str.push_str("* ");
|
||||
}
|
||||
str.push('x');
|
||||
// Equal chance of the next line being interpreted as a body block or argument block
|
||||
// line, if it is indented and doesn't match the operator-block syntax.
|
||||
// The `=` operator is chosen to exercise the expression-to-statement conversion path.
|
||||
if rng.gen() {
|
||||
str.push_str(" =");
|
||||
}
|
||||
str.push('\n');
|
||||
}
|
||||
let parser = Parser::new();
|
||||
bencher.iter(move || {
|
||||
parser.run(&str);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -106,8 +106,7 @@ fn matched_segments_into_multi_segment_app(
|
||||
let segments = matched_segments.mapped(|segment| {
|
||||
let header = segment.header;
|
||||
let tokens = segment.result.tokens();
|
||||
let body = (!tokens.is_empty())
|
||||
.as_some_from(|| syntax::operator::resolve_operator_precedence(tokens));
|
||||
let body = syntax::operator::resolve_operator_precedence_if_non_empty(tokens);
|
||||
syntax::tree::MultiSegmentAppSegment { header, body }
|
||||
});
|
||||
syntax::Tree::multi_segment_app(segments)
|
||||
|
@ -67,7 +67,7 @@ fn type_def_body(matched_segments: NonEmptyVec<MatchedSegment>) -> syntax::Tree
|
||||
|
||||
let mut v = match_tree.view();
|
||||
let name = &v.query("name").unwrap()[0];
|
||||
let name = operator::resolve_operator_precedence(name.clone());
|
||||
let name = operator::resolve_operator_precedence_if_non_empty(name.clone()).unwrap();
|
||||
// println!("{:#?}", name);
|
||||
// println!("\n\n------------- 2");
|
||||
|
||||
@ -78,7 +78,7 @@ fn type_def_body(matched_segments: NonEmptyVec<MatchedSegment>) -> syntax::Tree
|
||||
|
||||
let params = params
|
||||
.iter()
|
||||
.map(|tokens| operator::resolve_operator_precedence(tokens.clone()))
|
||||
.map(|tokens| operator::resolve_operator_precedence_if_non_empty(tokens.clone()).unwrap())
|
||||
.collect_vec();
|
||||
// println!("{:#?}", params);
|
||||
syntax::Tree::type_def(segment.header, name, params)
|
||||
|
@ -1,4 +1,26 @@
|
||||
//! Macro resolver implementation. Refer to the docs of the main parser module to learn more.
|
||||
//!
|
||||
//! # Blocks
|
||||
//!
|
||||
//! Macro resolution is informed by block structure.
|
||||
//!
|
||||
//! Macros can explicitly manipulate blocks: A macro can use [`pattern`]s to match depending on the
|
||||
//! contents of a child block, and a macro can create any arbitrary block structure in its output.
|
||||
//!
|
||||
//! However, there is one rule that makes block structure more primitive than macros: Each of a
|
||||
//! macro's segments must begin in the top level of the same block.
|
||||
//!
|
||||
//! For some invalid inputs, this rule affects how errors are reported. For example:
|
||||
//! ```Enso
|
||||
//! if foo
|
||||
//! then bar
|
||||
//! ```
|
||||
//! This will be parsed as an `if` macro whose condition is an argument block application applying
|
||||
//! `foo` to `then bar`; the reported error will be an incomplete application of the `if` macro.
|
||||
//!
|
||||
//! This is implemented by starting a new macro resolution [`Scope`] at the beginning of every
|
||||
//! block; the new scope is initialized with only the root macro. Within a scope the state of all
|
||||
//! macros defined in parent scopes will never be advanced.
|
||||
|
||||
use crate::prelude::*;
|
||||
|
||||
@ -95,11 +117,16 @@ impl<'a> PartiallyMatchedMacro<'a> {
|
||||
body: Rc::new(|v| {
|
||||
// Taking the first segment, hardcoded above.
|
||||
let body = v.pop().0.result;
|
||||
syntax::operator::resolve_operator_precedence(body.tokens())
|
||||
syntax::operator::resolve_operator_precedence_if_non_empty(body.tokens()).unwrap()
|
||||
}),
|
||||
}));
|
||||
Self { current_segment, resolved_segments, possible_next_segments, matched_macro_def }
|
||||
}
|
||||
|
||||
/// Append an item or partially-matched macro to the current segment.
|
||||
fn push(&mut self, item: impl Into<ItemOrPartiallyMatchedMacro<'a>>) {
|
||||
self.current_segment.body.push(item.into());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -174,8 +201,12 @@ impl<'s> TryAsRef<PartiallyMatchedMacro<'s>> for ItemOrPartiallyMatchedMacro<'s>
|
||||
/// to learn more about the macro resolution steps.
|
||||
#[derive(Debug)]
|
||||
pub struct Resolver<'s> {
|
||||
current_macro: PartiallyMatchedMacro<'s>,
|
||||
macro_stack: Vec<PartiallyMatchedMacro<'s>>,
|
||||
current_macro: PartiallyMatchedMacro<'s>,
|
||||
macro_stack: Vec<PartiallyMatchedMacro<'s>>,
|
||||
scopes: Vec<Scope<'s>>,
|
||||
lines: Vec<syntax::tree::block::Line<'s>>,
|
||||
newline: Option<token::Newline<'s>>,
|
||||
line_contains_items: bool,
|
||||
}
|
||||
|
||||
/// Result of the macro resolution step.
|
||||
@ -186,26 +217,50 @@ enum Step<'s> {
|
||||
MacroStackPop(syntax::Item<'s>),
|
||||
}
|
||||
|
||||
/// Information about macro resolution state that is stored while processing a deeper indentation
|
||||
/// level.
|
||||
///
|
||||
/// See the module docs ([`self`]) for about the interaction between blocks and macros.
|
||||
#[derive(Debug)]
|
||||
struct Scope<'s> {
|
||||
parent_tokens: std::vec::IntoIter<syntax::Item<'s>>,
|
||||
macros_start: usize,
|
||||
outputs_start: usize,
|
||||
prev_newline: Option<token::Newline<'s>>,
|
||||
prev_macro: PartiallyMatchedMacro<'s>,
|
||||
}
|
||||
|
||||
impl<'s> Resolver<'s> {
|
||||
/// New resolver with a special "root" segment definition allowing parsing arbitrary
|
||||
/// expressions.
|
||||
pub fn new_root() -> Self {
|
||||
let current_macro = PartiallyMatchedMacro::new_root();
|
||||
let macro_stack = default();
|
||||
Self { current_macro, macro_stack }
|
||||
let scopes = default();
|
||||
let lines = default();
|
||||
let newline = Some(token::newline("", ""));
|
||||
let line_contains_items = default();
|
||||
Self { current_macro, macro_stack, scopes, lines, newline, line_contains_items }
|
||||
}
|
||||
|
||||
fn replace_current_with_parent_macro(&mut self, mut parent_macro: PartiallyMatchedMacro<'s>) {
|
||||
mem::swap(&mut parent_macro, &mut self.current_macro);
|
||||
let child_macro = parent_macro;
|
||||
self.current_macro.current_segment.body.push(child_macro.into());
|
||||
fn replace_current_with_parent_macro(&mut self, parent_macro: PartiallyMatchedMacro<'s>) {
|
||||
let child_macro = mem::replace(&mut self.current_macro, parent_macro);
|
||||
self.current_macro.push(child_macro);
|
||||
}
|
||||
|
||||
/// Returns the index of the first element in `self.macro_stack` that is active in the current
|
||||
/// scope. Any macros before that index are active in some block that contains the current
|
||||
/// block, so they will not match tokens within this block.
|
||||
fn macro_scope_start(&self) -> usize {
|
||||
self.scopes.last().map(|scope| scope.macros_start).unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Pop the macro stack if the current token is reserved. For example, when matching the
|
||||
/// `if a if b then c then d` expression, the token `then` after the token `c` will be
|
||||
/// considered reserved and the macro resolution of `if b then c` will be popped from the stack.
|
||||
fn pop_macro_stack_if_reserved(&mut self, repr: &str) -> Option<PartiallyMatchedMacro<'s>> {
|
||||
let reserved = self.macro_stack.iter().any(|p| p.possible_next_segments.contains_key(repr));
|
||||
let macros = &self.macro_stack[self.macro_scope_start()..];
|
||||
let reserved = macros.iter().any(|p| p.possible_next_segments.contains_key(repr));
|
||||
reserved.and_option_from(|| self.macro_stack.pop())
|
||||
}
|
||||
|
||||
@ -213,16 +268,15 @@ impl<'s> Resolver<'s> {
|
||||
pub fn run(
|
||||
mut self,
|
||||
root_macro_map: &SegmentMap<'s>,
|
||||
tokens: &mut iter::Peekable<std::vec::IntoIter<syntax::Item<'s>>>,
|
||||
tokens: Vec<syntax::Item<'s>>,
|
||||
) -> syntax::Tree<'s> {
|
||||
let mut tokens = tokens.into_iter();
|
||||
event!(TRACE, "Running macro resolver. Registered macros:\n{:#?}", root_macro_map);
|
||||
let mut opt_item: Option<syntax::Item<'s>>;
|
||||
macro_rules! next_token {
|
||||
() => {{
|
||||
opt_item = tokens.next();
|
||||
if let Some(token) = opt_item.as_ref() {
|
||||
event!(TRACE, "New token {:#?}", token);
|
||||
}
|
||||
event!(TRACE, "Next token {:#?}", &opt_item);
|
||||
}};
|
||||
}
|
||||
macro_rules! trace_state {
|
||||
@ -232,9 +286,61 @@ impl<'s> Resolver<'s> {
|
||||
};
|
||||
}
|
||||
next_token!();
|
||||
while let Some(token) = opt_item && !token.is_newline() {
|
||||
loop {
|
||||
while opt_item.is_none() {
|
||||
if let Some(newline) = self.newline.take() {
|
||||
let expression = self.line_contains_items.as_some_from(|| self.unwind_stack());
|
||||
self.lines.push(syntax::tree::block::Line { newline, expression });
|
||||
}
|
||||
if let Some(parent_tokens) = self.exit_current_scope() {
|
||||
tokens = parent_tokens;
|
||||
next_token!();
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
let token = match opt_item {
|
||||
Some(token) => token,
|
||||
None => break,
|
||||
};
|
||||
if let syntax::Item::Token(Token {
|
||||
variant: token::Variant::Newline(_),
|
||||
left_offset,
|
||||
code,
|
||||
}) = token
|
||||
{
|
||||
let new_newline = token::newline(left_offset, code);
|
||||
let newline = mem::replace(&mut self.newline, Some(new_newline));
|
||||
if let Some(newline) = newline {
|
||||
let expression = self.line_contains_items.as_some_from(|| self.unwind_stack());
|
||||
self.lines.push(syntax::tree::block::Line { newline, expression });
|
||||
}
|
||||
next_token!();
|
||||
self.line_contains_items = false;
|
||||
continue;
|
||||
}
|
||||
self.line_contains_items = true;
|
||||
let step_result = match token {
|
||||
syntax::Item::Token(token) => self.process_token(root_macro_map, token),
|
||||
syntax::Item::Block(tokens_) => {
|
||||
let parent_tokens = mem::replace(&mut tokens, tokens_.into_iter());
|
||||
let new_root = PartiallyMatchedMacro::new_root();
|
||||
let prev_macro = mem::replace(&mut self.current_macro, new_root);
|
||||
let macros_start = self.macro_stack.len();
|
||||
let outputs_start = self.lines.len();
|
||||
let prev_newline = self.newline.take();
|
||||
let scope = Scope {
|
||||
parent_tokens,
|
||||
macros_start,
|
||||
outputs_start,
|
||||
prev_newline,
|
||||
prev_macro,
|
||||
};
|
||||
self.scopes.push(scope);
|
||||
next_token!();
|
||||
self.line_contains_items = false;
|
||||
continue;
|
||||
}
|
||||
_ => Step::NormalToken(token),
|
||||
};
|
||||
match step_result {
|
||||
@ -247,20 +353,63 @@ impl<'s> Resolver<'s> {
|
||||
next_token!()
|
||||
}
|
||||
Step::NormalToken(item) => {
|
||||
self.current_macro.current_segment.body.push(item.into());
|
||||
self.current_macro.push(item);
|
||||
trace_state!();
|
||||
next_token!();
|
||||
}
|
||||
}
|
||||
}
|
||||
syntax::tree::block::body_from_lines(self.lines)
|
||||
}
|
||||
|
||||
event!(TRACE, "Finishing resolution. Popping the macro stack.");
|
||||
while let Some(parent_macro) = self.macro_stack.pop() {
|
||||
self.replace_current_with_parent_macro(parent_macro);
|
||||
/// Finish processing the current block and close its macro scope, unless this is the top-level
|
||||
/// block, which is indicated by returning `None`.
|
||||
///
|
||||
/// This builds a [`syntax::Item::Block`] from the outputs of the current scope, restores the
|
||||
/// state to resume processing the parent scope, and submits the built block as a token to the
|
||||
/// newly-current macro (which would have been the macro active when the block began).
|
||||
///
|
||||
/// Returns the remaining tokens of the parent block.
|
||||
fn exit_current_scope(&mut self) -> Option<std::vec::IntoIter<syntax::Item<'s>>> {
|
||||
let scope = self.scopes.pop()?;
|
||||
let Scope { parent_tokens, macros_start, outputs_start, prev_newline, prev_macro } = scope;
|
||||
debug_assert_eq!(macros_start, self.macro_stack.len());
|
||||
self.current_macro = prev_macro;
|
||||
let lines = self.lines.drain(outputs_start..);
|
||||
let mut out = Vec::with_capacity(lines.len() * 2);
|
||||
for line in lines {
|
||||
let syntax::tree::block::Line { newline, expression } = line;
|
||||
let newline = syntax::Token::from(newline);
|
||||
let newline = syntax::Item::from(newline);
|
||||
out.push(newline);
|
||||
if let Some(expression) = expression {
|
||||
let expression = syntax::Item::from(expression);
|
||||
out.push(expression);
|
||||
}
|
||||
}
|
||||
let block = syntax::Item::Block(out);
|
||||
self.current_macro.push(block);
|
||||
self.line_contains_items = true;
|
||||
self.newline = prev_newline;
|
||||
Some(parent_tokens)
|
||||
}
|
||||
|
||||
fn unwind_stack(&mut self) -> syntax::Tree<'s> {
|
||||
macro_rules! trace_state {
|
||||
() => {
|
||||
event!(TRACE, "Current macro:\n{:#?}", self.current_macro);
|
||||
event!(TRACE, "Parent macros:\n{:#?}", self.macro_stack);
|
||||
};
|
||||
}
|
||||
event!(TRACE, "Finishing resolution. Popping the macro stack.");
|
||||
let macros = self.macro_stack.drain(self.macro_scope_start()..).rev();
|
||||
for parent_macro in macros {
|
||||
let child_macro = mem::replace(&mut self.current_macro, parent_macro);
|
||||
self.current_macro.push(child_macro);
|
||||
}
|
||||
trace_state!();
|
||||
let (tree, rest) = Self::resolve(self.current_macro);
|
||||
let macro_ = mem::replace(&mut self.current_macro, PartiallyMatchedMacro::new_root());
|
||||
let (tree, rest) = Self::resolve(macro_);
|
||||
if !rest.is_empty() {
|
||||
panic!(
|
||||
"Internal error. Not all tokens were consumed by the macro resolver:\n{:#?}",
|
||||
|
@ -34,7 +34,7 @@ use enso_parser::prelude::*;
|
||||
|
||||
fn main() {
|
||||
init_tracing(TRACE);
|
||||
let ast = enso_parser::Parser::new().run("type Option (a) b c");
|
||||
let ast = enso_parser::Parser::new().run("foo = 23");
|
||||
println!("\n\n==================\n\n");
|
||||
println!("{:#?}", ast);
|
||||
}
|
||||
|
@ -15,7 +15,7 @@ use crate::prelude::*;
|
||||
pub struct Code<'s> {
|
||||
#[serde(serialize_with = "crate::serialization::serialize_cow")]
|
||||
#[serde(deserialize_with = "crate::serialization::deserialize_cow")]
|
||||
#[reflect(as = "crate::serialization::Code")]
|
||||
#[reflect(as = "crate::serialization::Code", flatten)]
|
||||
pub repr: Cow<'s, str>,
|
||||
}
|
||||
|
||||
|
@ -327,7 +327,10 @@ where T: Builder<'s>
|
||||
{
|
||||
#[inline(always)]
|
||||
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
|
||||
self.as_mut().map(|t| Builder::add_to_span(t, span)).unwrap_or_default()
|
||||
match self {
|
||||
Some(t) => Builder::add_to_span(t, span),
|
||||
None => span,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -14,7 +14,7 @@ use crate::syntax::*;
|
||||
/// Abstraction for [`Token`] and [`Tree`]. Some functions, such as macro resolver need to
|
||||
/// distinguish between two cases and need to handle both incoming tokens and already constructed
|
||||
/// [`Tree`] nodes. This structure provides handy utilities to work with such cases.
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
#[allow(missing_docs)]
|
||||
pub enum Item<'s> {
|
||||
Token(Token<'s>),
|
||||
@ -47,10 +47,20 @@ impl<'s> Item<'s> {
|
||||
Item::Token(token) => match token.variant {
|
||||
token::Variant::Ident(ident) => Tree::ident(token.with_variant(ident)),
|
||||
token::Variant::Number(number) => Tree::number(token.with_variant(number)),
|
||||
_ => todo!(),
|
||||
_ => todo!("{token:?}"),
|
||||
},
|
||||
Item::Tree(ast) => ast,
|
||||
Item::Block(_) => todo!(),
|
||||
Item::Block(items) => build_block(items),
|
||||
}
|
||||
}
|
||||
|
||||
/// If this item is an [`Item::Tree`], apply the given function to the contained [`Tree`] and
|
||||
/// return the result.
|
||||
pub fn map_tree<'t: 's, F>(self, f: F) -> Self
|
||||
where F: FnOnce(Tree<'s>) -> Tree<'t> {
|
||||
match self {
|
||||
Item::Tree(tree) => Item::Tree(f(tree)),
|
||||
_ => self,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -73,6 +83,32 @@ impl<'s> TryAsRef<Item<'s>> for Item<'s> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Given a sequence of [`Item`]s belonging to one block, create an AST block node, of a type
|
||||
/// determined by the syntax of the lines in the block.
|
||||
fn build_block<'s>(items: impl IntoIterator<Item = Item<'s>>) -> Tree<'s> {
|
||||
let mut line = vec![];
|
||||
let mut block_builder = tree::block::Builder::new();
|
||||
let mut newline = None;
|
||||
for item in items {
|
||||
match item {
|
||||
Item::Token(Token { variant: token::Variant::Newline(_), left_offset, code }) => {
|
||||
let newline = mem::replace(&mut newline, Some(token::newline(left_offset, code)));
|
||||
if let Some(newline) = newline {
|
||||
let line: Vec<_> = line.drain(..).collect();
|
||||
let expression = operator::resolve_operator_precedence_if_non_empty(line);
|
||||
block_builder.push(newline, expression);
|
||||
}
|
||||
}
|
||||
_ => line.push(item),
|
||||
}
|
||||
}
|
||||
if let Some(newline) = newline {
|
||||
let expression = operator::resolve_operator_precedence_if_non_empty(line);
|
||||
block_builder.push(newline, expression);
|
||||
}
|
||||
block_builder.build()
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===========
|
||||
|
@ -13,7 +13,7 @@ use crate::syntax::token::Token;
|
||||
// ==================
|
||||
|
||||
// FIXME: The current implementation hard-codes precedence values and does not support precedence
|
||||
// computations for any operator (according to the spec)
|
||||
// computations for any operator (according to the spec)
|
||||
fn precedence_of(operator: &str) -> usize {
|
||||
match operator {
|
||||
"=" => 1,
|
||||
@ -45,77 +45,93 @@ impl<T> WithPrecedence<T> {
|
||||
/// example, `if cond then.x else.y` is parsed as `if cond then .x else .y`, which after expansion
|
||||
/// translates to `if cond then (\t -> t.x) else (\t -> t.y)`. However, for some macros spacing is
|
||||
/// not needed. For example, `(.x)` is parsed as `(\t -> t.x)`, which is understandable.
|
||||
fn annotate_tokens_that_need_spacing(items: Vec<syntax::Item>) -> Vec<syntax::Item> {
|
||||
// TODO: It should be possible to make it faster by iterating over mut vec. To be checked.
|
||||
items
|
||||
.into_iter()
|
||||
.map(|item| match item {
|
||||
syntax::Item::Block(_) => item,
|
||||
syntax::Item::Token(_) => item,
|
||||
syntax::Item::Tree(ast) => syntax::Item::Tree(match &*ast.variant {
|
||||
syntax::tree::Variant::MultiSegmentApp(data)
|
||||
if !data.segments.first().header.is_symbol() =>
|
||||
ast.with_error("This expression cannot be used in a non-spaced equation."),
|
||||
_ => ast,
|
||||
}),
|
||||
})
|
||||
.collect()
|
||||
fn annotate_tokens_that_need_spacing(item: syntax::Item) -> syntax::Item {
|
||||
use syntax::tree::Variant::*;
|
||||
item.map_tree(|ast| match &*ast.variant {
|
||||
MultiSegmentApp(data) if !data.segments.first().header.is_symbol() =>
|
||||
ast.with_error("This expression cannot be used in a non-spaced equation."),
|
||||
_ => ast,
|
||||
})
|
||||
}
|
||||
|
||||
/// Take [`Item`] stream, resolve operators precedence and return the final AST. The precedence
|
||||
/// resolution algorithm bases on the [Shunting yard algorithm](https://en.wikipedia.org/wiki/Shunting_yard_algorithm).
|
||||
/// It is extended to handle operator sections.
|
||||
#[inline(always)]
|
||||
pub fn resolve_operator_precedence<'s>(items: Vec<syntax::Item<'s>>) -> syntax::Tree<'s> {
|
||||
/// If the input sequence is non-empty, return the result of applying
|
||||
/// [`resolve_operator_precedence`] to it.
|
||||
pub fn resolve_operator_precedence_if_non_empty(
|
||||
items: Vec<syntax::Item<'_>>,
|
||||
) -> Option<syntax::Tree<'_>> {
|
||||
match NonEmptyVec::try_from(items) {
|
||||
Ok(items) => Some(resolve_operator_precedence(items)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Take [`Item`] stream, resolve operator precedence and return the final AST.
|
||||
///
|
||||
/// The precedence resolution algorithm is based on the Shunting yard algorithm[1], extended to
|
||||
/// handle operator sections.
|
||||
/// [1]: https://en.wikipedia.org/wiki/Shunting_yard_algorithm
|
||||
pub fn resolve_operator_precedence<'s>(items: NonEmptyVec<syntax::Item<'s>>) -> syntax::Tree<'s> {
|
||||
type Tokens<'s> = Vec<syntax::Item<'s>>;
|
||||
let mut flattened: Tokens<'s> = default();
|
||||
let mut no_space_group: Tokens<'s> = default();
|
||||
let processs_no_space_group = |flattened: &mut Tokens<'s>, no_space_group: &mut Tokens<'s>| {
|
||||
let tokens = mem::take(no_space_group);
|
||||
if tokens.len() == 1 {
|
||||
let process_no_space_group = |flattened: &mut Tokens<'s>, no_space_group: &mut Tokens<'s>| {
|
||||
let tokens = no_space_group.drain(..);
|
||||
if tokens.len() < 2 {
|
||||
flattened.extend(tokens);
|
||||
} else {
|
||||
let tokens = annotate_tokens_that_need_spacing(tokens);
|
||||
let tokens = tokens.map(annotate_tokens_that_need_spacing);
|
||||
let ast = resolve_operator_precedence_internal(tokens);
|
||||
flattened.push(ast.into());
|
||||
}
|
||||
};
|
||||
for item in items {
|
||||
if item.left_visible_offset().width_in_spaces == 0 || no_space_group.is_empty() {
|
||||
no_space_group.push(item)
|
||||
} else if !no_space_group.is_empty() {
|
||||
processs_no_space_group(&mut flattened, &mut no_space_group);
|
||||
no_space_group.push(item);
|
||||
} else {
|
||||
// FIXME: this is unreachable.
|
||||
flattened.push(item);
|
||||
// Returns `true` for an item if that item should not follow any other item in a no-space group
|
||||
// (i.e. the item has "space" before it).
|
||||
let starts_new_no_space_group = |item: &syntax::item::Item| {
|
||||
if item.left_visible_offset().width_in_spaces != 0 {
|
||||
return true;
|
||||
}
|
||||
if let syntax::item::Item::Block(_) = item {
|
||||
return true;
|
||||
}
|
||||
false
|
||||
};
|
||||
for item in items {
|
||||
if starts_new_no_space_group(&item) {
|
||||
process_no_space_group(&mut flattened, &mut no_space_group);
|
||||
}
|
||||
no_space_group.push(item);
|
||||
}
|
||||
if !no_space_group.is_empty() {
|
||||
processs_no_space_group(&mut flattened, &mut no_space_group);
|
||||
}
|
||||
process_no_space_group(&mut flattened, &mut no_space_group);
|
||||
resolve_operator_precedence_internal(flattened)
|
||||
}
|
||||
|
||||
fn resolve_operator_precedence_internal(items: Vec<syntax::Item<'_>>) -> syntax::Tree<'_> {
|
||||
fn resolve_operator_precedence_internal<'s>(
|
||||
items: impl IntoIterator<Item = syntax::Item<'s>>,
|
||||
) -> syntax::Tree<'s> {
|
||||
// Reverse-polish notation encoding.
|
||||
/// Classify an item as an operator-token, or other data; we track this state information
|
||||
/// because whenever consecutive operators or consecutive non-operators occur, we merge them
|
||||
/// into one node.
|
||||
#[derive(PartialEq, Eq)]
|
||||
enum ItemType {
|
||||
Ast,
|
||||
Opr,
|
||||
}
|
||||
use ItemType::*;
|
||||
let mut was_section_used = false;
|
||||
let mut output: Vec<syntax::Item> = default();
|
||||
let mut operator_stack: Vec<WithPrecedence<syntax::tree::OperatorOrError>> = default();
|
||||
let mut last_token_was_ast = false;
|
||||
let mut last_token_was_opr = false;
|
||||
let mut prev_type = None;
|
||||
for item in items {
|
||||
if let syntax::Item::Token(token) = item.clone()
|
||||
&& let token::Variant::Operator(opr) = token.variant {
|
||||
if let syntax::Item::Token(
|
||||
Token { variant: token::Variant::Operator(opr), left_offset, code }) = item {
|
||||
// Item is an operator.
|
||||
let last_token_was_opr_copy = last_token_was_opr;
|
||||
last_token_was_ast = false;
|
||||
last_token_was_opr = true;
|
||||
let prev_type = mem::replace(&mut prev_type, Some(Opr));
|
||||
|
||||
let prec = precedence_of(&token.code);
|
||||
let opr = Token(token.left_offset, token.code, opr);
|
||||
let prec = precedence_of(&code);
|
||||
let opr = Token(left_offset, code, opr);
|
||||
|
||||
if last_token_was_opr_copy && let Some(prev_opr) = operator_stack.last_mut() {
|
||||
if prev_type == Some(Opr) && let Some(prev_opr) = operator_stack.last_mut() {
|
||||
// Error. Multiple operators next to each other.
|
||||
match &mut prev_opr.elem {
|
||||
Err(err) => err.operators.push(opr),
|
||||
@ -133,37 +149,38 @@ fn resolve_operator_precedence_internal(items: Vec<syntax::Item<'_>>) -> syntax:
|
||||
// Prev operator in the [`operator_stack`] has a higher precedence.
|
||||
let lhs = output.pop().map(|t| t.to_ast());
|
||||
if lhs.is_none() { was_section_used = true; }
|
||||
let ast = syntax::Tree::opr_app(lhs, prev_opr.elem, Some(rhs.to_ast()));
|
||||
let ast = syntax::tree::apply_operator(lhs, prev_opr.elem, Some(rhs.to_ast()));
|
||||
output.push(ast.into());
|
||||
}
|
||||
operator_stack.push(WithPrecedence::new(prec, Ok(opr)));
|
||||
}
|
||||
} else if last_token_was_ast && let Some(lhs) = output.pop() {
|
||||
} else if prev_type == Some(Ast) && let Some(lhs) = output.pop() {
|
||||
// Multiple non-operators next to each other.
|
||||
let lhs = lhs.to_ast();
|
||||
let rhs = item.to_ast();
|
||||
let ast = syntax::Tree::app(lhs, rhs);
|
||||
let ast = syntax::tree::apply(lhs, rhs);
|
||||
output.push(ast.into());
|
||||
} else {
|
||||
// Non-operator that follows previously consumed operator.
|
||||
last_token_was_ast = true;
|
||||
last_token_was_opr = false;
|
||||
prev_type = Some(Ast);
|
||||
output.push(item);
|
||||
}
|
||||
}
|
||||
let mut opt_rhs = last_token_was_ast.and_option_from(|| output.pop().map(|t| t.to_ast()));
|
||||
let mut opt_rhs = (prev_type == Some(Ast)).and_option_from(|| output.pop().map(|t| t.to_ast()));
|
||||
while let Some(opr) = operator_stack.pop() {
|
||||
let opt_lhs = output.pop().map(|t| t.to_ast());
|
||||
if opt_lhs.is_none() || opt_rhs.is_none() {
|
||||
was_section_used = true;
|
||||
}
|
||||
opt_rhs = Some(syntax::Tree::opr_app(opt_lhs, opr.elem, opt_rhs));
|
||||
opt_rhs = Some(syntax::tree::apply_operator(opt_lhs, opr.elem, opt_rhs));
|
||||
}
|
||||
if !output.is_empty() {
|
||||
panic!("Internal error. Not all tokens were consumed while constructing the expression.");
|
||||
}
|
||||
|
||||
// FIXME
|
||||
// This unwrap is safe because:
|
||||
// - resolve_operator_precedence only calls this function with non-empty sequences as inputs.
|
||||
// - Given a non-empty input, we will always have at least one output.
|
||||
let out = opt_rhs.unwrap();
|
||||
if was_section_used {
|
||||
syntax::Tree::opr_section_boundary(out)
|
||||
|
@ -103,7 +103,7 @@ use enso_shapely_macros::tagged_enum;
|
||||
// =============
|
||||
|
||||
/// The lexical token definition. See the module docs to learn more about its usage scenarios.
|
||||
#[derive(Clone, Deref, DerefMut, Eq, PartialEq, Serialize, Reflect, Deserialize)]
|
||||
#[derive(Clone, Default, Deref, DerefMut, Eq, PartialEq, Serialize, Reflect, Deserialize)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Token<'s, T = Variant> {
|
||||
#[deref]
|
||||
@ -248,6 +248,8 @@ macro_rules! with_token_definition { ($f:ident ($($args:tt)*)) => { $f! { $($arg
|
||||
#[allow(missing_docs)]
|
||||
#[tagged_enum(apply_attributes_to = "variants")]
|
||||
#[reflect(inline)]
|
||||
#[tagged_enum(apply_attributes_to = "variant-types")]
|
||||
#[derive(Default)]
|
||||
pub enum Variant {
|
||||
Newline,
|
||||
Symbol,
|
||||
@ -272,6 +274,12 @@ macro_rules! with_token_definition { ($f:ident ($($args:tt)*)) => { $f! { $($arg
|
||||
}
|
||||
}}}
|
||||
|
||||
impl Default for Variant {
|
||||
fn default() -> Self {
|
||||
Self::Newline(variant::Newline {})
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! generate_token_aliases {
|
||||
(
|
||||
$(#$enum_meta:tt)*
|
||||
|
@ -9,6 +9,8 @@ use crate::span_builder;
|
||||
use enso_parser_syntax_tree_visitor::Visitor;
|
||||
use enso_shapely_macros::tagged_enum;
|
||||
|
||||
pub mod block;
|
||||
|
||||
|
||||
|
||||
// ============
|
||||
@ -53,6 +55,15 @@ impl<'s> AsRef<Span<'s>> for Tree<'s> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Default for Tree<'s> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
variant: Box::new(Variant::Ident(Ident { token: Default::default() })),
|
||||
span: Default::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Macro providing [`Tree`] type definition. It is used to both define the ast [`Variant`], and to
|
||||
/// define impls for every token type in other modules.
|
||||
#[macro_export]
|
||||
@ -68,8 +79,28 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
|
||||
pub error: Error,
|
||||
pub ast: Tree<'s>,
|
||||
},
|
||||
Block {
|
||||
pub statements: Vec<Tree<'s>>,
|
||||
/// A sequence of lines introduced by a line ending in an operator.
|
||||
BodyBlock {
|
||||
/// The lines of the block.
|
||||
pub statements: Vec<block::Line<'s>>,
|
||||
},
|
||||
/// A sequence of lines comprising the arguments of a function call.
|
||||
ArgumentBlockApplication {
|
||||
/// The expression for the value to which the arguments are to be applied.
|
||||
pub lhs: Option<Tree<'s>>,
|
||||
/// The lines of the block.
|
||||
pub arguments: Vec<block::Line<'s>>,
|
||||
},
|
||||
/// A sequence of lines comprising a tree of operator expressions.
|
||||
OperatorBlockApplication {
|
||||
/// The expression preceding the block; this will be the leftmost-leaf of the binary
|
||||
/// tree.
|
||||
pub lhs: Option<Tree<'s>>,
|
||||
/// The lines of the block.
|
||||
pub expressions: Vec<block::OperatorLine<'s>>,
|
||||
/// Lines that appear lexically within the block, but are not syntactically consistent
|
||||
/// with an operator block.
|
||||
pub excess: Vec<block::Line<'s>>,
|
||||
},
|
||||
/// A simple identifier, like `foo` or `bar`.
|
||||
Ident {
|
||||
@ -115,15 +146,25 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
|
||||
pub name: Tree<'s>,
|
||||
pub params: Vec<Tree<'s>>,
|
||||
},
|
||||
/// A variable assignment, like `foo = bar 23`.
|
||||
Assignment {
|
||||
/// The pattern which should be unified with the expression.
|
||||
pub pattern: Tree<'s>,
|
||||
/// The `=` token.
|
||||
pub equals: token::Operator<'s>,
|
||||
/// The expression initializing the value(s) in the pattern.
|
||||
pub expr: Tree<'s>,
|
||||
},
|
||||
/// A function definition, like `add x y = x + y`.
|
||||
Function {
|
||||
/// The identifier to which the function should be bound.
|
||||
pub name: token::Ident<'s>,
|
||||
/// The argument patterns.
|
||||
pub args: Vec<Tree<'s>>,
|
||||
/// The `=` token.
|
||||
pub equals: token::Operator<'s>,
|
||||
/// The body, which will typically be an inline expression or a `BodyBlock` expression.
|
||||
/// It is an error for this to be empty.
|
||||
pub body: Option<Tree<'s>>,
|
||||
},
|
||||
}
|
||||
@ -135,7 +176,7 @@ macro_rules! generate_variant_constructors {
|
||||
pub enum $enum:ident<'s> {
|
||||
$(
|
||||
$(#$variant_meta:tt)*
|
||||
$variant:ident $({ $(pub $field:ident : $field_ty:ty),* $(,)? })?
|
||||
$variant:ident $({$($(#$field_meta:tt)* pub $field:ident : $field_ty:ty),* $(,)? })?
|
||||
),* $(,)?
|
||||
}
|
||||
) => { paste! {
|
||||
@ -212,6 +253,29 @@ impl<'s> span::Builder<'s> for MultipleOperatorError<'s> {
|
||||
}
|
||||
}
|
||||
|
||||
/// A sequence of one or more operators.
|
||||
pub trait NonEmptyOperatorSequence<'s> {
|
||||
/// Return a reference to the first operator.
|
||||
fn first_operator(&self) -> &token::Operator<'s>;
|
||||
/// Return a mutable reference to the first operator.
|
||||
fn first_operator_mut(&mut self) -> &mut token::Operator<'s>;
|
||||
}
|
||||
|
||||
impl<'s> NonEmptyOperatorSequence<'s> for OperatorOrError<'s> {
|
||||
fn first_operator(&self) -> &token::Operator<'s> {
|
||||
match self {
|
||||
Ok(opr) => opr,
|
||||
Err(oprs) => oprs.operators.first(),
|
||||
}
|
||||
}
|
||||
fn first_operator_mut(&mut self) -> &mut token::Operator<'s> {
|
||||
match self {
|
||||
Ok(opr) => opr,
|
||||
Err(oprs) => oprs.operators.first_mut(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// === MultiSegmentApp ===
|
||||
|
||||
@ -231,6 +295,53 @@ impl<'s> span::Builder<'s> for MultiSegmentAppSegment<'s> {
|
||||
|
||||
|
||||
|
||||
// ====================================
|
||||
// === Tree-construction operations ===
|
||||
// ====================================
|
||||
|
||||
/// Join two nodes with a new node appropriate for their types.
|
||||
///
|
||||
/// For most input types, this simply constructs an `App`; however, for some block type operands
|
||||
/// application has special semantics.
|
||||
pub fn apply<'s>(func: Tree<'s>, mut arg: Tree<'s>) -> Tree<'s> {
|
||||
match &mut *arg.variant {
|
||||
Variant::ArgumentBlockApplication(block) if block.lhs.is_none() => {
|
||||
block.lhs = Some(func);
|
||||
arg
|
||||
}
|
||||
Variant::OperatorBlockApplication(block) if block.lhs.is_none() => {
|
||||
block.lhs = Some(func);
|
||||
arg
|
||||
}
|
||||
_ => Tree::app(func, arg),
|
||||
}
|
||||
}
|
||||
|
||||
/// Join two nodes with an operator, in a way appropriate for their types.
|
||||
///
|
||||
/// For most operands this will simply construct an `OprApp`; however, a non-operator block (i.e. an
|
||||
/// `ArgumentBlock`) is reinterpreted as a `BodyBlock` when it appears in the RHS of an operator
|
||||
/// expression.
|
||||
pub fn apply_operator<'s>(
|
||||
lhs: Option<Tree<'s>>,
|
||||
opr: OperatorOrError<'s>,
|
||||
mut rhs: Option<Tree<'s>>,
|
||||
) -> Tree<'s> {
|
||||
if let Some(rhs_) = rhs.as_mut() {
|
||||
if let Variant::ArgumentBlockApplication(block) = &mut *rhs_.variant {
|
||||
if block.lhs.is_none() {
|
||||
let ArgumentBlockApplication { lhs: _, arguments } = block;
|
||||
let arguments = mem::take(arguments);
|
||||
let rhs_ = block::body_from_lines(arguments);
|
||||
rhs = Some(rhs_);
|
||||
}
|
||||
}
|
||||
}
|
||||
Tree::opr_app(lhs, opr, rhs)
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ================
|
||||
// === Visitors ===
|
||||
// ================
|
||||
|
252
lib/rust/parser/src/syntax/tree/block.rs
Normal file
252
lib/rust/parser/src/syntax/tree/block.rs
Normal file
@ -0,0 +1,252 @@
|
||||
//! Code blocks.
|
||||
|
||||
|
||||
|
||||
use crate::syntax::tree::*;
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === Lines ===
|
||||
// =============
|
||||
|
||||
/// A line of code.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visitor, Reflect, Serialize, Deserialize)]
|
||||
pub struct Line<'s> {
|
||||
/// Token ending the previous line, if any.
|
||||
pub newline: token::Newline<'s>,
|
||||
/// The content of the line, if any.
|
||||
pub expression: Option<Tree<'s>>,
|
||||
}
|
||||
|
||||
impl<'s> Line<'s> {
|
||||
/// Transform the content of the line with the provided function, if any is present; return the
|
||||
/// result.
|
||||
pub fn map_expression(self, f: impl FnOnce(Tree<'s>) -> Tree<'s>) -> Self {
|
||||
let Self { newline, expression } = self;
|
||||
let expression = expression.map(f);
|
||||
Self { newline, expression }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> From<token::Newline<'s>> for Line<'s> {
|
||||
fn from(newline: token::Newline<'s>) -> Self {
|
||||
Self { newline, expression: None }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> span::Builder<'s> for Line<'s> {
|
||||
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
|
||||
span.add(&mut self.newline).add(&mut self.expression)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==================
|
||||
// === Body Block ===
|
||||
// ==================
|
||||
|
||||
/// Build a body block from a sequence of lines; this involves reinterpreting the input expressions
|
||||
/// in statement context (i.e. expressions at the top-level of the block that involve the `=`
|
||||
/// operator will be reinterpreted as function/variable bindings).
|
||||
pub fn body_from_lines<'s>(expressions: impl IntoIterator<Item = Line<'s>>) -> Tree<'s> {
|
||||
use crate::expression_to_statement;
|
||||
let expressions = expressions.into_iter();
|
||||
let statements = expressions.map(|line| line.map_expression(expression_to_statement));
|
||||
let statements = statements.collect();
|
||||
Tree::body_block(statements)
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ======================
|
||||
// === Operator Block ===
|
||||
// ======================
|
||||
|
||||
/// The content of a line in an operator block.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visitor, Reflect, Serialize, Deserialize)]
|
||||
pub struct OperatorBlockExpression<'s> {
|
||||
/// The operator at the beginning of the line.
|
||||
pub operator: OperatorOrError<'s>,
|
||||
/// The rest of the expression.
|
||||
pub expression: Tree<'s>,
|
||||
}
|
||||
|
||||
/// Interpret the given expression as an `OperatorBlockExpression`, if it fits the correct pattern.
|
||||
fn to_operator_block_expression(
|
||||
expression_: Tree<'_>,
|
||||
) -> Result<OperatorBlockExpression<'_>, Tree<'_>> {
|
||||
let tree_ = match &*expression_.variant {
|
||||
Variant::OprSectionBoundary(OprSectionBoundary { ast }) => ast,
|
||||
_ => return Err(expression_),
|
||||
};
|
||||
if let Variant::OprApp(OprApp { lhs: None, opr, rhs: Some(expression) }) = &*tree_.variant {
|
||||
if expression.span.left_offset.visible.width_in_spaces < 1 {
|
||||
return Err(expression_);
|
||||
}
|
||||
let mut operator = opr.clone();
|
||||
operator.first_operator_mut().left_offset = expression_.span.left_offset;
|
||||
let expression = expression.clone();
|
||||
Ok(OperatorBlockExpression { operator, expression })
|
||||
} else {
|
||||
Err(expression_)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> span::Builder<'s> for OperatorBlockExpression<'s> {
|
||||
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
|
||||
span.add(&mut self.operator).add(&mut self.expression)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// === Operator block lines ====
|
||||
|
||||
/// A line in an operator block.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visitor, Reflect, Serialize, Deserialize)]
|
||||
pub struct OperatorLine<'s> {
|
||||
/// Token ending the previous line, if any.
|
||||
pub newline: token::Newline<'s>,
|
||||
/// The operator-expression, if any.
|
||||
pub expression: Option<OperatorBlockExpression<'s>>,
|
||||
}
|
||||
|
||||
impl<'s> From<token::Newline<'s>> for OperatorLine<'s> {
|
||||
fn from(newline: token::Newline<'s>) -> Self {
|
||||
Self { newline, expression: None }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> span::Builder<'s> for OperatorLine<'s> {
|
||||
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
|
||||
span.add(&mut self.newline).add(&mut self.expression)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =====================
|
||||
// === Block Builder ===
|
||||
// =====================
|
||||
|
||||
/// Builds an AST block type from a sequence of lines.
|
||||
///
|
||||
/// Note that the block type is not fully determined at this stage: We apply context information
|
||||
/// later (see `apply_operator`) to distinguish the two non-operator block types, `BodyBlock` and
|
||||
/// `ArgumentBlockApplication`. Here we treat every non-operator block as an argument block,
|
||||
/// because creating a body block involves re-interpreting the expressions in statement context.
|
||||
///
|
||||
/// The implementation is a state machine. The only top-level transitions are:
|
||||
/// - `Indeterminate` -> `Operator`
|
||||
/// - `Indeterminate` -> `NonOperator`
|
||||
///
|
||||
/// The `Operator` state has two substates, and one possible transition:
|
||||
/// - `body_lines is empty` -> `body_lines is not empty`
|
||||
#[derive(Debug)]
|
||||
pub enum Builder<'s> {
|
||||
/// The builder is in an indeterminate state until a non-empty line has been encountered, which
|
||||
/// would distinguish an operator-block from a non-operator block.
|
||||
Indeterminate {
|
||||
/// The `Newline` token introducing the block, and `Newline` tokens for any empty lines
|
||||
/// that have been encountered.
|
||||
empty_lines: Vec<token::Newline<'s>>,
|
||||
},
|
||||
/// Building an operator block. If any line doesn't fit the operator-block syntax, that line
|
||||
/// and all following will be placed in `body_lines`.
|
||||
Operator {
|
||||
/// Valid operator-block expressions.
|
||||
operator_lines: Vec<OperatorLine<'s>>,
|
||||
/// Any lines violating the expected operator-block syntax.
|
||||
body_lines: Vec<Line<'s>>,
|
||||
},
|
||||
/// Building a non-operator block (either a body block or an argument block).
|
||||
NonOperator {
|
||||
/// The block content.
|
||||
body_lines: Vec<Line<'s>>,
|
||||
},
|
||||
}
|
||||
|
||||
impl<'s> Builder<'s> {
|
||||
/// Create a new instance, in initial state.
|
||||
pub fn new() -> Self {
|
||||
Self::Indeterminate { empty_lines: default() }
|
||||
}
|
||||
|
||||
/// Create a new instance, in a state appropriate for the given expression.
|
||||
fn new_with_expression(
|
||||
empty_lines: impl IntoIterator<Item = token::Newline<'s>>,
|
||||
newline: token::Newline<'s>,
|
||||
expression: Tree<'s>,
|
||||
) -> Self {
|
||||
let empty_lines = empty_lines.into_iter();
|
||||
let new_lines = 1;
|
||||
match to_operator_block_expression(expression) {
|
||||
Ok(expression) => {
|
||||
let expression = Some(expression);
|
||||
let mut operator_lines = Vec::with_capacity(empty_lines.size_hint().0 + new_lines);
|
||||
operator_lines.extend(empty_lines.map(block::OperatorLine::from));
|
||||
operator_lines.push(OperatorLine { newline, expression });
|
||||
Self::Operator { operator_lines, body_lines: default() }
|
||||
}
|
||||
Err(expression) => {
|
||||
let expression = Some(expression);
|
||||
let mut body_lines = Vec::with_capacity(empty_lines.size_hint().0 + new_lines);
|
||||
body_lines.extend(empty_lines.map(block::Line::from));
|
||||
body_lines.push(Line { newline, expression });
|
||||
Self::NonOperator { body_lines }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply a new line to the state.
|
||||
pub fn push(&mut self, newline: token::Newline<'s>, expression: Option<Tree<'s>>) {
|
||||
match self {
|
||||
Builder::Indeterminate { empty_lines } => match expression {
|
||||
Some(expression) =>
|
||||
*self = Self::new_with_expression(empty_lines.drain(..), newline, expression),
|
||||
None => empty_lines.push(newline),
|
||||
},
|
||||
Builder::NonOperator { body_lines, .. } =>
|
||||
body_lines.push(Line { newline, expression }),
|
||||
Builder::Operator { body_lines, .. } if !body_lines.is_empty() => {
|
||||
body_lines.push(Line { newline, expression });
|
||||
}
|
||||
Builder::Operator { operator_lines, body_lines, .. }
|
||||
if let Some(expression) = expression => {
|
||||
match to_operator_block_expression(expression) {
|
||||
Ok(expression) => {
|
||||
let expression = Some(expression);
|
||||
operator_lines.push(OperatorLine { newline, expression });
|
||||
}
|
||||
Err(expression) => {
|
||||
let expression = Some(expression);
|
||||
body_lines.push(Line { newline, expression })
|
||||
},
|
||||
}
|
||||
}
|
||||
Builder::Operator { operator_lines, .. } => operator_lines.push(newline.into()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Produce an AST node from the state.
|
||||
pub fn build(self) -> Tree<'s> {
|
||||
match self {
|
||||
Builder::Indeterminate { empty_lines } => {
|
||||
let empty_lines = empty_lines.into_iter();
|
||||
let lines = empty_lines.map(Line::from).collect();
|
||||
Tree::argument_block_application(None, lines)
|
||||
}
|
||||
Builder::Operator { operator_lines, body_lines } =>
|
||||
Tree::operator_block_application(None, operator_lines, body_lines),
|
||||
Builder::NonOperator { body_lines } =>
|
||||
Tree::argument_block_application(None, body_lines),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Default for Builder<'s> {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
@ -1,22 +0,0 @@
|
||||
[package]
|
||||
name = "enso-parser-syntax-tree-builder"
|
||||
version = "0.1.0"
|
||||
authors = ["Enso Team <enso-dev@enso.org>"]
|
||||
edition = "2021"
|
||||
description = "Enso Parser AST Builder."
|
||||
readme = "README.md"
|
||||
homepage = "https://github.com/enso-org/enso"
|
||||
repository = "https://github.com/enso-org/enso"
|
||||
license-file = "../../LICENSE"
|
||||
|
||||
[lib]
|
||||
proc-macro = true
|
||||
|
||||
[dependencies]
|
||||
proc-macro2 = "1.0"
|
||||
enso-macro-utils = { path = "../../../../../macro-utils" }
|
||||
quote = "1.0"
|
||||
|
||||
[dependencies.syn]
|
||||
version = "1.0"
|
||||
features = ['extra-traits', 'visit', 'full']
|
@ -1,136 +0,0 @@
|
||||
//! Definition of a macro allowing building mock AST structures, mostly useful for testing.
|
||||
|
||||
// === Features ===
|
||||
#![feature(proc_macro_span)]
|
||||
// === Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![warn(unsafe_code)]
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![allow(clippy::option_map_unit_fn)]
|
||||
#![allow(clippy::precedence)]
|
||||
#![allow(dead_code)]
|
||||
#![deny(unconditional_recursion)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(trivial_casts)]
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unused_import_braces)]
|
||||
#![warn(unused_qualifications)]
|
||||
|
||||
use proc_macro2::TokenStream;
|
||||
use quote::quote;
|
||||
use std::mem;
|
||||
|
||||
|
||||
|
||||
/// A macro allowing building mock AST structures, mostly useful for testing.
|
||||
///
|
||||
/// Currently supported syntax:
|
||||
///
|
||||
/// - `a b c` Application of arguments. Arguments are applied in-order, from left to right. Here,
|
||||
/// this expression would be the same as `[[a b] c]`.
|
||||
///
|
||||
/// - `a [b c] d` Grouping syntax that does not produce AST group expression. Here, `b c` is just
|
||||
/// the first argument passed to `a`.
|
||||
///
|
||||
/// - `{if} a {then} b {else} c` Multi-segment application. All segments should be enclosed in curly
|
||||
/// braces. You can also place segments in quotes, like `{"("} a {")"}`.
|
||||
#[proc_macro]
|
||||
pub fn ast_builder(tokens: proc_macro::TokenStream) -> proc_macro::TokenStream {
|
||||
let output = expr(tokens, None);
|
||||
let output = quote!(crate::syntax::Tree::block(vec![#output]));
|
||||
output.into()
|
||||
}
|
||||
|
||||
|
||||
struct Segment {
|
||||
header: TokenStream,
|
||||
body: TokenStream,
|
||||
}
|
||||
|
||||
impl Segment {
|
||||
fn new(header: TokenStream) -> Self {
|
||||
let body = quote!();
|
||||
Self { header, body }
|
||||
}
|
||||
}
|
||||
|
||||
fn expr(tokens: proc_macro::TokenStream, parent_spacing: Option<usize>) -> TokenStream {
|
||||
use proc_macro::TokenTree::*;
|
||||
let mut output = quote! {};
|
||||
let mut prefix: Option<TokenStream> = None;
|
||||
let mut segments: Vec<Segment> = vec![];
|
||||
let mut current_segment: Option<Segment> = None;
|
||||
let mut last_column: Option<usize> = None;
|
||||
let app_to_output = |output: &mut TokenStream, tok| {
|
||||
if output.is_empty() {
|
||||
*output = tok;
|
||||
} else {
|
||||
*output = quote! {syntax::Tree::app(#output,#tok)};
|
||||
}
|
||||
};
|
||||
let mut inherited_spacing = parent_spacing.unwrap_or(0);
|
||||
for token in tokens {
|
||||
let spacing = last_column.map(|t| token.span().start().column - t).unwrap_or(0);
|
||||
let spacing = spacing + inherited_spacing;
|
||||
inherited_spacing = 0;
|
||||
last_column = Some(token.span().end().column);
|
||||
match &token {
|
||||
// a b c ...
|
||||
Ident(ident) => {
|
||||
let ident = ident.to_string();
|
||||
let spacing = " ".repeat(spacing);
|
||||
app_to_output(
|
||||
&mut output,
|
||||
quote! {crate::syntax::Tree::ident(crate::syntax::Token(#spacing, #ident, syntax::token::Variant::new_ident_unchecked(#ident)))},
|
||||
);
|
||||
}
|
||||
// {if} a {then} b {else} c
|
||||
// {"("} a {")"}
|
||||
Group(group) if group.delimiter() == proc_macro::Delimiter::Brace => {
|
||||
if let Some(mut current_segment) = mem::take(&mut current_segment) {
|
||||
current_segment.body = mem::take(&mut output);
|
||||
segments.push(current_segment);
|
||||
} else if !output.is_empty() {
|
||||
prefix = Some(mem::take(&mut output));
|
||||
}
|
||||
let ident = group.stream().to_string();
|
||||
let spacing = " ".repeat(spacing);
|
||||
current_segment = Some(Segment::new(
|
||||
quote! { Token(#spacing, #ident, syntax::token::Variant::new_ident_unchecked(#ident).into())},
|
||||
)); // Token::symbol
|
||||
}
|
||||
// a [b c] d
|
||||
Group(group) if group.delimiter() == proc_macro::Delimiter::Bracket => {
|
||||
app_to_output(&mut output, expr(group.stream(), Some(spacing)));
|
||||
}
|
||||
_ => panic!("Unsupported token {:?}", token),
|
||||
}
|
||||
}
|
||||
if let Some(mut current_segment) = current_segment {
|
||||
current_segment.body = mem::take(&mut output);
|
||||
segments.push(current_segment);
|
||||
let segments: Vec<TokenStream> = segments
|
||||
.into_iter()
|
||||
.map(|t| {
|
||||
let header = t.header;
|
||||
let body = t.body;
|
||||
let body = if !body.is_empty() {
|
||||
quote!(Some(syntax::Tree::opr_section_boundary(#body)))
|
||||
} else {
|
||||
quote!(None)
|
||||
};
|
||||
quote! { syntax::tree::MultiSegmentAppSegment { header: #header, body: #body } }
|
||||
})
|
||||
.collect();
|
||||
let pfx = prefix
|
||||
.map(|t| quote! {Some(Box::new(syntax::Tree::opr_section_boundary(#t)))})
|
||||
.unwrap_or_else(|| quote! {None});
|
||||
let segments = quote! {NonEmptyVec::try_from(vec![#(#segments),*]).unwrap()};
|
||||
output = quote! {
|
||||
syntax::Tree::multi_segment_app (#pfx, #segments)
|
||||
}
|
||||
}
|
||||
output
|
||||
}
|
@ -16,6 +16,7 @@
|
||||
#![warn(unused_qualifications)]
|
||||
|
||||
use lexpr::sexp;
|
||||
use lexpr::Value;
|
||||
|
||||
|
||||
|
||||
@ -23,10 +24,10 @@ use lexpr::sexp;
|
||||
// === Test support macros ===
|
||||
// ===========================
|
||||
|
||||
/// Parses input as a sequence of S-expressions, and wraps it in a `Block`.
|
||||
/// Parses input as a sequence of S-expressions, and wraps it in a `BodyBlock`.
|
||||
macro_rules! block {
|
||||
( $statements:tt ) => {
|
||||
sexp![(Block #($statements))]
|
||||
( $($statements:tt)* ) => {
|
||||
sexp![(BodyBlock #( $( $statements )* ) )]
|
||||
}
|
||||
}
|
||||
|
||||
@ -36,18 +37,48 @@ macro_rules! block {
|
||||
// === Tests ===
|
||||
// =============
|
||||
|
||||
#[test]
|
||||
fn nothing() {
|
||||
test("", block![()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn application() {
|
||||
test("a b c", block![(App (App (Ident a) (Ident b)) (Ident c))]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn type_definition_bool() {
|
||||
test("type Bool", block![(TypeDef (Ident type) (Ident Bool) #())]);
|
||||
fn parentheses_simple() {
|
||||
let expected = block![
|
||||
(MultiSegmentApp #(((Symbol "(") (App (Ident a) (Ident b))) ((Symbol ")") ())))
|
||||
];
|
||||
test("(a b)", expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn type_definition_option() {
|
||||
fn section_simple() {
|
||||
let expected_lhs = block![(OprSectionBoundary (OprApp () (Ok "+") (Ident a)))];
|
||||
test("+ a", expected_lhs);
|
||||
let expected_rhs = block![(OprSectionBoundary (OprApp (Ident a) (Ok "+") ()))];
|
||||
test("a +", expected_rhs);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parentheses_nested() {
|
||||
#[rustfmt::skip]
|
||||
let expected = block![
|
||||
(MultiSegmentApp #(
|
||||
((Symbol "(")
|
||||
(App (MultiSegmentApp #(((Symbol "(") (App (Ident a) (Ident b))) ((Symbol ")") ())))
|
||||
(Ident c)))
|
||||
((Symbol ")") ())))
|
||||
];
|
||||
test("((a b) c)", expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn type_definition() {
|
||||
test("type Bool", block![(TypeDef (Ident type) (Ident Bool) #())]);
|
||||
test("type Option a", block![(TypeDef (Ident type) (Ident Option) #((Ident a)))]);
|
||||
}
|
||||
|
||||
@ -75,6 +106,118 @@ fn function_block_simple_args() {
|
||||
test("foo a b c =", block![(Function foo #((Ident a) (Ident b) (Ident c)) "=" ())]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn code_block_body() {
|
||||
let code = ["main =", " 4"];
|
||||
test(&code.join("\n"), block![(Function main #() "=" (BodyBlock #((Number 4))))]);
|
||||
let code = ["main =", " ", " 4"];
|
||||
test(&code.join("\n"), block![(Function main #() "=" (BodyBlock #(() (Number 4))))]);
|
||||
let code = ["main =", " ", " 4"];
|
||||
test(&code.join("\n"), block![(Function main #() "=" (BodyBlock #(() (Number 4))))]);
|
||||
let code = ["main =", " ", " 4"];
|
||||
test(&code.join("\n"), block![(Function main #() "=" (BodyBlock #(() (Number 4))))]);
|
||||
let code = ["main =", "", " 4"];
|
||||
test(&code.join("\n"), block![(Function main #() "=" (BodyBlock #(() (Number 4))))]);
|
||||
|
||||
#[rustfmt::skip]
|
||||
let code = [
|
||||
"main =",
|
||||
" +4",
|
||||
" print 23",
|
||||
];
|
||||
#[rustfmt::skip]
|
||||
let expect = block![
|
||||
(Function main #() "=" (BodyBlock #(
|
||||
(OprSectionBoundary (OprApp () (Ok "+") (Number 4)))
|
||||
(App (Ident print) (Number 23)))))
|
||||
];
|
||||
test(&code.join("\n"), expect);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn code_block_operator() {
|
||||
let code = ["value = nums", " * each random", " + constant"];
|
||||
let expect = block![
|
||||
(Assignment (Ident value) "="
|
||||
(OperatorBlockApplication (Ident nums)
|
||||
#(((Ok "*") (App (Ident each) (Ident random)))
|
||||
((Ok "+") (Ident constant)))
|
||||
#()))
|
||||
];
|
||||
test(&code.join("\n"), expect);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn code_block_argument_list() {
|
||||
#[rustfmt::skip]
|
||||
let code = [
|
||||
"value = foo",
|
||||
" bar",
|
||||
];
|
||||
let expect = block![
|
||||
(Assignment (Ident value) "=" (ArgumentBlockApplication (Ident foo) #((Ident bar))))
|
||||
];
|
||||
test(&code.join("\n"), expect);
|
||||
|
||||
|
||||
#[rustfmt::skip]
|
||||
let code = [
|
||||
"value = foo",
|
||||
" +1",
|
||||
" bar",
|
||||
];
|
||||
#[rustfmt::skip]
|
||||
let expect = block![
|
||||
(Assignment (Ident value) "="
|
||||
(ArgumentBlockApplication (Ident foo) #(
|
||||
(OprSectionBoundary (OprApp () (Ok "+") (Number 1)))
|
||||
(Ident bar))))
|
||||
];
|
||||
test(&code.join("\n"), expect);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn code_block_empty() {
|
||||
// The first line here should parse as a function with no body expression (which is an error).
|
||||
// No input would parse as an empty `ArgumentBlock` or `OperatorBlock`, because those types are
|
||||
// distinguished from a body continuation by the presence of non-empty indented lines.
|
||||
let code = ["foo =", "bar"];
|
||||
test(&code.join("\n"), block![(Function foo #() "=" ()) (Ident bar)]);
|
||||
// This parses similarly to above; a line with no non-whitespace content does not create a code
|
||||
// block.
|
||||
let code = ["foo =", " ", "bar"];
|
||||
test(&code.join("\n"), block![(Function foo #() "=" ()) () (Ident bar)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn code_block_bad_indents1() {
|
||||
let code = ["main =", " foo", " bar", " baz"];
|
||||
let expected = block![
|
||||
(Function main #() "=" (BodyBlock #((Ident foo) (Ident bar) (Ident baz))))
|
||||
];
|
||||
test(&code.join("\n"), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn code_block_bad_indents2() {
|
||||
let code = ["main =", " foo", " bar", "baz"];
|
||||
let expected = block![
|
||||
(Function main #() "=" (BodyBlock #((Ident foo) (Ident bar))))
|
||||
(Ident baz)
|
||||
];
|
||||
test(&code.join("\n"), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn code_block_with_following_statement() {
|
||||
let code = ["main =", " foo", "bar"];
|
||||
let expected = block![
|
||||
(Function main #() "=" (BodyBlock #((Ident foo))))
|
||||
(Ident bar)
|
||||
];
|
||||
test(&code.join("\n"), expected);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ====================
|
||||
@ -95,11 +238,11 @@ use std::collections::HashSet;
|
||||
/// - Most token types are represented as their contents, rather than as a token struct. For
|
||||
/// example, a `token::Number` may be represented like: `sexp![10]`, and a `token::Ident` may look
|
||||
/// like `sexp![foo]`.
|
||||
fn test(code: &str, expect: lexpr::Value) {
|
||||
fn test(code: &str, expect: Value) {
|
||||
let ast = enso_parser::Parser::new().run(code);
|
||||
let ast_s_expr = to_s_expr(&ast, code);
|
||||
assert_eq!(ast_s_expr.to_string(), expect.to_string());
|
||||
assert_eq!(ast.code(), code);
|
||||
assert_eq!(ast_s_expr.to_string(), expect.to_string(), "{:?}", &ast);
|
||||
assert_eq!(ast.code(), code, "{:?}", &ast);
|
||||
}
|
||||
|
||||
|
||||
@ -109,40 +252,61 @@ fn test(code: &str, expect: lexpr::Value) {
|
||||
// =====================
|
||||
|
||||
/// Produce an S-expression representation of the input AST type.
|
||||
pub fn to_s_expr<T>(value: &T, code: &str) -> lexpr::Value
|
||||
pub fn to_s_expr<T>(value: &T, code: &str) -> Value
|
||||
where T: serde::Serialize + Reflect {
|
||||
use enso_parser::syntax::token;
|
||||
use enso_parser::syntax::tree;
|
||||
let (graph, rust_to_meta) = enso_metamodel::rust::to_meta(value.reflect_type());
|
||||
let ast_ty = rust_to_meta[&value.reflect_type().id];
|
||||
let base = code.as_bytes().as_ptr() as usize;
|
||||
let code: Box<str> = Box::from(code);
|
||||
let mut to_s_expr = ToSExpr::new(&graph);
|
||||
to_s_expr.mapper(ast_ty, strip_hidden_fields);
|
||||
let ident_token = rust_to_meta[&enso_parser::syntax::token::variant::Ident::reflect().id];
|
||||
let operator_token = rust_to_meta[&enso_parser::syntax::token::variant::Operator::reflect().id];
|
||||
let number_token = rust_to_meta[&enso_parser::syntax::token::variant::Number::reflect().id];
|
||||
let token_to_str = move |token: lexpr::Value| {
|
||||
let ident_token = rust_to_meta[&token::variant::Ident::reflect().id];
|
||||
let operator_token = rust_to_meta[&token::variant::Operator::reflect().id];
|
||||
let symbol_token = rust_to_meta[&token::variant::Symbol::reflect().id];
|
||||
let number_token = rust_to_meta[&token::variant::Number::reflect().id];
|
||||
let newline_token = rust_to_meta[&token::variant::Newline::reflect().id];
|
||||
// TODO: Implement `#[reflect(flag = "enso::concrete")]`, which just attaches user data to the
|
||||
// type info; then filter by flag here instead of hard-coding these simplifications.
|
||||
let line = rust_to_meta[&tree::block::Line::reflect().id];
|
||||
let operator_line = rust_to_meta[&tree::block::OperatorLine::reflect().id];
|
||||
let token_to_str = move |token: Value| {
|
||||
let range = token_code_range(&token, base);
|
||||
code[range].to_owned().into_boxed_str()
|
||||
};
|
||||
let token_to_str_ = token_to_str.clone();
|
||||
to_s_expr.mapper(ident_token, move |token| lexpr::Value::symbol(token_to_str_(token)));
|
||||
to_s_expr.mapper(ident_token, move |token| Value::symbol(token_to_str_(token)));
|
||||
let token_to_str_ = token_to_str.clone();
|
||||
to_s_expr.mapper(operator_token, move |token| lexpr::Value::string(token_to_str_(token)));
|
||||
to_s_expr.mapper(operator_token, move |token| Value::string(token_to_str_(token)));
|
||||
let token_to_str_ = token_to_str.clone();
|
||||
to_s_expr.mapper(symbol_token, move |token| Value::string(token_to_str_(token)));
|
||||
let token_to_str_ = token_to_str;
|
||||
to_s_expr.mapper(number_token, move |token| {
|
||||
lexpr::Value::Number(token_to_str_(token).parse::<u64>().unwrap().into())
|
||||
Value::Number(token_to_str_(token).parse::<u64>().unwrap().into())
|
||||
});
|
||||
let into_car = |cons| match cons {
|
||||
Value::Cons(cons) => cons.into_pair().0,
|
||||
_ => panic!(),
|
||||
};
|
||||
to_s_expr.mapper(line, into_car);
|
||||
to_s_expr.mapper(operator_line, into_car);
|
||||
to_s_expr.skip(newline_token);
|
||||
tuplify(to_s_expr.value(ast_ty, &value))
|
||||
}
|
||||
|
||||
/// Strip certain fields that should be excluded from output.
|
||||
fn strip_hidden_fields(tree: lexpr::Value) -> lexpr::Value {
|
||||
let hidden_tree_fields =
|
||||
[":spanLeftOffsetVisible", ":spanLeftOffsetCodeRepr", ":spanCodeLength"];
|
||||
fn strip_hidden_fields(tree: Value) -> Value {
|
||||
let hidden_tree_fields = [
|
||||
":spanLeftOffsetVisible",
|
||||
":spanLeftOffsetCodeReprBegin",
|
||||
":spanLeftOffsetCodeReprLen",
|
||||
":spanCodeLength",
|
||||
];
|
||||
let hidden_tree_fields: HashSet<_> = hidden_tree_fields.into_iter().collect();
|
||||
lexpr::Value::list(tree.to_vec().unwrap().into_iter().filter(|val| match val {
|
||||
lexpr::Value::Cons(cons) => match cons.car() {
|
||||
lexpr::Value::Symbol(symbol) => !hidden_tree_fields.contains(symbol.as_ref()),
|
||||
Value::list(tree.to_vec().unwrap().into_iter().filter(|val| match val {
|
||||
Value::Cons(cons) => match cons.car() {
|
||||
Value::Symbol(symbol) => !hidden_tree_fields.contains(symbol.as_ref()),
|
||||
_ => panic!(),
|
||||
},
|
||||
_ => true,
|
||||
@ -151,30 +315,23 @@ fn strip_hidden_fields(tree: lexpr::Value) -> lexpr::Value {
|
||||
|
||||
/// Given an S-expression representation of a [`Token`] and the base address for `Code` `Cow`s,
|
||||
/// return the range of the input code the token references.
|
||||
fn token_code_range(token: &lexpr::Value, base: usize) -> std::ops::Range<usize> {
|
||||
let code_repr = fields(token).find(|(name, _)| *name == ":codeRepr").unwrap().1;
|
||||
let mut begin = None;
|
||||
let mut len = None;
|
||||
for (name, value) in fields(code_repr) {
|
||||
match name {
|
||||
":begin" => begin = Some(value.as_u64().unwrap() as u32),
|
||||
":len" => len = Some(value.as_u64().unwrap() as u32),
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
let begin = begin.unwrap();
|
||||
fn token_code_range(token: &Value, base: usize) -> std::ops::Range<usize> {
|
||||
let get_u32 =
|
||||
|field| fields(token).find(|(name, _)| *name == field).unwrap().1.as_u64().unwrap() as u32;
|
||||
let begin = get_u32(":codeReprBegin");
|
||||
let len = get_u32(":codeReprLen");
|
||||
let begin = (begin as u64) | (base as u64 & !0xFFFF_FFFF);
|
||||
let begin = if begin < (base as u64) { begin + 0x1_0000_0000 } else { begin };
|
||||
let begin = begin as usize - base;
|
||||
let len = len.unwrap() as usize;
|
||||
let len = len as usize;
|
||||
begin..(begin + len)
|
||||
}
|
||||
|
||||
/// Iterate the field `(name, value)` pairs of the S-expression of a struct with named fields.
|
||||
fn fields(value: &'_ lexpr::Value) -> impl Iterator<Item = (&'_ str, &'_ lexpr::Value)> {
|
||||
fn fields(value: &'_ Value) -> impl Iterator<Item = (&'_ str, &'_ Value)> {
|
||||
value.list_iter().unwrap().filter_map(|value| match value {
|
||||
lexpr::Value::Cons(cons) => match cons.car() {
|
||||
lexpr::Value::Symbol(symbol) => Some((&symbol[..], cons.cdr())),
|
||||
Value::Cons(cons) => match cons.car() {
|
||||
Value::Symbol(symbol) => Some((&symbol[..], cons.cdr())),
|
||||
_ => None,
|
||||
},
|
||||
_ => None,
|
||||
@ -183,24 +340,24 @@ fn fields(value: &'_ lexpr::Value) -> impl Iterator<Item = (&'_ str, &'_ lexpr::
|
||||
|
||||
/// Strip field names from struct representations, so that they are printed more concisely, as if
|
||||
/// they were tuple-structs.
|
||||
fn tuplify(value: lexpr::Value) -> lexpr::Value {
|
||||
fn tuplify(value: Value) -> Value {
|
||||
let (car, cdr) = match value {
|
||||
lexpr::Value::Cons(cons) => cons.into_pair(),
|
||||
lexpr::Value::Vector(mut vector) => {
|
||||
Value::Cons(cons) => cons.into_pair(),
|
||||
Value::Vector(mut vector) => {
|
||||
for value in vector.iter_mut() {
|
||||
let original = std::mem::replace(value, lexpr::Value::Nil);
|
||||
let original = std::mem::replace(value, Value::Nil);
|
||||
*value = tuplify(original);
|
||||
}
|
||||
return lexpr::Value::Vector(vector);
|
||||
return Value::Vector(vector);
|
||||
}
|
||||
value => return value,
|
||||
};
|
||||
if let lexpr::Value::Symbol(symbol) = &car {
|
||||
if let Value::Symbol(symbol) = &car {
|
||||
if let Some(':') = symbol.chars().next() {
|
||||
return tuplify(cdr);
|
||||
}
|
||||
}
|
||||
let car = tuplify(car);
|
||||
let cdr = tuplify(cdr);
|
||||
lexpr::Value::Cons(lexpr::Cons::new(car, cdr))
|
||||
Value::Cons(lexpr::Cons::new(car, cdr))
|
||||
}
|
||||
|
@ -33,7 +33,8 @@ impl<T> NonEmptyVec<T> {
|
||||
/// let mut vec: NonEmptyVec<usize> = NonEmptyVec::new(0, vec![]);
|
||||
/// ```
|
||||
pub fn new(first: T, rest: Vec<T>) -> NonEmptyVec<T> {
|
||||
let mut elems = vec![first];
|
||||
let mut elems = Vec::with_capacity(1 + rest.len());
|
||||
elems.push(first);
|
||||
elems.extend(rest);
|
||||
NonEmptyVec { elems }
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
//! This module defines utilities for working with the [`std::vec::Vec`] type.
|
||||
|
||||
use derivative::Derivative;
|
||||
use failure::_core::hint::unreachable_unchecked;
|
||||
|
||||
|
||||
@ -84,6 +85,84 @@ pub trait VecOps<T>: AsMut<Vec<T>> + Sized {
|
||||
|
||||
impl<T> VecOps<T> for Vec<T> {}
|
||||
|
||||
|
||||
|
||||
// =====================
|
||||
// === VecAllocation ===
|
||||
// =====================
|
||||
|
||||
/// Owns a storage allocation for a [`std::vec::Vec`], but no elements.
|
||||
///
|
||||
/// # Usage
|
||||
///
|
||||
/// This data structure implements an optimization when creating temporary vectors. The use case
|
||||
/// occurs when:
|
||||
/// - Within some scope, a `Vec` is created, added to, and discarded.
|
||||
/// - The scope may be entered multiple times.
|
||||
///
|
||||
/// The optimization is to reuse an allocation between entries to the scope. This is sometimes done
|
||||
/// by storing and reusing the `Vec`, but that pattern is misleading; owning a `Vec` suggests that
|
||||
/// values may be retained between entries to the scope. This type explicitly has only one logical
|
||||
/// state (empty).
|
||||
///
|
||||
/// ```
|
||||
/// # use enso_prelude::*;
|
||||
/// #[derive(Default)]
|
||||
/// struct NumberAdder {
|
||||
/// // In a more complex struct it would be important to be able to tell what state the object
|
||||
/// // retains from its fields.
|
||||
/// temporary_nums: VecAllocation<f64>,
|
||||
/// }
|
||||
///
|
||||
/// impl NumberAdder {
|
||||
/// /// Add some numbers, with better precision than simply adding `f32` values in a loop.
|
||||
/// /// (For the sake of example, ignore that this is not a fast or accurate approach.)
|
||||
/// ///
|
||||
/// /// Because we reuse an allocation, if this method is called repeatedly it will only have to
|
||||
/// /// allocate enough space to accommodate the largest single input it processes. Thus, rather
|
||||
/// /// than performing a number of reallocations that scales linearly in the number of batches
|
||||
/// /// of input (assuming batch size has some constant geometric mean), it performs a number of
|
||||
/// /// allocations that scales with the log of the size of the largest batch; the worst case of
|
||||
/// /// this implementation has the same performance as the best case of an implementation that
|
||||
/// /// doesn't reuse its allocation.
|
||||
/// pub fn add_nums(&mut self, inputs: impl IntoIterator<Item = f32>) -> f32 {
|
||||
/// let mut extended_precision = self.temporary_nums.take();
|
||||
/// extended_precision.extend(inputs.into_iter().map(f64::from));
|
||||
/// let result = extended_precision.drain(..).fold(0.0, f64::add);
|
||||
/// self.temporary_nums.set_from(extended_precision);
|
||||
/// result as f32
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
#[derive(Clone, Debug, Derivative, Eq, PartialEq)]
|
||||
#[derivative(Default(bound = ""))]
|
||||
pub struct VecAllocation<T> {
|
||||
data: Vec<T>,
|
||||
}
|
||||
|
||||
impl<T> VecAllocation<T> {
|
||||
/// Create a new, empty allocation.
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Drop any elements from the given `Vec`, keeping its allocated memory. It can be retrieved
|
||||
/// later with `take`.
|
||||
pub fn set_from(&mut self, mut data: Vec<T>) {
|
||||
data.clear();
|
||||
self.data = data;
|
||||
}
|
||||
|
||||
/// Return a `Vec` containing no elements, whose allocated storage comes from the most recent
|
||||
/// call to `set_from`, unless `take` has been called since then. Any subsequent call before the
|
||||
/// next `set_from` would return a newly-created `Vec` with no allocated memory.
|
||||
pub fn take(&mut self) -> Vec<T> {
|
||||
std::mem::take(&mut self.data)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === Tests ===
|
||||
// =============
|
||||
|
Loading…
Reference in New Issue
Block a user