From 7c0330290f42560d8cb90d9f23d1cbd663e5c9e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Wojciech=20Dani=C5=82o?= Date: Fri, 1 Jul 2022 05:42:29 +0200 Subject: [PATCH] Parser improvements (#3549) --- lib/rust/data-structures/src/im_list.rs | 2 +- lib/rust/parser/src/lexer.rs | 72 +- lib/rust/parser/src/macros.rs | 52 +- lib/rust/parser/src/macros/built_in.rs | 84 ++ lib/rust/parser/src/macros/expand.rs | 356 +++++++++ lib/rust/parser/src/macros/pattern.rs | 414 ++++++++-- lib/rust/parser/src/macros/resolver.rs | 387 +++++++++ lib/rust/parser/src/main.rs | 739 ++---------------- lib/rust/parser/src/source/code.rs | 14 + lib/rust/parser/src/source/span.rs | 328 +++----- lib/rust/parser/src/syntax.rs | 1 + lib/rust/parser/src/syntax/item.rs | 58 +- lib/rust/parser/src/syntax/operator.rs | 172 ++++ lib/rust/parser/src/syntax/tree.rs | 35 +- .../parser/src/syntax/tree/builder/src/lib.rs | 36 +- lib/rust/prelude/src/data/non_empty_vec.rs | 14 +- lib/rust/prelude/src/data/semigroup.rs | 2 +- 17 files changed, 1709 insertions(+), 1057 deletions(-) create mode 100644 lib/rust/parser/src/macros/built_in.rs create mode 100644 lib/rust/parser/src/macros/expand.rs create mode 100644 lib/rust/parser/src/macros/resolver.rs create mode 100644 lib/rust/parser/src/syntax/operator.rs diff --git a/lib/rust/data-structures/src/im_list.rs b/lib/rust/data-structures/src/im_list.rs index 164817d2be..860bec0c94 100644 --- a/lib/rust/data-structures/src/im_list.rs +++ b/lib/rust/data-structures/src/im_list.rs @@ -81,7 +81,7 @@ impl NonEmpty { } /// Convert this list to a vector. - fn to_vec(&self) -> Vec<&T> { + pub fn to_vec(&self) -> Vec<&T> { let mut out = vec![&self.head]; let mut list = self.tail(); loop { diff --git a/lib/rust/parser/src/lexer.rs b/lib/rust/parser/src/lexer.rs index d38e669950..ef4a68f050 100644 --- a/lib/rust/parser/src/lexer.rs +++ b/lib/rust/parser/src/lexer.rs @@ -731,8 +731,15 @@ const PARSERS: &[for<'r> fn(&'r mut Lexer<'_>)] = &[ ]; impl<'s> Lexer<'s> { - /// Run the lexer. Returns [`true`] if the process succeeded. - pub fn run(&mut self) -> bool { + /// Run the lexer. Return hierarchical list of tokens (the token groups will be represented as a + /// hierarchy). + pub fn run(self) -> Vec> { + build_block_hierarchy(self.run_flat()) + } + + /// Run the lexer. Return non-hierarchical list of tokens (the token groups will be represented + /// as start and end tokens). + pub fn run_flat(mut self) -> Vec> { self.spaces_after_lexeme(); let mut any_parser_matched = true; while any_parser_matched { @@ -744,10 +751,53 @@ impl<'s> Lexer<'s> { } } } - self.current_char == None + if self.current_char != None { + panic!("Internal error. Lexer did not consume all input."); + } + while self.end_block().is_some() { + let block_end = self.marker_token(token::Variant::block_end()); + self.submit_token(block_end); + } + let tokens = self.output; + event!(TRACE, "Tokens:\n{:#?}", tokens); + tokens } } +/// Run the lexer. Return non-hierarchical list of tokens (the token groups will be represented +/// as start and end tokens). +pub fn run_flat(input: &'_ str) -> Vec> { + Lexer::new(input).run_flat() +} + +/// Run the lexer. Return hierarchical list of tokens (the token groups will be represented as a +/// hierarchy). +pub fn run(input: &'_ str) -> Vec> { + Lexer::new(input).run() +} + +/// Convert the flat token stream into hierarchical one. The token variants [`BlockStart`] and +/// [`BlockEnd`] will be replaced with [`Item::Group`]. +pub fn build_block_hierarchy(tokens: Vec>) -> Vec> { + let mut stack = vec![]; + let mut out: Vec> = vec![]; + for token in tokens { + match token.variant { + token::Variant::BlockStart(_) => stack.push(mem::take(&mut out)), + token::Variant::BlockEnd(_) => { + let new_out = stack.pop().unwrap(); + let block = mem::replace(&mut out, new_out); + out.push(Item::Block(block)); + } + _ => out.push(token.into()), + } + } + if !stack.is_empty() { + panic!("Internal error. Block start token not paired with block end token."); + } + out +} + // ============= @@ -756,9 +806,7 @@ impl<'s> Lexer<'s> { /// Lexer main function used for ad-hoc testing during development. pub fn main() { - let mut lexer = Lexer::new("\n foo\n bar"); - println!("{:?}", lexer.run()); - println!("{:#?}", lexer.output.iter().collect_vec()); + println!("{:#?}", run_flat("\n foo\n bar")); } /// Test utils for fast mock tokens creation. @@ -797,9 +845,7 @@ mod tests { } fn test_lexer<'s>(input: &'s str, expected: Vec>) { - let mut lexer = Lexer::new(input); - assert!(lexer.run()); - assert_eq!(lexer.output.iter().collect_vec(), expected); + assert_eq!(run_flat(input), expected); } fn lexer_case_idents<'s>(idents: &[&'s str]) -> Vec<(&'s str, Vec>)> { @@ -828,7 +874,7 @@ mod tests { ident_(" ", "foo"), newline_("", "\n"), ident_(" ", "bar"), - // FIXME: here should be block end + block_end_("", ""), ]), ]); } @@ -1081,10 +1127,8 @@ mod benches { let str = "test ".repeat(reps); b.iter(move || { - let mut lexer = Lexer::new(&str); - let ok = lexer.run(); - assert!(ok); - assert_eq!(lexer.output.len(), reps); + let lexer = Lexer::new(&str); + assert_eq!(lexer.run().len(), reps); }); } } diff --git a/lib/rust/parser/src/macros.rs b/lib/rust/parser/src/macros.rs index ba5c6e26c1..b38c7706f5 100644 --- a/lib/rust/parser/src/macros.rs +++ b/lib/rust/parser/src/macros.rs @@ -3,22 +3,23 @@ //! utilities allowing macros management. //! Read the docs of the main module of this crate to learn more about the parsing process. -// - use crate::prelude::*; use crate::syntax; -use crate::syntax::token::Token; use enso_data_structures::im_list; -use pattern::Pattern; // ============== // === Export === // ============== +pub mod built_in; +pub mod expand; pub mod pattern; +pub mod resolver; + +pub use pattern::Pattern; @@ -38,21 +39,13 @@ pub mod pattern; #[derivative(Debug)] #[allow(missing_docs)] pub struct Definition<'a> { - /// The pattern in this field will be matched from right to left, unlike patterns in segments. - pub rev_prefix_pattern: Option, - pub segments: im_list::NonEmpty>, + pub segments: im_list::NonEmpty>, #[derivative(Debug = "ignore")] - pub body: Rc, + pub body: Rc, } -/// All the tokens matched as prefix of the resolved macro. -pub type PrefixTokens<'s> = Option>>; - -/// All the sections of the resolved macro. -pub type MatchedSections<'s> = NonEmptyVec<(Token<'s>, Vec>)>; - /// A function that transforms matched macro tokens into [`syntax::Tree`]. -pub type Body = dyn for<'s> Fn(PrefixTokens<'s>, MatchedSections<'s>) -> syntax::Tree<'s>; +pub type DefinitionBody = dyn for<'s> Fn(pattern::MatchedSegments<'s>) -> syntax::Tree<'s>; @@ -93,18 +86,29 @@ impl<'a> SegmentDefinition<'a> { /// ``` #[macro_export] macro_rules! macro_definition { - ( ($($section:literal, $pattern:expr),* $(,)?) $body:expr ) => { - $crate::macro_definition!{[None] ($($section, $pattern),*) $body} + ($def:tt) => { + $crate::macro_definition!{$def $crate::macros::matched_segments_into_multi_segment_app} }; - ( ($prefix:expr, $($section:literal, $pattern:expr),* $(,)?) $body:expr ) => { - $crate::macro_definition!{[Some($prefix)] ($($section, $pattern),*) $body} - }; - ( [$prefix:expr] ($($section:literal, $pattern:expr),* $(,)?) $body:expr ) => { - macros::Definition { - rev_prefix_pattern: $prefix, + (($($section:literal, $pattern:expr),* $(,)?) $body:expr) => { + $crate::macros::Definition { segments: im_list::NonEmpty::try_from(vec![ - $(macros::SegmentDefinition::new($section, $pattern)),*]).unwrap(), + $($crate::macros::SegmentDefinition::new($section, $pattern)),*]).unwrap(), body: Rc::new($body), } }; } + + + +fn matched_segments_into_multi_segment_app( + matched_segments: NonEmptyVec>, +) -> syntax::Tree<'_> { + let segments = matched_segments.mapped(|segment| { + let header = segment.header; + let tokens = segment.result.tokens(); + let body = (!tokens.is_empty()) + .as_some_from(|| syntax::operator::resolve_operator_precedence(tokens)); + syntax::tree::MultiSegmentAppSegment { header, body } + }); + syntax::Tree::multi_segment_app(segments) +} diff --git a/lib/rust/parser/src/macros/built_in.rs b/lib/rust/parser/src/macros/built_in.rs new file mode 100644 index 0000000000..e68848c0d7 --- /dev/null +++ b/lib/rust/parser/src/macros/built_in.rs @@ -0,0 +1,84 @@ +//! Built-in macro definitions. + +use crate::macros::pattern::*; +use crate::macros::*; + +use crate::syntax::operator; + + + +// ======================= +// === Built-in macros === +// ======================= + +/// All built-in macro definitions. +pub fn all() -> resolver::SegmentMap<'static> { + let mut macro_map = resolver::SegmentMap::default(); + // macro_map.register(if_then()); + // macro_map.register(if_then_else()); + macro_map.register(group()); + macro_map.register(type_def()); + macro_map +} + +/// If-then-else macro definition. +pub fn if_then_else<'s>() -> Definition<'s> { + crate::macro_definition! {("if", everything(), "then", everything(), "else", everything())} +} + +/// If-then macro definition. +pub fn if_then<'s>() -> Definition<'s> { + crate::macro_definition! {("if", everything(), "then", everything())} +} + +/// Group macro definition. +pub fn group<'s>() -> Definition<'s> { + crate::macro_definition! {("(", everything(), ")", nothing())} +} + +/// New type definition macro definition. +pub fn type_def<'s>() -> Definition<'s> { + use pattern::*; + #[rustfmt::skip] + let pattern = + identifier() / "name" % "type name" >> + many(identifier() % "type parameter" / "param") % "type parameters" >> + block( + many(identifier() / "constructor") % "type constructors" >> + everything() + ) % "type definition body"; + // let pattern2 = Everything; + crate::macro_definition! { + ("type", pattern) + type_def_body + } +} + +// TODO: The comments in the code were left in order to allow easy debugging of this struct. They +// should be removed in the future. +fn type_def_body(matched_segments: NonEmptyVec) -> syntax::Tree { + let segment = matched_segments.to_vec().pop().unwrap(); + // println!(">>>"); + // println!("{:#?}", segment); + // println!(">>>"); + let match_tree = segment.result.into_var_map(); + // println!("{:#?}", match_tree); + // println!("\n\n------------- 1"); + + let mut v = match_tree.view(); + let name = &v.query("name").unwrap()[0]; + let name = operator::resolve_operator_precedence(name.clone()); + // println!("{:#?}", name); + // println!("\n\n------------- 2"); + + let params = v.nested().query("param").unwrap(); + // println!("{:#?}", params); + // println!("\n\n------------- 3"); + + let params = params + .iter() + .map(|tokens| operator::resolve_operator_precedence(tokens.clone())) + .collect_vec(); + // println!("{:#?}", params); + syntax::Tree::type_def(segment.header, name, params) +} diff --git a/lib/rust/parser/src/macros/expand.rs b/lib/rust/parser/src/macros/expand.rs new file mode 100644 index 0000000000..e9e98065c4 --- /dev/null +++ b/lib/rust/parser/src/macros/expand.rs @@ -0,0 +1,356 @@ +//! Macro expansion utilities. Allow expanding macro variables in the same as Rust macro rules do. + +use crate::macros::pattern::*; +use crate::prelude::*; + +use crate::syntax; + + + +// ============== +// === VarMap === +// ============== + +/// A nested map of pattern variables (elements using the [`Pattern::Named`] variant). The validator +/// should be instantiated either with the [`EnabledValidator`] in case of user-defined +/// macros or with the [`DisabledValidator`] in case of built-in macros. The latter is +/// faster but does not provide nice error messages and allows for illegal code expansion, like +/// using two variables that have the same repetition depth, but have different parents (e.g. the +/// variables `$b` and `$e` from the example below). +/// +/// To better understand how it works, let's consider the following pattern definition (using the +/// Rust macro rules syntax for simplicity): +/// +/// ```text +/// $x:tt +/// $( +/// $a:tt +/// $( +/// $b:tt +/// $c:tt +/// )* +/// +/// $d:tt +/// $( +/// $e:tt +/// $f:tt +/// )* +/// )* +/// ``` +/// +/// The following [`VarMap`] will be generated (some fields simplified for clarity): +/// +/// ```text +/// VarMap { +/// map: [ +/// ("x", VarMapEntry { +/// tokens: ["x"], +/// validator: EnabledValidator { scope: VarScope { +/// locals: ["x"], parent: None +/// }} +/// }), +/// ], +/// nested: Some(VarMap { +/// map: [ +/// ("a", VarMapEntry { +/// tokens: ["a"], +/// validator: EnabledValidator { scope: VarScope { +/// locals: ["a","d"], parent: Some (VarScope { +/// locals: ["x"], parent: None +/// }) +/// }} +/// }), +/// ("e", VarMapEntry { +/// tokens: ["e"], +/// validator: EnabledValidator { scope: VarScope { +/// locals: ["a","d"], parent: Some (VarScope { +/// locals: ["x"], parent: None +/// }) +/// }} +/// }), +/// ], +/// nested: Some(VarMap { +/// map: [ +/// ("b", VarMapEntry { +/// tokens: ["b"], +/// validator: EnabledValidator { scope: VarScope { +/// locals: ["b","c"], parent: Some (VarScope { +/// locals: ["a","d"], parent: Some (VarScope { +/// locals: ["x"], parent: None +/// }) +/// }) +/// }} +/// }), +/// ("c", VarMapEntry { +/// tokens: ["c"], +/// validator: EnabledValidator { scope: VarScope { +/// locals: ["b","c"], parent: Some (VarScope { +/// locals: ["a","d"], parent: Some (VarScope { +/// locals: ["x"], parent: None +/// }) +/// }) +/// }} +/// }), +/// ("e", VarMapEntry { +/// tokens: ["e"], +/// validator: EnabledValidator { scope: VarScope { +/// locals: ["e","f"], parent: Some (VarScope { +/// locals: ["a","d"], parent: Some (VarScope { +/// locals: ["x"], parent: None +/// }) +/// }) +/// }} +/// }), +/// ("f", VarMapEntry { +/// tokens: ["f"], +/// validator: EnabledValidator { scope: VarScope { +/// locals: ["e","f"], parent: Some (VarScope { +/// locals: ["a","d"], parent: Some (VarScope { +/// locals: ["x"], parent: None +/// }) +/// }) +/// }} +/// }), +/// ], +/// }) +/// }) +/// } +/// ``` +/// +/// Validators can be queried during code expansion to check whether these variables belong to +/// the same repetition scope. +#[derive(Clone, Debug, Default)] +pub struct VarMap<'s, V> { + nested: Option>>, + map: HashMap>, +} + +/// Entry of the [`VarMap`] map. +#[derive(Clone, Debug, Default)] +struct VarMapEntry<'s, V> { + pub tokens: Vec>>, + pub validator: V, +} + +impl<'s, V> VarMapEntry<'s, V> { + /// Constructor. + pub fn new(validator: V, tokens: Vec>>) -> Self { + Self { validator, tokens } + } +} + +impl<'s> Match<'s> { + /// Convert the match into checked [`VarMap`]. + pub fn into_var_map(self) -> VarMap<'s, EnabledValidator> { + let mut tree = VarMap::default(); + self.build_var_map(&mut tree, &default()); + tree + } + + /// Convert the match into unchecked [`VarMap`]. The unchecked version has better performance, + /// but does not provide nice user error messages and allows for illegal code expansion. Read + /// the docs of [`VarMap`] to learn more. + pub fn into_unchecked_var_map(self) -> VarMap<'s, DisabledValidator> { + let mut tree = VarMap::default(); + self.build_var_map(&mut tree, &default()); + tree + } + + fn build_var_map(self, tree: &mut VarMap<'s, V>, validator: &V) { + match self { + Self::Everything(_) => {} + Self::Nothing => {} + Self::Identifier(_) => {} + Self::Expected(_, _) => {} + Self::NotBlock(_) => {} + Self::Or(t) => match *t { + OrMatch::First(first) => first.build_var_map(tree, validator), + OrMatch::Second(second) => second.build_var_map(tree, validator), + }, + Self::Seq(first, second) => { + first.build_var_map(tree, validator); + second.build_var_map(tree, validator); + } + Self::Many(matches) => { + if tree.nested.is_none() { + let nested = VarMap::<'s, V>::default(); + tree.nested = Some(Box::new(nested)); + } + let nested_validator = V::default(); + nested_validator.set_parent(validator); + let nested = tree.nested.as_mut().unwrap(); + for m in matches { + m.build_var_map(nested, &nested_validator); + } + } + Self::Named(name, t) => { + validator.insert_local_var(&name); + tree.map + .entry(name) + .or_insert_with(|| VarMapEntry::new(validator.clone_ref(), default())) + .tokens + .push(t.tokens()); + } + } + } +} + + + +// ================= +// === Validator === +// ================= + +/// Validator used to check if the macro generation correct. See the definition of [`VarMap`] to +/// learn more. +#[allow(missing_docs)] +pub trait Validator: PartialEq + Default + CloneRef { + fn check(&self, name: &str) -> bool; + fn parent(&self) -> Option; + fn set_parent(&self, parent: &Self); + fn insert_local_var(&self, var: &str); +} + +/// Disabled validator. See the docs of [`VarMap`] to learn more. +#[derive(Copy, Clone, CloneRef, Debug, Default, PartialEq)] +pub struct DisabledValidator; + +/// Enabled validator. See the docs of [`VarMap`] to learn more. +#[derive(Clone, CloneRef, Debug, Default)] +#[allow(missing_docs)] +pub struct EnabledValidator { + scope: Rc>, +} + +#[derive(Clone, Debug, Default)] +struct VarScope { + locals: HashSet, + parent: Option, +} + +impl PartialEq for EnabledValidator { + fn eq(&self, other: &EnabledValidator) -> bool { + Rc::ptr_eq(&self.scope, &other.scope) + } +} + +impl Validator for EnabledValidator { + #[inline(always)] + fn check(&self, name: &str) -> bool { + self.scope.borrow().locals.contains(name) + } + + #[inline(always)] + fn parent(&self) -> Option { + self.scope.borrow().parent.as_ref().map(|t| t.clone_ref()) + } + + #[inline(always)] + fn set_parent(&self, parent: &Self) { + self.scope.borrow_mut().parent = Some(parent.clone_ref()); + } + + #[inline(always)] + fn insert_local_var(&self, var: &str) { + self.scope.borrow_mut().locals.insert(var.to_string()); + } +} + +impl Validator for DisabledValidator { + #[inline(always)] + fn check(&self, _name: &str) -> bool { + true + } + + #[inline(always)] + fn parent(&self) -> Option { + None + } + + #[inline(always)] + fn set_parent(&self, _parent: &Self) {} + + #[inline(always)] + fn insert_local_var(&self, _var: &str) {} +} + + + +// ================== +// === VarMapView === +// ================== + +/// A view for a [`VarMap`]. It allows focusing on a specific repetition scope and querying for +/// variables there. See the docs of [`VarMap`] to learn more. +#[derive(Clone, Debug, Default)] +pub struct VarMapView<'t, 's, V> { + tree: Option<&'t VarMap<'s, V>>, + resolved_validator: Option, + parent_validator_to_check: Option, +} + +impl<'t, 's, V> VarMapView<'t, 's, V> { + /// Constructor. + pub fn new(tree: &'t VarMap<'s, V>) -> Self { + let resolved_validator = default(); + let parent_validator_to_check = default(); + Self { tree: Some(tree), resolved_validator, parent_validator_to_check } + } +} + +impl<'t, 's, V: Validator> VarMapView<'t, 's, V> { + /// Get the view for the nested repetition scope. + pub fn nested(&self) -> Self { + let tree = self.tree.and_then(|t| t.nested.as_ref().map(|n| n.as_ref())); + let resolved_validator = None; + let parent_validator_to_check = self.resolved_validator.as_ref().map(|t| t.clone_ref()); + Self { tree, resolved_validator, parent_validator_to_check } + } +} + +impl<'t, 's, V: Validator> VarMapView<'t, 's, V> { + /// Query for a variable. + pub fn query(&mut self, name: &str) -> Option<&'t Vec>>> { + self.tree.and_then(|t| { + t.map.get(name).map(|entry| { + match &self.resolved_validator { + Some(validator) => + if !validator.check(name) { + todo!("Report nice error that the name does not belong to the scope.") + }, + None => { + let resolved_validator = entry.validator.clone_ref(); + if let Some(parent_validator_to_check) = &self.parent_validator_to_check { + let mut ok = false; + let mut validator = resolved_validator.clone(); + loop { + if &validator == parent_validator_to_check { + ok = true; + break; + } else { + match validator.parent() { + Some(p) => validator = p, + None => break, + } + } + } + if !ok { + todo!("Report nice error that the name does not belong to the same scope as previous variables.") + } + self.parent_validator_to_check = None; + } + self.resolved_validator = Some(resolved_validator); + } + } + &entry.tokens + }) + }) + } +} + +impl<'s, V> VarMap<'s, V> { + /// Create a new view for this var map. + pub fn view<'t>(&'t self) -> VarMapView<'t, 's, V> { + VarMapView::new(self) + } +} diff --git a/lib/rust/parser/src/macros/pattern.rs b/lib/rust/parser/src/macros/pattern.rs index 5ee7d86490..977d981541 100644 --- a/lib/rust/parser/src/macros/pattern.rs +++ b/lib/rust/parser/src/macros/pattern.rs @@ -5,121 +5,365 @@ use crate::prelude::*; use crate::syntax; +use std::collections::VecDeque; + // =============== // === Pattern === // =============== -/// Pattern used to validate incoming token stream against expected macro input. +/// Patterns are used to validate incoming token stream against expected macro input. /// -/// The idea is similar to patterns used in `macro_rules` definitions in Rust. There are a few -/// differences though: -/// 1. This pattern implementation exposes different matchers and operations. -/// 2. This macro implementation never attaches types to tokens, which means that every defined +/// The idea is similar to patterns used in macro rules in Rust with a few differences: +/// 1. These patterns allow for other constructs than macro rules. +/// 2. The macro resolution never reifies tokens as given types, which means that every defined /// pattern behaves like a TT-muncher in Rust. +#[derive(Clone, Debug, Deref)] +#[allow(missing_docs)] +pub struct Pattern { + #[deref] + pub data: Rc, + pub matches_empty_input: bool, +} + +impl Pattern { + /// Constructor. + pub fn new(data: PatternData, matches_empty_input: bool) -> Self { + Self { data: Rc::new(data), matches_empty_input } + } +} + +/// Variants of [`Pattern`]. #[derive(Clone, Debug)] #[allow(missing_docs)] -pub enum Pattern { +pub enum PatternData { /// Consume all items, till the end of the token stream. Everything, /// Consume nothing. Nothing, /// Consume items matching the first pattern. If the match was unsuccessful, the second match /// will be tried. - Or(Box, Box), - /// Consume a single item if it matches the configuration. - Item(Item), + Or(Pattern, Pattern), + Seq(Pattern, Pattern), + /// Consume many times (zero or more) the given pattern. If the given pattern succeeds on empty + /// input, it will be repeated as long as it consumes any input. + Many(Pattern), + /// Consume an identifier. + Identifier, + /// Consume a block and run the provided pattern in its body. + Block(Pattern), + /// Indicator of an error. The provided pattern is used to consume input when an error occurs. + /// For example, if you want to consume an identifier, but the identifier is not found, you can + /// use this pattern to consume any token instead and mark it as invalid. + Expected(String, Pattern), + /// Named pattern. Mainly used for splicing the code in the macro definition body. + Named(String, Pattern), + /// Anything that is not a block. + NotBlock, } -/// Item pattern configuration. -#[derive(Clone, Copy, Debug)] -#[allow(missing_docs)] -pub struct Item { - /// Check whether the token has spaces on right-hand-side. The [`None`] value means that the - /// condition would not be checked. - pub has_rhs_spacing: Option, +/// Constructor. +pub fn everything() -> Pattern { + Pattern::new(PatternData::Everything, true) } - - -// ======================= -// === ResolutionError === -// ======================= - -/// Pattern resolution error. -#[derive(Debug)] -#[allow(missing_docs)] -pub struct ResolutionError { - /// All the incoming tokens. The resolver consumes vector of tokens and returns it back in case - /// an error happened. - pub tokens: Vec, - pub message: String, +/// Constructor. +pub fn identifier() -> Pattern { + Pattern::new(PatternData::Identifier, false) } -impl ResolutionError { - /// Constructor. - pub fn new(tokens: Vec, message: impl Into) -> Self { - let message = message.into(); - Self { tokens, message } - } +/// Constructor. +pub fn not_block() -> Pattern { + Pattern::new(PatternData::NotBlock, false) } - - -/// ================== -/// === Resolution === -/// ================== - -/// Successful pattern match result. -#[derive(Debug, Clone)] -#[allow(missing_docs)] -pub struct Match { - /// All the matched tokens. - pub matched: Vec, - /// The rest of the token stream that was not needed for the successful pattern match. - pub rest: Vec, +/// Constructor. +pub fn nothing() -> Pattern { + Pattern::new(PatternData::Nothing, true) } -impl Match { - /// Constructor. - pub fn new(matched: Vec, rest: Vec) -> Self { - Self { matched, rest } - } +/// Constructor. +pub fn or(fst: Pattern, snd: Pattern) -> Pattern { + let matches_empty_input = fst.matches_empty_input || snd.matches_empty_input; + Pattern::new(PatternData::Or(fst, snd), matches_empty_input) +} + +/// Constructor. +pub fn seq(fst: Pattern, snd: Pattern) -> Pattern { + let matches_empty_input = fst.matches_empty_input && snd.matches_empty_input; + Pattern::new(PatternData::Seq(fst, snd), matches_empty_input) +} + +/// Constructor. +pub fn many(item: Pattern) -> Pattern { + Pattern::new(PatternData::Many(item), true) +} + +/// Constructor. +pub fn block(body: Pattern) -> Pattern { + Pattern::new(PatternData::Block(body), false) +} + +/// Constructor. +pub fn expected(message: impl Into, item: Pattern) -> Pattern { + let matches_empty_input = item.matches_empty_input; + Pattern::new(PatternData::Expected(message.into(), item), matches_empty_input) +} + +/// Constructor. +pub fn named(message: impl Into, item: Pattern) -> Pattern { + let matches_empty_input = item.matches_empty_input; + Pattern::new(PatternData::Named(message.into(), item), matches_empty_input) } impl Pattern { - /// Match the token stream with this pattern. - pub fn resolve<'s, T: TryAsRef>>( - &self, - mut input: Vec, - has_spacing_at_end: bool, - right_to_left_mode: bool, - ) -> Result, ResolutionError> { + /// Repeat the current pattern multiple times. + pub fn many(self) -> Self { + many(self) + } + + /// Match self or consume any token that is not a block and mark it as invalid. + pub fn expect(self, message: impl Into) -> Self { + self | expected(message, not_block() | nothing()) + } + + /// Match self or consume any token that is not a block and mark it as invalid. + pub fn named(self, label: impl Into) -> Self { + named(label, self) + } +} + +/// The syntax `pattern1 >> pattern2` is a shortcut for `seq(pattern1, pattern2)`. +impl std::ops::Shr for Pattern { + type Output = Pattern; + fn shr(self, rhs: Pattern) -> Self::Output { + seq(self, rhs) + } +} + +/// The syntax `pattern1 | pattern2` is a shortcut for `or(pattern1, pattern2)`. +impl std::ops::BitOr for Pattern { + type Output = Pattern; + fn bitor(self, rhs: Pattern) -> Self::Output { + or(self, rhs) + } +} + +/// The syntax `pattern % "message"` is a shortcut for `pattern.expect("message")`. +impl> std::ops::Rem for Pattern { + type Output = Pattern; + fn rem(self, message: T) -> Self::Output { + self.expect(message) + } +} + +/// The syntax `pattern / "label"` is a shortcut for `pattern.named("label")`. +impl> Div for Pattern { + type Output = Pattern; + fn div(self, message: T) -> Self::Output { + named(message, self) + } +} + + + +// ============= +// === Match === +// ============= + +/// The result of applying [`Pattern`] to a token stream. After a successful match, a variant of the +/// [`Pattern`] is transformed to variant of [`Match`] of the same name. +#[derive(Clone, Debug)] +#[allow(missing_docs)] +pub enum Match<'s> { + Everything(VecDeque>), + Nothing, + Or(Box>), + Seq(Box>, Box>), + Many(Vec>), + Identifier(syntax::Item<'s>), + Expected(String, Box>), + Named(String, Box>), + NotBlock(syntax::Item<'s>), +} + +/// The result of the [`Pattern::Or`] resolution. +#[derive(Clone, Debug)] +#[allow(missing_docs)] +pub enum OrMatch<'s> { + First(Match<'s>), + Second(Match<'s>), +} + +impl<'s> Match<'s> { + /// Constructor. + pub fn or(m: OrMatch<'s>) -> Self { + Self::Or(Box::new(m)) + } + + /// Constructor. + pub fn seq(first: Match<'s>, second: Match<'s>) -> Self { + Self::Seq(Box::new(first), Box::new(second)) + } + + /// Constructor. + pub fn expected(expected: impl Into, second: Match<'s>) -> Self { + Self::Expected(expected.into(), Box::new(second)) + } + + /// Constructor. + pub fn named(label: impl Into, second: Match<'s>) -> Self { + Self::Named(label.into(), Box::new(second)) + } + + /// Get all tokens of the match. + pub fn tokens(self) -> Vec> { match self { - Self::Everything => Ok(Match::new(input, default())), - Self::Nothing => Ok(Match::new(default(), input)), - Self::Or(fst, snd) => fst - .resolve(input, has_spacing_at_end, right_to_left_mode) - .or_else(|err| snd.resolve(err.tokens, has_spacing_at_end, right_to_left_mode)), - Self::Item(item) => match input.first() { - None => Err(ResolutionError::new(input, "Expected an item.")), - Some(first) => match first.try_as_ref() { - None => Err(ResolutionError::new(input, "Expected an item.")), - Some(_) => match item.has_rhs_spacing { - Some(spacing) => - if right_to_left_mode { - if spacing == has_spacing_at_end { - Ok(Match::new(vec![input.pop_front().unwrap()], input)) - } else { - Err(ResolutionError::new(input, "Expected an item.")) - } - } else { - todo!() - }, - None => Ok(Match::new(vec![input.pop_front().unwrap()], input)), - }, - }, + Self::Everything(tokens) => tokens.into(), + Self::Nothing => default(), + Self::Seq(fst, snd) => fst.tokens().extended(snd.tokens()), + Self::Many(t) => t.into_iter().flat_map(|s| s.tokens()).collect(), + Self::Identifier(ident) => vec![ident], + Self::Expected(_, item) => item.tokens(), + Self::Named(_, item) => item.tokens(), + Self::NotBlock(item) => vec![item], + Self::Or(t) => match *t { + OrMatch::First(fst) => fst.tokens(), + OrMatch::Second(snd) => snd.tokens(), + }, + } + } +} + + + +// =================== +// === MatchResult === +// =================== + +/// Result of a successful pattern resolution. It contains a match and the remaining token stream. +#[derive(Debug)] +#[allow(missing_docs)] +pub struct MatchResult<'s> { + pub matched: Match<'s>, + pub rest: VecDeque>, +} + +impl<'s> MatchResult<'s> { + /// Constructor. + pub fn new(matched: Match<'s>, rest: VecDeque>) -> Self { + Self { matched, rest } + } + + /// Map the match with the provided function. + pub fn map(mut self, f: impl FnOnce(Match<'s>) -> Match<'s>) -> Self { + self.matched = f(self.matched); + self + } +} + + + +// ====================== +// === MatchedSegment === +// ====================== + +/// List of matched segments. +pub type MatchedSegments<'s> = NonEmptyVec>; + +/// A matched segment. See the [`macros::resolver::Segment`] to learn more. +#[derive(Clone, Debug)] +#[allow(missing_docs)] +pub struct MatchedSegment<'s> { + pub header: syntax::Token<'s>, + pub result: Match<'s>, +} + +impl<'s> MatchedSegment<'s> { + /// Constructor. + pub fn new(header: syntax::Token<'s>, result: Match<'s>) -> Self { + Self { header, result } + } +} + + + +// ========================== +// === Pattern Resolution === +// ========================== + +impl Pattern { + /// Resolve the pattern. Return [`MatchResult`] if the pattern is matched, otherwise all the + /// input tokens. + pub fn resolve<'s>( + &self, + mut input: VecDeque>, + ) -> Result, VecDeque>> { + match &*self.data { + PatternData::Expected(msg, item) => + item.resolve(input).map(|t| t.map(|s| Match::expected(msg, s))), + PatternData::Named(msg, item) => + item.resolve(input).map(|t| t.map(|s| Match::named(msg, s))), + PatternData::Everything => Ok(MatchResult::new(Match::Everything(input), default())), + PatternData::Nothing => Ok(MatchResult::new(Match::Nothing, input)), + PatternData::Or(fst, snd) => fst + .resolve(input) + .map(|t| t.map(|s| Match::or(OrMatch::First(s)))) + .or_else(|t| snd.resolve(t).map(|t| t.map(|s| Match::or(OrMatch::Second(s))))), + PatternData::Seq(fst, snd) => fst + .resolve(input) + .and_then(|t| snd.resolve(t.rest).map(|s| s.map(|x| Match::seq(t.matched, x)))), + PatternData::Many(pat) => { + let mut out = vec![]; + let mut input_len = input.len(); + loop { + match pat.resolve(input) { + Err(rest) => { + input = rest; + break; + } + Ok(t) => { + input = t.rest; + if pat.matches_empty_input { + let no_input_consumed = input_len == input.len(); + if no_input_consumed { + break; + } + input_len = input.len(); + } + out.push(t.matched); + } + } + } + Ok(MatchResult::new(Match::Many(out), input)) + } + PatternData::Identifier => match input.pop_front() { + None => Err(default()), + Some(t) => + if t.is_variant(syntax::token::variant::VariantMarker::Ident) { + Ok(MatchResult::new(Match::Identifier(t), input)) + } else { + input.push_front(t); + Err(input) + }, + }, + PatternData::Block(body) => match input.pop_front() { + Some(syntax::Item::Block(tokens)) => + body.resolve(tokens.into_iter().rev().map_into().collect()), + Some(t) => { + input.push_front(t); + Err(input) + } + None => Err(default()), + }, + PatternData::NotBlock => match input.pop_front() { + Some(t @ syntax::Item::Block(_)) => { + input.push_front(t); + Err(input) + } + None => Err(default()), + Some(t) => Ok(MatchResult::new(Match::NotBlock(t), input)), }, } } diff --git a/lib/rust/parser/src/macros/resolver.rs b/lib/rust/parser/src/macros/resolver.rs new file mode 100644 index 0000000000..52d32a47ae --- /dev/null +++ b/lib/rust/parser/src/macros/resolver.rs @@ -0,0 +1,387 @@ +//! Macro resolver implementation. Refer to the docs of the main parser module to learn more. + +use crate::prelude::*; + +use crate::macros; +use crate::macros::pattern; +use crate::syntax; +use crate::syntax::token; +use crate::syntax::token::Token; + +use enso_data_structures::im_list; +use enso_data_structures::im_list::List; +use std::collections::VecDeque; + + + +// ================== +// === SegmentMap === +// ================== + +/// A tree-like structure encoding potential macro matches. The keys are code representations of +/// [`macros::SegmentDefinition`] headers (first tokens of sections). Each key is associated with +/// one or more [`SegmentEntry`], which stories a list of required subsequent segments +/// and a macro definition that should be used when all the segments will be matched. For example, +/// after matching the "if" keyword, this struct will contain one entry "then" with two values, one +/// for the required "else" section, and one without a required section (for the "if ... then ..." +/// case). +#[derive(Default, Debug, Deref, DerefMut)] +pub struct SegmentMap<'s> { + map: HashMap<&'s str, NonEmptyVec>>, +} + +/// Partially matched macro info. See docs of [`SegmentMap`] to learn more. +#[derive(Clone, Debug)] +#[allow(missing_docs)] +pub struct SegmentEntry<'s> { + /// All the segment headers that are required for the macro definition to be used. + pub required_segments: List>, + /// Definition of the macro that should be used when all the required segments will be matched. + /// It contains [`Pattern`] definition for every segment that will be used after all the + /// segment tokens are discovered. + pub definition: Rc>, +} + + +impl<'a> SegmentMap<'a> { + /// Register a new macro definition in this macro tree. + pub fn register(&mut self, definition: macros::Definition<'a>) { + let header = definition.segments.head.header; + let entry = SegmentEntry { + required_segments: definition.segments.tail.clone(), + definition: Rc::new(definition), + }; + if let Some(node) = self.get_mut(header) { + node.push(entry); + } else { + self.insert(header, NonEmptyVec::singleton(entry)); + } + } +} + + + +// ============================= +// === PartiallyMatchedMacro === +// ============================= + +/// Partially matched macro. It contains the current section being matched, all the sections matched +/// so far, and the macro definition in case the macro was fully matched. Please note that the +/// definition can change during macro resolution. For example, after finding both "if" and "then" +/// sections, the definition of the "if ... then ..." macro will be used. However, after finding the +/// "else" token, the definition will be replaced with the "if ... then ... else ..." macro one. +#[derive(Debug)] +#[allow(missing_docs)] +pub struct PartiallyMatchedMacro<'s> { + pub current_segment: MatchedSegment<'s>, + pub resolved_segments: Vec>, + pub possible_next_segments: SegmentMap<'s>, + pub matched_macro_def: Option>>, +} + +impl<'a> PartiallyMatchedMacro<'a> { + /// A new macro resolver with a special "root" segment definition. The "root" segment does not + /// exist in the source code, it is simply the whole expression being parsed. It is treated + /// as a macro in order to unify the algorithms. + pub fn new_root() -> Self { + let current_segment = MatchedSegment::new(Token("", "", token::Variant::newline())); + let resolved_segments = default(); + let possible_next_segments = default(); + let matched_macro_def = Some(Rc::new(macros::Definition { + segments: im_list::NonEmpty::singleton(macros::SegmentDefinition { + header: "__ROOT__", + pattern: pattern::everything(), + }), + body: Rc::new(|v| { + // Taking the first segment, hardcoded above. + let body = v.pop().0.result; + syntax::operator::resolve_operator_precedence(body.tokens()) + }), + })); + Self { current_segment, resolved_segments, possible_next_segments, matched_macro_def } + } +} + + + +// ====================== +// === MatchedSegment === +// ====================== + +/// A macro segment which header was matched. Its body contains a list of tokens and nested macros +/// that were found. Please note that the body tokens are not matched against the pattern yet. +/// Because of that, the macro nesting is incorrect for patterns that do not consume all tokens till +/// the end of the stream. For example, the expression `(a) (b)` will be matched in such a way, that +/// the macro `(b)` will be part of the body of the `)` segment of the `(a)` macro. This will be +/// restructured in the patter matching phase. See the parser module docs to learn more about this +/// process. +#[derive(Debug)] +pub struct MatchedSegment<'s> { + header: Token<'s>, + body: Vec>, +} + +impl<'s> MatchedSegment<'s> { + /// Constructor. + pub fn new(header: Token<'s>) -> Self { + let body = default(); + Self { header, body } + } +} + + + +// =================================== +// === ItemOrPartiallyMatchedMacro === +// =================================== + +/// One of [`syntax::Item`] or [`PartiallyMatchedMacro`]. Used during macro resolution when some +/// items are already resolved as macros, and some are not yet. For example, after matching the +/// expression `(a) x (b)`, the `x` token and the `(b)` macro will be items of the body of the last +/// segment of the `(a)` macro. +#[derive(Debug, From)] +#[allow(missing_docs)] +enum ItemOrPartiallyMatchedMacro<'s> { + SyntaxItem(syntax::Item<'s>), + PartiallyMatchedMacro(PartiallyMatchedMacro<'s>), +} + +impl<'s> TryAsRef> for ItemOrPartiallyMatchedMacro<'s> { + fn try_as_ref(&self) -> Option<&syntax::Item<'s>> { + match self { + Self::SyntaxItem(t) => Some(t), + _ => None, + } + } +} + +impl<'s> TryAsRef> for ItemOrPartiallyMatchedMacro<'s> { + fn try_as_ref(&self) -> Option<&PartiallyMatchedMacro<'s>> { + match self { + Self::PartiallyMatchedMacro(t) => Some(t), + _ => None, + } + } +} + + + +// ================ +// === Resolver === +// ================ + +/// Macro resolver capable of resolving nested macro usages. See the docs of the main parser module +/// to learn more about the macro resolution steps. +#[derive(Debug)] +pub struct Resolver<'s> { + current_macro: PartiallyMatchedMacro<'s>, + macro_stack: Vec>, +} + +/// Result of the macro resolution step. +#[derive(Clone, Debug)] +enum Step<'s> { + NewSegmentStarted, + NormalToken(syntax::Item<'s>), + MacroStackPop(syntax::Item<'s>), +} + +impl<'s> Resolver<'s> { + /// New resolver with a special "root" segment definition allowing parsing arbitrary + /// expressions. + pub fn new_root() -> Self { + let current_macro = PartiallyMatchedMacro::new_root(); + let macro_stack = default(); + Self { current_macro, macro_stack } + } + + fn replace_current_with_parent_macro(&mut self, mut parent_macro: PartiallyMatchedMacro<'s>) { + mem::swap(&mut parent_macro, &mut self.current_macro); + let child_macro = parent_macro; + self.current_macro.current_segment.body.push(child_macro.into()); + } + + /// Pop the macro stack if the current token is reserved. For example, when matching the + /// `if a if b then c then d` expression, the token `then` after the token `c` will be + /// considered reserved and the macro resolution of `if b then c` will be popped from the stack. + fn pop_macro_stack_if_reserved(&mut self, repr: &str) -> Option> { + let reserved = self.macro_stack.iter().any(|p| p.possible_next_segments.contains_key(repr)); + reserved.and_option_from(|| self.macro_stack.pop()) + } + + /// Run the resolver. Returns the resolved AST. + pub fn run( + mut self, + root_macro_map: &SegmentMap<'s>, + tokens: &mut iter::Peekable>>, + ) -> syntax::Tree<'s> { + event!(TRACE, "Running macro resolver. Registered macros:\n{:#?}", root_macro_map); + let mut opt_item: Option>; + macro_rules! next_token { + () => {{ + opt_item = tokens.next(); + if let Some(token) = opt_item.as_ref() { + event!(TRACE, "New token {:#?}", token); + } + }}; + } + macro_rules! trace_state { + () => { + event!(TRACE, "Current macro:\n{:#?}", self.current_macro); + event!(TRACE, "Parent macros:\n{:#?}", self.macro_stack); + }; + } + next_token!(); + while let Some(token) = opt_item && !token.is_newline() { + let step_result = match token { + syntax::Item::Token(token) => self.process_token(root_macro_map, token), + _ => Step::NormalToken(token), + }; + match step_result { + Step::MacroStackPop(item) => { + trace_state!(); + opt_item = Some(item) + } + Step::NewSegmentStarted => { + trace_state!(); + next_token!() + } + Step::NormalToken(item) => { + self.current_macro.current_segment.body.push(item.into()); + trace_state!(); + next_token!(); + } + } + } + + event!(TRACE, "Finishing resolution. Popping the macro stack."); + while let Some(parent_macro) = self.macro_stack.pop() { + self.replace_current_with_parent_macro(parent_macro); + } + + trace_state!(); + let (tree, rest) = Self::resolve(self.current_macro); + if !rest.is_empty() { + panic!( + "Internal error. Not all tokens were consumed by the macro resolver:\n{:#?}", + rest + ); + } + tree + } + + fn process_token(&mut self, root_macro_map: &SegmentMap<'s>, token: Token<'s>) -> Step<'s> { + let repr = &**token.code; + if let Some(subsegments) = self.current_macro.possible_next_segments.get(repr) { + event!(TRACE, "Entering next segment of the current macro."); + let mut new_match_tree = + Self::move_to_next_segment(&mut self.current_macro.matched_macro_def, subsegments); + let mut current_segment = MatchedSegment::new(token); + mem::swap(&mut new_match_tree, &mut self.current_macro.possible_next_segments); + mem::swap(&mut self.current_macro.current_segment, &mut current_segment); + self.current_macro.resolved_segments.push(current_segment); + Step::NewSegmentStarted + } else if let Some(parent_macro) = self.pop_macro_stack_if_reserved(repr) { + event!(TRACE, "Next token reserved by parent macro. Resolving current macro."); + self.replace_current_with_parent_macro(parent_macro); + Step::MacroStackPop(token.into()) + } else if let Some(segments) = root_macro_map.get(repr) { + event!(TRACE, "Starting a new nested macro resolution."); + let mut matched_macro_def = default(); + let mut current_macro = PartiallyMatchedMacro { + current_segment: MatchedSegment { header: token, body: default() }, + resolved_segments: default(), + possible_next_segments: Self::move_to_next_segment( + &mut matched_macro_def, + segments, + ), + matched_macro_def, + }; + mem::swap(&mut self.current_macro, &mut current_macro); + self.macro_stack.push(current_macro); + Step::NewSegmentStarted + } else { + event!(TRACE, "Consuming token as current segment body."); + Step::NormalToken(token.into()) + } + } + + /// Resolve the [`PartiallyMatchedMacro`]. Returns the AST and the non-used tokens. For example, + /// the resolution of the `(a)` macro in the `(a) x (b)` expression will return the `(a)` AST + /// and the `x` and `(b)` items (already resolved). + fn resolve(m: PartiallyMatchedMacro<'s>) -> (syntax::Tree<'s>, VecDeque>) { + let segments = NonEmptyVec::new_with_last(m.resolved_segments, m.current_segment); + let resolved_segments = segments.mapped(|segment| { + let mut items: VecDeque> = default(); + for item in segment.body { + match item { + ItemOrPartiallyMatchedMacro::SyntaxItem(t) => items.push_back(t), + ItemOrPartiallyMatchedMacro::PartiallyMatchedMacro(unresolved_macro) => { + let (resolved_macro, unused_items) = Self::resolve(unresolved_macro); + items.push_back(resolved_macro.into()); + items.extend(unused_items); + } + } + } + (segment.header, items) + }); + + if let Some(macro_def) = m.matched_macro_def { + let mut def_segments = macro_def.segments.to_vec().into_iter(); + let mut pattern_matched_segments = resolved_segments.mapped(|(header, items)| { + let err = "Internal error. Macro definition and match segments count mismatch."; + let def = def_segments.next().unwrap_or_else(|| panic!("{}", err)); + (header, def.pattern.resolve(items)) + }); + + // Moving not pattern-matched tokens of the last segment to parent. + let mut not_used_items_of_last_segment = VecDeque::new(); + match &mut pattern_matched_segments.last_mut().1 { + Err(rest) => mem::swap(&mut not_used_items_of_last_segment, rest), + Ok(segment) => mem::swap(&mut not_used_items_of_last_segment, &mut segment.rest), + } + + let pattern_matched_segments = + pattern_matched_segments.mapped(|(header, match_result)| match match_result { + Ok(result) => { + if !result.rest.is_empty() { + todo!("Mark unmatched tokens as unexpected."); + } + pattern::MatchedSegment::new(header, result.matched) + } + Err(_unmatched_items) => todo!("Mark unmatched tokens as unexpected."), + }); + + let out = (macro_def.body)(pattern_matched_segments); + (out, not_used_items_of_last_segment) + } else { + todo!("Macro was not matched with any known macro definition. This should return an AST node indicating invalid match.") + } + } + + /// Move the resolution to the next segment. Takes possible next segments and merges them in a + /// new [`SegmentMap`]. If after moving to the next segment there is a macro definition that is + /// fully matched, its definition will be recorded. + fn move_to_next_segment( + matched_macro_def: &mut Option>>, + possible_segments: &[SegmentEntry<'s>], + ) -> SegmentMap<'s> { + *matched_macro_def = None; + let mut new_section_tree = SegmentMap::default(); + for segment_entry in possible_segments { + if let Some(first) = segment_entry.required_segments.head() { + let tail = segment_entry.required_segments.tail().cloned().unwrap_or_default(); + let definition = segment_entry.definition.clone_ref(); + let entry = SegmentEntry { required_segments: tail, definition }; + if let Some(node) = new_section_tree.get_mut(&first.header) { + node.push(entry); + } else { + new_section_tree.insert(first.header, NonEmptyVec::singleton(entry)); + } + } else { + *matched_macro_def = Some(segment_entry.definition.clone_ref()); + } + } + new_section_tree + } +} diff --git a/lib/rust/parser/src/main.rs b/lib/rust/parser/src/main.rs index df6b884b99..cfd38dbf7c 100644 --- a/lib/rust/parser/src/main.rs +++ b/lib/rust/parser/src/main.rs @@ -100,15 +100,6 @@ use crate::prelude::*; -use crate::source::VisibleOffset; - -use enso_data_structures::im_list; -use enso_data_structures::im_list::List; -use lexer::Lexer; -use macros::pattern::Pattern; -use syntax::token; -use syntax::token::Token; - // ============== // === Export === @@ -130,682 +121,98 @@ pub mod prelude { -// ================================= -// === SyntaxItemOrMacroResolver === -// ================================= +// ============== +// === Parser === +// ============== -/// One of [`syntax::Item`] or [`MacroResolver`]. -#[derive(Debug)] +/// Enso parser. See the module documentation to learn more about how it works. #[allow(missing_docs)] -pub enum SyntaxItemOrMacroResolver<'s> { - SyntaxItem(syntax::Item<'s>), - MacroResolver(MacroResolver<'s>), -} - -impl<'s> From> for SyntaxItemOrMacroResolver<'s> { - fn from(t: syntax::Item<'s>) -> Self { - Self::SyntaxItem(t) - } -} - -impl<'s> From> for SyntaxItemOrMacroResolver<'s> { - fn from(t: MacroResolver<'s>) -> Self { - Self::MacroResolver(t) - } -} - -impl<'s> TryAsRef> for SyntaxItemOrMacroResolver<'s> { - fn try_as_ref(&self) -> Option<&syntax::Item<'s>> { - match self { - Self::SyntaxItem(t) => Some(t), - _ => None, - } - } -} - - -// ====================== -// === MacroMatchTree === -// ====================== - -/// A tree-like structure encoding potential macro matches. The keys are representations of tokens -/// that can be matched. For example, the key could be "if" or "->". Each key is associated with one -/// or more [`PartiallyMatchedMacro`], which stories a list of required segments and a macro -/// definition in case all the segments were matched. For example, for the "if" key, there can be -/// two required segment lists, one for "then" and "else" segments, and one for the "then" segment -/// only. -#[derive(Default, Debug, Deref, DerefMut)] -pub struct MacroMatchTree<'s> { - map: HashMap<&'s str, NonEmptyVec>>, -} - -/// Partially matched macro info. See docs of [`MacroMatchTree`] to learn more. -#[derive(Clone, Debug)] -#[allow(missing_docs)] -pub struct PartiallyMatchedMacro<'s> { - pub required_segments: List>, - pub definition: Rc>, -} - -impl<'a> MacroMatchTree<'a> { - /// Register a new macro definition in this macro tree. - pub fn register(&mut self, definition: macros::Definition<'a>) { - let header = definition.segments.head.header; - let entry = PartiallyMatchedMacro { - required_segments: definition.segments.tail.clone(), - definition: Rc::new(definition), - }; - if let Some(node) = self.get_mut(header) { - node.push(entry); - } else { - self.insert(header, NonEmptyVec::singleton(entry)); - } - } -} - - - -// ===================== -// === MacroResolver === -// ===================== - -/// Enso macro resolver. See the docs of the main module to learn more about the macro resolution -/// steps. #[derive(Debug)] -#[allow(missing_docs)] -pub struct MacroResolver<'s> { - pub current_segment: MatchedSegment<'s>, - pub resolved_segments: Vec>, - pub possible_next_segments: MacroMatchTree<'s>, - pub matched_macro_def: Option>>, +pub struct Parser { + pub macros: macros::resolver::SegmentMap<'static>, } -impl<'a> MacroResolver<'a> { - /// A new macro resolver with a special "root" segment definition. The "root" segment does not - /// exist in the source code, it is simply the whole expression being parsed. It is treated - /// as a macro in order to unify the algorithms. - pub fn new_root() -> Self { - let current_segment = - MatchedSegment { header: Token("", "", token::Variant::newline()), body: default() }; - let resolved_segments = default(); - let possible_next_segments = default(); - let matched_macro_def = Some(Rc::new(macros::Definition { - rev_prefix_pattern: None, - segments: im_list::NonEmpty::singleton(macros::SegmentDefinition { - header: "__ROOT__", - pattern: Pattern::Everything, - }), - body: Rc::new(|_, v| { - if v.len() != 1 { - panic!() - } - let t = v.into_vec().pop().unwrap().1; - resolve_operator_precedence(t) - }), - })); - Self { current_segment, resolved_segments, possible_next_segments, matched_macro_def } - } -} - -/// A matched macro segment. Partial macro resolution product. -#[derive(Debug)] -pub struct MatchedSegment<'s> { - header: Token<'s>, - body: Vec>, -} - -impl<'s> MatchedSegment<'s> { +impl Parser { /// Constructor. - pub fn new(header: Token<'s>) -> Self { - let body = default(); - Self { header, body } - } -} - - -/// Main macro resolver capable of resolving nested macro usages. See the docs of the main module to -/// learn more about the macro resolution steps. -#[derive(Debug)] -pub struct Resolver<'s> { - current_macro: MacroResolver<'s>, - macro_stack: Vec>, -} - -/// Result of the macro resolution step. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -enum ResolverStep { - NormalToken, - NewSegmentStarted, - MacroStackPop, -} - -impl<'s> Resolver<'s> { - fn new_root() -> Self { - let current_macro = MacroResolver::new_root(); - let macro_stack = default(); - Self { current_macro, macro_stack } + pub fn new() -> Self { + let macros = macros::built_in::all(); + Self { macros } } - fn run( - mut self, - lexer: &Lexer<'s>, - root_macro_map: &MacroMatchTree<'s>, - tokens: Vec>, - ) -> syntax::Tree<'s> { - let mut stream = tokens.into_iter(); - let mut opt_token: Option>; - macro_rules! next_token { - () => {{ - opt_token = stream.next(); - if let Some(token) = opt_token.as_ref() { - event!(TRACE, "New token {:#?}", token); - } - }}; + /// Main entry point. + pub fn run<'s>(&self, code: &'s str) -> syntax::Tree<'s> { + let tokens = lexer::run(code); + let mut statements = vec![]; + let mut tokens = tokens.into_iter().peekable(); + while tokens.peek().is_some() { + let resolver = macros::resolver::Resolver::new_root(); + let tree = resolver.run(&self.macros, &mut tokens); + statements.push(tree); } - macro_rules! trace_state { - () => { - event!(TRACE, "Current macro:\n{:#?}", self.current_macro); - event!(TRACE, "Parent macros:\n{:#?}", self.macro_stack); - }; - } - next_token!(); - while let Some(token) = opt_token { - let step_result = match &token { - // FIXME: clone? - syntax::Item::Token(token) => self.process_token(root_macro_map, token.clone()), - _ => ResolverStep::NormalToken, - }; - match step_result { - ResolverStep::MacroStackPop => { - trace_state!(); - opt_token = Some(token) - } - ResolverStep::NewSegmentStarted => { - trace_state!(); - next_token!() - } - ResolverStep::NormalToken => { - self.current_macro.current_segment.body.push(token.into()); - trace_state!(); - next_token!(); - } - } - } - - while let Some(parent_macro) = self.macro_stack.pop() { - self.replace_current_with_parent_macro(parent_macro); - } - - trace_state!(); - - Self::resolve(lexer, self.current_macro, None) - } - - fn replace_current_with_parent_macro(&mut self, mut parent_macro: MacroResolver<'s>) { - mem::swap(&mut parent_macro, &mut self.current_macro); - let mut child_macro = parent_macro; - if let Some(def) = &child_macro.matched_macro_def { - let pattern = &def.segments.last().pattern; - let child_tokens = mem::take(&mut child_macro.current_segment.body); - // FIXME: the first [`false`] below is invalid. - let match_result = pattern.resolve(child_tokens, false, false).unwrap(); - let mut new_child_tokens = match_result.matched; - let new_parent_tokens = match_result.rest; - mem::swap(&mut child_macro.current_segment.body, &mut new_child_tokens); - self.current_macro.current_segment.body.push(child_macro.into()); - self.current_macro.current_segment.body.extend(new_parent_tokens); - } else { - panic!() - } - } - - fn resolve( - lexer: &Lexer<'s>, - m: MacroResolver<'s>, - prefix_tokens: Option>>, - ) -> syntax::Tree<'s> { - let segments = NonEmptyVec::new_with_last(m.resolved_segments, m.current_segment); - let sss: NonEmptyVec<(Token, Vec>)> = segments.mapped(|segment| { - let mut ss: Vec> = vec![]; - for item in segment.body { - let resolved_token = match item { - SyntaxItemOrMacroResolver::MacroResolver(m2) => { - if let Some(macro_def) = &m2.matched_macro_def - && let Some(pfx_pattern) = ¯o_def.rev_prefix_pattern { - ss.reverse(); - let spacing = m2.current_segment.header.left_offset.visible > VisibleOffset(0); - let mut match_result = pfx_pattern.resolve(ss,spacing,true).unwrap(); - match_result.matched.reverse(); - ss = match_result.rest; - ss.reverse(); - Self::resolve(lexer, m2, Some(match_result.matched)).into() - } else { - Self::resolve(lexer, m2, None).into() - } - }, - SyntaxItemOrMacroResolver::SyntaxItem(t) => t, - }; - ss.push(resolved_token); - } - (segment.header, ss) - }); - - if let Some(macro_def) = m.matched_macro_def { - (macro_def.body)(prefix_tokens, sss) - } else { - todo!("Handling non-fully-resolved macros") - } - } - - fn pop_macro_stack_if_reserved(&mut self, repr: &str) -> Option> { - let reserved = self.macro_stack.iter().any(|p| p.possible_next_segments.contains_key(repr)); - if reserved { - self.macro_stack.pop() - } else { - None - } - } - - fn process_token( - &mut self, - root_macro_map: &MacroMatchTree<'s>, - token: Token<'s>, - ) -> ResolverStep { - let repr = &**token.code; - if let Some(subsegments) = self.current_macro.possible_next_segments.get(repr) { - event!(TRACE, "Entering next segment of the current macro."); - let mut new_match_tree = - Self::enter(&mut self.current_macro.matched_macro_def, subsegments); - let mut current_segment = MatchedSegment::new(token); - mem::swap(&mut new_match_tree, &mut self.current_macro.possible_next_segments); - mem::swap(&mut self.current_macro.current_segment, &mut current_segment); - self.current_macro.resolved_segments.push(current_segment); - ResolverStep::NewSegmentStarted - } else if let Some(parent_macro) = self.pop_macro_stack_if_reserved(repr) { - event!(TRACE, "Next token reserved by parent macro. Resolving current macro."); - self.replace_current_with_parent_macro(parent_macro); - ResolverStep::MacroStackPop - } else if let Some(segments) = root_macro_map.get(repr) { - event!(TRACE, "Starting a new nested macro resolution."); - let mut matched_macro_def = default(); - let mut current_macro = MacroResolver { - current_segment: MatchedSegment { header: token, body: default() }, - resolved_segments: default(), - possible_next_segments: Self::enter(&mut matched_macro_def, segments), - matched_macro_def, - }; - mem::swap(&mut self.current_macro, &mut current_macro); - self.macro_stack.push(current_macro); - ResolverStep::NewSegmentStarted - } else { - event!(TRACE, "Consuming token as current segment body."); - ResolverStep::NormalToken - } - } - - fn enter( - matched_macro_def: &mut Option>>, - path: &[PartiallyMatchedMacro<'s>], - ) -> MacroMatchTree<'s> { - *matched_macro_def = None; - let mut new_section_tree = MacroMatchTree::default(); - for v in path { - if let Some(first) = v.required_segments.head() { - let tail = v.required_segments.tail().cloned().unwrap_or_default(); - let definition = v.definition.clone_ref(); - let x = PartiallyMatchedMacro { required_segments: tail, definition }; - if let Some(node) = new_section_tree.get_mut(&first.header) { - node.push(x); - } else { - new_section_tree.insert(first.header, NonEmptyVec::singleton(x)); - } - } else { - if matched_macro_def.is_some() { - event!(ERROR, "Internal error. Duplicate macro definition."); - } - *matched_macro_def = Some(v.definition.clone_ref()); - } - } - new_section_tree + syntax::Tree::module(statements) } } - -// FIXME: hardcoded values + not finished implementation. -fn precedence_of(operator: &str) -> usize { - match operator { - "+" => 3, - "-" => 3, - "*" => 7, - _ => panic!("Operator not supported: {}", operator), - } -} -// -#[derive(Clone, Copy, Debug, Deref, DerefMut)] -struct WithPrecedence { - #[deref] - #[deref_mut] - elem: T, - precedence: usize, -} - -impl WithPrecedence { - pub fn new(precedence: usize, elem: T) -> Self { - Self { elem, precedence } - } -} - - -fn annotate_tokens_that_need_spacing(items: Vec) -> Vec { - items - .into_iter() - .map(|item| match item { - syntax::Item::Token(_) => item, - syntax::Item::Tree(ast) => - match &*ast.variant { - syntax::tree::Variant::MultiSegmentApp(data) => { - if data.segments.first().header.variant.marker() - != token::variant::VariantMarker::Symbol - { - syntax::Item::Tree(ast.with_error( - "This expression cannot be used in a non-spaced equation.", - )) - } else { - syntax::Item::Tree(ast) - } - } - _ => syntax::Item::Tree(ast), - }, - }) - .collect() -} - -fn resolve_operator_precedence<'s>(items: Vec>) -> syntax::Tree<'s> { - type Tokens<'s> = Vec>; - let mut flattened: Tokens<'s> = default(); - let mut no_space_group: Tokens<'s> = default(); - let processs_no_space_group = |flattened: &mut Tokens<'s>, no_space_group: &mut Tokens<'s>| { - let tokens = mem::take(no_space_group); - if tokens.len() == 1 { - flattened.extend(tokens); - } else { - let tokens = annotate_tokens_that_need_spacing(tokens); - let ast = resolve_operator_precedence_internal(tokens); - flattened.push(ast.into()); - } - }; - for item in items { - if item.span().left_offset.visible.width_in_spaces == 0 || no_space_group.is_empty() { - no_space_group.push(item) - } else if !no_space_group.is_empty() { - processs_no_space_group(&mut flattened, &mut no_space_group); - no_space_group.push(item); - } else { - // FIXME: this is unreachable. - flattened.push(item); - } - } - if !no_space_group.is_empty() { - processs_no_space_group(&mut flattened, &mut no_space_group); - } - resolve_operator_precedence_internal(flattened) -} - -fn resolve_operator_precedence_internal(items: Vec>) -> syntax::Tree<'_> { - // Reverse-polish notation encoding. - let mut output: Vec = default(); - let mut operator_stack: Vec> = default(); - let mut last_token_was_ast = false; - let mut last_token_was_opr = false; - for item in items { - let i2 = item.clone(); // FIXME - if let syntax::Item::Token(token) = i2 && let token::Variant::Operator(opr) = token.variant { - // Item is an operator. - let last_token_was_opr_copy = last_token_was_opr; - last_token_was_ast = false; - last_token_was_opr = true; - - let prec = precedence_of(&token.code); - let opr = Token(token.left_offset, token.code, opr); - // let opr = item.span().with(opr); - - if last_token_was_opr_copy && let Some(prev_opr) = operator_stack.last_mut() { - // Error. Multiple operators next to each other. - match &mut prev_opr.elem { - Err(err) => err.operators.push(opr), - Ok(prev) => { - let operators = NonEmptyVec::new(prev.clone(),vec![opr]); // FIXME: clone? - prev_opr.elem = Err(syntax::tree::MultipleOperatorError{operators}); - } - } - } else { - while let Some(prev_opr) = operator_stack.last() - && prev_opr.precedence >= prec - && let Some(prev_opr) = operator_stack.pop() - && let Some(rhs) = output.pop() - { - // Prev operator in the [`operator_stack`] has a higher precedence. - let lhs = output.pop().map(token_to_ast); - let ast = syntax::Tree::opr_app(lhs, prev_opr.elem, Some(token_to_ast(rhs))); - output.push(ast.into()); - } - operator_stack.push(WithPrecedence::new(prec, Ok(opr))); - } - } else if last_token_was_ast && let Some(lhs) = output.pop() { - // Multiple non-operators next to each other. - let lhs = token_to_ast(lhs); - let rhs = token_to_ast(item); - let ast = syntax::Tree::app(lhs, rhs); - output.push(ast.into()); - } else { - // Non-operator that follows previously consumed operator. - last_token_was_ast = true; - last_token_was_opr = false; - output.push(item); - } - } - let mut opt_rhs = last_token_was_ast.and_option_from(|| output.pop().map(token_to_ast)); - while let Some(opr) = operator_stack.pop() { - let opt_lhs = output.pop().map(token_to_ast); - opt_rhs = Some(syntax::Tree::opr_app(opt_lhs, opr.elem, opt_rhs)); - } - if !output.is_empty() { - panic!( - "Internal error. Not all tokens were consumed while constructing the -expression." - ); - } - syntax::Tree::opr_section_boundary(opt_rhs.unwrap()) // fixme -} - -fn token_to_ast(elem: syntax::Item) -> syntax::Tree { - match elem { - syntax::Item::Token(token) => match token.variant { - token::Variant::Ident(ident) => { - let ii2 = token.with_variant(ident); - syntax::tree::Tree::ident(ii2) - } - _ => panic!(), - }, - syntax::Item::Tree(ast) => ast, - } -} - -fn matched_segments_into_multi_segment_app<'s>( - prefix_tokens: Option>>, - matched_segments: NonEmptyVec<(Token<'s>, Vec>)>, -) -> syntax::Tree<'s> { - // FIXME: remove into_vec and use NonEmptyVec::mapped - let segments = matched_segments - .into_vec() - .into_iter() - .map(|segment| { - let header = segment.0; - let body = - (!segment.1.is_empty()).as_some_from(|| resolve_operator_precedence(segment.1)); - syntax::tree::MultiSegmentAppSegment { header, body } - }) - .collect_vec(); - if let Ok(segments) = NonEmptyVec::try_from(segments) { - let prefix = prefix_tokens.map(resolve_operator_precedence); - syntax::Tree::multi_segment_app(prefix, segments) - } else { - panic!() +impl Default for Parser { + fn default() -> Self { + Self::new() } } -// ========================= -// === Macro Definitions === -// ========================= - -fn macro_if_then_else<'s>() -> macros::Definition<'s> { - macro_definition! { - ("if", Pattern::Everything, "then", Pattern::Everything, "else", Pattern::Everything) - matched_segments_into_multi_segment_app - } -} - -fn macro_if_then<'s>() -> macros::Definition<'s> { - macro_definition! { - ("if", Pattern::Everything, "then", Pattern::Everything) - matched_segments_into_multi_segment_app - } -} - -fn macro_group<'s>() -> macros::Definition<'s> { - macro_definition! { - ("(", Pattern::Everything, ")", Pattern::Nothing) - matched_segments_into_multi_segment_app - } -} - -fn macro_lambda<'s>() -> macros::Definition<'s> { - let prefix = Pattern::Or( - Box::new(Pattern::Item(macros::pattern::Item { has_rhs_spacing: Some(false) })), - Box::new(Pattern::Everything), - ); - macro_definition! { - (prefix, "->", Pattern::Everything) - matched_segments_into_multi_segment_app - } -} - -fn builtin_macros() -> MacroMatchTree<'static> { - let mut macro_map = MacroMatchTree::default(); - macro_map.register(macro_if_then()); - macro_map.register(macro_if_then_else()); - macro_map.register(macro_group()); - macro_map.register(macro_lambda()); - macro_map -} - - - -// ============ -// === Main === -// ============ - -// fn main() { -// lexer::lexer_main(); -// } +// ============= +// === Tests === +// ============= fn main() { init_tracing(TRACE); - // let str = "if a then b else c"; - // let str = "if if * a + b * then y then b"; - // let str = "* a + b *"; - // let str = "* a + * b"; - // let str = "(a) (b) c"; - // let str = "if (a) then b"; - // let str = "foo a-> b"; - // let str = "a+b * c"; - // let str = "foo if a then b"; - // let str = "foo *(a)"; - let str = "foo if a then b else c"; - let mut lexer = Lexer::new(str); - lexer.run(); - - let root_macro_map = builtin_macros(); - - event!(TRACE, "Registered macros:\n{:#?}", root_macro_map); - - let resolver = Resolver::new_root(); - let ast = resolver.run( - &lexer, - &root_macro_map, - lexer.output.iter().map(|t| t.clone().into()).collect_vec(), - ); - println!("{:#?}", ast); - println!("\n\n{}", ast.code()); - + let ast = Parser::new().run("type Option (a) b c"); println!("\n\n==================\n\n"); - - lexer::main(); + println!("{:#?}", ast); +} + +#[cfg(test)] +mod tests { + use super::*; + use enso_parser_syntax_tree_builder::ast_builder; + + macro_rules! test_parse { + ($input:tt = {$($def:tt)*}) => { + assert_eq!( + Parser::new().run($input), + ast_builder! { $($def)* } + ) + }; + } + + #[test] + fn test_expressions() { + test_parse! {"a" = {a}}; + test_parse! {"a b" = {a b}}; + test_parse! {"a b c" = {[a b] c}}; + } +} + + + +// ================== +// === Benchmarks === +// ================== + +#[cfg(test)] +mod benches { + use super::*; + extern crate test; + use test::Bencher; + + #[bench] + fn bench_parsing_type_defs(bencher: &mut Bencher) { + let reps = 1_000; + let str = "type Option a b c\n".repeat(reps); + let parser = Parser::new(); + bencher.iter(move || { + parser.run(&str); + }); + } } -// -// -// -// // ============= -// // === Tests === -// // ============= -// -// #[cfg(test)] -// mod test { -// use super::*; -// -// pub fn ident(repr: &str) -> syntax::Tree { -// match token::Variant::to_ident_unchecked(repr) { -// token::Variant::Ident(ident) => span::With::new_no_left_offset_no_start( -// Bytes::from(repr.len()), -// syntax::tree::Type::from(syntax::tree::Ident(ident)), -// ), -// _ => panic!(), -// } -// } -// -// pub fn app_segment( -// header: Token, -// body: Option, -// ) -> syntax::tree::MultiSegmentAppSegment { -// syntax::tree::MultiSegmentAppSegment { header, body } -// } -// } -// -// -// -// #[cfg(test)] -// mod tests { -// use super::*; -// use enso_parser_syntax_tree_builder::ast_builder; -// -// fn one_shot(input: &str) -> syntax::Tree { -// let mut lexer = Lexer::new(input); -// lexer.run(); -// let root_macro_map = builtin_macros(); -// let resolver = Resolver::new_root(); -// let ast = resolver.run( -// &lexer, -// &root_macro_map, -// lexer.output.borrow_vec().iter().map(|t| (*t).into()).collect_vec(), -// ); -// ast -// } -// -// macro_rules! test_parse { -// ($input:tt = {$($def:tt)*}) => { -// assert_eq!( -// one_shot($input).with_removed_span_info(), -// ast_builder! { $($def)* }.with_removed_span_info() -// ) -// }; -// } -// -// #[test] -// fn test_expressions() { -// test_parse!("if a then b" = { {if} a {then} b }); -// test_parse!("if a then b else c" = { {if} a {then} b {else} c }); -// test_parse!("if a b then c d else e f" = { {if} a b {then} c d {else} e f }); -// } -// } diff --git a/lib/rust/parser/src/source/code.rs b/lib/rust/parser/src/source/code.rs index 2772e45092..b3530c7534 100644 --- a/lib/rust/parser/src/source/code.rs +++ b/lib/rust/parser/src/source/code.rs @@ -64,3 +64,17 @@ impl std::borrow::Borrow for Code<'_> { &self.repr } } + +impl<'s> std::ops::AddAssign> for Code<'s> { + #[inline(always)] + fn add_assign(&mut self, other: Code<'s>) { + self.repr.add_assign(other.repr); + } +} + +impl<'s> std::ops::AddAssign<&Code<'s>> for Code<'s> { + #[inline(always)] + fn add_assign(&mut self, other: &Code<'s>) { + self.repr.add_assign(other.repr.clone()); + } +} diff --git a/lib/rust/parser/src/source/span.rs b/lib/rust/parser/src/source/span.rs index 756b69eba2..5228b7aa4f 100644 --- a/lib/rust/parser/src/source/span.rs +++ b/lib/rust/parser/src/source/span.rs @@ -79,6 +79,16 @@ impl<'s> Offset<'s> { pub fn len(&self) -> Bytes { self.code.len() } + + /// Check if the offset is 0. + pub fn is_empty(&self) -> bool { + self.len() == Bytes(0) + } + + /// Check if the offset is bigger than 0. + pub fn exists(&self) -> bool { + self.len() > Bytes(0) + } } impl<'s> AsRef> for Offset<'s> { @@ -94,17 +104,31 @@ impl<'s> From<&'s str> for Offset<'s> { } } +impl<'s> std::ops::AddAssign> for Offset<'s> { + fn add_assign(&mut self, other: Offset<'s>) { + self.visible += other.visible; + self.code += other.code; + } +} + +impl<'s> std::ops::AddAssign<&Offset<'s>> for Offset<'s> { + fn add_assign(&mut self, other: &Offset<'s>) { + self.visible += other.visible; + self.code += &other.code; + } +} + // ============ // === Span === // ============ -/// A span of a given syntactic element (token or AST). It contains the left offset code and the -/// information about the length of the element. It does not contain the code of the element. This -/// is done in order to not duplicate the data. For example, some AST nodes contain a lot of tokens. -/// They need to remember their span, but they do not need to remember their code, because it is -/// already stored in the tokens. +/// A span of a given syntactic element (token or AST). It is a monoid that contains the left offset +/// code and the information about the length of the element. It does not contain the code of the +/// element. This is done in order to not duplicate the data. For example, some AST nodes contain a +/// lot of tokens. They need to remember their span, but they do not need to remember their code, +/// because it is already stored in the tokens. #[derive(Clone, Debug, Default, Eq, PartialEq)] #[allow(missing_docs)] pub struct Span<'s> { @@ -114,30 +138,30 @@ pub struct Span<'s> { } impl<'s> Span<'s> { - /// Extend the span with another one. The other span has to be the immediate neighbor of the - /// current span. - #[inline(always)] - pub fn extend<'a, T>(&mut self, other: T) - where - T: Into>, - 's: 'a, { - let other = other.into(); - self.code_length += other.left_offset.len() + other.code_length; + /// Constructor. + pub fn new() -> Self { + default() + } + /// Check whether the span is empty. + pub fn is_empty(&self) -> bool { + self.left_offset.is_empty() && self.code_length.is_zero() } - /// Self consuming version of [`extend`]. - pub fn extended<'a, T>(mut self, other: T) -> Self - where - T: Into>, - 's: 'a, { - self.extend(other); - self + /// Check whether the span is only an offset, without the code part. + pub fn is_only_offset(&self) -> bool { + self.code_length.is_zero() } /// Get the [`Ref`] of the current span. pub fn as_ref(&self) -> Ref<'_, 's> { Ref { left_offset: &self.left_offset, code_length: self.code_length } } + + /// Add the item to this span. The item can be anything that implements the span [`Builder`]. + #[allow(clippy::should_implement_trait)] + pub fn add>(self, elem: &mut T) -> Self { + Builder::add_to_span(elem, self) + } } impl<'s> AsRef> for Span<'s> { @@ -146,6 +170,22 @@ impl<'s> AsRef> for Span<'s> { } } +impl<'s, 'a, T> PartialSemigroup for Span<'s> +where + T: Into>, + 's: 'a, +{ + fn concat_mut(&mut self, other: T) { + let other = other.into(); + if self.code_length.is_zero() { + self.left_offset += other.left_offset; + self.code_length = other.code_length; + } else { + self.code_length += other.left_offset.len() + other.code_length; + } + } +} + // =========== @@ -233,254 +273,98 @@ impl<'s> FirstChildTrim<'s> for Span<'s> { #[macro_export] macro_rules! span_builder { ($($arg:ident),* $(,)?) => { - $crate::source::span::Builder::new() $(.add(&mut $arg))* .span + $crate::source::span::Span::new() $(.add(&mut $arg))* }; } -/// A marker struct for span building. The [`T`] parameter can be one of: -/// - [`()`], which means that the structure was not used yet. -/// - [`Option>`], which means that the struct was used to build the span, however, we are -/// unsure whether the span is known in all the cases. -/// - [`Span<'s>`], which means that the total span can be always computed for the provided -/// parameters. -#[derive(Default, Debug)] + +/// Elements implementing this trait can contain a span or multiple spans. If an element is added to +/// an empty span, it means that it is the first element in the span group. In such a case, the left +/// offset of the element will be removed and moved to the resulting span. See the docs of +/// [`FirstChildTrim`] to learn more. #[allow(missing_docs)] -pub struct Builder { - pub span: T, -} - -/// Constructor. -#[allow(non_snake_case)] -pub fn Builder(span: T) -> Builder { - Builder { span } -} - -impl Builder<()> { - /// Constructor. - pub fn new() -> Self { - default() - } -} - -impl Builder { - /// Add a new span to the builder. - #[inline(always)] - #[allow(clippy::should_implement_trait)] - pub fn add(self, elem: &mut S) -> Builder - where S: Build { - Builder(elem.build(self)) - } -} - -/// A trait defining the behavior of [`Builder`] for different types containing spans. -/// -/// The trait definition is a little bit strange, consuming the builder as a parameter instead of -/// consuming it as self. This is done because otherwise Rust type checker goes into infinite -/// loops. -#[allow(missing_docs)] -pub trait Build { - type Output; - fn build(&mut self, builder: Builder) -> Self::Output; +pub trait Builder<'s> { + fn add_to_span(&mut self, span: Span<'s>) -> Span<'s>; } // === Instances === -impl<'s> Build<()> for Span<'s> { - type Output = Span<'s>; +impl<'s> Builder<'s> for Span<'s> { #[inline(always)] - fn build(&mut self, _builder: Builder<()>) -> Self::Output { - self.trim_as_first_child() - } -} - -impl<'s> Build> for Span<'s> { - type Output = Span<'s>; - #[inline(always)] - fn build(&mut self, builder: Builder>) -> Self::Output { - builder.span.extended(&*self) - } -} - -impl<'s> Build>> for Span<'s> { - type Output = Span<'s>; - #[inline(always)] - fn build(&mut self, builder: Builder>>) -> Self::Output { - match builder.span { - Some(span) => span.extended(&*self), - None => self.trim_as_first_child(), + fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> { + if span.is_only_offset() { + span.concat(&self.trim_as_first_child()) + } else { + span.concat(&*self) } } } -impl<'s> Build<()> for Tree<'s> { - type Output = Span<'s>; +impl<'s> Builder<'s> for Tree<'s> { #[inline(always)] - fn build(&mut self, builder: Builder<()>) -> Self::Output { - Build::build(&mut self.span, builder) + fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> { + Builder::add_to_span(&mut self.span, span) } } -impl<'s> Build> for Tree<'s> { - type Output = Span<'s>; +impl<'s, T> Builder<'s> for Token<'s, T> { #[inline(always)] - fn build(&mut self, builder: Builder>) -> Self::Output { - builder.span.extended(&self.span) - } -} - -impl<'s> Build>> for Tree<'s> { - type Output = Span<'s>; - #[inline(always)] - fn build(&mut self, builder: Builder>>) -> Self::Output { - Build::build(&mut self.span, builder) - } -} - -impl<'s, T> Build<()> for Token<'s, T> { - type Output = Span<'s>; - #[inline(always)] - fn build(&mut self, _builder: Builder<()>) -> Self::Output { - self.trim_as_first_child() - } -} - -impl<'s, T> Build> for Token<'s, T> { - type Output = Span<'s>; - #[inline(always)] - fn build(&mut self, builder: Builder>) -> Self::Output { - builder.span.extended(self.span()) - } -} - -impl<'s, T> Build>> for Token<'s, T> { - type Output = Span<'s>; - #[inline(always)] - fn build(&mut self, builder: Builder>>) -> Self::Output { - match builder.span { - Some(span) => span.extended(self.span()), - None => self.trim_as_first_child(), + fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> { + if span.is_only_offset() { + span.concat(&self.trim_as_first_child()) + } else { + span.concat(self.span()) } } } -impl Build<()> for Option -where T: Build<()> +impl<'s, T> Builder<'s> for Option +where T: Builder<'s> { - type Output = Option<>::Output>; #[inline(always)] - fn build(&mut self, builder: Builder<()>) -> Self::Output { - self.as_mut().map(|t| Build::build(t, builder)) + fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> { + self.as_mut().map(|t| Builder::add_to_span(t, span)).unwrap_or_default() } } -impl<'s, T> Build>> for Option -where T: Build>> -{ - type Output = Option<>>>::Output>; - #[inline(always)] - fn build(&mut self, builder: Builder>>) -> Self::Output { - self.as_mut().map(|t| Build::build(t, builder)) - } -} - -impl<'s, T> Build> for Option -where T: Build, Output = Span<'s>> -{ - type Output = Span<'s>; - #[inline(always)] - fn build(&mut self, builder: Builder>) -> Self::Output { - match self.as_mut() { - None => builder.span, - Some(t) => Build::build(t, builder), - } - } -} - -impl Build for Result +impl<'s, T, E> Builder<'s> for Result where - T: Build, - E: Build>::Output>, + T: Builder<'s>, + E: Builder<'s>, { - type Output = >::Output; #[inline(always)] - fn build(&mut self, builder: Builder) -> Self::Output { + fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> { match self { - Ok(t) => Build::build(t, builder), - Err(t) => Build::build(t, builder), + Ok(t) => Builder::add_to_span(t, span), + Err(t) => Builder::add_to_span(t, span), } } } -impl Build for NonEmptyVec -where - T: Build, - [T]: Build<>::Output>, +impl<'s, T> Builder<'s> for NonEmptyVec +where T: Builder<'s> { - type Output = <[T] as Build>::Output; #[inline(always)] - fn build(&mut self, builder: Builder) -> Self::Output { - let b = Build::build(self.first_mut(), builder); - Build::build(self.tail_mut(), Builder(b)) + fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> { + self.into_iter().fold(span, |sum, new_span| Builder::add_to_span(new_span, sum)) } } -impl<'s, T> Build> for Vec -where T: Build, Output = Span<'s>> +impl<'s, T> Builder<'s> for Vec +where T: Builder<'s> { - type Output = Span<'s>; #[inline(always)] - fn build(&mut self, builder: Builder>) -> Self::Output { - let mut out = builder.span; - for elem in self { - out = Build::build(elem, Builder(out)) - } - out + fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> { + self.iter_mut().fold(span, |sum, new_span| Builder::add_to_span(new_span, sum)) } } -impl<'s, T> Build>> for Vec -where - T: Build>>, - T::Output: Into>>, +impl<'s, T> Builder<'s> for [T] +where T: Builder<'s> { - type Output = Option>; #[inline(always)] - fn build(&mut self, builder: Builder>>) -> Self::Output { - let mut out = builder.span; - for elem in self { - out = Build::build(elem, Builder(out)).into(); - } - out - } -} - -impl<'s, T> Build> for [T] -where T: Build, Output = Span<'s>> -{ - type Output = Span<'s>; - #[inline(always)] - fn build(&mut self, builder: Builder>) -> Self::Output { - let mut out = builder.span; - for elem in self { - out = Build::build(elem, Builder(out)); - } - out - } -} - -impl<'s, T> Build>> for [T] -where - T: Build>>, - T::Output: Into>>, -{ - type Output = Option>; - #[inline(always)] - fn build(&mut self, builder: Builder>>) -> Self::Output { - let mut out = builder.span; - for elem in self { - out = Build::build(elem, Builder(out)).into(); - } - out + fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> { + self.iter_mut().fold(span, |sum, new_span| Builder::add_to_span(new_span, sum)) } } diff --git a/lib/rust/parser/src/syntax.rs b/lib/rust/parser/src/syntax.rs index 076f22d313..aee1b6ecff 100644 --- a/lib/rust/parser/src/syntax.rs +++ b/lib/rust/parser/src/syntax.rs @@ -7,6 +7,7 @@ // ============== pub mod item; +pub mod operator; pub mod token; pub mod tree; diff --git a/lib/rust/parser/src/syntax/item.rs b/lib/rust/parser/src/syntax/item.rs index 86ac309f61..8f305ff77f 100644 --- a/lib/rust/parser/src/syntax/item.rs +++ b/lib/rust/parser/src/syntax/item.rs @@ -18,12 +18,13 @@ use crate::syntax::*; #[allow(missing_docs)] pub enum Item<'s> { Token(Token<'s>), + Block(Vec>), Tree(Tree<'s>), } impl<'s> Item<'s> { - /// Check whether the element is the provided token variant. Returns [`false`] if it was an - /// [`Tree`] node. + /// Check whether the element is the provided token variant. Returns [`false`] if it was not a + /// token. pub fn is_variant(&self, variant: token::variant::VariantMarker) -> bool { match self { Item::Token(token) => token.is(variant), @@ -32,20 +33,23 @@ impl<'s> Item<'s> { } /// [`location::Span`] of the element. - pub fn span(&self) -> span::Ref<'_, 's> { + pub fn left_visible_offset(&self) -> VisibleOffset { match self { - Self::Token(t) => t.span(), - Self::Tree(t) => t.span.as_ref(), + Self::Token(t) => t.span().left_offset.visible, + Self::Tree(t) => t.span.left_offset.visible, + Self::Block(t) => t.first().map(|t| t.left_visible_offset()).unwrap_or_default(), } } -} -impl<'s> FirstChildTrim<'s> for Item<'s> { - #[inline(always)] - fn trim_as_first_child(&mut self) -> Span<'s> { + /// Convert this item to a [`Tree`]. + pub fn to_ast(self) -> Tree<'s> { match self { - Self::Token(t) => t.trim_as_first_child(), - Self::Tree(t) => t.span.trim_as_first_child(), + Item::Token(token) => match token.variant { + token::Variant::Ident(ident) => Tree::ident(token.with_variant(ident)), + _ => todo!(), + }, + Item::Tree(ast) => ast, + Item::Block(_) => todo!(), } } } @@ -81,3 +85,35 @@ pub enum Ref<'s, 'a> { Token(token::Ref<'s, 'a>), Tree(&'a Tree<'s>), } + + + +// ====================== +// === Variant Checks === +// ====================== + +/// For each token variant, generates a function checking if the token is of the given variant. For +/// example, the `is_ident` function checks if the token is an identifier. +macro_rules! generate_variant_checks { + ( + $(#$enum_meta:tt)* + pub enum $enum:ident { + $( + $(#$variant_meta:tt)* + $variant:ident $({ $(pub $field:ident : $field_ty:ty),* $(,)? })? + ),* $(,)? + } + ) => { paste!{ + impl<'s> Item<'s> { + $( + $(#[$($variant_meta)*])* + #[allow(missing_docs)] + pub fn [](&self) -> bool { + self.is_variant(token::variant::VariantMarker::$variant) + } + )* + } + }}; +} + +crate::with_token_definition!(generate_variant_checks()); diff --git a/lib/rust/parser/src/syntax/operator.rs b/lib/rust/parser/src/syntax/operator.rs new file mode 100644 index 0000000000..b41001ad9f --- /dev/null +++ b/lib/rust/parser/src/syntax/operator.rs @@ -0,0 +1,172 @@ +//! Operator related functionalities. + +use crate::prelude::*; + +use crate::syntax; +use crate::syntax::token; +use crate::syntax::token::Token; + + + +// ================== +// === Precedence === +// ================== + +// FIXME: The current implementation hard-codes precedence values and does not support precedence +// computations for any operator (according to the spec) +fn precedence_of(operator: &str) -> usize { + match operator { + "+" => 3, + "-" => 3, + "*" => 7, + _ => panic!("Operator not supported: {}", operator), + } +} + +/// An item with an assigned precedence. +#[derive(Clone, Copy, Debug, Deref, DerefMut)] +struct WithPrecedence { + #[deref] + #[deref_mut] + elem: T, + precedence: usize, +} + +impl WithPrecedence { + /// Constructor. + pub fn new(precedence: usize, elem: T) -> Self { + Self { elem, precedence } + } +} + + +/// Annotate expressions that should use spacing, because otherwise they are misleading. For +/// example, `if cond then.x else.y` is parsed as `if cond then .x else .y`, which after expansion +/// translates to `if cond then (\t -> t.x) else (\t -> t.y)`. However, for some macros spacing is +/// not needed. For example, `(.x)` is parsed as `(\t -> t.x)`, which is understandable. +fn annotate_tokens_that_need_spacing(items: Vec) -> Vec { + // TODO: It should be possible to make it faster by iterating over mut vec. To be checked. + items + .into_iter() + .map(|item| match item { + syntax::Item::Block(_) => item, + syntax::Item::Token(_) => item, + syntax::Item::Tree(ast) => syntax::Item::Tree(match &*ast.variant { + syntax::tree::Variant::MultiSegmentApp(data) + if !data.segments.first().header.is_symbol() => + ast.with_error("This expression cannot be used in a non-spaced equation."), + _ => ast, + }), + }) + .collect() +} + +/// Take [`Item`] stream, resolve operators precedence and return the final AST. The precedence +/// resolution algorithm bases on the [Shunting yard algorithm](https://en.wikipedia.org/wiki/Shunting_yard_algorithm). +/// It is extended to handle operator sections. +#[inline(always)] +pub fn resolve_operator_precedence<'s>(items: Vec>) -> syntax::Tree<'s> { + type Tokens<'s> = Vec>; + let mut flattened: Tokens<'s> = default(); + let mut no_space_group: Tokens<'s> = default(); + let processs_no_space_group = |flattened: &mut Tokens<'s>, no_space_group: &mut Tokens<'s>| { + let tokens = mem::take(no_space_group); + if tokens.len() == 1 { + flattened.extend(tokens); + } else { + let tokens = annotate_tokens_that_need_spacing(tokens); + let ast = resolve_operator_precedence_internal(tokens); + flattened.push(ast.into()); + } + }; + for item in items { + if item.left_visible_offset().width_in_spaces == 0 || no_space_group.is_empty() { + no_space_group.push(item) + } else if !no_space_group.is_empty() { + processs_no_space_group(&mut flattened, &mut no_space_group); + no_space_group.push(item); + } else { + // FIXME: this is unreachable. + flattened.push(item); + } + } + if !no_space_group.is_empty() { + processs_no_space_group(&mut flattened, &mut no_space_group); + } + resolve_operator_precedence_internal(flattened) +} + +fn resolve_operator_precedence_internal(items: Vec>) -> syntax::Tree<'_> { + // Reverse-polish notation encoding. + let mut was_section_used = false; + let mut output: Vec = default(); + let mut operator_stack: Vec> = default(); + let mut last_token_was_ast = false; + let mut last_token_was_opr = false; + for item in items { + if let syntax::Item::Token(token) = item.clone() + && let token::Variant::Operator(opr) = token.variant { + // Item is an operator. + let last_token_was_opr_copy = last_token_was_opr; + last_token_was_ast = false; + last_token_was_opr = true; + + let prec = precedence_of(&token.code); + let opr = Token(token.left_offset, token.code, opr); + + if last_token_was_opr_copy && let Some(prev_opr) = operator_stack.last_mut() { + // Error. Multiple operators next to each other. + match &mut prev_opr.elem { + Err(err) => err.operators.push(opr), + Ok(prev) => { + let operators = NonEmptyVec::new(prev.clone(),vec![opr]); + prev_opr.elem = Err(syntax::tree::MultipleOperatorError{operators}); + } + } + } else { + while let Some(prev_opr) = operator_stack.last() + && prev_opr.precedence >= prec + && let Some(prev_opr) = operator_stack.pop() + && let Some(rhs) = output.pop() + { + // Prev operator in the [`operator_stack`] has a higher precedence. + let lhs = output.pop().map(|t| t.to_ast()); + if lhs.is_none() { was_section_used = true; } + let ast = syntax::Tree::opr_app(lhs, prev_opr.elem, Some(rhs.to_ast())); + output.push(ast.into()); + } + operator_stack.push(WithPrecedence::new(prec, Ok(opr))); + } + } else if last_token_was_ast && let Some(lhs) = output.pop() { + // Multiple non-operators next to each other. + let lhs = lhs.to_ast(); + let rhs = item.to_ast(); + let ast = syntax::Tree::app(lhs, rhs); + output.push(ast.into()); + } else { + // Non-operator that follows previously consumed operator. + last_token_was_ast = true; + last_token_was_opr = false; + output.push(item); + } + } + let mut opt_rhs = last_token_was_ast.and_option_from(|| output.pop().map(|t| t.to_ast())); + while let Some(opr) = operator_stack.pop() { + let opt_lhs = output.pop().map(|t| t.to_ast()); + if opt_lhs.is_none() || opt_rhs.is_none() { + was_section_used = true; + } + opt_rhs = Some(syntax::Tree::opr_app(opt_lhs, opr.elem, opt_rhs)); + } + if !output.is_empty() { + panic!("Internal error. Not all tokens were consumed while constructing the expression."); + } + + // FIXME + let out = opt_rhs.unwrap(); + if was_section_used { + syntax::Tree::opr_section_boundary(out) + } else { + out + } +} diff --git a/lib/rust/parser/src/syntax/tree.rs b/lib/rust/parser/src/syntax/tree.rs index fbfe276273..22ef4c1b41 100644 --- a/lib/rust/parser/src/syntax/tree.rs +++ b/lib/rust/parser/src/syntax/tree.rs @@ -64,6 +64,9 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args) pub error: Error, pub ast: Tree<'s>, }, + Module { + pub statements: Vec>, + }, /// A simple identifier, like `foo` or `bar`. Ident { pub token: token::Ident<'s>, @@ -97,8 +100,13 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args) /// `x + y + z` is the section body, and `Vector x y z` is the prefix of this function /// application. MultiSegmentApp { - pub prefix: Option>, pub segments: NonEmptyVec>, + }, + + TypeDef { + pub keyword: Token<'s>, + pub name: Tree<'s>, + pub params: Vec>, } } }};} @@ -158,10 +166,9 @@ impl<'s> Tree<'s> { } } -impl span::Build for Error { - type Output = S; - fn build(&mut self, builder: span::Builder) -> Self::Output { - builder.span +impl<'s> span::Builder<'s> for Error { + fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> { + span } } @@ -178,12 +185,9 @@ pub struct MultipleOperatorError<'s> { pub operators: NonEmptyVec>, } -impl<'s, S> span::Build for MultipleOperatorError<'s> -where NonEmptyVec>: span::Build -{ - type Output = > as span::Build>::Output; - fn build(&mut self, builder: span::Builder) -> Self::Output { - self.operators.build(builder) +impl<'s> span::Builder<'s> for MultipleOperatorError<'s> { + fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> { + self.operators.add_to_span(span) } } @@ -198,12 +202,9 @@ pub struct MultiSegmentAppSegment<'s> { pub body: Option>, } -impl<'s, S> span::Build for MultiSegmentAppSegment<'s> -where Token<'s>: span::Build> -{ - type Output = Span<'s>; - fn build(&mut self, builder: span::Builder) -> Self::Output { - builder.add(&mut self.header).add(&mut self.body).span +impl<'s> span::Builder<'s> for MultiSegmentAppSegment<'s> { + fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> { + span.add(&mut self.header).add(&mut self.body) } } diff --git a/lib/rust/parser/src/syntax/tree/builder/src/lib.rs b/lib/rust/parser/src/syntax/tree/builder/src/lib.rs index 7627226555..e4e3352466 100644 --- a/lib/rust/parser/src/syntax/tree/builder/src/lib.rs +++ b/lib/rust/parser/src/syntax/tree/builder/src/lib.rs @@ -1,5 +1,7 @@ //! Definition of a macro allowing building mock AST structures, mostly useful for testing. +// === Features === +#![feature(proc_macro_span)] // === Standard Linter Configuration === #![deny(non_ascii_idents)] #![warn(unsafe_code)] @@ -36,8 +38,8 @@ use std::mem; /// braces. You can also place segments in quotes, like `{"("} a {")"}`. #[proc_macro] pub fn ast_builder(tokens: proc_macro::TokenStream) -> proc_macro::TokenStream { - let output = expr(tokens); - let output = quote!(syntax::Tree::opr_section_boundary(#output)); + let output = expr(tokens, None); + let output = quote!(crate::syntax::Tree::module(vec![#output])); output.into() } @@ -54,12 +56,13 @@ impl Segment { } } -fn expr(tokens: proc_macro::TokenStream) -> TokenStream { +fn expr(tokens: proc_macro::TokenStream, parent_spacing: Option) -> TokenStream { use proc_macro::TokenTree::*; let mut output = quote! {}; let mut prefix: Option = None; let mut segments: Vec = vec![]; let mut current_segment: Option = None; + let mut last_column: Option = None; let app_to_output = |output: &mut TokenStream, tok| { if output.is_empty() { *output = tok; @@ -67,12 +70,21 @@ fn expr(tokens: proc_macro::TokenStream) -> TokenStream { *output = quote! {syntax::Tree::app(#output,#tok)}; } }; + let mut inherited_spacing = parent_spacing.unwrap_or(0); for token in tokens { - match token { + let spacing = last_column.map(|t| token.span().start().column - t).unwrap_or(0); + let spacing = spacing + inherited_spacing; + inherited_spacing = 0; + last_column = Some(token.span().end().column); + match &token { // a b c ... Ident(ident) => { let ident = ident.to_string(); - app_to_output(&mut output, quote! {test::ident(#ident)}); + let spacing = " ".repeat(spacing); + app_to_output( + &mut output, + quote! {crate::syntax::Tree::ident(crate::syntax::Token(#spacing, #ident, syntax::token::Variant::new_ident_unchecked(#ident)))}, + ); } // {if} a {then} b {else} c // {"("} a {")"} @@ -83,12 +95,15 @@ fn expr(tokens: proc_macro::TokenStream) -> TokenStream { } else if !output.is_empty() { prefix = Some(mem::take(&mut output)); } - let body = group.stream().to_string(); - current_segment = Some(Segment::new(quote! {Token::ident(#body)})); // Token::symbol + let ident = group.stream().to_string(); + let spacing = " ".repeat(spacing); + current_segment = Some(Segment::new( + quote! { Token(#spacing, #ident, syntax::token::Variant::new_ident_unchecked(#ident).into())}, + )); // Token::symbol } // a [b c] d Group(group) if group.delimiter() == proc_macro::Delimiter::Bracket => { - app_to_output(&mut output, expr(group.stream())); + app_to_output(&mut output, expr(group.stream(), Some(spacing))); } _ => panic!("Unsupported token {:?}", token), } @@ -114,10 +129,7 @@ fn expr(tokens: proc_macro::TokenStream) -> TokenStream { .unwrap_or_else(|| quote! {None}); let segments = quote! {NonEmptyVec::try_from(vec![#(#segments),*]).unwrap()}; output = quote! { - span::With::new_no_left_offset_no_start( - Bytes::from(0), - syntax::tree::Type::MultiSegmentApp(Box::new(syntax::tree::MultiSegmentApp {prefix: #pfx, segments: #segments})) - ) + syntax::Tree::multi_segment_app (#pfx, #segments) } } output diff --git a/lib/rust/prelude/src/data/non_empty_vec.rs b/lib/rust/prelude/src/data/non_empty_vec.rs index 47e8569b69..d8f1449868 100644 --- a/lib/rust/prelude/src/data/non_empty_vec.rs +++ b/lib/rust/prelude/src/data/non_empty_vec.rs @@ -49,6 +49,12 @@ impl NonEmptyVec { NonEmptyVec { elems } } + /// Length of the vector. + #[allow(clippy::len_without_is_empty)] + pub fn len(&self) -> usize { + self.elems.len() + } + /// Construct a `NonEmptyVec` containing a single element. /// /// # Examples @@ -207,7 +213,7 @@ impl NonEmptyVec { /// assert_eq!(*vec.first(), 0); /// ``` pub fn first(&self) -> &T { - self.elems.first().expect("The NonEmptyVec always has an item in it.") + self.elems.first().unwrap_or_else(|| unreachable!()) } /// Obtain a mutable reference to the head of the `NonEmptyVec`. @@ -220,7 +226,7 @@ impl NonEmptyVec { /// assert_eq!(*vec.first_mut(), 0); /// ``` pub fn first_mut(&mut self) -> &mut T { - self.elems.first_mut().expect("The NonEmptyVec always has an item in it.") + self.elems.first_mut().unwrap_or_else(|| unreachable!()) } /// Get the tail reference. @@ -243,7 +249,7 @@ impl NonEmptyVec { /// assert_eq!(*vec.last(), 2) /// ``` pub fn last(&self) -> &T { - self.get(self.len() - 1).expect("There is always one element in a NonEmptyVec.") + self.get(self.len() - 1).unwrap_or_else(|| unreachable!()) } /// Obtain a mutable reference to the last element in the `NonEmptyVec`. @@ -256,7 +262,7 @@ impl NonEmptyVec { /// assert_eq!(*vec.last_mut(), 2) /// ``` pub fn last_mut(&mut self) -> &mut T { - self.get_mut(self.len() - 1).expect("There is always one element in a NonEmptyVec.") + self.get_mut(self.len() - 1).unwrap_or_else(|| unreachable!()) } /// Create a draining iterator that removes the specified range in the vector and yields the diff --git a/lib/rust/prelude/src/data/semigroup.rs b/lib/rust/prelude/src/data/semigroup.rs index 12d723b121..788f76f01b 100644 --- a/lib/rust/prelude/src/data/semigroup.rs +++ b/lib/rust/prelude/src/data/semigroup.rs @@ -17,7 +17,7 @@ use std::iter::Extend; /// Mutable Semigroup definition. Impls should satisfy the associativity law: /// `x.concat(y.concat(z)) = x.concat(y).concat(z)`, in symbolic form: /// `x <> (y <> z) = (x <> y) <> z` -pub trait PartialSemigroup: Clone { +pub trait PartialSemigroup: Clone { /// An associative operation. fn concat_mut(&mut self, other: T);