Parser improvements (#3549)

This commit is contained in:
Wojciech Daniło 2022-07-01 05:42:29 +02:00 committed by GitHub
parent 43a893cae6
commit 7c0330290f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 1709 additions and 1057 deletions

View File

@ -81,7 +81,7 @@ impl<T> NonEmpty<T> {
}
/// Convert this list to a vector.
fn to_vec(&self) -> Vec<&T> {
pub fn to_vec(&self) -> Vec<&T> {
let mut out = vec![&self.head];
let mut list = self.tail();
loop {

View File

@ -731,8 +731,15 @@ const PARSERS: &[for<'r> fn(&'r mut Lexer<'_>)] = &[
];
impl<'s> Lexer<'s> {
/// Run the lexer. Returns [`true`] if the process succeeded.
pub fn run(&mut self) -> bool {
/// Run the lexer. Return hierarchical list of tokens (the token groups will be represented as a
/// hierarchy).
pub fn run(self) -> Vec<Item<'s>> {
build_block_hierarchy(self.run_flat())
}
/// Run the lexer. Return non-hierarchical list of tokens (the token groups will be represented
/// as start and end tokens).
pub fn run_flat(mut self) -> Vec<Token<'s>> {
self.spaces_after_lexeme();
let mut any_parser_matched = true;
while any_parser_matched {
@ -744,10 +751,53 @@ impl<'s> Lexer<'s> {
}
}
}
self.current_char == None
if self.current_char != None {
panic!("Internal error. Lexer did not consume all input.");
}
while self.end_block().is_some() {
let block_end = self.marker_token(token::Variant::block_end());
self.submit_token(block_end);
}
let tokens = self.output;
event!(TRACE, "Tokens:\n{:#?}", tokens);
tokens
}
}
/// Run the lexer. Return non-hierarchical list of tokens (the token groups will be represented
/// as start and end tokens).
pub fn run_flat(input: &'_ str) -> Vec<Token<'_>> {
Lexer::new(input).run_flat()
}
/// Run the lexer. Return hierarchical list of tokens (the token groups will be represented as a
/// hierarchy).
pub fn run(input: &'_ str) -> Vec<Item<'_>> {
Lexer::new(input).run()
}
/// Convert the flat token stream into hierarchical one. The token variants [`BlockStart`] and
/// [`BlockEnd`] will be replaced with [`Item::Group`].
pub fn build_block_hierarchy(tokens: Vec<Token<'_>>) -> Vec<Item<'_>> {
let mut stack = vec![];
let mut out: Vec<Item<'_>> = vec![];
for token in tokens {
match token.variant {
token::Variant::BlockStart(_) => stack.push(mem::take(&mut out)),
token::Variant::BlockEnd(_) => {
let new_out = stack.pop().unwrap();
let block = mem::replace(&mut out, new_out);
out.push(Item::Block(block));
}
_ => out.push(token.into()),
}
}
if !stack.is_empty() {
panic!("Internal error. Block start token not paired with block end token.");
}
out
}
// =============
@ -756,9 +806,7 @@ impl<'s> Lexer<'s> {
/// Lexer main function used for ad-hoc testing during development.
pub fn main() {
let mut lexer = Lexer::new("\n foo\n bar");
println!("{:?}", lexer.run());
println!("{:#?}", lexer.output.iter().collect_vec());
println!("{:#?}", run_flat("\n foo\n bar"));
}
/// Test utils for fast mock tokens creation.
@ -797,9 +845,7 @@ mod tests {
}
fn test_lexer<'s>(input: &'s str, expected: Vec<Token<'s>>) {
let mut lexer = Lexer::new(input);
assert!(lexer.run());
assert_eq!(lexer.output.iter().collect_vec(), expected);
assert_eq!(run_flat(input), expected);
}
fn lexer_case_idents<'s>(idents: &[&'s str]) -> Vec<(&'s str, Vec<Token<'s>>)> {
@ -828,7 +874,7 @@ mod tests {
ident_(" ", "foo"),
newline_("", "\n"),
ident_(" ", "bar"),
// FIXME: here should be block end
block_end_("", ""),
]),
]);
}
@ -1081,10 +1127,8 @@ mod benches {
let str = "test ".repeat(reps);
b.iter(move || {
let mut lexer = Lexer::new(&str);
let ok = lexer.run();
assert!(ok);
assert_eq!(lexer.output.len(), reps);
let lexer = Lexer::new(&str);
assert_eq!(lexer.run().len(), reps);
});
}
}

View File

@ -3,22 +3,23 @@
//! utilities allowing macros management.
//! Read the docs of the main module of this crate to learn more about the parsing process.
//
use crate::prelude::*;
use crate::syntax;
use crate::syntax::token::Token;
use enso_data_structures::im_list;
use pattern::Pattern;
// ==============
// === Export ===
// ==============
pub mod built_in;
pub mod expand;
pub mod pattern;
pub mod resolver;
pub use pattern::Pattern;
@ -38,21 +39,13 @@ pub mod pattern;
#[derivative(Debug)]
#[allow(missing_docs)]
pub struct Definition<'a> {
/// The pattern in this field will be matched from right to left, unlike patterns in segments.
pub rev_prefix_pattern: Option<Pattern>,
pub segments: im_list::NonEmpty<SegmentDefinition<'a>>,
pub segments: im_list::NonEmpty<SegmentDefinition<'a>>,
#[derivative(Debug = "ignore")]
pub body: Rc<Body>,
pub body: Rc<DefinitionBody>,
}
/// All the tokens matched as prefix of the resolved macro.
pub type PrefixTokens<'s> = Option<Vec<syntax::Item<'s>>>;
/// All the sections of the resolved macro.
pub type MatchedSections<'s> = NonEmptyVec<(Token<'s>, Vec<syntax::Item<'s>>)>;
/// A function that transforms matched macro tokens into [`syntax::Tree`].
pub type Body = dyn for<'s> Fn(PrefixTokens<'s>, MatchedSections<'s>) -> syntax::Tree<'s>;
pub type DefinitionBody = dyn for<'s> Fn(pattern::MatchedSegments<'s>) -> syntax::Tree<'s>;
@ -93,18 +86,29 @@ impl<'a> SegmentDefinition<'a> {
/// ```
#[macro_export]
macro_rules! macro_definition {
( ($($section:literal, $pattern:expr),* $(,)?) $body:expr ) => {
$crate::macro_definition!{[None] ($($section, $pattern),*) $body}
($def:tt) => {
$crate::macro_definition!{$def $crate::macros::matched_segments_into_multi_segment_app}
};
( ($prefix:expr, $($section:literal, $pattern:expr),* $(,)?) $body:expr ) => {
$crate::macro_definition!{[Some($prefix)] ($($section, $pattern),*) $body}
};
( [$prefix:expr] ($($section:literal, $pattern:expr),* $(,)?) $body:expr ) => {
macros::Definition {
rev_prefix_pattern: $prefix,
(($($section:literal, $pattern:expr),* $(,)?) $body:expr) => {
$crate::macros::Definition {
segments: im_list::NonEmpty::try_from(vec![
$(macros::SegmentDefinition::new($section, $pattern)),*]).unwrap(),
$($crate::macros::SegmentDefinition::new($section, $pattern)),*]).unwrap(),
body: Rc::new($body),
}
};
}
fn matched_segments_into_multi_segment_app(
matched_segments: NonEmptyVec<pattern::MatchedSegment<'_>>,
) -> syntax::Tree<'_> {
let segments = matched_segments.mapped(|segment| {
let header = segment.header;
let tokens = segment.result.tokens();
let body = (!tokens.is_empty())
.as_some_from(|| syntax::operator::resolve_operator_precedence(tokens));
syntax::tree::MultiSegmentAppSegment { header, body }
});
syntax::Tree::multi_segment_app(segments)
}

View File

@ -0,0 +1,84 @@
//! Built-in macro definitions.
use crate::macros::pattern::*;
use crate::macros::*;
use crate::syntax::operator;
// =======================
// === Built-in macros ===
// =======================
/// All built-in macro definitions.
pub fn all() -> resolver::SegmentMap<'static> {
let mut macro_map = resolver::SegmentMap::default();
// macro_map.register(if_then());
// macro_map.register(if_then_else());
macro_map.register(group());
macro_map.register(type_def());
macro_map
}
/// If-then-else macro definition.
pub fn if_then_else<'s>() -> Definition<'s> {
crate::macro_definition! {("if", everything(), "then", everything(), "else", everything())}
}
/// If-then macro definition.
pub fn if_then<'s>() -> Definition<'s> {
crate::macro_definition! {("if", everything(), "then", everything())}
}
/// Group macro definition.
pub fn group<'s>() -> Definition<'s> {
crate::macro_definition! {("(", everything(), ")", nothing())}
}
/// New type definition macro definition.
pub fn type_def<'s>() -> Definition<'s> {
use pattern::*;
#[rustfmt::skip]
let pattern =
identifier() / "name" % "type name" >>
many(identifier() % "type parameter" / "param") % "type parameters" >>
block(
many(identifier() / "constructor") % "type constructors" >>
everything()
) % "type definition body";
// let pattern2 = Everything;
crate::macro_definition! {
("type", pattern)
type_def_body
}
}
// TODO: The comments in the code were left in order to allow easy debugging of this struct. They
// should be removed in the future.
fn type_def_body(matched_segments: NonEmptyVec<MatchedSegment>) -> syntax::Tree {
let segment = matched_segments.to_vec().pop().unwrap();
// println!(">>>");
// println!("{:#?}", segment);
// println!(">>>");
let match_tree = segment.result.into_var_map();
// println!("{:#?}", match_tree);
// println!("\n\n------------- 1");
let mut v = match_tree.view();
let name = &v.query("name").unwrap()[0];
let name = operator::resolve_operator_precedence(name.clone());
// println!("{:#?}", name);
// println!("\n\n------------- 2");
let params = v.nested().query("param").unwrap();
// println!("{:#?}", params);
// println!("\n\n------------- 3");
let params = params
.iter()
.map(|tokens| operator::resolve_operator_precedence(tokens.clone()))
.collect_vec();
// println!("{:#?}", params);
syntax::Tree::type_def(segment.header, name, params)
}

View File

@ -0,0 +1,356 @@
//! Macro expansion utilities. Allow expanding macro variables in the same as Rust macro rules do.
use crate::macros::pattern::*;
use crate::prelude::*;
use crate::syntax;
// ==============
// === VarMap ===
// ==============
/// A nested map of pattern variables (elements using the [`Pattern::Named`] variant). The validator
/// should be instantiated either with the [`EnabledValidator`] in case of user-defined
/// macros or with the [`DisabledValidator`] in case of built-in macros. The latter is
/// faster but does not provide nice error messages and allows for illegal code expansion, like
/// using two variables that have the same repetition depth, but have different parents (e.g. the
/// variables `$b` and `$e` from the example below).
///
/// To better understand how it works, let's consider the following pattern definition (using the
/// Rust macro rules syntax for simplicity):
///
/// ```text
/// $x:tt
/// $(
/// $a:tt
/// $(
/// $b:tt
/// $c:tt
/// )*
///
/// $d:tt
/// $(
/// $e:tt
/// $f:tt
/// )*
/// )*
/// ```
///
/// The following [`VarMap`] will be generated (some fields simplified for clarity):
///
/// ```text
/// VarMap {
/// map: [
/// ("x", VarMapEntry {
/// tokens: ["x"],
/// validator: EnabledValidator { scope: VarScope {
/// locals: ["x"], parent: None
/// }}
/// }),
/// ],
/// nested: Some(VarMap {
/// map: [
/// ("a", VarMapEntry {
/// tokens: ["a"],
/// validator: EnabledValidator { scope: VarScope {
/// locals: ["a","d"], parent: Some (VarScope {
/// locals: ["x"], parent: None
/// })
/// }}
/// }),
/// ("e", VarMapEntry {
/// tokens: ["e"],
/// validator: EnabledValidator { scope: VarScope {
/// locals: ["a","d"], parent: Some (VarScope {
/// locals: ["x"], parent: None
/// })
/// }}
/// }),
/// ],
/// nested: Some(VarMap {
/// map: [
/// ("b", VarMapEntry {
/// tokens: ["b"],
/// validator: EnabledValidator { scope: VarScope {
/// locals: ["b","c"], parent: Some (VarScope {
/// locals: ["a","d"], parent: Some (VarScope {
/// locals: ["x"], parent: None
/// })
/// })
/// }}
/// }),
/// ("c", VarMapEntry {
/// tokens: ["c"],
/// validator: EnabledValidator { scope: VarScope {
/// locals: ["b","c"], parent: Some (VarScope {
/// locals: ["a","d"], parent: Some (VarScope {
/// locals: ["x"], parent: None
/// })
/// })
/// }}
/// }),
/// ("e", VarMapEntry {
/// tokens: ["e"],
/// validator: EnabledValidator { scope: VarScope {
/// locals: ["e","f"], parent: Some (VarScope {
/// locals: ["a","d"], parent: Some (VarScope {
/// locals: ["x"], parent: None
/// })
/// })
/// }}
/// }),
/// ("f", VarMapEntry {
/// tokens: ["f"],
/// validator: EnabledValidator { scope: VarScope {
/// locals: ["e","f"], parent: Some (VarScope {
/// locals: ["a","d"], parent: Some (VarScope {
/// locals: ["x"], parent: None
/// })
/// })
/// }}
/// }),
/// ],
/// })
/// })
/// }
/// ```
///
/// Validators can be queried during code expansion to check whether these variables belong to
/// the same repetition scope.
#[derive(Clone, Debug, Default)]
pub struct VarMap<'s, V> {
nested: Option<Box<VarMap<'s, V>>>,
map: HashMap<String, VarMapEntry<'s, V>>,
}
/// Entry of the [`VarMap`] map.
#[derive(Clone, Debug, Default)]
struct VarMapEntry<'s, V> {
pub tokens: Vec<Vec<syntax::Item<'s>>>,
pub validator: V,
}
impl<'s, V> VarMapEntry<'s, V> {
/// Constructor.
pub fn new(validator: V, tokens: Vec<Vec<syntax::Item<'s>>>) -> Self {
Self { validator, tokens }
}
}
impl<'s> Match<'s> {
/// Convert the match into checked [`VarMap`].
pub fn into_var_map(self) -> VarMap<'s, EnabledValidator> {
let mut tree = VarMap::default();
self.build_var_map(&mut tree, &default());
tree
}
/// Convert the match into unchecked [`VarMap`]. The unchecked version has better performance,
/// but does not provide nice user error messages and allows for illegal code expansion. Read
/// the docs of [`VarMap`] to learn more.
pub fn into_unchecked_var_map(self) -> VarMap<'s, DisabledValidator> {
let mut tree = VarMap::default();
self.build_var_map(&mut tree, &default());
tree
}
fn build_var_map<V: Default + Validator>(self, tree: &mut VarMap<'s, V>, validator: &V) {
match self {
Self::Everything(_) => {}
Self::Nothing => {}
Self::Identifier(_) => {}
Self::Expected(_, _) => {}
Self::NotBlock(_) => {}
Self::Or(t) => match *t {
OrMatch::First(first) => first.build_var_map(tree, validator),
OrMatch::Second(second) => second.build_var_map(tree, validator),
},
Self::Seq(first, second) => {
first.build_var_map(tree, validator);
second.build_var_map(tree, validator);
}
Self::Many(matches) => {
if tree.nested.is_none() {
let nested = VarMap::<'s, V>::default();
tree.nested = Some(Box::new(nested));
}
let nested_validator = V::default();
nested_validator.set_parent(validator);
let nested = tree.nested.as_mut().unwrap();
for m in matches {
m.build_var_map(nested, &nested_validator);
}
}
Self::Named(name, t) => {
validator.insert_local_var(&name);
tree.map
.entry(name)
.or_insert_with(|| VarMapEntry::new(validator.clone_ref(), default()))
.tokens
.push(t.tokens());
}
}
}
}
// =================
// === Validator ===
// =================
/// Validator used to check if the macro generation correct. See the definition of [`VarMap`] to
/// learn more.
#[allow(missing_docs)]
pub trait Validator: PartialEq + Default + CloneRef {
fn check(&self, name: &str) -> bool;
fn parent(&self) -> Option<Self>;
fn set_parent(&self, parent: &Self);
fn insert_local_var(&self, var: &str);
}
/// Disabled validator. See the docs of [`VarMap`] to learn more.
#[derive(Copy, Clone, CloneRef, Debug, Default, PartialEq)]
pub struct DisabledValidator;
/// Enabled validator. See the docs of [`VarMap`] to learn more.
#[derive(Clone, CloneRef, Debug, Default)]
#[allow(missing_docs)]
pub struct EnabledValidator {
scope: Rc<RefCell<VarScope>>,
}
#[derive(Clone, Debug, Default)]
struct VarScope {
locals: HashSet<String>,
parent: Option<EnabledValidator>,
}
impl PartialEq for EnabledValidator {
fn eq(&self, other: &EnabledValidator) -> bool {
Rc::ptr_eq(&self.scope, &other.scope)
}
}
impl Validator for EnabledValidator {
#[inline(always)]
fn check(&self, name: &str) -> bool {
self.scope.borrow().locals.contains(name)
}
#[inline(always)]
fn parent(&self) -> Option<Self> {
self.scope.borrow().parent.as_ref().map(|t| t.clone_ref())
}
#[inline(always)]
fn set_parent(&self, parent: &Self) {
self.scope.borrow_mut().parent = Some(parent.clone_ref());
}
#[inline(always)]
fn insert_local_var(&self, var: &str) {
self.scope.borrow_mut().locals.insert(var.to_string());
}
}
impl Validator for DisabledValidator {
#[inline(always)]
fn check(&self, _name: &str) -> bool {
true
}
#[inline(always)]
fn parent(&self) -> Option<Self> {
None
}
#[inline(always)]
fn set_parent(&self, _parent: &Self) {}
#[inline(always)]
fn insert_local_var(&self, _var: &str) {}
}
// ==================
// === VarMapView ===
// ==================
/// A view for a [`VarMap`]. It allows focusing on a specific repetition scope and querying for
/// variables there. See the docs of [`VarMap`] to learn more.
#[derive(Clone, Debug, Default)]
pub struct VarMapView<'t, 's, V> {
tree: Option<&'t VarMap<'s, V>>,
resolved_validator: Option<V>,
parent_validator_to_check: Option<V>,
}
impl<'t, 's, V> VarMapView<'t, 's, V> {
/// Constructor.
pub fn new(tree: &'t VarMap<'s, V>) -> Self {
let resolved_validator = default();
let parent_validator_to_check = default();
Self { tree: Some(tree), resolved_validator, parent_validator_to_check }
}
}
impl<'t, 's, V: Validator> VarMapView<'t, 's, V> {
/// Get the view for the nested repetition scope.
pub fn nested(&self) -> Self {
let tree = self.tree.and_then(|t| t.nested.as_ref().map(|n| n.as_ref()));
let resolved_validator = None;
let parent_validator_to_check = self.resolved_validator.as_ref().map(|t| t.clone_ref());
Self { tree, resolved_validator, parent_validator_to_check }
}
}
impl<'t, 's, V: Validator> VarMapView<'t, 's, V> {
/// Query for a variable.
pub fn query(&mut self, name: &str) -> Option<&'t Vec<Vec<syntax::Item<'s>>>> {
self.tree.and_then(|t| {
t.map.get(name).map(|entry| {
match &self.resolved_validator {
Some(validator) =>
if !validator.check(name) {
todo!("Report nice error that the name does not belong to the scope.")
},
None => {
let resolved_validator = entry.validator.clone_ref();
if let Some(parent_validator_to_check) = &self.parent_validator_to_check {
let mut ok = false;
let mut validator = resolved_validator.clone();
loop {
if &validator == parent_validator_to_check {
ok = true;
break;
} else {
match validator.parent() {
Some(p) => validator = p,
None => break,
}
}
}
if !ok {
todo!("Report nice error that the name does not belong to the same scope as previous variables.")
}
self.parent_validator_to_check = None;
}
self.resolved_validator = Some(resolved_validator);
}
}
&entry.tokens
})
})
}
}
impl<'s, V> VarMap<'s, V> {
/// Create a new view for this var map.
pub fn view<'t>(&'t self) -> VarMapView<'t, 's, V> {
VarMapView::new(self)
}
}

View File

@ -5,121 +5,365 @@ use crate::prelude::*;
use crate::syntax;
use std::collections::VecDeque;
// ===============
// === Pattern ===
// ===============
/// Pattern used to validate incoming token stream against expected macro input.
/// Patterns are used to validate incoming token stream against expected macro input.
///
/// The idea is similar to patterns used in `macro_rules` definitions in Rust. There are a few
/// differences though:
/// 1. This pattern implementation exposes different matchers and operations.
/// 2. This macro implementation never attaches types to tokens, which means that every defined
/// The idea is similar to patterns used in macro rules in Rust with a few differences:
/// 1. These patterns allow for other constructs than macro rules.
/// 2. The macro resolution never reifies tokens as given types, which means that every defined
/// pattern behaves like a TT-muncher in Rust.
#[derive(Clone, Debug, Deref)]
#[allow(missing_docs)]
pub struct Pattern {
#[deref]
pub data: Rc<PatternData>,
pub matches_empty_input: bool,
}
impl Pattern {
/// Constructor.
pub fn new(data: PatternData, matches_empty_input: bool) -> Self {
Self { data: Rc::new(data), matches_empty_input }
}
}
/// Variants of [`Pattern`].
#[derive(Clone, Debug)]
#[allow(missing_docs)]
pub enum Pattern {
pub enum PatternData {
/// Consume all items, till the end of the token stream.
Everything,
/// Consume nothing.
Nothing,
/// Consume items matching the first pattern. If the match was unsuccessful, the second match
/// will be tried.
Or(Box<Pattern>, Box<Pattern>),
/// Consume a single item if it matches the configuration.
Item(Item),
Or(Pattern, Pattern),
Seq(Pattern, Pattern),
/// Consume many times (zero or more) the given pattern. If the given pattern succeeds on empty
/// input, it will be repeated as long as it consumes any input.
Many(Pattern),
/// Consume an identifier.
Identifier,
/// Consume a block and run the provided pattern in its body.
Block(Pattern),
/// Indicator of an error. The provided pattern is used to consume input when an error occurs.
/// For example, if you want to consume an identifier, but the identifier is not found, you can
/// use this pattern to consume any token instead and mark it as invalid.
Expected(String, Pattern),
/// Named pattern. Mainly used for splicing the code in the macro definition body.
Named(String, Pattern),
/// Anything that is not a block.
NotBlock,
}
/// Item pattern configuration.
#[derive(Clone, Copy, Debug)]
#[allow(missing_docs)]
pub struct Item {
/// Check whether the token has spaces on right-hand-side. The [`None`] value means that the
/// condition would not be checked.
pub has_rhs_spacing: Option<bool>,
/// Constructor.
pub fn everything() -> Pattern {
Pattern::new(PatternData::Everything, true)
}
// =======================
// === ResolutionError ===
// =======================
/// Pattern resolution error.
#[derive(Debug)]
#[allow(missing_docs)]
pub struct ResolutionError<T> {
/// All the incoming tokens. The resolver consumes vector of tokens and returns it back in case
/// an error happened.
pub tokens: Vec<T>,
pub message: String,
/// Constructor.
pub fn identifier() -> Pattern {
Pattern::new(PatternData::Identifier, false)
}
impl<T> ResolutionError<T> {
/// Constructor.
pub fn new(tokens: Vec<T>, message: impl Into<String>) -> Self {
let message = message.into();
Self { tokens, message }
}
/// Constructor.
pub fn not_block() -> Pattern {
Pattern::new(PatternData::NotBlock, false)
}
/// ==================
/// === Resolution ===
/// ==================
/// Successful pattern match result.
#[derive(Debug, Clone)]
#[allow(missing_docs)]
pub struct Match<T> {
/// All the matched tokens.
pub matched: Vec<T>,
/// The rest of the token stream that was not needed for the successful pattern match.
pub rest: Vec<T>,
/// Constructor.
pub fn nothing() -> Pattern {
Pattern::new(PatternData::Nothing, true)
}
impl<T> Match<T> {
/// Constructor.
pub fn new(matched: Vec<T>, rest: Vec<T>) -> Self {
Self { matched, rest }
}
/// Constructor.
pub fn or(fst: Pattern, snd: Pattern) -> Pattern {
let matches_empty_input = fst.matches_empty_input || snd.matches_empty_input;
Pattern::new(PatternData::Or(fst, snd), matches_empty_input)
}
/// Constructor.
pub fn seq(fst: Pattern, snd: Pattern) -> Pattern {
let matches_empty_input = fst.matches_empty_input && snd.matches_empty_input;
Pattern::new(PatternData::Seq(fst, snd), matches_empty_input)
}
/// Constructor.
pub fn many(item: Pattern) -> Pattern {
Pattern::new(PatternData::Many(item), true)
}
/// Constructor.
pub fn block(body: Pattern) -> Pattern {
Pattern::new(PatternData::Block(body), false)
}
/// Constructor.
pub fn expected(message: impl Into<String>, item: Pattern) -> Pattern {
let matches_empty_input = item.matches_empty_input;
Pattern::new(PatternData::Expected(message.into(), item), matches_empty_input)
}
/// Constructor.
pub fn named(message: impl Into<String>, item: Pattern) -> Pattern {
let matches_empty_input = item.matches_empty_input;
Pattern::new(PatternData::Named(message.into(), item), matches_empty_input)
}
impl Pattern {
/// Match the token stream with this pattern.
pub fn resolve<'s, T: TryAsRef<syntax::Item<'s>>>(
&self,
mut input: Vec<T>,
has_spacing_at_end: bool,
right_to_left_mode: bool,
) -> Result<Match<T>, ResolutionError<T>> {
/// Repeat the current pattern multiple times.
pub fn many(self) -> Self {
many(self)
}
/// Match self or consume any token that is not a block and mark it as invalid.
pub fn expect(self, message: impl Into<String>) -> Self {
self | expected(message, not_block() | nothing())
}
/// Match self or consume any token that is not a block and mark it as invalid.
pub fn named(self, label: impl Into<String>) -> Self {
named(label, self)
}
}
/// The syntax `pattern1 >> pattern2` is a shortcut for `seq(pattern1, pattern2)`.
impl std::ops::Shr for Pattern {
type Output = Pattern;
fn shr(self, rhs: Pattern) -> Self::Output {
seq(self, rhs)
}
}
/// The syntax `pattern1 | pattern2` is a shortcut for `or(pattern1, pattern2)`.
impl std::ops::BitOr for Pattern {
type Output = Pattern;
fn bitor(self, rhs: Pattern) -> Self::Output {
or(self, rhs)
}
}
/// The syntax `pattern % "message"` is a shortcut for `pattern.expect("message")`.
impl<T: Into<String>> std::ops::Rem<T> for Pattern {
type Output = Pattern;
fn rem(self, message: T) -> Self::Output {
self.expect(message)
}
}
/// The syntax `pattern / "label"` is a shortcut for `pattern.named("label")`.
impl<T: Into<String>> Div<T> for Pattern {
type Output = Pattern;
fn div(self, message: T) -> Self::Output {
named(message, self)
}
}
// =============
// === Match ===
// =============
/// The result of applying [`Pattern`] to a token stream. After a successful match, a variant of the
/// [`Pattern`] is transformed to variant of [`Match`] of the same name.
#[derive(Clone, Debug)]
#[allow(missing_docs)]
pub enum Match<'s> {
Everything(VecDeque<syntax::Item<'s>>),
Nothing,
Or(Box<OrMatch<'s>>),
Seq(Box<Match<'s>>, Box<Match<'s>>),
Many(Vec<Match<'s>>),
Identifier(syntax::Item<'s>),
Expected(String, Box<Match<'s>>),
Named(String, Box<Match<'s>>),
NotBlock(syntax::Item<'s>),
}
/// The result of the [`Pattern::Or`] resolution.
#[derive(Clone, Debug)]
#[allow(missing_docs)]
pub enum OrMatch<'s> {
First(Match<'s>),
Second(Match<'s>),
}
impl<'s> Match<'s> {
/// Constructor.
pub fn or(m: OrMatch<'s>) -> Self {
Self::Or(Box::new(m))
}
/// Constructor.
pub fn seq(first: Match<'s>, second: Match<'s>) -> Self {
Self::Seq(Box::new(first), Box::new(second))
}
/// Constructor.
pub fn expected(expected: impl Into<String>, second: Match<'s>) -> Self {
Self::Expected(expected.into(), Box::new(second))
}
/// Constructor.
pub fn named(label: impl Into<String>, second: Match<'s>) -> Self {
Self::Named(label.into(), Box::new(second))
}
/// Get all tokens of the match.
pub fn tokens(self) -> Vec<syntax::Item<'s>> {
match self {
Self::Everything => Ok(Match::new(input, default())),
Self::Nothing => Ok(Match::new(default(), input)),
Self::Or(fst, snd) => fst
.resolve(input, has_spacing_at_end, right_to_left_mode)
.or_else(|err| snd.resolve(err.tokens, has_spacing_at_end, right_to_left_mode)),
Self::Item(item) => match input.first() {
None => Err(ResolutionError::new(input, "Expected an item.")),
Some(first) => match first.try_as_ref() {
None => Err(ResolutionError::new(input, "Expected an item.")),
Some(_) => match item.has_rhs_spacing {
Some(spacing) =>
if right_to_left_mode {
if spacing == has_spacing_at_end {
Ok(Match::new(vec![input.pop_front().unwrap()], input))
} else {
Err(ResolutionError::new(input, "Expected an item."))
}
} else {
todo!()
},
None => Ok(Match::new(vec![input.pop_front().unwrap()], input)),
},
},
Self::Everything(tokens) => tokens.into(),
Self::Nothing => default(),
Self::Seq(fst, snd) => fst.tokens().extended(snd.tokens()),
Self::Many(t) => t.into_iter().flat_map(|s| s.tokens()).collect(),
Self::Identifier(ident) => vec![ident],
Self::Expected(_, item) => item.tokens(),
Self::Named(_, item) => item.tokens(),
Self::NotBlock(item) => vec![item],
Self::Or(t) => match *t {
OrMatch::First(fst) => fst.tokens(),
OrMatch::Second(snd) => snd.tokens(),
},
}
}
}
// ===================
// === MatchResult ===
// ===================
/// Result of a successful pattern resolution. It contains a match and the remaining token stream.
#[derive(Debug)]
#[allow(missing_docs)]
pub struct MatchResult<'s> {
pub matched: Match<'s>,
pub rest: VecDeque<syntax::Item<'s>>,
}
impl<'s> MatchResult<'s> {
/// Constructor.
pub fn new(matched: Match<'s>, rest: VecDeque<syntax::Item<'s>>) -> Self {
Self { matched, rest }
}
/// Map the match with the provided function.
pub fn map(mut self, f: impl FnOnce(Match<'s>) -> Match<'s>) -> Self {
self.matched = f(self.matched);
self
}
}
// ======================
// === MatchedSegment ===
// ======================
/// List of matched segments.
pub type MatchedSegments<'s> = NonEmptyVec<MatchedSegment<'s>>;
/// A matched segment. See the [`macros::resolver::Segment`] to learn more.
#[derive(Clone, Debug)]
#[allow(missing_docs)]
pub struct MatchedSegment<'s> {
pub header: syntax::Token<'s>,
pub result: Match<'s>,
}
impl<'s> MatchedSegment<'s> {
/// Constructor.
pub fn new(header: syntax::Token<'s>, result: Match<'s>) -> Self {
Self { header, result }
}
}
// ==========================
// === Pattern Resolution ===
// ==========================
impl Pattern {
/// Resolve the pattern. Return [`MatchResult`] if the pattern is matched, otherwise all the
/// input tokens.
pub fn resolve<'s>(
&self,
mut input: VecDeque<syntax::Item<'s>>,
) -> Result<MatchResult<'s>, VecDeque<syntax::Item<'s>>> {
match &*self.data {
PatternData::Expected(msg, item) =>
item.resolve(input).map(|t| t.map(|s| Match::expected(msg, s))),
PatternData::Named(msg, item) =>
item.resolve(input).map(|t| t.map(|s| Match::named(msg, s))),
PatternData::Everything => Ok(MatchResult::new(Match::Everything(input), default())),
PatternData::Nothing => Ok(MatchResult::new(Match::Nothing, input)),
PatternData::Or(fst, snd) => fst
.resolve(input)
.map(|t| t.map(|s| Match::or(OrMatch::First(s))))
.or_else(|t| snd.resolve(t).map(|t| t.map(|s| Match::or(OrMatch::Second(s))))),
PatternData::Seq(fst, snd) => fst
.resolve(input)
.and_then(|t| snd.resolve(t.rest).map(|s| s.map(|x| Match::seq(t.matched, x)))),
PatternData::Many(pat) => {
let mut out = vec![];
let mut input_len = input.len();
loop {
match pat.resolve(input) {
Err(rest) => {
input = rest;
break;
}
Ok(t) => {
input = t.rest;
if pat.matches_empty_input {
let no_input_consumed = input_len == input.len();
if no_input_consumed {
break;
}
input_len = input.len();
}
out.push(t.matched);
}
}
}
Ok(MatchResult::new(Match::Many(out), input))
}
PatternData::Identifier => match input.pop_front() {
None => Err(default()),
Some(t) =>
if t.is_variant(syntax::token::variant::VariantMarker::Ident) {
Ok(MatchResult::new(Match::Identifier(t), input))
} else {
input.push_front(t);
Err(input)
},
},
PatternData::Block(body) => match input.pop_front() {
Some(syntax::Item::Block(tokens)) =>
body.resolve(tokens.into_iter().rev().map_into().collect()),
Some(t) => {
input.push_front(t);
Err(input)
}
None => Err(default()),
},
PatternData::NotBlock => match input.pop_front() {
Some(t @ syntax::Item::Block(_)) => {
input.push_front(t);
Err(input)
}
None => Err(default()),
Some(t) => Ok(MatchResult::new(Match::NotBlock(t), input)),
},
}
}

View File

@ -0,0 +1,387 @@
//! Macro resolver implementation. Refer to the docs of the main parser module to learn more.
use crate::prelude::*;
use crate::macros;
use crate::macros::pattern;
use crate::syntax;
use crate::syntax::token;
use crate::syntax::token::Token;
use enso_data_structures::im_list;
use enso_data_structures::im_list::List;
use std::collections::VecDeque;
// ==================
// === SegmentMap ===
// ==================
/// A tree-like structure encoding potential macro matches. The keys are code representations of
/// [`macros::SegmentDefinition`] headers (first tokens of sections). Each key is associated with
/// one or more [`SegmentEntry`], which stories a list of required subsequent segments
/// and a macro definition that should be used when all the segments will be matched. For example,
/// after matching the "if" keyword, this struct will contain one entry "then" with two values, one
/// for the required "else" section, and one without a required section (for the "if ... then ..."
/// case).
#[derive(Default, Debug, Deref, DerefMut)]
pub struct SegmentMap<'s> {
map: HashMap<&'s str, NonEmptyVec<SegmentEntry<'s>>>,
}
/// Partially matched macro info. See docs of [`SegmentMap`] to learn more.
#[derive(Clone, Debug)]
#[allow(missing_docs)]
pub struct SegmentEntry<'s> {
/// All the segment headers that are required for the macro definition to be used.
pub required_segments: List<macros::SegmentDefinition<'s>>,
/// Definition of the macro that should be used when all the required segments will be matched.
/// It contains [`Pattern`] definition for every segment that will be used after all the
/// segment tokens are discovered.
pub definition: Rc<macros::Definition<'s>>,
}
impl<'a> SegmentMap<'a> {
/// Register a new macro definition in this macro tree.
pub fn register(&mut self, definition: macros::Definition<'a>) {
let header = definition.segments.head.header;
let entry = SegmentEntry {
required_segments: definition.segments.tail.clone(),
definition: Rc::new(definition),
};
if let Some(node) = self.get_mut(header) {
node.push(entry);
} else {
self.insert(header, NonEmptyVec::singleton(entry));
}
}
}
// =============================
// === PartiallyMatchedMacro ===
// =============================
/// Partially matched macro. It contains the current section being matched, all the sections matched
/// so far, and the macro definition in case the macro was fully matched. Please note that the
/// definition can change during macro resolution. For example, after finding both "if" and "then"
/// sections, the definition of the "if ... then ..." macro will be used. However, after finding the
/// "else" token, the definition will be replaced with the "if ... then ... else ..." macro one.
#[derive(Debug)]
#[allow(missing_docs)]
pub struct PartiallyMatchedMacro<'s> {
pub current_segment: MatchedSegment<'s>,
pub resolved_segments: Vec<MatchedSegment<'s>>,
pub possible_next_segments: SegmentMap<'s>,
pub matched_macro_def: Option<Rc<macros::Definition<'s>>>,
}
impl<'a> PartiallyMatchedMacro<'a> {
/// A new macro resolver with a special "root" segment definition. The "root" segment does not
/// exist in the source code, it is simply the whole expression being parsed. It is treated
/// as a macro in order to unify the algorithms.
pub fn new_root() -> Self {
let current_segment = MatchedSegment::new(Token("", "", token::Variant::newline()));
let resolved_segments = default();
let possible_next_segments = default();
let matched_macro_def = Some(Rc::new(macros::Definition {
segments: im_list::NonEmpty::singleton(macros::SegmentDefinition {
header: "__ROOT__",
pattern: pattern::everything(),
}),
body: Rc::new(|v| {
// Taking the first segment, hardcoded above.
let body = v.pop().0.result;
syntax::operator::resolve_operator_precedence(body.tokens())
}),
}));
Self { current_segment, resolved_segments, possible_next_segments, matched_macro_def }
}
}
// ======================
// === MatchedSegment ===
// ======================
/// A macro segment which header was matched. Its body contains a list of tokens and nested macros
/// that were found. Please note that the body tokens are not matched against the pattern yet.
/// Because of that, the macro nesting is incorrect for patterns that do not consume all tokens till
/// the end of the stream. For example, the expression `(a) (b)` will be matched in such a way, that
/// the macro `(b)` will be part of the body of the `)` segment of the `(a)` macro. This will be
/// restructured in the patter matching phase. See the parser module docs to learn more about this
/// process.
#[derive(Debug)]
pub struct MatchedSegment<'s> {
header: Token<'s>,
body: Vec<ItemOrPartiallyMatchedMacro<'s>>,
}
impl<'s> MatchedSegment<'s> {
/// Constructor.
pub fn new(header: Token<'s>) -> Self {
let body = default();
Self { header, body }
}
}
// ===================================
// === ItemOrPartiallyMatchedMacro ===
// ===================================
/// One of [`syntax::Item`] or [`PartiallyMatchedMacro`]. Used during macro resolution when some
/// items are already resolved as macros, and some are not yet. For example, after matching the
/// expression `(a) x (b)`, the `x` token and the `(b)` macro will be items of the body of the last
/// segment of the `(a)` macro.
#[derive(Debug, From)]
#[allow(missing_docs)]
enum ItemOrPartiallyMatchedMacro<'s> {
SyntaxItem(syntax::Item<'s>),
PartiallyMatchedMacro(PartiallyMatchedMacro<'s>),
}
impl<'s> TryAsRef<syntax::Item<'s>> for ItemOrPartiallyMatchedMacro<'s> {
fn try_as_ref(&self) -> Option<&syntax::Item<'s>> {
match self {
Self::SyntaxItem(t) => Some(t),
_ => None,
}
}
}
impl<'s> TryAsRef<PartiallyMatchedMacro<'s>> for ItemOrPartiallyMatchedMacro<'s> {
fn try_as_ref(&self) -> Option<&PartiallyMatchedMacro<'s>> {
match self {
Self::PartiallyMatchedMacro(t) => Some(t),
_ => None,
}
}
}
// ================
// === Resolver ===
// ================
/// Macro resolver capable of resolving nested macro usages. See the docs of the main parser module
/// to learn more about the macro resolution steps.
#[derive(Debug)]
pub struct Resolver<'s> {
current_macro: PartiallyMatchedMacro<'s>,
macro_stack: Vec<PartiallyMatchedMacro<'s>>,
}
/// Result of the macro resolution step.
#[derive(Clone, Debug)]
enum Step<'s> {
NewSegmentStarted,
NormalToken(syntax::Item<'s>),
MacroStackPop(syntax::Item<'s>),
}
impl<'s> Resolver<'s> {
/// New resolver with a special "root" segment definition allowing parsing arbitrary
/// expressions.
pub fn new_root() -> Self {
let current_macro = PartiallyMatchedMacro::new_root();
let macro_stack = default();
Self { current_macro, macro_stack }
}
fn replace_current_with_parent_macro(&mut self, mut parent_macro: PartiallyMatchedMacro<'s>) {
mem::swap(&mut parent_macro, &mut self.current_macro);
let child_macro = parent_macro;
self.current_macro.current_segment.body.push(child_macro.into());
}
/// Pop the macro stack if the current token is reserved. For example, when matching the
/// `if a if b then c then d` expression, the token `then` after the token `c` will be
/// considered reserved and the macro resolution of `if b then c` will be popped from the stack.
fn pop_macro_stack_if_reserved(&mut self, repr: &str) -> Option<PartiallyMatchedMacro<'s>> {
let reserved = self.macro_stack.iter().any(|p| p.possible_next_segments.contains_key(repr));
reserved.and_option_from(|| self.macro_stack.pop())
}
/// Run the resolver. Returns the resolved AST.
pub fn run(
mut self,
root_macro_map: &SegmentMap<'s>,
tokens: &mut iter::Peekable<std::vec::IntoIter<syntax::Item<'s>>>,
) -> syntax::Tree<'s> {
event!(TRACE, "Running macro resolver. Registered macros:\n{:#?}", root_macro_map);
let mut opt_item: Option<syntax::Item<'s>>;
macro_rules! next_token {
() => {{
opt_item = tokens.next();
if let Some(token) = opt_item.as_ref() {
event!(TRACE, "New token {:#?}", token);
}
}};
}
macro_rules! trace_state {
() => {
event!(TRACE, "Current macro:\n{:#?}", self.current_macro);
event!(TRACE, "Parent macros:\n{:#?}", self.macro_stack);
};
}
next_token!();
while let Some(token) = opt_item && !token.is_newline() {
let step_result = match token {
syntax::Item::Token(token) => self.process_token(root_macro_map, token),
_ => Step::NormalToken(token),
};
match step_result {
Step::MacroStackPop(item) => {
trace_state!();
opt_item = Some(item)
}
Step::NewSegmentStarted => {
trace_state!();
next_token!()
}
Step::NormalToken(item) => {
self.current_macro.current_segment.body.push(item.into());
trace_state!();
next_token!();
}
}
}
event!(TRACE, "Finishing resolution. Popping the macro stack.");
while let Some(parent_macro) = self.macro_stack.pop() {
self.replace_current_with_parent_macro(parent_macro);
}
trace_state!();
let (tree, rest) = Self::resolve(self.current_macro);
if !rest.is_empty() {
panic!(
"Internal error. Not all tokens were consumed by the macro resolver:\n{:#?}",
rest
);
}
tree
}
fn process_token(&mut self, root_macro_map: &SegmentMap<'s>, token: Token<'s>) -> Step<'s> {
let repr = &**token.code;
if let Some(subsegments) = self.current_macro.possible_next_segments.get(repr) {
event!(TRACE, "Entering next segment of the current macro.");
let mut new_match_tree =
Self::move_to_next_segment(&mut self.current_macro.matched_macro_def, subsegments);
let mut current_segment = MatchedSegment::new(token);
mem::swap(&mut new_match_tree, &mut self.current_macro.possible_next_segments);
mem::swap(&mut self.current_macro.current_segment, &mut current_segment);
self.current_macro.resolved_segments.push(current_segment);
Step::NewSegmentStarted
} else if let Some(parent_macro) = self.pop_macro_stack_if_reserved(repr) {
event!(TRACE, "Next token reserved by parent macro. Resolving current macro.");
self.replace_current_with_parent_macro(parent_macro);
Step::MacroStackPop(token.into())
} else if let Some(segments) = root_macro_map.get(repr) {
event!(TRACE, "Starting a new nested macro resolution.");
let mut matched_macro_def = default();
let mut current_macro = PartiallyMatchedMacro {
current_segment: MatchedSegment { header: token, body: default() },
resolved_segments: default(),
possible_next_segments: Self::move_to_next_segment(
&mut matched_macro_def,
segments,
),
matched_macro_def,
};
mem::swap(&mut self.current_macro, &mut current_macro);
self.macro_stack.push(current_macro);
Step::NewSegmentStarted
} else {
event!(TRACE, "Consuming token as current segment body.");
Step::NormalToken(token.into())
}
}
/// Resolve the [`PartiallyMatchedMacro`]. Returns the AST and the non-used tokens. For example,
/// the resolution of the `(a)` macro in the `(a) x (b)` expression will return the `(a)` AST
/// and the `x` and `(b)` items (already resolved).
fn resolve(m: PartiallyMatchedMacro<'s>) -> (syntax::Tree<'s>, VecDeque<syntax::Item<'s>>) {
let segments = NonEmptyVec::new_with_last(m.resolved_segments, m.current_segment);
let resolved_segments = segments.mapped(|segment| {
let mut items: VecDeque<syntax::Item<'s>> = default();
for item in segment.body {
match item {
ItemOrPartiallyMatchedMacro::SyntaxItem(t) => items.push_back(t),
ItemOrPartiallyMatchedMacro::PartiallyMatchedMacro(unresolved_macro) => {
let (resolved_macro, unused_items) = Self::resolve(unresolved_macro);
items.push_back(resolved_macro.into());
items.extend(unused_items);
}
}
}
(segment.header, items)
});
if let Some(macro_def) = m.matched_macro_def {
let mut def_segments = macro_def.segments.to_vec().into_iter();
let mut pattern_matched_segments = resolved_segments.mapped(|(header, items)| {
let err = "Internal error. Macro definition and match segments count mismatch.";
let def = def_segments.next().unwrap_or_else(|| panic!("{}", err));
(header, def.pattern.resolve(items))
});
// Moving not pattern-matched tokens of the last segment to parent.
let mut not_used_items_of_last_segment = VecDeque::new();
match &mut pattern_matched_segments.last_mut().1 {
Err(rest) => mem::swap(&mut not_used_items_of_last_segment, rest),
Ok(segment) => mem::swap(&mut not_used_items_of_last_segment, &mut segment.rest),
}
let pattern_matched_segments =
pattern_matched_segments.mapped(|(header, match_result)| match match_result {
Ok(result) => {
if !result.rest.is_empty() {
todo!("Mark unmatched tokens as unexpected.");
}
pattern::MatchedSegment::new(header, result.matched)
}
Err(_unmatched_items) => todo!("Mark unmatched tokens as unexpected."),
});
let out = (macro_def.body)(pattern_matched_segments);
(out, not_used_items_of_last_segment)
} else {
todo!("Macro was not matched with any known macro definition. This should return an AST node indicating invalid match.")
}
}
/// Move the resolution to the next segment. Takes possible next segments and merges them in a
/// new [`SegmentMap`]. If after moving to the next segment there is a macro definition that is
/// fully matched, its definition will be recorded.
fn move_to_next_segment(
matched_macro_def: &mut Option<Rc<macros::Definition<'s>>>,
possible_segments: &[SegmentEntry<'s>],
) -> SegmentMap<'s> {
*matched_macro_def = None;
let mut new_section_tree = SegmentMap::default();
for segment_entry in possible_segments {
if let Some(first) = segment_entry.required_segments.head() {
let tail = segment_entry.required_segments.tail().cloned().unwrap_or_default();
let definition = segment_entry.definition.clone_ref();
let entry = SegmentEntry { required_segments: tail, definition };
if let Some(node) = new_section_tree.get_mut(&first.header) {
node.push(entry);
} else {
new_section_tree.insert(first.header, NonEmptyVec::singleton(entry));
}
} else {
*matched_macro_def = Some(segment_entry.definition.clone_ref());
}
}
new_section_tree
}
}

View File

@ -100,15 +100,6 @@
use crate::prelude::*;
use crate::source::VisibleOffset;
use enso_data_structures::im_list;
use enso_data_structures::im_list::List;
use lexer::Lexer;
use macros::pattern::Pattern;
use syntax::token;
use syntax::token::Token;
// ==============
// === Export ===
@ -130,682 +121,98 @@ pub mod prelude {
// =================================
// === SyntaxItemOrMacroResolver ===
// =================================
// ==============
// === Parser ===
// ==============
/// One of [`syntax::Item`] or [`MacroResolver`].
#[derive(Debug)]
/// Enso parser. See the module documentation to learn more about how it works.
#[allow(missing_docs)]
pub enum SyntaxItemOrMacroResolver<'s> {
SyntaxItem(syntax::Item<'s>),
MacroResolver(MacroResolver<'s>),
}
impl<'s> From<syntax::Item<'s>> for SyntaxItemOrMacroResolver<'s> {
fn from(t: syntax::Item<'s>) -> Self {
Self::SyntaxItem(t)
}
}
impl<'s> From<MacroResolver<'s>> for SyntaxItemOrMacroResolver<'s> {
fn from(t: MacroResolver<'s>) -> Self {
Self::MacroResolver(t)
}
}
impl<'s> TryAsRef<syntax::Item<'s>> for SyntaxItemOrMacroResolver<'s> {
fn try_as_ref(&self) -> Option<&syntax::Item<'s>> {
match self {
Self::SyntaxItem(t) => Some(t),
_ => None,
}
}
}
// ======================
// === MacroMatchTree ===
// ======================
/// A tree-like structure encoding potential macro matches. The keys are representations of tokens
/// that can be matched. For example, the key could be "if" or "->". Each key is associated with one
/// or more [`PartiallyMatchedMacro`], which stories a list of required segments and a macro
/// definition in case all the segments were matched. For example, for the "if" key, there can be
/// two required segment lists, one for "then" and "else" segments, and one for the "then" segment
/// only.
#[derive(Default, Debug, Deref, DerefMut)]
pub struct MacroMatchTree<'s> {
map: HashMap<&'s str, NonEmptyVec<PartiallyMatchedMacro<'s>>>,
}
/// Partially matched macro info. See docs of [`MacroMatchTree`] to learn more.
#[derive(Clone, Debug)]
#[allow(missing_docs)]
pub struct PartiallyMatchedMacro<'s> {
pub required_segments: List<macros::SegmentDefinition<'s>>,
pub definition: Rc<macros::Definition<'s>>,
}
impl<'a> MacroMatchTree<'a> {
/// Register a new macro definition in this macro tree.
pub fn register(&mut self, definition: macros::Definition<'a>) {
let header = definition.segments.head.header;
let entry = PartiallyMatchedMacro {
required_segments: definition.segments.tail.clone(),
definition: Rc::new(definition),
};
if let Some(node) = self.get_mut(header) {
node.push(entry);
} else {
self.insert(header, NonEmptyVec::singleton(entry));
}
}
}
// =====================
// === MacroResolver ===
// =====================
/// Enso macro resolver. See the docs of the main module to learn more about the macro resolution
/// steps.
#[derive(Debug)]
#[allow(missing_docs)]
pub struct MacroResolver<'s> {
pub current_segment: MatchedSegment<'s>,
pub resolved_segments: Vec<MatchedSegment<'s>>,
pub possible_next_segments: MacroMatchTree<'s>,
pub matched_macro_def: Option<Rc<macros::Definition<'s>>>,
pub struct Parser {
pub macros: macros::resolver::SegmentMap<'static>,
}
impl<'a> MacroResolver<'a> {
/// A new macro resolver with a special "root" segment definition. The "root" segment does not
/// exist in the source code, it is simply the whole expression being parsed. It is treated
/// as a macro in order to unify the algorithms.
pub fn new_root() -> Self {
let current_segment =
MatchedSegment { header: Token("", "", token::Variant::newline()), body: default() };
let resolved_segments = default();
let possible_next_segments = default();
let matched_macro_def = Some(Rc::new(macros::Definition {
rev_prefix_pattern: None,
segments: im_list::NonEmpty::singleton(macros::SegmentDefinition {
header: "__ROOT__",
pattern: Pattern::Everything,
}),
body: Rc::new(|_, v| {
if v.len() != 1 {
panic!()
}
let t = v.into_vec().pop().unwrap().1;
resolve_operator_precedence(t)
}),
}));
Self { current_segment, resolved_segments, possible_next_segments, matched_macro_def }
}
}
/// A matched macro segment. Partial macro resolution product.
#[derive(Debug)]
pub struct MatchedSegment<'s> {
header: Token<'s>,
body: Vec<SyntaxItemOrMacroResolver<'s>>,
}
impl<'s> MatchedSegment<'s> {
impl Parser {
/// Constructor.
pub fn new(header: Token<'s>) -> Self {
let body = default();
Self { header, body }
}
}
/// Main macro resolver capable of resolving nested macro usages. See the docs of the main module to
/// learn more about the macro resolution steps.
#[derive(Debug)]
pub struct Resolver<'s> {
current_macro: MacroResolver<'s>,
macro_stack: Vec<MacroResolver<'s>>,
}
/// Result of the macro resolution step.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum ResolverStep {
NormalToken,
NewSegmentStarted,
MacroStackPop,
}
impl<'s> Resolver<'s> {
fn new_root() -> Self {
let current_macro = MacroResolver::new_root();
let macro_stack = default();
Self { current_macro, macro_stack }
pub fn new() -> Self {
let macros = macros::built_in::all();
Self { macros }
}
fn run(
mut self,
lexer: &Lexer<'s>,
root_macro_map: &MacroMatchTree<'s>,
tokens: Vec<syntax::Item<'s>>,
) -> syntax::Tree<'s> {
let mut stream = tokens.into_iter();
let mut opt_token: Option<syntax::Item<'s>>;
macro_rules! next_token {
() => {{
opt_token = stream.next();
if let Some(token) = opt_token.as_ref() {
event!(TRACE, "New token {:#?}", token);
}
}};
/// Main entry point.
pub fn run<'s>(&self, code: &'s str) -> syntax::Tree<'s> {
let tokens = lexer::run(code);
let mut statements = vec![];
let mut tokens = tokens.into_iter().peekable();
while tokens.peek().is_some() {
let resolver = macros::resolver::Resolver::new_root();
let tree = resolver.run(&self.macros, &mut tokens);
statements.push(tree);
}
macro_rules! trace_state {
() => {
event!(TRACE, "Current macro:\n{:#?}", self.current_macro);
event!(TRACE, "Parent macros:\n{:#?}", self.macro_stack);
};
}
next_token!();
while let Some(token) = opt_token {
let step_result = match &token {
// FIXME: clone?
syntax::Item::Token(token) => self.process_token(root_macro_map, token.clone()),
_ => ResolverStep::NormalToken,
};
match step_result {
ResolverStep::MacroStackPop => {
trace_state!();
opt_token = Some(token)
}
ResolverStep::NewSegmentStarted => {
trace_state!();
next_token!()
}
ResolverStep::NormalToken => {
self.current_macro.current_segment.body.push(token.into());
trace_state!();
next_token!();
}
}
}
while let Some(parent_macro) = self.macro_stack.pop() {
self.replace_current_with_parent_macro(parent_macro);
}
trace_state!();
Self::resolve(lexer, self.current_macro, None)
}
fn replace_current_with_parent_macro(&mut self, mut parent_macro: MacroResolver<'s>) {
mem::swap(&mut parent_macro, &mut self.current_macro);
let mut child_macro = parent_macro;
if let Some(def) = &child_macro.matched_macro_def {
let pattern = &def.segments.last().pattern;
let child_tokens = mem::take(&mut child_macro.current_segment.body);
// FIXME: the first [`false`] below is invalid.
let match_result = pattern.resolve(child_tokens, false, false).unwrap();
let mut new_child_tokens = match_result.matched;
let new_parent_tokens = match_result.rest;
mem::swap(&mut child_macro.current_segment.body, &mut new_child_tokens);
self.current_macro.current_segment.body.push(child_macro.into());
self.current_macro.current_segment.body.extend(new_parent_tokens);
} else {
panic!()
}
}
fn resolve(
lexer: &Lexer<'s>,
m: MacroResolver<'s>,
prefix_tokens: Option<Vec<syntax::Item<'s>>>,
) -> syntax::Tree<'s> {
let segments = NonEmptyVec::new_with_last(m.resolved_segments, m.current_segment);
let sss: NonEmptyVec<(Token, Vec<syntax::Item<'s>>)> = segments.mapped(|segment| {
let mut ss: Vec<syntax::Item<'s>> = vec![];
for item in segment.body {
let resolved_token = match item {
SyntaxItemOrMacroResolver::MacroResolver(m2) => {
if let Some(macro_def) = &m2.matched_macro_def
&& let Some(pfx_pattern) = &macro_def.rev_prefix_pattern {
ss.reverse();
let spacing = m2.current_segment.header.left_offset.visible > VisibleOffset(0);
let mut match_result = pfx_pattern.resolve(ss,spacing,true).unwrap();
match_result.matched.reverse();
ss = match_result.rest;
ss.reverse();
Self::resolve(lexer, m2, Some(match_result.matched)).into()
} else {
Self::resolve(lexer, m2, None).into()
}
},
SyntaxItemOrMacroResolver::SyntaxItem(t) => t,
};
ss.push(resolved_token);
}
(segment.header, ss)
});
if let Some(macro_def) = m.matched_macro_def {
(macro_def.body)(prefix_tokens, sss)
} else {
todo!("Handling non-fully-resolved macros")
}
}
fn pop_macro_stack_if_reserved(&mut self, repr: &str) -> Option<MacroResolver<'s>> {
let reserved = self.macro_stack.iter().any(|p| p.possible_next_segments.contains_key(repr));
if reserved {
self.macro_stack.pop()
} else {
None
}
}
fn process_token(
&mut self,
root_macro_map: &MacroMatchTree<'s>,
token: Token<'s>,
) -> ResolverStep {
let repr = &**token.code;
if let Some(subsegments) = self.current_macro.possible_next_segments.get(repr) {
event!(TRACE, "Entering next segment of the current macro.");
let mut new_match_tree =
Self::enter(&mut self.current_macro.matched_macro_def, subsegments);
let mut current_segment = MatchedSegment::new(token);
mem::swap(&mut new_match_tree, &mut self.current_macro.possible_next_segments);
mem::swap(&mut self.current_macro.current_segment, &mut current_segment);
self.current_macro.resolved_segments.push(current_segment);
ResolverStep::NewSegmentStarted
} else if let Some(parent_macro) = self.pop_macro_stack_if_reserved(repr) {
event!(TRACE, "Next token reserved by parent macro. Resolving current macro.");
self.replace_current_with_parent_macro(parent_macro);
ResolverStep::MacroStackPop
} else if let Some(segments) = root_macro_map.get(repr) {
event!(TRACE, "Starting a new nested macro resolution.");
let mut matched_macro_def = default();
let mut current_macro = MacroResolver {
current_segment: MatchedSegment { header: token, body: default() },
resolved_segments: default(),
possible_next_segments: Self::enter(&mut matched_macro_def, segments),
matched_macro_def,
};
mem::swap(&mut self.current_macro, &mut current_macro);
self.macro_stack.push(current_macro);
ResolverStep::NewSegmentStarted
} else {
event!(TRACE, "Consuming token as current segment body.");
ResolverStep::NormalToken
}
}
fn enter(
matched_macro_def: &mut Option<Rc<macros::Definition<'s>>>,
path: &[PartiallyMatchedMacro<'s>],
) -> MacroMatchTree<'s> {
*matched_macro_def = None;
let mut new_section_tree = MacroMatchTree::default();
for v in path {
if let Some(first) = v.required_segments.head() {
let tail = v.required_segments.tail().cloned().unwrap_or_default();
let definition = v.definition.clone_ref();
let x = PartiallyMatchedMacro { required_segments: tail, definition };
if let Some(node) = new_section_tree.get_mut(&first.header) {
node.push(x);
} else {
new_section_tree.insert(first.header, NonEmptyVec::singleton(x));
}
} else {
if matched_macro_def.is_some() {
event!(ERROR, "Internal error. Duplicate macro definition.");
}
*matched_macro_def = Some(v.definition.clone_ref());
}
}
new_section_tree
syntax::Tree::module(statements)
}
}
// FIXME: hardcoded values + not finished implementation.
fn precedence_of(operator: &str) -> usize {
match operator {
"+" => 3,
"-" => 3,
"*" => 7,
_ => panic!("Operator not supported: {}", operator),
}
}
//
#[derive(Clone, Copy, Debug, Deref, DerefMut)]
struct WithPrecedence<T> {
#[deref]
#[deref_mut]
elem: T,
precedence: usize,
}
impl<T> WithPrecedence<T> {
pub fn new(precedence: usize, elem: T) -> Self {
Self { elem, precedence }
}
}
fn annotate_tokens_that_need_spacing(items: Vec<syntax::Item>) -> Vec<syntax::Item> {
items
.into_iter()
.map(|item| match item {
syntax::Item::Token(_) => item,
syntax::Item::Tree(ast) =>
match &*ast.variant {
syntax::tree::Variant::MultiSegmentApp(data) => {
if data.segments.first().header.variant.marker()
!= token::variant::VariantMarker::Symbol
{
syntax::Item::Tree(ast.with_error(
"This expression cannot be used in a non-spaced equation.",
))
} else {
syntax::Item::Tree(ast)
}
}
_ => syntax::Item::Tree(ast),
},
})
.collect()
}
fn resolve_operator_precedence<'s>(items: Vec<syntax::Item<'s>>) -> syntax::Tree<'s> {
type Tokens<'s> = Vec<syntax::Item<'s>>;
let mut flattened: Tokens<'s> = default();
let mut no_space_group: Tokens<'s> = default();
let processs_no_space_group = |flattened: &mut Tokens<'s>, no_space_group: &mut Tokens<'s>| {
let tokens = mem::take(no_space_group);
if tokens.len() == 1 {
flattened.extend(tokens);
} else {
let tokens = annotate_tokens_that_need_spacing(tokens);
let ast = resolve_operator_precedence_internal(tokens);
flattened.push(ast.into());
}
};
for item in items {
if item.span().left_offset.visible.width_in_spaces == 0 || no_space_group.is_empty() {
no_space_group.push(item)
} else if !no_space_group.is_empty() {
processs_no_space_group(&mut flattened, &mut no_space_group);
no_space_group.push(item);
} else {
// FIXME: this is unreachable.
flattened.push(item);
}
}
if !no_space_group.is_empty() {
processs_no_space_group(&mut flattened, &mut no_space_group);
}
resolve_operator_precedence_internal(flattened)
}
fn resolve_operator_precedence_internal(items: Vec<syntax::Item<'_>>) -> syntax::Tree<'_> {
// Reverse-polish notation encoding.
let mut output: Vec<syntax::Item> = default();
let mut operator_stack: Vec<WithPrecedence<syntax::tree::OperatorOrError>> = default();
let mut last_token_was_ast = false;
let mut last_token_was_opr = false;
for item in items {
let i2 = item.clone(); // FIXME
if let syntax::Item::Token(token) = i2 && let token::Variant::Operator(opr) = token.variant {
// Item is an operator.
let last_token_was_opr_copy = last_token_was_opr;
last_token_was_ast = false;
last_token_was_opr = true;
let prec = precedence_of(&token.code);
let opr = Token(token.left_offset, token.code, opr);
// let opr = item.span().with(opr);
if last_token_was_opr_copy && let Some(prev_opr) = operator_stack.last_mut() {
// Error. Multiple operators next to each other.
match &mut prev_opr.elem {
Err(err) => err.operators.push(opr),
Ok(prev) => {
let operators = NonEmptyVec::new(prev.clone(),vec![opr]); // FIXME: clone?
prev_opr.elem = Err(syntax::tree::MultipleOperatorError{operators});
}
}
} else {
while let Some(prev_opr) = operator_stack.last()
&& prev_opr.precedence >= prec
&& let Some(prev_opr) = operator_stack.pop()
&& let Some(rhs) = output.pop()
{
// Prev operator in the [`operator_stack`] has a higher precedence.
let lhs = output.pop().map(token_to_ast);
let ast = syntax::Tree::opr_app(lhs, prev_opr.elem, Some(token_to_ast(rhs)));
output.push(ast.into());
}
operator_stack.push(WithPrecedence::new(prec, Ok(opr)));
}
} else if last_token_was_ast && let Some(lhs) = output.pop() {
// Multiple non-operators next to each other.
let lhs = token_to_ast(lhs);
let rhs = token_to_ast(item);
let ast = syntax::Tree::app(lhs, rhs);
output.push(ast.into());
} else {
// Non-operator that follows previously consumed operator.
last_token_was_ast = true;
last_token_was_opr = false;
output.push(item);
}
}
let mut opt_rhs = last_token_was_ast.and_option_from(|| output.pop().map(token_to_ast));
while let Some(opr) = operator_stack.pop() {
let opt_lhs = output.pop().map(token_to_ast);
opt_rhs = Some(syntax::Tree::opr_app(opt_lhs, opr.elem, opt_rhs));
}
if !output.is_empty() {
panic!(
"Internal error. Not all tokens were consumed while constructing the
expression."
);
}
syntax::Tree::opr_section_boundary(opt_rhs.unwrap()) // fixme
}
fn token_to_ast(elem: syntax::Item) -> syntax::Tree {
match elem {
syntax::Item::Token(token) => match token.variant {
token::Variant::Ident(ident) => {
let ii2 = token.with_variant(ident);
syntax::tree::Tree::ident(ii2)
}
_ => panic!(),
},
syntax::Item::Tree(ast) => ast,
}
}
fn matched_segments_into_multi_segment_app<'s>(
prefix_tokens: Option<Vec<syntax::Item<'s>>>,
matched_segments: NonEmptyVec<(Token<'s>, Vec<syntax::Item<'s>>)>,
) -> syntax::Tree<'s> {
// FIXME: remove into_vec and use NonEmptyVec::mapped
let segments = matched_segments
.into_vec()
.into_iter()
.map(|segment| {
let header = segment.0;
let body =
(!segment.1.is_empty()).as_some_from(|| resolve_operator_precedence(segment.1));
syntax::tree::MultiSegmentAppSegment { header, body }
})
.collect_vec();
if let Ok(segments) = NonEmptyVec::try_from(segments) {
let prefix = prefix_tokens.map(resolve_operator_precedence);
syntax::Tree::multi_segment_app(prefix, segments)
} else {
panic!()
impl Default for Parser {
fn default() -> Self {
Self::new()
}
}
// =========================
// === Macro Definitions ===
// =========================
fn macro_if_then_else<'s>() -> macros::Definition<'s> {
macro_definition! {
("if", Pattern::Everything, "then", Pattern::Everything, "else", Pattern::Everything)
matched_segments_into_multi_segment_app
}
}
fn macro_if_then<'s>() -> macros::Definition<'s> {
macro_definition! {
("if", Pattern::Everything, "then", Pattern::Everything)
matched_segments_into_multi_segment_app
}
}
fn macro_group<'s>() -> macros::Definition<'s> {
macro_definition! {
("(", Pattern::Everything, ")", Pattern::Nothing)
matched_segments_into_multi_segment_app
}
}
fn macro_lambda<'s>() -> macros::Definition<'s> {
let prefix = Pattern::Or(
Box::new(Pattern::Item(macros::pattern::Item { has_rhs_spacing: Some(false) })),
Box::new(Pattern::Everything),
);
macro_definition! {
(prefix, "->", Pattern::Everything)
matched_segments_into_multi_segment_app
}
}
fn builtin_macros() -> MacroMatchTree<'static> {
let mut macro_map = MacroMatchTree::default();
macro_map.register(macro_if_then());
macro_map.register(macro_if_then_else());
macro_map.register(macro_group());
macro_map.register(macro_lambda());
macro_map
}
// ============
// === Main ===
// ============
// fn main() {
// lexer::lexer_main();
// }
// =============
// === Tests ===
// =============
fn main() {
init_tracing(TRACE);
// let str = "if a then b else c";
// let str = "if if * a + b * then y then b";
// let str = "* a + b *";
// let str = "* a + * b";
// let str = "(a) (b) c";
// let str = "if (a) then b";
// let str = "foo a-> b";
// let str = "a+b * c";
// let str = "foo if a then b";
// let str = "foo *(a)";
let str = "foo if a then b else c";
let mut lexer = Lexer::new(str);
lexer.run();
let root_macro_map = builtin_macros();
event!(TRACE, "Registered macros:\n{:#?}", root_macro_map);
let resolver = Resolver::new_root();
let ast = resolver.run(
&lexer,
&root_macro_map,
lexer.output.iter().map(|t| t.clone().into()).collect_vec(),
);
println!("{:#?}", ast);
println!("\n\n{}", ast.code());
let ast = Parser::new().run("type Option (a) b c");
println!("\n\n==================\n\n");
lexer::main();
println!("{:#?}", ast);
}
#[cfg(test)]
mod tests {
use super::*;
use enso_parser_syntax_tree_builder::ast_builder;
macro_rules! test_parse {
($input:tt = {$($def:tt)*}) => {
assert_eq!(
Parser::new().run($input),
ast_builder! { $($def)* }
)
};
}
#[test]
fn test_expressions() {
test_parse! {"a" = {a}};
test_parse! {"a b" = {a b}};
test_parse! {"a b c" = {[a b] c}};
}
}
// ==================
// === Benchmarks ===
// ==================
#[cfg(test)]
mod benches {
use super::*;
extern crate test;
use test::Bencher;
#[bench]
fn bench_parsing_type_defs(bencher: &mut Bencher) {
let reps = 1_000;
let str = "type Option a b c\n".repeat(reps);
let parser = Parser::new();
bencher.iter(move || {
parser.run(&str);
});
}
}
//
//
//
// // =============
// // === Tests ===
// // =============
//
// #[cfg(test)]
// mod test {
// use super::*;
//
// pub fn ident(repr: &str) -> syntax::Tree {
// match token::Variant::to_ident_unchecked(repr) {
// token::Variant::Ident(ident) => span::With::new_no_left_offset_no_start(
// Bytes::from(repr.len()),
// syntax::tree::Type::from(syntax::tree::Ident(ident)),
// ),
// _ => panic!(),
// }
// }
//
// pub fn app_segment(
// header: Token,
// body: Option<syntax::Tree>,
// ) -> syntax::tree::MultiSegmentAppSegment {
// syntax::tree::MultiSegmentAppSegment { header, body }
// }
// }
//
//
//
// #[cfg(test)]
// mod tests {
// use super::*;
// use enso_parser_syntax_tree_builder::ast_builder;
//
// fn one_shot(input: &str) -> syntax::Tree {
// let mut lexer = Lexer::new(input);
// lexer.run();
// let root_macro_map = builtin_macros();
// let resolver = Resolver::new_root();
// let ast = resolver.run(
// &lexer,
// &root_macro_map,
// lexer.output.borrow_vec().iter().map(|t| (*t).into()).collect_vec(),
// );
// ast
// }
//
// macro_rules! test_parse {
// ($input:tt = {$($def:tt)*}) => {
// assert_eq!(
// one_shot($input).with_removed_span_info(),
// ast_builder! { $($def)* }.with_removed_span_info()
// )
// };
// }
//
// #[test]
// fn test_expressions() {
// test_parse!("if a then b" = { {if} a {then} b });
// test_parse!("if a then b else c" = { {if} a {then} b {else} c });
// test_parse!("if a b then c d else e f" = { {if} a b {then} c d {else} e f });
// }
// }

View File

@ -64,3 +64,17 @@ impl std::borrow::Borrow<str> for Code<'_> {
&self.repr
}
}
impl<'s> std::ops::AddAssign<Code<'s>> for Code<'s> {
#[inline(always)]
fn add_assign(&mut self, other: Code<'s>) {
self.repr.add_assign(other.repr);
}
}
impl<'s> std::ops::AddAssign<&Code<'s>> for Code<'s> {
#[inline(always)]
fn add_assign(&mut self, other: &Code<'s>) {
self.repr.add_assign(other.repr.clone());
}
}

View File

@ -79,6 +79,16 @@ impl<'s> Offset<'s> {
pub fn len(&self) -> Bytes {
self.code.len()
}
/// Check if the offset is 0.
pub fn is_empty(&self) -> bool {
self.len() == Bytes(0)
}
/// Check if the offset is bigger than 0.
pub fn exists(&self) -> bool {
self.len() > Bytes(0)
}
}
impl<'s> AsRef<Offset<'s>> for Offset<'s> {
@ -94,17 +104,31 @@ impl<'s> From<&'s str> for Offset<'s> {
}
}
impl<'s> std::ops::AddAssign<Offset<'s>> for Offset<'s> {
fn add_assign(&mut self, other: Offset<'s>) {
self.visible += other.visible;
self.code += other.code;
}
}
impl<'s> std::ops::AddAssign<&Offset<'s>> for Offset<'s> {
fn add_assign(&mut self, other: &Offset<'s>) {
self.visible += other.visible;
self.code += &other.code;
}
}
// ============
// === Span ===
// ============
/// A span of a given syntactic element (token or AST). It contains the left offset code and the
/// information about the length of the element. It does not contain the code of the element. This
/// is done in order to not duplicate the data. For example, some AST nodes contain a lot of tokens.
/// They need to remember their span, but they do not need to remember their code, because it is
/// already stored in the tokens.
/// A span of a given syntactic element (token or AST). It is a monoid that contains the left offset
/// code and the information about the length of the element. It does not contain the code of the
/// element. This is done in order to not duplicate the data. For example, some AST nodes contain a
/// lot of tokens. They need to remember their span, but they do not need to remember their code,
/// because it is already stored in the tokens.
#[derive(Clone, Debug, Default, Eq, PartialEq)]
#[allow(missing_docs)]
pub struct Span<'s> {
@ -114,30 +138,30 @@ pub struct Span<'s> {
}
impl<'s> Span<'s> {
/// Extend the span with another one. The other span has to be the immediate neighbor of the
/// current span.
#[inline(always)]
pub fn extend<'a, T>(&mut self, other: T)
where
T: Into<Ref<'s, 'a>>,
's: 'a, {
let other = other.into();
self.code_length += other.left_offset.len() + other.code_length;
/// Constructor.
pub fn new() -> Self {
default()
}
/// Check whether the span is empty.
pub fn is_empty(&self) -> bool {
self.left_offset.is_empty() && self.code_length.is_zero()
}
/// Self consuming version of [`extend`].
pub fn extended<'a, T>(mut self, other: T) -> Self
where
T: Into<Ref<'s, 'a>>,
's: 'a, {
self.extend(other);
self
/// Check whether the span is only an offset, without the code part.
pub fn is_only_offset(&self) -> bool {
self.code_length.is_zero()
}
/// Get the [`Ref`] of the current span.
pub fn as_ref(&self) -> Ref<'_, 's> {
Ref { left_offset: &self.left_offset, code_length: self.code_length }
}
/// Add the item to this span. The item can be anything that implements the span [`Builder`].
#[allow(clippy::should_implement_trait)]
pub fn add<T: Builder<'s>>(self, elem: &mut T) -> Self {
Builder::add_to_span(elem, self)
}
}
impl<'s> AsRef<Span<'s>> for Span<'s> {
@ -146,6 +170,22 @@ impl<'s> AsRef<Span<'s>> for Span<'s> {
}
}
impl<'s, 'a, T> PartialSemigroup<T> for Span<'s>
where
T: Into<Ref<'s, 'a>>,
's: 'a,
{
fn concat_mut(&mut self, other: T) {
let other = other.into();
if self.code_length.is_zero() {
self.left_offset += other.left_offset;
self.code_length = other.code_length;
} else {
self.code_length += other.left_offset.len() + other.code_length;
}
}
}
// ===========
@ -233,254 +273,98 @@ impl<'s> FirstChildTrim<'s> for Span<'s> {
#[macro_export]
macro_rules! span_builder {
($($arg:ident),* $(,)?) => {
$crate::source::span::Builder::new() $(.add(&mut $arg))* .span
$crate::source::span::Span::new() $(.add(&mut $arg))*
};
}
/// A marker struct for span building. The [`T`] parameter can be one of:
/// - [`()`], which means that the structure was not used yet.
/// - [`Option<Span<'s>>`], which means that the struct was used to build the span, however, we are
/// unsure whether the span is known in all the cases.
/// - [`Span<'s>`], which means that the total span can be always computed for the provided
/// parameters.
#[derive(Default, Debug)]
/// Elements implementing this trait can contain a span or multiple spans. If an element is added to
/// an empty span, it means that it is the first element in the span group. In such a case, the left
/// offset of the element will be removed and moved to the resulting span. See the docs of
/// [`FirstChildTrim`] to learn more.
#[allow(missing_docs)]
pub struct Builder<T = ()> {
pub span: T,
}
/// Constructor.
#[allow(non_snake_case)]
pub fn Builder<T>(span: T) -> Builder<T> {
Builder { span }
}
impl Builder<()> {
/// Constructor.
pub fn new() -> Self {
default()
}
}
impl<T> Builder<T> {
/// Add a new span to the builder.
#[inline(always)]
#[allow(clippy::should_implement_trait)]
pub fn add<S>(self, elem: &mut S) -> Builder<S::Output>
where S: Build<T> {
Builder(elem.build(self))
}
}
/// A trait defining the behavior of [`Builder`] for different types containing spans.
///
/// The trait definition is a little bit strange, consuming the builder as a parameter instead of
/// consuming it as self. This is done because otherwise Rust type checker goes into infinite
/// loops.
#[allow(missing_docs)]
pub trait Build<T> {
type Output;
fn build(&mut self, builder: Builder<T>) -> Self::Output;
pub trait Builder<'s> {
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s>;
}
// === Instances ===
impl<'s> Build<()> for Span<'s> {
type Output = Span<'s>;
impl<'s> Builder<'s> for Span<'s> {
#[inline(always)]
fn build(&mut self, _builder: Builder<()>) -> Self::Output {
self.trim_as_first_child()
}
}
impl<'s> Build<Span<'s>> for Span<'s> {
type Output = Span<'s>;
#[inline(always)]
fn build(&mut self, builder: Builder<Span<'s>>) -> Self::Output {
builder.span.extended(&*self)
}
}
impl<'s> Build<Option<Span<'s>>> for Span<'s> {
type Output = Span<'s>;
#[inline(always)]
fn build(&mut self, builder: Builder<Option<Span<'s>>>) -> Self::Output {
match builder.span {
Some(span) => span.extended(&*self),
None => self.trim_as_first_child(),
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
if span.is_only_offset() {
span.concat(&self.trim_as_first_child())
} else {
span.concat(&*self)
}
}
}
impl<'s> Build<()> for Tree<'s> {
type Output = Span<'s>;
impl<'s> Builder<'s> for Tree<'s> {
#[inline(always)]
fn build(&mut self, builder: Builder<()>) -> Self::Output {
Build::build(&mut self.span, builder)
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
Builder::add_to_span(&mut self.span, span)
}
}
impl<'s> Build<Span<'s>> for Tree<'s> {
type Output = Span<'s>;
impl<'s, T> Builder<'s> for Token<'s, T> {
#[inline(always)]
fn build(&mut self, builder: Builder<Span<'s>>) -> Self::Output {
builder.span.extended(&self.span)
}
}
impl<'s> Build<Option<Span<'s>>> for Tree<'s> {
type Output = Span<'s>;
#[inline(always)]
fn build(&mut self, builder: Builder<Option<Span<'s>>>) -> Self::Output {
Build::build(&mut self.span, builder)
}
}
impl<'s, T> Build<()> for Token<'s, T> {
type Output = Span<'s>;
#[inline(always)]
fn build(&mut self, _builder: Builder<()>) -> Self::Output {
self.trim_as_first_child()
}
}
impl<'s, T> Build<Span<'s>> for Token<'s, T> {
type Output = Span<'s>;
#[inline(always)]
fn build(&mut self, builder: Builder<Span<'s>>) -> Self::Output {
builder.span.extended(self.span())
}
}
impl<'s, T> Build<Option<Span<'s>>> for Token<'s, T> {
type Output = Span<'s>;
#[inline(always)]
fn build(&mut self, builder: Builder<Option<Span<'s>>>) -> Self::Output {
match builder.span {
Some(span) => span.extended(self.span()),
None => self.trim_as_first_child(),
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
if span.is_only_offset() {
span.concat(&self.trim_as_first_child())
} else {
span.concat(self.span())
}
}
}
impl<T> Build<()> for Option<T>
where T: Build<()>
impl<'s, T> Builder<'s> for Option<T>
where T: Builder<'s>
{
type Output = Option<<T as Build<()>>::Output>;
#[inline(always)]
fn build(&mut self, builder: Builder<()>) -> Self::Output {
self.as_mut().map(|t| Build::build(t, builder))
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
self.as_mut().map(|t| Builder::add_to_span(t, span)).unwrap_or_default()
}
}
impl<'s, T> Build<Option<Span<'s>>> for Option<T>
where T: Build<Option<Span<'s>>>
{
type Output = Option<<T as Build<Option<Span<'s>>>>::Output>;
#[inline(always)]
fn build(&mut self, builder: Builder<Option<Span<'s>>>) -> Self::Output {
self.as_mut().map(|t| Build::build(t, builder))
}
}
impl<'s, T> Build<Span<'s>> for Option<T>
where T: Build<Span<'s>, Output = Span<'s>>
{
type Output = Span<'s>;
#[inline(always)]
fn build(&mut self, builder: Builder<Span<'s>>) -> Self::Output {
match self.as_mut() {
None => builder.span,
Some(t) => Build::build(t, builder),
}
}
}
impl<S, T, E> Build<S> for Result<T, E>
impl<'s, T, E> Builder<'s> for Result<T, E>
where
T: Build<S>,
E: Build<S, Output = <T as Build<S>>::Output>,
T: Builder<'s>,
E: Builder<'s>,
{
type Output = <T as Build<S>>::Output;
#[inline(always)]
fn build(&mut self, builder: Builder<S>) -> Self::Output {
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
match self {
Ok(t) => Build::build(t, builder),
Err(t) => Build::build(t, builder),
Ok(t) => Builder::add_to_span(t, span),
Err(t) => Builder::add_to_span(t, span),
}
}
}
impl<S, T> Build<S> for NonEmptyVec<T>
where
T: Build<S>,
[T]: Build<<T as Build<S>>::Output>,
impl<'s, T> Builder<'s> for NonEmptyVec<T>
where T: Builder<'s>
{
type Output = <[T] as Build<T::Output>>::Output;
#[inline(always)]
fn build(&mut self, builder: Builder<S>) -> Self::Output {
let b = Build::build(self.first_mut(), builder);
Build::build(self.tail_mut(), Builder(b))
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
self.into_iter().fold(span, |sum, new_span| Builder::add_to_span(new_span, sum))
}
}
impl<'s, T> Build<Span<'s>> for Vec<T>
where T: Build<Span<'s>, Output = Span<'s>>
impl<'s, T> Builder<'s> for Vec<T>
where T: Builder<'s>
{
type Output = Span<'s>;
#[inline(always)]
fn build(&mut self, builder: Builder<Span<'s>>) -> Self::Output {
let mut out = builder.span;
for elem in self {
out = Build::build(elem, Builder(out))
}
out
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
self.iter_mut().fold(span, |sum, new_span| Builder::add_to_span(new_span, sum))
}
}
impl<'s, T> Build<Option<Span<'s>>> for Vec<T>
where
T: Build<Option<Span<'s>>>,
T::Output: Into<Option<Span<'s>>>,
impl<'s, T> Builder<'s> for [T]
where T: Builder<'s>
{
type Output = Option<Span<'s>>;
#[inline(always)]
fn build(&mut self, builder: Builder<Option<Span<'s>>>) -> Self::Output {
let mut out = builder.span;
for elem in self {
out = Build::build(elem, Builder(out)).into();
}
out
}
}
impl<'s, T> Build<Span<'s>> for [T]
where T: Build<Span<'s>, Output = Span<'s>>
{
type Output = Span<'s>;
#[inline(always)]
fn build(&mut self, builder: Builder<Span<'s>>) -> Self::Output {
let mut out = builder.span;
for elem in self {
out = Build::build(elem, Builder(out));
}
out
}
}
impl<'s, T> Build<Option<Span<'s>>> for [T]
where
T: Build<Option<Span<'s>>>,
T::Output: Into<Option<Span<'s>>>,
{
type Output = Option<Span<'s>>;
#[inline(always)]
fn build(&mut self, builder: Builder<Option<Span<'s>>>) -> Self::Output {
let mut out = builder.span;
for elem in self {
out = Build::build(elem, Builder(out)).into();
}
out
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
self.iter_mut().fold(span, |sum, new_span| Builder::add_to_span(new_span, sum))
}
}

View File

@ -7,6 +7,7 @@
// ==============
pub mod item;
pub mod operator;
pub mod token;
pub mod tree;

View File

@ -18,12 +18,13 @@ use crate::syntax::*;
#[allow(missing_docs)]
pub enum Item<'s> {
Token(Token<'s>),
Block(Vec<Item<'s>>),
Tree(Tree<'s>),
}
impl<'s> Item<'s> {
/// Check whether the element is the provided token variant. Returns [`false`] if it was an
/// [`Tree`] node.
/// Check whether the element is the provided token variant. Returns [`false`] if it was not a
/// token.
pub fn is_variant(&self, variant: token::variant::VariantMarker) -> bool {
match self {
Item::Token(token) => token.is(variant),
@ -32,20 +33,23 @@ impl<'s> Item<'s> {
}
/// [`location::Span`] of the element.
pub fn span(&self) -> span::Ref<'_, 's> {
pub fn left_visible_offset(&self) -> VisibleOffset {
match self {
Self::Token(t) => t.span(),
Self::Tree(t) => t.span.as_ref(),
Self::Token(t) => t.span().left_offset.visible,
Self::Tree(t) => t.span.left_offset.visible,
Self::Block(t) => t.first().map(|t| t.left_visible_offset()).unwrap_or_default(),
}
}
}
impl<'s> FirstChildTrim<'s> for Item<'s> {
#[inline(always)]
fn trim_as_first_child(&mut self) -> Span<'s> {
/// Convert this item to a [`Tree`].
pub fn to_ast(self) -> Tree<'s> {
match self {
Self::Token(t) => t.trim_as_first_child(),
Self::Tree(t) => t.span.trim_as_first_child(),
Item::Token(token) => match token.variant {
token::Variant::Ident(ident) => Tree::ident(token.with_variant(ident)),
_ => todo!(),
},
Item::Tree(ast) => ast,
Item::Block(_) => todo!(),
}
}
}
@ -81,3 +85,35 @@ pub enum Ref<'s, 'a> {
Token(token::Ref<'s, 'a>),
Tree(&'a Tree<'s>),
}
// ======================
// === Variant Checks ===
// ======================
/// For each token variant, generates a function checking if the token is of the given variant. For
/// example, the `is_ident` function checks if the token is an identifier.
macro_rules! generate_variant_checks {
(
$(#$enum_meta:tt)*
pub enum $enum:ident {
$(
$(#$variant_meta:tt)*
$variant:ident $({ $(pub $field:ident : $field_ty:ty),* $(,)? })?
),* $(,)?
}
) => { paste!{
impl<'s> Item<'s> {
$(
$(#[$($variant_meta)*])*
#[allow(missing_docs)]
pub fn [<is_ $variant:snake:lower>](&self) -> bool {
self.is_variant(token::variant::VariantMarker::$variant)
}
)*
}
}};
}
crate::with_token_definition!(generate_variant_checks());

View File

@ -0,0 +1,172 @@
//! Operator related functionalities.
use crate::prelude::*;
use crate::syntax;
use crate::syntax::token;
use crate::syntax::token::Token;
// ==================
// === Precedence ===
// ==================
// FIXME: The current implementation hard-codes precedence values and does not support precedence
// computations for any operator (according to the spec)
fn precedence_of(operator: &str) -> usize {
match operator {
"+" => 3,
"-" => 3,
"*" => 7,
_ => panic!("Operator not supported: {}", operator),
}
}
/// An item with an assigned precedence.
#[derive(Clone, Copy, Debug, Deref, DerefMut)]
struct WithPrecedence<T> {
#[deref]
#[deref_mut]
elem: T,
precedence: usize,
}
impl<T> WithPrecedence<T> {
/// Constructor.
pub fn new(precedence: usize, elem: T) -> Self {
Self { elem, precedence }
}
}
/// Annotate expressions that should use spacing, because otherwise they are misleading. For
/// example, `if cond then.x else.y` is parsed as `if cond then .x else .y`, which after expansion
/// translates to `if cond then (\t -> t.x) else (\t -> t.y)`. However, for some macros spacing is
/// not needed. For example, `(.x)` is parsed as `(\t -> t.x)`, which is understandable.
fn annotate_tokens_that_need_spacing(items: Vec<syntax::Item>) -> Vec<syntax::Item> {
// TODO: It should be possible to make it faster by iterating over mut vec. To be checked.
items
.into_iter()
.map(|item| match item {
syntax::Item::Block(_) => item,
syntax::Item::Token(_) => item,
syntax::Item::Tree(ast) => syntax::Item::Tree(match &*ast.variant {
syntax::tree::Variant::MultiSegmentApp(data)
if !data.segments.first().header.is_symbol() =>
ast.with_error("This expression cannot be used in a non-spaced equation."),
_ => ast,
}),
})
.collect()
}
/// Take [`Item`] stream, resolve operators precedence and return the final AST. The precedence
/// resolution algorithm bases on the [Shunting yard algorithm](https://en.wikipedia.org/wiki/Shunting_yard_algorithm).
/// It is extended to handle operator sections.
#[inline(always)]
pub fn resolve_operator_precedence<'s>(items: Vec<syntax::Item<'s>>) -> syntax::Tree<'s> {
type Tokens<'s> = Vec<syntax::Item<'s>>;
let mut flattened: Tokens<'s> = default();
let mut no_space_group: Tokens<'s> = default();
let processs_no_space_group = |flattened: &mut Tokens<'s>, no_space_group: &mut Tokens<'s>| {
let tokens = mem::take(no_space_group);
if tokens.len() == 1 {
flattened.extend(tokens);
} else {
let tokens = annotate_tokens_that_need_spacing(tokens);
let ast = resolve_operator_precedence_internal(tokens);
flattened.push(ast.into());
}
};
for item in items {
if item.left_visible_offset().width_in_spaces == 0 || no_space_group.is_empty() {
no_space_group.push(item)
} else if !no_space_group.is_empty() {
processs_no_space_group(&mut flattened, &mut no_space_group);
no_space_group.push(item);
} else {
// FIXME: this is unreachable.
flattened.push(item);
}
}
if !no_space_group.is_empty() {
processs_no_space_group(&mut flattened, &mut no_space_group);
}
resolve_operator_precedence_internal(flattened)
}
fn resolve_operator_precedence_internal(items: Vec<syntax::Item<'_>>) -> syntax::Tree<'_> {
// Reverse-polish notation encoding.
let mut was_section_used = false;
let mut output: Vec<syntax::Item> = default();
let mut operator_stack: Vec<WithPrecedence<syntax::tree::OperatorOrError>> = default();
let mut last_token_was_ast = false;
let mut last_token_was_opr = false;
for item in items {
if let syntax::Item::Token(token) = item.clone()
&& let token::Variant::Operator(opr) = token.variant {
// Item is an operator.
let last_token_was_opr_copy = last_token_was_opr;
last_token_was_ast = false;
last_token_was_opr = true;
let prec = precedence_of(&token.code);
let opr = Token(token.left_offset, token.code, opr);
if last_token_was_opr_copy && let Some(prev_opr) = operator_stack.last_mut() {
// Error. Multiple operators next to each other.
match &mut prev_opr.elem {
Err(err) => err.operators.push(opr),
Ok(prev) => {
let operators = NonEmptyVec::new(prev.clone(),vec![opr]);
prev_opr.elem = Err(syntax::tree::MultipleOperatorError{operators});
}
}
} else {
while let Some(prev_opr) = operator_stack.last()
&& prev_opr.precedence >= prec
&& let Some(prev_opr) = operator_stack.pop()
&& let Some(rhs) = output.pop()
{
// Prev operator in the [`operator_stack`] has a higher precedence.
let lhs = output.pop().map(|t| t.to_ast());
if lhs.is_none() { was_section_used = true; }
let ast = syntax::Tree::opr_app(lhs, prev_opr.elem, Some(rhs.to_ast()));
output.push(ast.into());
}
operator_stack.push(WithPrecedence::new(prec, Ok(opr)));
}
} else if last_token_was_ast && let Some(lhs) = output.pop() {
// Multiple non-operators next to each other.
let lhs = lhs.to_ast();
let rhs = item.to_ast();
let ast = syntax::Tree::app(lhs, rhs);
output.push(ast.into());
} else {
// Non-operator that follows previously consumed operator.
last_token_was_ast = true;
last_token_was_opr = false;
output.push(item);
}
}
let mut opt_rhs = last_token_was_ast.and_option_from(|| output.pop().map(|t| t.to_ast()));
while let Some(opr) = operator_stack.pop() {
let opt_lhs = output.pop().map(|t| t.to_ast());
if opt_lhs.is_none() || opt_rhs.is_none() {
was_section_used = true;
}
opt_rhs = Some(syntax::Tree::opr_app(opt_lhs, opr.elem, opt_rhs));
}
if !output.is_empty() {
panic!("Internal error. Not all tokens were consumed while constructing the expression.");
}
// FIXME
let out = opt_rhs.unwrap();
if was_section_used {
syntax::Tree::opr_section_boundary(out)
} else {
out
}
}

View File

@ -64,6 +64,9 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
pub error: Error,
pub ast: Tree<'s>,
},
Module {
pub statements: Vec<Tree<'s>>,
},
/// A simple identifier, like `foo` or `bar`.
Ident {
pub token: token::Ident<'s>,
@ -97,8 +100,13 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
/// `x + y + z` is the section body, and `Vector x y z` is the prefix of this function
/// application.
MultiSegmentApp {
pub prefix: Option<Tree<'s>>,
pub segments: NonEmptyVec<MultiSegmentAppSegment<'s>>,
},
TypeDef {
pub keyword: Token<'s>,
pub name: Tree<'s>,
pub params: Vec<Tree<'s>>,
}
}
}};}
@ -158,10 +166,9 @@ impl<'s> Tree<'s> {
}
}
impl<S> span::Build<S> for Error {
type Output = S;
fn build(&mut self, builder: span::Builder<S>) -> Self::Output {
builder.span
impl<'s> span::Builder<'s> for Error {
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
span
}
}
@ -178,12 +185,9 @@ pub struct MultipleOperatorError<'s> {
pub operators: NonEmptyVec<token::Operator<'s>>,
}
impl<'s, S> span::Build<S> for MultipleOperatorError<'s>
where NonEmptyVec<token::Operator<'s>>: span::Build<S>
{
type Output = <NonEmptyVec<token::Operator<'s>> as span::Build<S>>::Output;
fn build(&mut self, builder: span::Builder<S>) -> Self::Output {
self.operators.build(builder)
impl<'s> span::Builder<'s> for MultipleOperatorError<'s> {
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
self.operators.add_to_span(span)
}
}
@ -198,12 +202,9 @@ pub struct MultiSegmentAppSegment<'s> {
pub body: Option<Tree<'s>>,
}
impl<'s, S> span::Build<S> for MultiSegmentAppSegment<'s>
where Token<'s>: span::Build<S, Output = Span<'s>>
{
type Output = Span<'s>;
fn build(&mut self, builder: span::Builder<S>) -> Self::Output {
builder.add(&mut self.header).add(&mut self.body).span
impl<'s> span::Builder<'s> for MultiSegmentAppSegment<'s> {
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
span.add(&mut self.header).add(&mut self.body)
}
}

View File

@ -1,5 +1,7 @@
//! Definition of a macro allowing building mock AST structures, mostly useful for testing.
// === Features ===
#![feature(proc_macro_span)]
// === Standard Linter Configuration ===
#![deny(non_ascii_idents)]
#![warn(unsafe_code)]
@ -36,8 +38,8 @@ use std::mem;
/// braces. You can also place segments in quotes, like `{"("} a {")"}`.
#[proc_macro]
pub fn ast_builder(tokens: proc_macro::TokenStream) -> proc_macro::TokenStream {
let output = expr(tokens);
let output = quote!(syntax::Tree::opr_section_boundary(#output));
let output = expr(tokens, None);
let output = quote!(crate::syntax::Tree::module(vec![#output]));
output.into()
}
@ -54,12 +56,13 @@ impl Segment {
}
}
fn expr(tokens: proc_macro::TokenStream) -> TokenStream {
fn expr(tokens: proc_macro::TokenStream, parent_spacing: Option<usize>) -> TokenStream {
use proc_macro::TokenTree::*;
let mut output = quote! {};
let mut prefix: Option<TokenStream> = None;
let mut segments: Vec<Segment> = vec![];
let mut current_segment: Option<Segment> = None;
let mut last_column: Option<usize> = None;
let app_to_output = |output: &mut TokenStream, tok| {
if output.is_empty() {
*output = tok;
@ -67,12 +70,21 @@ fn expr(tokens: proc_macro::TokenStream) -> TokenStream {
*output = quote! {syntax::Tree::app(#output,#tok)};
}
};
let mut inherited_spacing = parent_spacing.unwrap_or(0);
for token in tokens {
match token {
let spacing = last_column.map(|t| token.span().start().column - t).unwrap_or(0);
let spacing = spacing + inherited_spacing;
inherited_spacing = 0;
last_column = Some(token.span().end().column);
match &token {
// a b c ...
Ident(ident) => {
let ident = ident.to_string();
app_to_output(&mut output, quote! {test::ident(#ident)});
let spacing = " ".repeat(spacing);
app_to_output(
&mut output,
quote! {crate::syntax::Tree::ident(crate::syntax::Token(#spacing, #ident, syntax::token::Variant::new_ident_unchecked(#ident)))},
);
}
// {if} a {then} b {else} c
// {"("} a {")"}
@ -83,12 +95,15 @@ fn expr(tokens: proc_macro::TokenStream) -> TokenStream {
} else if !output.is_empty() {
prefix = Some(mem::take(&mut output));
}
let body = group.stream().to_string();
current_segment = Some(Segment::new(quote! {Token::ident(#body)})); // Token::symbol
let ident = group.stream().to_string();
let spacing = " ".repeat(spacing);
current_segment = Some(Segment::new(
quote! { Token(#spacing, #ident, syntax::token::Variant::new_ident_unchecked(#ident).into())},
)); // Token::symbol
}
// a [b c] d
Group(group) if group.delimiter() == proc_macro::Delimiter::Bracket => {
app_to_output(&mut output, expr(group.stream()));
app_to_output(&mut output, expr(group.stream(), Some(spacing)));
}
_ => panic!("Unsupported token {:?}", token),
}
@ -114,10 +129,7 @@ fn expr(tokens: proc_macro::TokenStream) -> TokenStream {
.unwrap_or_else(|| quote! {None});
let segments = quote! {NonEmptyVec::try_from(vec![#(#segments),*]).unwrap()};
output = quote! {
span::With::new_no_left_offset_no_start(
Bytes::from(0),
syntax::tree::Type::MultiSegmentApp(Box::new(syntax::tree::MultiSegmentApp {prefix: #pfx, segments: #segments}))
)
syntax::Tree::multi_segment_app (#pfx, #segments)
}
}
output

View File

@ -49,6 +49,12 @@ impl<T> NonEmptyVec<T> {
NonEmptyVec { elems }
}
/// Length of the vector.
#[allow(clippy::len_without_is_empty)]
pub fn len(&self) -> usize {
self.elems.len()
}
/// Construct a `NonEmptyVec` containing a single element.
///
/// # Examples
@ -207,7 +213,7 @@ impl<T> NonEmptyVec<T> {
/// assert_eq!(*vec.first(), 0);
/// ```
pub fn first(&self) -> &T {
self.elems.first().expect("The NonEmptyVec always has an item in it.")
self.elems.first().unwrap_or_else(|| unreachable!())
}
/// Obtain a mutable reference to the head of the `NonEmptyVec`.
@ -220,7 +226,7 @@ impl<T> NonEmptyVec<T> {
/// assert_eq!(*vec.first_mut(), 0);
/// ```
pub fn first_mut(&mut self) -> &mut T {
self.elems.first_mut().expect("The NonEmptyVec always has an item in it.")
self.elems.first_mut().unwrap_or_else(|| unreachable!())
}
/// Get the tail reference.
@ -243,7 +249,7 @@ impl<T> NonEmptyVec<T> {
/// assert_eq!(*vec.last(), 2)
/// ```
pub fn last(&self) -> &T {
self.get(self.len() - 1).expect("There is always one element in a NonEmptyVec.")
self.get(self.len() - 1).unwrap_or_else(|| unreachable!())
}
/// Obtain a mutable reference to the last element in the `NonEmptyVec`.
@ -256,7 +262,7 @@ impl<T> NonEmptyVec<T> {
/// assert_eq!(*vec.last_mut(), 2)
/// ```
pub fn last_mut(&mut self) -> &mut T {
self.get_mut(self.len() - 1).expect("There is always one element in a NonEmptyVec.")
self.get_mut(self.len() - 1).unwrap_or_else(|| unreachable!())
}
/// Create a draining iterator that removes the specified range in the vector and yields the

View File

@ -17,7 +17,7 @@ use std::iter::Extend;
/// Mutable Semigroup definition. Impls should satisfy the associativity law:
/// `x.concat(y.concat(z)) = x.concat(y).concat(z)`, in symbolic form:
/// `x <> (y <> z) = (x <> y) <> z`
pub trait PartialSemigroup<T>: Clone {
pub trait PartialSemigroup<T = Self>: Clone {
/// An associative operation.
fn concat_mut(&mut self, other: T);