mirror of
https://github.com/enso-org/enso.git
synced 2024-11-23 08:08:34 +03:00
Parser improvements (#3549)
This commit is contained in:
parent
43a893cae6
commit
7c0330290f
@ -81,7 +81,7 @@ impl<T> NonEmpty<T> {
|
||||
}
|
||||
|
||||
/// Convert this list to a vector.
|
||||
fn to_vec(&self) -> Vec<&T> {
|
||||
pub fn to_vec(&self) -> Vec<&T> {
|
||||
let mut out = vec![&self.head];
|
||||
let mut list = self.tail();
|
||||
loop {
|
||||
|
@ -731,8 +731,15 @@ const PARSERS: &[for<'r> fn(&'r mut Lexer<'_>)] = &[
|
||||
];
|
||||
|
||||
impl<'s> Lexer<'s> {
|
||||
/// Run the lexer. Returns [`true`] if the process succeeded.
|
||||
pub fn run(&mut self) -> bool {
|
||||
/// Run the lexer. Return hierarchical list of tokens (the token groups will be represented as a
|
||||
/// hierarchy).
|
||||
pub fn run(self) -> Vec<Item<'s>> {
|
||||
build_block_hierarchy(self.run_flat())
|
||||
}
|
||||
|
||||
/// Run the lexer. Return non-hierarchical list of tokens (the token groups will be represented
|
||||
/// as start and end tokens).
|
||||
pub fn run_flat(mut self) -> Vec<Token<'s>> {
|
||||
self.spaces_after_lexeme();
|
||||
let mut any_parser_matched = true;
|
||||
while any_parser_matched {
|
||||
@ -744,10 +751,53 @@ impl<'s> Lexer<'s> {
|
||||
}
|
||||
}
|
||||
}
|
||||
self.current_char == None
|
||||
if self.current_char != None {
|
||||
panic!("Internal error. Lexer did not consume all input.");
|
||||
}
|
||||
while self.end_block().is_some() {
|
||||
let block_end = self.marker_token(token::Variant::block_end());
|
||||
self.submit_token(block_end);
|
||||
}
|
||||
let tokens = self.output;
|
||||
event!(TRACE, "Tokens:\n{:#?}", tokens);
|
||||
tokens
|
||||
}
|
||||
}
|
||||
|
||||
/// Run the lexer. Return non-hierarchical list of tokens (the token groups will be represented
|
||||
/// as start and end tokens).
|
||||
pub fn run_flat(input: &'_ str) -> Vec<Token<'_>> {
|
||||
Lexer::new(input).run_flat()
|
||||
}
|
||||
|
||||
/// Run the lexer. Return hierarchical list of tokens (the token groups will be represented as a
|
||||
/// hierarchy).
|
||||
pub fn run(input: &'_ str) -> Vec<Item<'_>> {
|
||||
Lexer::new(input).run()
|
||||
}
|
||||
|
||||
/// Convert the flat token stream into hierarchical one. The token variants [`BlockStart`] and
|
||||
/// [`BlockEnd`] will be replaced with [`Item::Group`].
|
||||
pub fn build_block_hierarchy(tokens: Vec<Token<'_>>) -> Vec<Item<'_>> {
|
||||
let mut stack = vec![];
|
||||
let mut out: Vec<Item<'_>> = vec![];
|
||||
for token in tokens {
|
||||
match token.variant {
|
||||
token::Variant::BlockStart(_) => stack.push(mem::take(&mut out)),
|
||||
token::Variant::BlockEnd(_) => {
|
||||
let new_out = stack.pop().unwrap();
|
||||
let block = mem::replace(&mut out, new_out);
|
||||
out.push(Item::Block(block));
|
||||
}
|
||||
_ => out.push(token.into()),
|
||||
}
|
||||
}
|
||||
if !stack.is_empty() {
|
||||
panic!("Internal error. Block start token not paired with block end token.");
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
@ -756,9 +806,7 @@ impl<'s> Lexer<'s> {
|
||||
|
||||
/// Lexer main function used for ad-hoc testing during development.
|
||||
pub fn main() {
|
||||
let mut lexer = Lexer::new("\n foo\n bar");
|
||||
println!("{:?}", lexer.run());
|
||||
println!("{:#?}", lexer.output.iter().collect_vec());
|
||||
println!("{:#?}", run_flat("\n foo\n bar"));
|
||||
}
|
||||
|
||||
/// Test utils for fast mock tokens creation.
|
||||
@ -797,9 +845,7 @@ mod tests {
|
||||
}
|
||||
|
||||
fn test_lexer<'s>(input: &'s str, expected: Vec<Token<'s>>) {
|
||||
let mut lexer = Lexer::new(input);
|
||||
assert!(lexer.run());
|
||||
assert_eq!(lexer.output.iter().collect_vec(), expected);
|
||||
assert_eq!(run_flat(input), expected);
|
||||
}
|
||||
|
||||
fn lexer_case_idents<'s>(idents: &[&'s str]) -> Vec<(&'s str, Vec<Token<'s>>)> {
|
||||
@ -828,7 +874,7 @@ mod tests {
|
||||
ident_(" ", "foo"),
|
||||
newline_("", "\n"),
|
||||
ident_(" ", "bar"),
|
||||
// FIXME: here should be block end
|
||||
block_end_("", ""),
|
||||
]),
|
||||
]);
|
||||
}
|
||||
@ -1081,10 +1127,8 @@ mod benches {
|
||||
let str = "test ".repeat(reps);
|
||||
|
||||
b.iter(move || {
|
||||
let mut lexer = Lexer::new(&str);
|
||||
let ok = lexer.run();
|
||||
assert!(ok);
|
||||
assert_eq!(lexer.output.len(), reps);
|
||||
let lexer = Lexer::new(&str);
|
||||
assert_eq!(lexer.run().len(), reps);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -3,22 +3,23 @@
|
||||
//! utilities allowing macros management.
|
||||
//! Read the docs of the main module of this crate to learn more about the parsing process.
|
||||
|
||||
//
|
||||
|
||||
use crate::prelude::*;
|
||||
|
||||
use crate::syntax;
|
||||
use crate::syntax::token::Token;
|
||||
|
||||
use enso_data_structures::im_list;
|
||||
use pattern::Pattern;
|
||||
|
||||
|
||||
// ==============
|
||||
// === Export ===
|
||||
// ==============
|
||||
|
||||
pub mod built_in;
|
||||
pub mod expand;
|
||||
pub mod pattern;
|
||||
pub mod resolver;
|
||||
|
||||
pub use pattern::Pattern;
|
||||
|
||||
|
||||
|
||||
@ -38,21 +39,13 @@ pub mod pattern;
|
||||
#[derivative(Debug)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Definition<'a> {
|
||||
/// The pattern in this field will be matched from right to left, unlike patterns in segments.
|
||||
pub rev_prefix_pattern: Option<Pattern>,
|
||||
pub segments: im_list::NonEmpty<SegmentDefinition<'a>>,
|
||||
pub segments: im_list::NonEmpty<SegmentDefinition<'a>>,
|
||||
#[derivative(Debug = "ignore")]
|
||||
pub body: Rc<Body>,
|
||||
pub body: Rc<DefinitionBody>,
|
||||
}
|
||||
|
||||
/// All the tokens matched as prefix of the resolved macro.
|
||||
pub type PrefixTokens<'s> = Option<Vec<syntax::Item<'s>>>;
|
||||
|
||||
/// All the sections of the resolved macro.
|
||||
pub type MatchedSections<'s> = NonEmptyVec<(Token<'s>, Vec<syntax::Item<'s>>)>;
|
||||
|
||||
/// A function that transforms matched macro tokens into [`syntax::Tree`].
|
||||
pub type Body = dyn for<'s> Fn(PrefixTokens<'s>, MatchedSections<'s>) -> syntax::Tree<'s>;
|
||||
pub type DefinitionBody = dyn for<'s> Fn(pattern::MatchedSegments<'s>) -> syntax::Tree<'s>;
|
||||
|
||||
|
||||
|
||||
@ -93,18 +86,29 @@ impl<'a> SegmentDefinition<'a> {
|
||||
/// ```
|
||||
#[macro_export]
|
||||
macro_rules! macro_definition {
|
||||
( ($($section:literal, $pattern:expr),* $(,)?) $body:expr ) => {
|
||||
$crate::macro_definition!{[None] ($($section, $pattern),*) $body}
|
||||
($def:tt) => {
|
||||
$crate::macro_definition!{$def $crate::macros::matched_segments_into_multi_segment_app}
|
||||
};
|
||||
( ($prefix:expr, $($section:literal, $pattern:expr),* $(,)?) $body:expr ) => {
|
||||
$crate::macro_definition!{[Some($prefix)] ($($section, $pattern),*) $body}
|
||||
};
|
||||
( [$prefix:expr] ($($section:literal, $pattern:expr),* $(,)?) $body:expr ) => {
|
||||
macros::Definition {
|
||||
rev_prefix_pattern: $prefix,
|
||||
(($($section:literal, $pattern:expr),* $(,)?) $body:expr) => {
|
||||
$crate::macros::Definition {
|
||||
segments: im_list::NonEmpty::try_from(vec![
|
||||
$(macros::SegmentDefinition::new($section, $pattern)),*]).unwrap(),
|
||||
$($crate::macros::SegmentDefinition::new($section, $pattern)),*]).unwrap(),
|
||||
body: Rc::new($body),
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
|
||||
fn matched_segments_into_multi_segment_app(
|
||||
matched_segments: NonEmptyVec<pattern::MatchedSegment<'_>>,
|
||||
) -> syntax::Tree<'_> {
|
||||
let segments = matched_segments.mapped(|segment| {
|
||||
let header = segment.header;
|
||||
let tokens = segment.result.tokens();
|
||||
let body = (!tokens.is_empty())
|
||||
.as_some_from(|| syntax::operator::resolve_operator_precedence(tokens));
|
||||
syntax::tree::MultiSegmentAppSegment { header, body }
|
||||
});
|
||||
syntax::Tree::multi_segment_app(segments)
|
||||
}
|
||||
|
84
lib/rust/parser/src/macros/built_in.rs
Normal file
84
lib/rust/parser/src/macros/built_in.rs
Normal file
@ -0,0 +1,84 @@
|
||||
//! Built-in macro definitions.
|
||||
|
||||
use crate::macros::pattern::*;
|
||||
use crate::macros::*;
|
||||
|
||||
use crate::syntax::operator;
|
||||
|
||||
|
||||
|
||||
// =======================
|
||||
// === Built-in macros ===
|
||||
// =======================
|
||||
|
||||
/// All built-in macro definitions.
|
||||
pub fn all() -> resolver::SegmentMap<'static> {
|
||||
let mut macro_map = resolver::SegmentMap::default();
|
||||
// macro_map.register(if_then());
|
||||
// macro_map.register(if_then_else());
|
||||
macro_map.register(group());
|
||||
macro_map.register(type_def());
|
||||
macro_map
|
||||
}
|
||||
|
||||
/// If-then-else macro definition.
|
||||
pub fn if_then_else<'s>() -> Definition<'s> {
|
||||
crate::macro_definition! {("if", everything(), "then", everything(), "else", everything())}
|
||||
}
|
||||
|
||||
/// If-then macro definition.
|
||||
pub fn if_then<'s>() -> Definition<'s> {
|
||||
crate::macro_definition! {("if", everything(), "then", everything())}
|
||||
}
|
||||
|
||||
/// Group macro definition.
|
||||
pub fn group<'s>() -> Definition<'s> {
|
||||
crate::macro_definition! {("(", everything(), ")", nothing())}
|
||||
}
|
||||
|
||||
/// New type definition macro definition.
|
||||
pub fn type_def<'s>() -> Definition<'s> {
|
||||
use pattern::*;
|
||||
#[rustfmt::skip]
|
||||
let pattern =
|
||||
identifier() / "name" % "type name" >>
|
||||
many(identifier() % "type parameter" / "param") % "type parameters" >>
|
||||
block(
|
||||
many(identifier() / "constructor") % "type constructors" >>
|
||||
everything()
|
||||
) % "type definition body";
|
||||
// let pattern2 = Everything;
|
||||
crate::macro_definition! {
|
||||
("type", pattern)
|
||||
type_def_body
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: The comments in the code were left in order to allow easy debugging of this struct. They
|
||||
// should be removed in the future.
|
||||
fn type_def_body(matched_segments: NonEmptyVec<MatchedSegment>) -> syntax::Tree {
|
||||
let segment = matched_segments.to_vec().pop().unwrap();
|
||||
// println!(">>>");
|
||||
// println!("{:#?}", segment);
|
||||
// println!(">>>");
|
||||
let match_tree = segment.result.into_var_map();
|
||||
// println!("{:#?}", match_tree);
|
||||
// println!("\n\n------------- 1");
|
||||
|
||||
let mut v = match_tree.view();
|
||||
let name = &v.query("name").unwrap()[0];
|
||||
let name = operator::resolve_operator_precedence(name.clone());
|
||||
// println!("{:#?}", name);
|
||||
// println!("\n\n------------- 2");
|
||||
|
||||
let params = v.nested().query("param").unwrap();
|
||||
// println!("{:#?}", params);
|
||||
// println!("\n\n------------- 3");
|
||||
|
||||
let params = params
|
||||
.iter()
|
||||
.map(|tokens| operator::resolve_operator_precedence(tokens.clone()))
|
||||
.collect_vec();
|
||||
// println!("{:#?}", params);
|
||||
syntax::Tree::type_def(segment.header, name, params)
|
||||
}
|
356
lib/rust/parser/src/macros/expand.rs
Normal file
356
lib/rust/parser/src/macros/expand.rs
Normal file
@ -0,0 +1,356 @@
|
||||
//! Macro expansion utilities. Allow expanding macro variables in the same as Rust macro rules do.
|
||||
|
||||
use crate::macros::pattern::*;
|
||||
use crate::prelude::*;
|
||||
|
||||
use crate::syntax;
|
||||
|
||||
|
||||
|
||||
// ==============
|
||||
// === VarMap ===
|
||||
// ==============
|
||||
|
||||
/// A nested map of pattern variables (elements using the [`Pattern::Named`] variant). The validator
|
||||
/// should be instantiated either with the [`EnabledValidator`] in case of user-defined
|
||||
/// macros or with the [`DisabledValidator`] in case of built-in macros. The latter is
|
||||
/// faster but does not provide nice error messages and allows for illegal code expansion, like
|
||||
/// using two variables that have the same repetition depth, but have different parents (e.g. the
|
||||
/// variables `$b` and `$e` from the example below).
|
||||
///
|
||||
/// To better understand how it works, let's consider the following pattern definition (using the
|
||||
/// Rust macro rules syntax for simplicity):
|
||||
///
|
||||
/// ```text
|
||||
/// $x:tt
|
||||
/// $(
|
||||
/// $a:tt
|
||||
/// $(
|
||||
/// $b:tt
|
||||
/// $c:tt
|
||||
/// )*
|
||||
///
|
||||
/// $d:tt
|
||||
/// $(
|
||||
/// $e:tt
|
||||
/// $f:tt
|
||||
/// )*
|
||||
/// )*
|
||||
/// ```
|
||||
///
|
||||
/// The following [`VarMap`] will be generated (some fields simplified for clarity):
|
||||
///
|
||||
/// ```text
|
||||
/// VarMap {
|
||||
/// map: [
|
||||
/// ("x", VarMapEntry {
|
||||
/// tokens: ["x"],
|
||||
/// validator: EnabledValidator { scope: VarScope {
|
||||
/// locals: ["x"], parent: None
|
||||
/// }}
|
||||
/// }),
|
||||
/// ],
|
||||
/// nested: Some(VarMap {
|
||||
/// map: [
|
||||
/// ("a", VarMapEntry {
|
||||
/// tokens: ["a"],
|
||||
/// validator: EnabledValidator { scope: VarScope {
|
||||
/// locals: ["a","d"], parent: Some (VarScope {
|
||||
/// locals: ["x"], parent: None
|
||||
/// })
|
||||
/// }}
|
||||
/// }),
|
||||
/// ("e", VarMapEntry {
|
||||
/// tokens: ["e"],
|
||||
/// validator: EnabledValidator { scope: VarScope {
|
||||
/// locals: ["a","d"], parent: Some (VarScope {
|
||||
/// locals: ["x"], parent: None
|
||||
/// })
|
||||
/// }}
|
||||
/// }),
|
||||
/// ],
|
||||
/// nested: Some(VarMap {
|
||||
/// map: [
|
||||
/// ("b", VarMapEntry {
|
||||
/// tokens: ["b"],
|
||||
/// validator: EnabledValidator { scope: VarScope {
|
||||
/// locals: ["b","c"], parent: Some (VarScope {
|
||||
/// locals: ["a","d"], parent: Some (VarScope {
|
||||
/// locals: ["x"], parent: None
|
||||
/// })
|
||||
/// })
|
||||
/// }}
|
||||
/// }),
|
||||
/// ("c", VarMapEntry {
|
||||
/// tokens: ["c"],
|
||||
/// validator: EnabledValidator { scope: VarScope {
|
||||
/// locals: ["b","c"], parent: Some (VarScope {
|
||||
/// locals: ["a","d"], parent: Some (VarScope {
|
||||
/// locals: ["x"], parent: None
|
||||
/// })
|
||||
/// })
|
||||
/// }}
|
||||
/// }),
|
||||
/// ("e", VarMapEntry {
|
||||
/// tokens: ["e"],
|
||||
/// validator: EnabledValidator { scope: VarScope {
|
||||
/// locals: ["e","f"], parent: Some (VarScope {
|
||||
/// locals: ["a","d"], parent: Some (VarScope {
|
||||
/// locals: ["x"], parent: None
|
||||
/// })
|
||||
/// })
|
||||
/// }}
|
||||
/// }),
|
||||
/// ("f", VarMapEntry {
|
||||
/// tokens: ["f"],
|
||||
/// validator: EnabledValidator { scope: VarScope {
|
||||
/// locals: ["e","f"], parent: Some (VarScope {
|
||||
/// locals: ["a","d"], parent: Some (VarScope {
|
||||
/// locals: ["x"], parent: None
|
||||
/// })
|
||||
/// })
|
||||
/// }}
|
||||
/// }),
|
||||
/// ],
|
||||
/// })
|
||||
/// })
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// Validators can be queried during code expansion to check whether these variables belong to
|
||||
/// the same repetition scope.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct VarMap<'s, V> {
|
||||
nested: Option<Box<VarMap<'s, V>>>,
|
||||
map: HashMap<String, VarMapEntry<'s, V>>,
|
||||
}
|
||||
|
||||
/// Entry of the [`VarMap`] map.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
struct VarMapEntry<'s, V> {
|
||||
pub tokens: Vec<Vec<syntax::Item<'s>>>,
|
||||
pub validator: V,
|
||||
}
|
||||
|
||||
impl<'s, V> VarMapEntry<'s, V> {
|
||||
/// Constructor.
|
||||
pub fn new(validator: V, tokens: Vec<Vec<syntax::Item<'s>>>) -> Self {
|
||||
Self { validator, tokens }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Match<'s> {
|
||||
/// Convert the match into checked [`VarMap`].
|
||||
pub fn into_var_map(self) -> VarMap<'s, EnabledValidator> {
|
||||
let mut tree = VarMap::default();
|
||||
self.build_var_map(&mut tree, &default());
|
||||
tree
|
||||
}
|
||||
|
||||
/// Convert the match into unchecked [`VarMap`]. The unchecked version has better performance,
|
||||
/// but does not provide nice user error messages and allows for illegal code expansion. Read
|
||||
/// the docs of [`VarMap`] to learn more.
|
||||
pub fn into_unchecked_var_map(self) -> VarMap<'s, DisabledValidator> {
|
||||
let mut tree = VarMap::default();
|
||||
self.build_var_map(&mut tree, &default());
|
||||
tree
|
||||
}
|
||||
|
||||
fn build_var_map<V: Default + Validator>(self, tree: &mut VarMap<'s, V>, validator: &V) {
|
||||
match self {
|
||||
Self::Everything(_) => {}
|
||||
Self::Nothing => {}
|
||||
Self::Identifier(_) => {}
|
||||
Self::Expected(_, _) => {}
|
||||
Self::NotBlock(_) => {}
|
||||
Self::Or(t) => match *t {
|
||||
OrMatch::First(first) => first.build_var_map(tree, validator),
|
||||
OrMatch::Second(second) => second.build_var_map(tree, validator),
|
||||
},
|
||||
Self::Seq(first, second) => {
|
||||
first.build_var_map(tree, validator);
|
||||
second.build_var_map(tree, validator);
|
||||
}
|
||||
Self::Many(matches) => {
|
||||
if tree.nested.is_none() {
|
||||
let nested = VarMap::<'s, V>::default();
|
||||
tree.nested = Some(Box::new(nested));
|
||||
}
|
||||
let nested_validator = V::default();
|
||||
nested_validator.set_parent(validator);
|
||||
let nested = tree.nested.as_mut().unwrap();
|
||||
for m in matches {
|
||||
m.build_var_map(nested, &nested_validator);
|
||||
}
|
||||
}
|
||||
Self::Named(name, t) => {
|
||||
validator.insert_local_var(&name);
|
||||
tree.map
|
||||
.entry(name)
|
||||
.or_insert_with(|| VarMapEntry::new(validator.clone_ref(), default()))
|
||||
.tokens
|
||||
.push(t.tokens());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =================
|
||||
// === Validator ===
|
||||
// =================
|
||||
|
||||
/// Validator used to check if the macro generation correct. See the definition of [`VarMap`] to
|
||||
/// learn more.
|
||||
#[allow(missing_docs)]
|
||||
pub trait Validator: PartialEq + Default + CloneRef {
|
||||
fn check(&self, name: &str) -> bool;
|
||||
fn parent(&self) -> Option<Self>;
|
||||
fn set_parent(&self, parent: &Self);
|
||||
fn insert_local_var(&self, var: &str);
|
||||
}
|
||||
|
||||
/// Disabled validator. See the docs of [`VarMap`] to learn more.
|
||||
#[derive(Copy, Clone, CloneRef, Debug, Default, PartialEq)]
|
||||
pub struct DisabledValidator;
|
||||
|
||||
/// Enabled validator. See the docs of [`VarMap`] to learn more.
|
||||
#[derive(Clone, CloneRef, Debug, Default)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct EnabledValidator {
|
||||
scope: Rc<RefCell<VarScope>>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
struct VarScope {
|
||||
locals: HashSet<String>,
|
||||
parent: Option<EnabledValidator>,
|
||||
}
|
||||
|
||||
impl PartialEq for EnabledValidator {
|
||||
fn eq(&self, other: &EnabledValidator) -> bool {
|
||||
Rc::ptr_eq(&self.scope, &other.scope)
|
||||
}
|
||||
}
|
||||
|
||||
impl Validator for EnabledValidator {
|
||||
#[inline(always)]
|
||||
fn check(&self, name: &str) -> bool {
|
||||
self.scope.borrow().locals.contains(name)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn parent(&self) -> Option<Self> {
|
||||
self.scope.borrow().parent.as_ref().map(|t| t.clone_ref())
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn set_parent(&self, parent: &Self) {
|
||||
self.scope.borrow_mut().parent = Some(parent.clone_ref());
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn insert_local_var(&self, var: &str) {
|
||||
self.scope.borrow_mut().locals.insert(var.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
impl Validator for DisabledValidator {
|
||||
#[inline(always)]
|
||||
fn check(&self, _name: &str) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn parent(&self) -> Option<Self> {
|
||||
None
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn set_parent(&self, _parent: &Self) {}
|
||||
|
||||
#[inline(always)]
|
||||
fn insert_local_var(&self, _var: &str) {}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==================
|
||||
// === VarMapView ===
|
||||
// ==================
|
||||
|
||||
/// A view for a [`VarMap`]. It allows focusing on a specific repetition scope and querying for
|
||||
/// variables there. See the docs of [`VarMap`] to learn more.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct VarMapView<'t, 's, V> {
|
||||
tree: Option<&'t VarMap<'s, V>>,
|
||||
resolved_validator: Option<V>,
|
||||
parent_validator_to_check: Option<V>,
|
||||
}
|
||||
|
||||
impl<'t, 's, V> VarMapView<'t, 's, V> {
|
||||
/// Constructor.
|
||||
pub fn new(tree: &'t VarMap<'s, V>) -> Self {
|
||||
let resolved_validator = default();
|
||||
let parent_validator_to_check = default();
|
||||
Self { tree: Some(tree), resolved_validator, parent_validator_to_check }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t, 's, V: Validator> VarMapView<'t, 's, V> {
|
||||
/// Get the view for the nested repetition scope.
|
||||
pub fn nested(&self) -> Self {
|
||||
let tree = self.tree.and_then(|t| t.nested.as_ref().map(|n| n.as_ref()));
|
||||
let resolved_validator = None;
|
||||
let parent_validator_to_check = self.resolved_validator.as_ref().map(|t| t.clone_ref());
|
||||
Self { tree, resolved_validator, parent_validator_to_check }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t, 's, V: Validator> VarMapView<'t, 's, V> {
|
||||
/// Query for a variable.
|
||||
pub fn query(&mut self, name: &str) -> Option<&'t Vec<Vec<syntax::Item<'s>>>> {
|
||||
self.tree.and_then(|t| {
|
||||
t.map.get(name).map(|entry| {
|
||||
match &self.resolved_validator {
|
||||
Some(validator) =>
|
||||
if !validator.check(name) {
|
||||
todo!("Report nice error that the name does not belong to the scope.")
|
||||
},
|
||||
None => {
|
||||
let resolved_validator = entry.validator.clone_ref();
|
||||
if let Some(parent_validator_to_check) = &self.parent_validator_to_check {
|
||||
let mut ok = false;
|
||||
let mut validator = resolved_validator.clone();
|
||||
loop {
|
||||
if &validator == parent_validator_to_check {
|
||||
ok = true;
|
||||
break;
|
||||
} else {
|
||||
match validator.parent() {
|
||||
Some(p) => validator = p,
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
if !ok {
|
||||
todo!("Report nice error that the name does not belong to the same scope as previous variables.")
|
||||
}
|
||||
self.parent_validator_to_check = None;
|
||||
}
|
||||
self.resolved_validator = Some(resolved_validator);
|
||||
}
|
||||
}
|
||||
&entry.tokens
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, V> VarMap<'s, V> {
|
||||
/// Create a new view for this var map.
|
||||
pub fn view<'t>(&'t self) -> VarMapView<'t, 's, V> {
|
||||
VarMapView::new(self)
|
||||
}
|
||||
}
|
@ -5,121 +5,365 @@ use crate::prelude::*;
|
||||
|
||||
use crate::syntax;
|
||||
|
||||
use std::collections::VecDeque;
|
||||
|
||||
|
||||
|
||||
// ===============
|
||||
// === Pattern ===
|
||||
// ===============
|
||||
|
||||
/// Pattern used to validate incoming token stream against expected macro input.
|
||||
/// Patterns are used to validate incoming token stream against expected macro input.
|
||||
///
|
||||
/// The idea is similar to patterns used in `macro_rules` definitions in Rust. There are a few
|
||||
/// differences though:
|
||||
/// 1. This pattern implementation exposes different matchers and operations.
|
||||
/// 2. This macro implementation never attaches types to tokens, which means that every defined
|
||||
/// The idea is similar to patterns used in macro rules in Rust with a few differences:
|
||||
/// 1. These patterns allow for other constructs than macro rules.
|
||||
/// 2. The macro resolution never reifies tokens as given types, which means that every defined
|
||||
/// pattern behaves like a TT-muncher in Rust.
|
||||
#[derive(Clone, Debug, Deref)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Pattern {
|
||||
#[deref]
|
||||
pub data: Rc<PatternData>,
|
||||
pub matches_empty_input: bool,
|
||||
}
|
||||
|
||||
impl Pattern {
|
||||
/// Constructor.
|
||||
pub fn new(data: PatternData, matches_empty_input: bool) -> Self {
|
||||
Self { data: Rc::new(data), matches_empty_input }
|
||||
}
|
||||
}
|
||||
|
||||
/// Variants of [`Pattern`].
|
||||
#[derive(Clone, Debug)]
|
||||
#[allow(missing_docs)]
|
||||
pub enum Pattern {
|
||||
pub enum PatternData {
|
||||
/// Consume all items, till the end of the token stream.
|
||||
Everything,
|
||||
/// Consume nothing.
|
||||
Nothing,
|
||||
/// Consume items matching the first pattern. If the match was unsuccessful, the second match
|
||||
/// will be tried.
|
||||
Or(Box<Pattern>, Box<Pattern>),
|
||||
/// Consume a single item if it matches the configuration.
|
||||
Item(Item),
|
||||
Or(Pattern, Pattern),
|
||||
Seq(Pattern, Pattern),
|
||||
/// Consume many times (zero or more) the given pattern. If the given pattern succeeds on empty
|
||||
/// input, it will be repeated as long as it consumes any input.
|
||||
Many(Pattern),
|
||||
/// Consume an identifier.
|
||||
Identifier,
|
||||
/// Consume a block and run the provided pattern in its body.
|
||||
Block(Pattern),
|
||||
/// Indicator of an error. The provided pattern is used to consume input when an error occurs.
|
||||
/// For example, if you want to consume an identifier, but the identifier is not found, you can
|
||||
/// use this pattern to consume any token instead and mark it as invalid.
|
||||
Expected(String, Pattern),
|
||||
/// Named pattern. Mainly used for splicing the code in the macro definition body.
|
||||
Named(String, Pattern),
|
||||
/// Anything that is not a block.
|
||||
NotBlock,
|
||||
}
|
||||
|
||||
/// Item pattern configuration.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Item {
|
||||
/// Check whether the token has spaces on right-hand-side. The [`None`] value means that the
|
||||
/// condition would not be checked.
|
||||
pub has_rhs_spacing: Option<bool>,
|
||||
/// Constructor.
|
||||
pub fn everything() -> Pattern {
|
||||
Pattern::new(PatternData::Everything, true)
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =======================
|
||||
// === ResolutionError ===
|
||||
// =======================
|
||||
|
||||
/// Pattern resolution error.
|
||||
#[derive(Debug)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct ResolutionError<T> {
|
||||
/// All the incoming tokens. The resolver consumes vector of tokens and returns it back in case
|
||||
/// an error happened.
|
||||
pub tokens: Vec<T>,
|
||||
pub message: String,
|
||||
/// Constructor.
|
||||
pub fn identifier() -> Pattern {
|
||||
Pattern::new(PatternData::Identifier, false)
|
||||
}
|
||||
|
||||
impl<T> ResolutionError<T> {
|
||||
/// Constructor.
|
||||
pub fn new(tokens: Vec<T>, message: impl Into<String>) -> Self {
|
||||
let message = message.into();
|
||||
Self { tokens, message }
|
||||
}
|
||||
/// Constructor.
|
||||
pub fn not_block() -> Pattern {
|
||||
Pattern::new(PatternData::NotBlock, false)
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// ==================
|
||||
/// === Resolution ===
|
||||
/// ==================
|
||||
|
||||
/// Successful pattern match result.
|
||||
#[derive(Debug, Clone)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Match<T> {
|
||||
/// All the matched tokens.
|
||||
pub matched: Vec<T>,
|
||||
/// The rest of the token stream that was not needed for the successful pattern match.
|
||||
pub rest: Vec<T>,
|
||||
/// Constructor.
|
||||
pub fn nothing() -> Pattern {
|
||||
Pattern::new(PatternData::Nothing, true)
|
||||
}
|
||||
|
||||
impl<T> Match<T> {
|
||||
/// Constructor.
|
||||
pub fn new(matched: Vec<T>, rest: Vec<T>) -> Self {
|
||||
Self { matched, rest }
|
||||
}
|
||||
/// Constructor.
|
||||
pub fn or(fst: Pattern, snd: Pattern) -> Pattern {
|
||||
let matches_empty_input = fst.matches_empty_input || snd.matches_empty_input;
|
||||
Pattern::new(PatternData::Or(fst, snd), matches_empty_input)
|
||||
}
|
||||
|
||||
/// Constructor.
|
||||
pub fn seq(fst: Pattern, snd: Pattern) -> Pattern {
|
||||
let matches_empty_input = fst.matches_empty_input && snd.matches_empty_input;
|
||||
Pattern::new(PatternData::Seq(fst, snd), matches_empty_input)
|
||||
}
|
||||
|
||||
/// Constructor.
|
||||
pub fn many(item: Pattern) -> Pattern {
|
||||
Pattern::new(PatternData::Many(item), true)
|
||||
}
|
||||
|
||||
/// Constructor.
|
||||
pub fn block(body: Pattern) -> Pattern {
|
||||
Pattern::new(PatternData::Block(body), false)
|
||||
}
|
||||
|
||||
/// Constructor.
|
||||
pub fn expected(message: impl Into<String>, item: Pattern) -> Pattern {
|
||||
let matches_empty_input = item.matches_empty_input;
|
||||
Pattern::new(PatternData::Expected(message.into(), item), matches_empty_input)
|
||||
}
|
||||
|
||||
/// Constructor.
|
||||
pub fn named(message: impl Into<String>, item: Pattern) -> Pattern {
|
||||
let matches_empty_input = item.matches_empty_input;
|
||||
Pattern::new(PatternData::Named(message.into(), item), matches_empty_input)
|
||||
}
|
||||
|
||||
impl Pattern {
|
||||
/// Match the token stream with this pattern.
|
||||
pub fn resolve<'s, T: TryAsRef<syntax::Item<'s>>>(
|
||||
&self,
|
||||
mut input: Vec<T>,
|
||||
has_spacing_at_end: bool,
|
||||
right_to_left_mode: bool,
|
||||
) -> Result<Match<T>, ResolutionError<T>> {
|
||||
/// Repeat the current pattern multiple times.
|
||||
pub fn many(self) -> Self {
|
||||
many(self)
|
||||
}
|
||||
|
||||
/// Match self or consume any token that is not a block and mark it as invalid.
|
||||
pub fn expect(self, message: impl Into<String>) -> Self {
|
||||
self | expected(message, not_block() | nothing())
|
||||
}
|
||||
|
||||
/// Match self or consume any token that is not a block and mark it as invalid.
|
||||
pub fn named(self, label: impl Into<String>) -> Self {
|
||||
named(label, self)
|
||||
}
|
||||
}
|
||||
|
||||
/// The syntax `pattern1 >> pattern2` is a shortcut for `seq(pattern1, pattern2)`.
|
||||
impl std::ops::Shr for Pattern {
|
||||
type Output = Pattern;
|
||||
fn shr(self, rhs: Pattern) -> Self::Output {
|
||||
seq(self, rhs)
|
||||
}
|
||||
}
|
||||
|
||||
/// The syntax `pattern1 | pattern2` is a shortcut for `or(pattern1, pattern2)`.
|
||||
impl std::ops::BitOr for Pattern {
|
||||
type Output = Pattern;
|
||||
fn bitor(self, rhs: Pattern) -> Self::Output {
|
||||
or(self, rhs)
|
||||
}
|
||||
}
|
||||
|
||||
/// The syntax `pattern % "message"` is a shortcut for `pattern.expect("message")`.
|
||||
impl<T: Into<String>> std::ops::Rem<T> for Pattern {
|
||||
type Output = Pattern;
|
||||
fn rem(self, message: T) -> Self::Output {
|
||||
self.expect(message)
|
||||
}
|
||||
}
|
||||
|
||||
/// The syntax `pattern / "label"` is a shortcut for `pattern.named("label")`.
|
||||
impl<T: Into<String>> Div<T> for Pattern {
|
||||
type Output = Pattern;
|
||||
fn div(self, message: T) -> Self::Output {
|
||||
named(message, self)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === Match ===
|
||||
// =============
|
||||
|
||||
/// The result of applying [`Pattern`] to a token stream. After a successful match, a variant of the
|
||||
/// [`Pattern`] is transformed to variant of [`Match`] of the same name.
|
||||
#[derive(Clone, Debug)]
|
||||
#[allow(missing_docs)]
|
||||
pub enum Match<'s> {
|
||||
Everything(VecDeque<syntax::Item<'s>>),
|
||||
Nothing,
|
||||
Or(Box<OrMatch<'s>>),
|
||||
Seq(Box<Match<'s>>, Box<Match<'s>>),
|
||||
Many(Vec<Match<'s>>),
|
||||
Identifier(syntax::Item<'s>),
|
||||
Expected(String, Box<Match<'s>>),
|
||||
Named(String, Box<Match<'s>>),
|
||||
NotBlock(syntax::Item<'s>),
|
||||
}
|
||||
|
||||
/// The result of the [`Pattern::Or`] resolution.
|
||||
#[derive(Clone, Debug)]
|
||||
#[allow(missing_docs)]
|
||||
pub enum OrMatch<'s> {
|
||||
First(Match<'s>),
|
||||
Second(Match<'s>),
|
||||
}
|
||||
|
||||
impl<'s> Match<'s> {
|
||||
/// Constructor.
|
||||
pub fn or(m: OrMatch<'s>) -> Self {
|
||||
Self::Or(Box::new(m))
|
||||
}
|
||||
|
||||
/// Constructor.
|
||||
pub fn seq(first: Match<'s>, second: Match<'s>) -> Self {
|
||||
Self::Seq(Box::new(first), Box::new(second))
|
||||
}
|
||||
|
||||
/// Constructor.
|
||||
pub fn expected(expected: impl Into<String>, second: Match<'s>) -> Self {
|
||||
Self::Expected(expected.into(), Box::new(second))
|
||||
}
|
||||
|
||||
/// Constructor.
|
||||
pub fn named(label: impl Into<String>, second: Match<'s>) -> Self {
|
||||
Self::Named(label.into(), Box::new(second))
|
||||
}
|
||||
|
||||
/// Get all tokens of the match.
|
||||
pub fn tokens(self) -> Vec<syntax::Item<'s>> {
|
||||
match self {
|
||||
Self::Everything => Ok(Match::new(input, default())),
|
||||
Self::Nothing => Ok(Match::new(default(), input)),
|
||||
Self::Or(fst, snd) => fst
|
||||
.resolve(input, has_spacing_at_end, right_to_left_mode)
|
||||
.or_else(|err| snd.resolve(err.tokens, has_spacing_at_end, right_to_left_mode)),
|
||||
Self::Item(item) => match input.first() {
|
||||
None => Err(ResolutionError::new(input, "Expected an item.")),
|
||||
Some(first) => match first.try_as_ref() {
|
||||
None => Err(ResolutionError::new(input, "Expected an item.")),
|
||||
Some(_) => match item.has_rhs_spacing {
|
||||
Some(spacing) =>
|
||||
if right_to_left_mode {
|
||||
if spacing == has_spacing_at_end {
|
||||
Ok(Match::new(vec![input.pop_front().unwrap()], input))
|
||||
} else {
|
||||
Err(ResolutionError::new(input, "Expected an item."))
|
||||
}
|
||||
} else {
|
||||
todo!()
|
||||
},
|
||||
None => Ok(Match::new(vec![input.pop_front().unwrap()], input)),
|
||||
},
|
||||
},
|
||||
Self::Everything(tokens) => tokens.into(),
|
||||
Self::Nothing => default(),
|
||||
Self::Seq(fst, snd) => fst.tokens().extended(snd.tokens()),
|
||||
Self::Many(t) => t.into_iter().flat_map(|s| s.tokens()).collect(),
|
||||
Self::Identifier(ident) => vec![ident],
|
||||
Self::Expected(_, item) => item.tokens(),
|
||||
Self::Named(_, item) => item.tokens(),
|
||||
Self::NotBlock(item) => vec![item],
|
||||
Self::Or(t) => match *t {
|
||||
OrMatch::First(fst) => fst.tokens(),
|
||||
OrMatch::Second(snd) => snd.tokens(),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===================
|
||||
// === MatchResult ===
|
||||
// ===================
|
||||
|
||||
/// Result of a successful pattern resolution. It contains a match and the remaining token stream.
|
||||
#[derive(Debug)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct MatchResult<'s> {
|
||||
pub matched: Match<'s>,
|
||||
pub rest: VecDeque<syntax::Item<'s>>,
|
||||
}
|
||||
|
||||
impl<'s> MatchResult<'s> {
|
||||
/// Constructor.
|
||||
pub fn new(matched: Match<'s>, rest: VecDeque<syntax::Item<'s>>) -> Self {
|
||||
Self { matched, rest }
|
||||
}
|
||||
|
||||
/// Map the match with the provided function.
|
||||
pub fn map(mut self, f: impl FnOnce(Match<'s>) -> Match<'s>) -> Self {
|
||||
self.matched = f(self.matched);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ======================
|
||||
// === MatchedSegment ===
|
||||
// ======================
|
||||
|
||||
/// List of matched segments.
|
||||
pub type MatchedSegments<'s> = NonEmptyVec<MatchedSegment<'s>>;
|
||||
|
||||
/// A matched segment. See the [`macros::resolver::Segment`] to learn more.
|
||||
#[derive(Clone, Debug)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct MatchedSegment<'s> {
|
||||
pub header: syntax::Token<'s>,
|
||||
pub result: Match<'s>,
|
||||
}
|
||||
|
||||
impl<'s> MatchedSegment<'s> {
|
||||
/// Constructor.
|
||||
pub fn new(header: syntax::Token<'s>, result: Match<'s>) -> Self {
|
||||
Self { header, result }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==========================
|
||||
// === Pattern Resolution ===
|
||||
// ==========================
|
||||
|
||||
impl Pattern {
|
||||
/// Resolve the pattern. Return [`MatchResult`] if the pattern is matched, otherwise all the
|
||||
/// input tokens.
|
||||
pub fn resolve<'s>(
|
||||
&self,
|
||||
mut input: VecDeque<syntax::Item<'s>>,
|
||||
) -> Result<MatchResult<'s>, VecDeque<syntax::Item<'s>>> {
|
||||
match &*self.data {
|
||||
PatternData::Expected(msg, item) =>
|
||||
item.resolve(input).map(|t| t.map(|s| Match::expected(msg, s))),
|
||||
PatternData::Named(msg, item) =>
|
||||
item.resolve(input).map(|t| t.map(|s| Match::named(msg, s))),
|
||||
PatternData::Everything => Ok(MatchResult::new(Match::Everything(input), default())),
|
||||
PatternData::Nothing => Ok(MatchResult::new(Match::Nothing, input)),
|
||||
PatternData::Or(fst, snd) => fst
|
||||
.resolve(input)
|
||||
.map(|t| t.map(|s| Match::or(OrMatch::First(s))))
|
||||
.or_else(|t| snd.resolve(t).map(|t| t.map(|s| Match::or(OrMatch::Second(s))))),
|
||||
PatternData::Seq(fst, snd) => fst
|
||||
.resolve(input)
|
||||
.and_then(|t| snd.resolve(t.rest).map(|s| s.map(|x| Match::seq(t.matched, x)))),
|
||||
PatternData::Many(pat) => {
|
||||
let mut out = vec![];
|
||||
let mut input_len = input.len();
|
||||
loop {
|
||||
match pat.resolve(input) {
|
||||
Err(rest) => {
|
||||
input = rest;
|
||||
break;
|
||||
}
|
||||
Ok(t) => {
|
||||
input = t.rest;
|
||||
if pat.matches_empty_input {
|
||||
let no_input_consumed = input_len == input.len();
|
||||
if no_input_consumed {
|
||||
break;
|
||||
}
|
||||
input_len = input.len();
|
||||
}
|
||||
out.push(t.matched);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(MatchResult::new(Match::Many(out), input))
|
||||
}
|
||||
PatternData::Identifier => match input.pop_front() {
|
||||
None => Err(default()),
|
||||
Some(t) =>
|
||||
if t.is_variant(syntax::token::variant::VariantMarker::Ident) {
|
||||
Ok(MatchResult::new(Match::Identifier(t), input))
|
||||
} else {
|
||||
input.push_front(t);
|
||||
Err(input)
|
||||
},
|
||||
},
|
||||
PatternData::Block(body) => match input.pop_front() {
|
||||
Some(syntax::Item::Block(tokens)) =>
|
||||
body.resolve(tokens.into_iter().rev().map_into().collect()),
|
||||
Some(t) => {
|
||||
input.push_front(t);
|
||||
Err(input)
|
||||
}
|
||||
None => Err(default()),
|
||||
},
|
||||
PatternData::NotBlock => match input.pop_front() {
|
||||
Some(t @ syntax::Item::Block(_)) => {
|
||||
input.push_front(t);
|
||||
Err(input)
|
||||
}
|
||||
None => Err(default()),
|
||||
Some(t) => Ok(MatchResult::new(Match::NotBlock(t), input)),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
387
lib/rust/parser/src/macros/resolver.rs
Normal file
387
lib/rust/parser/src/macros/resolver.rs
Normal file
@ -0,0 +1,387 @@
|
||||
//! Macro resolver implementation. Refer to the docs of the main parser module to learn more.
|
||||
|
||||
use crate::prelude::*;
|
||||
|
||||
use crate::macros;
|
||||
use crate::macros::pattern;
|
||||
use crate::syntax;
|
||||
use crate::syntax::token;
|
||||
use crate::syntax::token::Token;
|
||||
|
||||
use enso_data_structures::im_list;
|
||||
use enso_data_structures::im_list::List;
|
||||
use std::collections::VecDeque;
|
||||
|
||||
|
||||
|
||||
// ==================
|
||||
// === SegmentMap ===
|
||||
// ==================
|
||||
|
||||
/// A tree-like structure encoding potential macro matches. The keys are code representations of
|
||||
/// [`macros::SegmentDefinition`] headers (first tokens of sections). Each key is associated with
|
||||
/// one or more [`SegmentEntry`], which stories a list of required subsequent segments
|
||||
/// and a macro definition that should be used when all the segments will be matched. For example,
|
||||
/// after matching the "if" keyword, this struct will contain one entry "then" with two values, one
|
||||
/// for the required "else" section, and one without a required section (for the "if ... then ..."
|
||||
/// case).
|
||||
#[derive(Default, Debug, Deref, DerefMut)]
|
||||
pub struct SegmentMap<'s> {
|
||||
map: HashMap<&'s str, NonEmptyVec<SegmentEntry<'s>>>,
|
||||
}
|
||||
|
||||
/// Partially matched macro info. See docs of [`SegmentMap`] to learn more.
|
||||
#[derive(Clone, Debug)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct SegmentEntry<'s> {
|
||||
/// All the segment headers that are required for the macro definition to be used.
|
||||
pub required_segments: List<macros::SegmentDefinition<'s>>,
|
||||
/// Definition of the macro that should be used when all the required segments will be matched.
|
||||
/// It contains [`Pattern`] definition for every segment that will be used after all the
|
||||
/// segment tokens are discovered.
|
||||
pub definition: Rc<macros::Definition<'s>>,
|
||||
}
|
||||
|
||||
|
||||
impl<'a> SegmentMap<'a> {
|
||||
/// Register a new macro definition in this macro tree.
|
||||
pub fn register(&mut self, definition: macros::Definition<'a>) {
|
||||
let header = definition.segments.head.header;
|
||||
let entry = SegmentEntry {
|
||||
required_segments: definition.segments.tail.clone(),
|
||||
definition: Rc::new(definition),
|
||||
};
|
||||
if let Some(node) = self.get_mut(header) {
|
||||
node.push(entry);
|
||||
} else {
|
||||
self.insert(header, NonEmptyVec::singleton(entry));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =============================
|
||||
// === PartiallyMatchedMacro ===
|
||||
// =============================
|
||||
|
||||
/// Partially matched macro. It contains the current section being matched, all the sections matched
|
||||
/// so far, and the macro definition in case the macro was fully matched. Please note that the
|
||||
/// definition can change during macro resolution. For example, after finding both "if" and "then"
|
||||
/// sections, the definition of the "if ... then ..." macro will be used. However, after finding the
|
||||
/// "else" token, the definition will be replaced with the "if ... then ... else ..." macro one.
|
||||
#[derive(Debug)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct PartiallyMatchedMacro<'s> {
|
||||
pub current_segment: MatchedSegment<'s>,
|
||||
pub resolved_segments: Vec<MatchedSegment<'s>>,
|
||||
pub possible_next_segments: SegmentMap<'s>,
|
||||
pub matched_macro_def: Option<Rc<macros::Definition<'s>>>,
|
||||
}
|
||||
|
||||
impl<'a> PartiallyMatchedMacro<'a> {
|
||||
/// A new macro resolver with a special "root" segment definition. The "root" segment does not
|
||||
/// exist in the source code, it is simply the whole expression being parsed. It is treated
|
||||
/// as a macro in order to unify the algorithms.
|
||||
pub fn new_root() -> Self {
|
||||
let current_segment = MatchedSegment::new(Token("", "", token::Variant::newline()));
|
||||
let resolved_segments = default();
|
||||
let possible_next_segments = default();
|
||||
let matched_macro_def = Some(Rc::new(macros::Definition {
|
||||
segments: im_list::NonEmpty::singleton(macros::SegmentDefinition {
|
||||
header: "__ROOT__",
|
||||
pattern: pattern::everything(),
|
||||
}),
|
||||
body: Rc::new(|v| {
|
||||
// Taking the first segment, hardcoded above.
|
||||
let body = v.pop().0.result;
|
||||
syntax::operator::resolve_operator_precedence(body.tokens())
|
||||
}),
|
||||
}));
|
||||
Self { current_segment, resolved_segments, possible_next_segments, matched_macro_def }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ======================
|
||||
// === MatchedSegment ===
|
||||
// ======================
|
||||
|
||||
/// A macro segment which header was matched. Its body contains a list of tokens and nested macros
|
||||
/// that were found. Please note that the body tokens are not matched against the pattern yet.
|
||||
/// Because of that, the macro nesting is incorrect for patterns that do not consume all tokens till
|
||||
/// the end of the stream. For example, the expression `(a) (b)` will be matched in such a way, that
|
||||
/// the macro `(b)` will be part of the body of the `)` segment of the `(a)` macro. This will be
|
||||
/// restructured in the patter matching phase. See the parser module docs to learn more about this
|
||||
/// process.
|
||||
#[derive(Debug)]
|
||||
pub struct MatchedSegment<'s> {
|
||||
header: Token<'s>,
|
||||
body: Vec<ItemOrPartiallyMatchedMacro<'s>>,
|
||||
}
|
||||
|
||||
impl<'s> MatchedSegment<'s> {
|
||||
/// Constructor.
|
||||
pub fn new(header: Token<'s>) -> Self {
|
||||
let body = default();
|
||||
Self { header, body }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===================================
|
||||
// === ItemOrPartiallyMatchedMacro ===
|
||||
// ===================================
|
||||
|
||||
/// One of [`syntax::Item`] or [`PartiallyMatchedMacro`]. Used during macro resolution when some
|
||||
/// items are already resolved as macros, and some are not yet. For example, after matching the
|
||||
/// expression `(a) x (b)`, the `x` token and the `(b)` macro will be items of the body of the last
|
||||
/// segment of the `(a)` macro.
|
||||
#[derive(Debug, From)]
|
||||
#[allow(missing_docs)]
|
||||
enum ItemOrPartiallyMatchedMacro<'s> {
|
||||
SyntaxItem(syntax::Item<'s>),
|
||||
PartiallyMatchedMacro(PartiallyMatchedMacro<'s>),
|
||||
}
|
||||
|
||||
impl<'s> TryAsRef<syntax::Item<'s>> for ItemOrPartiallyMatchedMacro<'s> {
|
||||
fn try_as_ref(&self) -> Option<&syntax::Item<'s>> {
|
||||
match self {
|
||||
Self::SyntaxItem(t) => Some(t),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> TryAsRef<PartiallyMatchedMacro<'s>> for ItemOrPartiallyMatchedMacro<'s> {
|
||||
fn try_as_ref(&self) -> Option<&PartiallyMatchedMacro<'s>> {
|
||||
match self {
|
||||
Self::PartiallyMatchedMacro(t) => Some(t),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ================
|
||||
// === Resolver ===
|
||||
// ================
|
||||
|
||||
/// Macro resolver capable of resolving nested macro usages. See the docs of the main parser module
|
||||
/// to learn more about the macro resolution steps.
|
||||
#[derive(Debug)]
|
||||
pub struct Resolver<'s> {
|
||||
current_macro: PartiallyMatchedMacro<'s>,
|
||||
macro_stack: Vec<PartiallyMatchedMacro<'s>>,
|
||||
}
|
||||
|
||||
/// Result of the macro resolution step.
|
||||
#[derive(Clone, Debug)]
|
||||
enum Step<'s> {
|
||||
NewSegmentStarted,
|
||||
NormalToken(syntax::Item<'s>),
|
||||
MacroStackPop(syntax::Item<'s>),
|
||||
}
|
||||
|
||||
impl<'s> Resolver<'s> {
|
||||
/// New resolver with a special "root" segment definition allowing parsing arbitrary
|
||||
/// expressions.
|
||||
pub fn new_root() -> Self {
|
||||
let current_macro = PartiallyMatchedMacro::new_root();
|
||||
let macro_stack = default();
|
||||
Self { current_macro, macro_stack }
|
||||
}
|
||||
|
||||
fn replace_current_with_parent_macro(&mut self, mut parent_macro: PartiallyMatchedMacro<'s>) {
|
||||
mem::swap(&mut parent_macro, &mut self.current_macro);
|
||||
let child_macro = parent_macro;
|
||||
self.current_macro.current_segment.body.push(child_macro.into());
|
||||
}
|
||||
|
||||
/// Pop the macro stack if the current token is reserved. For example, when matching the
|
||||
/// `if a if b then c then d` expression, the token `then` after the token `c` will be
|
||||
/// considered reserved and the macro resolution of `if b then c` will be popped from the stack.
|
||||
fn pop_macro_stack_if_reserved(&mut self, repr: &str) -> Option<PartiallyMatchedMacro<'s>> {
|
||||
let reserved = self.macro_stack.iter().any(|p| p.possible_next_segments.contains_key(repr));
|
||||
reserved.and_option_from(|| self.macro_stack.pop())
|
||||
}
|
||||
|
||||
/// Run the resolver. Returns the resolved AST.
|
||||
pub fn run(
|
||||
mut self,
|
||||
root_macro_map: &SegmentMap<'s>,
|
||||
tokens: &mut iter::Peekable<std::vec::IntoIter<syntax::Item<'s>>>,
|
||||
) -> syntax::Tree<'s> {
|
||||
event!(TRACE, "Running macro resolver. Registered macros:\n{:#?}", root_macro_map);
|
||||
let mut opt_item: Option<syntax::Item<'s>>;
|
||||
macro_rules! next_token {
|
||||
() => {{
|
||||
opt_item = tokens.next();
|
||||
if let Some(token) = opt_item.as_ref() {
|
||||
event!(TRACE, "New token {:#?}", token);
|
||||
}
|
||||
}};
|
||||
}
|
||||
macro_rules! trace_state {
|
||||
() => {
|
||||
event!(TRACE, "Current macro:\n{:#?}", self.current_macro);
|
||||
event!(TRACE, "Parent macros:\n{:#?}", self.macro_stack);
|
||||
};
|
||||
}
|
||||
next_token!();
|
||||
while let Some(token) = opt_item && !token.is_newline() {
|
||||
let step_result = match token {
|
||||
syntax::Item::Token(token) => self.process_token(root_macro_map, token),
|
||||
_ => Step::NormalToken(token),
|
||||
};
|
||||
match step_result {
|
||||
Step::MacroStackPop(item) => {
|
||||
trace_state!();
|
||||
opt_item = Some(item)
|
||||
}
|
||||
Step::NewSegmentStarted => {
|
||||
trace_state!();
|
||||
next_token!()
|
||||
}
|
||||
Step::NormalToken(item) => {
|
||||
self.current_macro.current_segment.body.push(item.into());
|
||||
trace_state!();
|
||||
next_token!();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
event!(TRACE, "Finishing resolution. Popping the macro stack.");
|
||||
while let Some(parent_macro) = self.macro_stack.pop() {
|
||||
self.replace_current_with_parent_macro(parent_macro);
|
||||
}
|
||||
|
||||
trace_state!();
|
||||
let (tree, rest) = Self::resolve(self.current_macro);
|
||||
if !rest.is_empty() {
|
||||
panic!(
|
||||
"Internal error. Not all tokens were consumed by the macro resolver:\n{:#?}",
|
||||
rest
|
||||
);
|
||||
}
|
||||
tree
|
||||
}
|
||||
|
||||
fn process_token(&mut self, root_macro_map: &SegmentMap<'s>, token: Token<'s>) -> Step<'s> {
|
||||
let repr = &**token.code;
|
||||
if let Some(subsegments) = self.current_macro.possible_next_segments.get(repr) {
|
||||
event!(TRACE, "Entering next segment of the current macro.");
|
||||
let mut new_match_tree =
|
||||
Self::move_to_next_segment(&mut self.current_macro.matched_macro_def, subsegments);
|
||||
let mut current_segment = MatchedSegment::new(token);
|
||||
mem::swap(&mut new_match_tree, &mut self.current_macro.possible_next_segments);
|
||||
mem::swap(&mut self.current_macro.current_segment, &mut current_segment);
|
||||
self.current_macro.resolved_segments.push(current_segment);
|
||||
Step::NewSegmentStarted
|
||||
} else if let Some(parent_macro) = self.pop_macro_stack_if_reserved(repr) {
|
||||
event!(TRACE, "Next token reserved by parent macro. Resolving current macro.");
|
||||
self.replace_current_with_parent_macro(parent_macro);
|
||||
Step::MacroStackPop(token.into())
|
||||
} else if let Some(segments) = root_macro_map.get(repr) {
|
||||
event!(TRACE, "Starting a new nested macro resolution.");
|
||||
let mut matched_macro_def = default();
|
||||
let mut current_macro = PartiallyMatchedMacro {
|
||||
current_segment: MatchedSegment { header: token, body: default() },
|
||||
resolved_segments: default(),
|
||||
possible_next_segments: Self::move_to_next_segment(
|
||||
&mut matched_macro_def,
|
||||
segments,
|
||||
),
|
||||
matched_macro_def,
|
||||
};
|
||||
mem::swap(&mut self.current_macro, &mut current_macro);
|
||||
self.macro_stack.push(current_macro);
|
||||
Step::NewSegmentStarted
|
||||
} else {
|
||||
event!(TRACE, "Consuming token as current segment body.");
|
||||
Step::NormalToken(token.into())
|
||||
}
|
||||
}
|
||||
|
||||
/// Resolve the [`PartiallyMatchedMacro`]. Returns the AST and the non-used tokens. For example,
|
||||
/// the resolution of the `(a)` macro in the `(a) x (b)` expression will return the `(a)` AST
|
||||
/// and the `x` and `(b)` items (already resolved).
|
||||
fn resolve(m: PartiallyMatchedMacro<'s>) -> (syntax::Tree<'s>, VecDeque<syntax::Item<'s>>) {
|
||||
let segments = NonEmptyVec::new_with_last(m.resolved_segments, m.current_segment);
|
||||
let resolved_segments = segments.mapped(|segment| {
|
||||
let mut items: VecDeque<syntax::Item<'s>> = default();
|
||||
for item in segment.body {
|
||||
match item {
|
||||
ItemOrPartiallyMatchedMacro::SyntaxItem(t) => items.push_back(t),
|
||||
ItemOrPartiallyMatchedMacro::PartiallyMatchedMacro(unresolved_macro) => {
|
||||
let (resolved_macro, unused_items) = Self::resolve(unresolved_macro);
|
||||
items.push_back(resolved_macro.into());
|
||||
items.extend(unused_items);
|
||||
}
|
||||
}
|
||||
}
|
||||
(segment.header, items)
|
||||
});
|
||||
|
||||
if let Some(macro_def) = m.matched_macro_def {
|
||||
let mut def_segments = macro_def.segments.to_vec().into_iter();
|
||||
let mut pattern_matched_segments = resolved_segments.mapped(|(header, items)| {
|
||||
let err = "Internal error. Macro definition and match segments count mismatch.";
|
||||
let def = def_segments.next().unwrap_or_else(|| panic!("{}", err));
|
||||
(header, def.pattern.resolve(items))
|
||||
});
|
||||
|
||||
// Moving not pattern-matched tokens of the last segment to parent.
|
||||
let mut not_used_items_of_last_segment = VecDeque::new();
|
||||
match &mut pattern_matched_segments.last_mut().1 {
|
||||
Err(rest) => mem::swap(&mut not_used_items_of_last_segment, rest),
|
||||
Ok(segment) => mem::swap(&mut not_used_items_of_last_segment, &mut segment.rest),
|
||||
}
|
||||
|
||||
let pattern_matched_segments =
|
||||
pattern_matched_segments.mapped(|(header, match_result)| match match_result {
|
||||
Ok(result) => {
|
||||
if !result.rest.is_empty() {
|
||||
todo!("Mark unmatched tokens as unexpected.");
|
||||
}
|
||||
pattern::MatchedSegment::new(header, result.matched)
|
||||
}
|
||||
Err(_unmatched_items) => todo!("Mark unmatched tokens as unexpected."),
|
||||
});
|
||||
|
||||
let out = (macro_def.body)(pattern_matched_segments);
|
||||
(out, not_used_items_of_last_segment)
|
||||
} else {
|
||||
todo!("Macro was not matched with any known macro definition. This should return an AST node indicating invalid match.")
|
||||
}
|
||||
}
|
||||
|
||||
/// Move the resolution to the next segment. Takes possible next segments and merges them in a
|
||||
/// new [`SegmentMap`]. If after moving to the next segment there is a macro definition that is
|
||||
/// fully matched, its definition will be recorded.
|
||||
fn move_to_next_segment(
|
||||
matched_macro_def: &mut Option<Rc<macros::Definition<'s>>>,
|
||||
possible_segments: &[SegmentEntry<'s>],
|
||||
) -> SegmentMap<'s> {
|
||||
*matched_macro_def = None;
|
||||
let mut new_section_tree = SegmentMap::default();
|
||||
for segment_entry in possible_segments {
|
||||
if let Some(first) = segment_entry.required_segments.head() {
|
||||
let tail = segment_entry.required_segments.tail().cloned().unwrap_or_default();
|
||||
let definition = segment_entry.definition.clone_ref();
|
||||
let entry = SegmentEntry { required_segments: tail, definition };
|
||||
if let Some(node) = new_section_tree.get_mut(&first.header) {
|
||||
node.push(entry);
|
||||
} else {
|
||||
new_section_tree.insert(first.header, NonEmptyVec::singleton(entry));
|
||||
}
|
||||
} else {
|
||||
*matched_macro_def = Some(segment_entry.definition.clone_ref());
|
||||
}
|
||||
}
|
||||
new_section_tree
|
||||
}
|
||||
}
|
@ -100,15 +100,6 @@
|
||||
|
||||
use crate::prelude::*;
|
||||
|
||||
use crate::source::VisibleOffset;
|
||||
|
||||
use enso_data_structures::im_list;
|
||||
use enso_data_structures::im_list::List;
|
||||
use lexer::Lexer;
|
||||
use macros::pattern::Pattern;
|
||||
use syntax::token;
|
||||
use syntax::token::Token;
|
||||
|
||||
|
||||
// ==============
|
||||
// === Export ===
|
||||
@ -130,682 +121,98 @@ pub mod prelude {
|
||||
|
||||
|
||||
|
||||
// =================================
|
||||
// === SyntaxItemOrMacroResolver ===
|
||||
// =================================
|
||||
// ==============
|
||||
// === Parser ===
|
||||
// ==============
|
||||
|
||||
/// One of [`syntax::Item`] or [`MacroResolver`].
|
||||
#[derive(Debug)]
|
||||
/// Enso parser. See the module documentation to learn more about how it works.
|
||||
#[allow(missing_docs)]
|
||||
pub enum SyntaxItemOrMacroResolver<'s> {
|
||||
SyntaxItem(syntax::Item<'s>),
|
||||
MacroResolver(MacroResolver<'s>),
|
||||
}
|
||||
|
||||
impl<'s> From<syntax::Item<'s>> for SyntaxItemOrMacroResolver<'s> {
|
||||
fn from(t: syntax::Item<'s>) -> Self {
|
||||
Self::SyntaxItem(t)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> From<MacroResolver<'s>> for SyntaxItemOrMacroResolver<'s> {
|
||||
fn from(t: MacroResolver<'s>) -> Self {
|
||||
Self::MacroResolver(t)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> TryAsRef<syntax::Item<'s>> for SyntaxItemOrMacroResolver<'s> {
|
||||
fn try_as_ref(&self) -> Option<&syntax::Item<'s>> {
|
||||
match self {
|
||||
Self::SyntaxItem(t) => Some(t),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ======================
|
||||
// === MacroMatchTree ===
|
||||
// ======================
|
||||
|
||||
/// A tree-like structure encoding potential macro matches. The keys are representations of tokens
|
||||
/// that can be matched. For example, the key could be "if" or "->". Each key is associated with one
|
||||
/// or more [`PartiallyMatchedMacro`], which stories a list of required segments and a macro
|
||||
/// definition in case all the segments were matched. For example, for the "if" key, there can be
|
||||
/// two required segment lists, one for "then" and "else" segments, and one for the "then" segment
|
||||
/// only.
|
||||
#[derive(Default, Debug, Deref, DerefMut)]
|
||||
pub struct MacroMatchTree<'s> {
|
||||
map: HashMap<&'s str, NonEmptyVec<PartiallyMatchedMacro<'s>>>,
|
||||
}
|
||||
|
||||
/// Partially matched macro info. See docs of [`MacroMatchTree`] to learn more.
|
||||
#[derive(Clone, Debug)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct PartiallyMatchedMacro<'s> {
|
||||
pub required_segments: List<macros::SegmentDefinition<'s>>,
|
||||
pub definition: Rc<macros::Definition<'s>>,
|
||||
}
|
||||
|
||||
impl<'a> MacroMatchTree<'a> {
|
||||
/// Register a new macro definition in this macro tree.
|
||||
pub fn register(&mut self, definition: macros::Definition<'a>) {
|
||||
let header = definition.segments.head.header;
|
||||
let entry = PartiallyMatchedMacro {
|
||||
required_segments: definition.segments.tail.clone(),
|
||||
definition: Rc::new(definition),
|
||||
};
|
||||
if let Some(node) = self.get_mut(header) {
|
||||
node.push(entry);
|
||||
} else {
|
||||
self.insert(header, NonEmptyVec::singleton(entry));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =====================
|
||||
// === MacroResolver ===
|
||||
// =====================
|
||||
|
||||
/// Enso macro resolver. See the docs of the main module to learn more about the macro resolution
|
||||
/// steps.
|
||||
#[derive(Debug)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct MacroResolver<'s> {
|
||||
pub current_segment: MatchedSegment<'s>,
|
||||
pub resolved_segments: Vec<MatchedSegment<'s>>,
|
||||
pub possible_next_segments: MacroMatchTree<'s>,
|
||||
pub matched_macro_def: Option<Rc<macros::Definition<'s>>>,
|
||||
pub struct Parser {
|
||||
pub macros: macros::resolver::SegmentMap<'static>,
|
||||
}
|
||||
|
||||
impl<'a> MacroResolver<'a> {
|
||||
/// A new macro resolver with a special "root" segment definition. The "root" segment does not
|
||||
/// exist in the source code, it is simply the whole expression being parsed. It is treated
|
||||
/// as a macro in order to unify the algorithms.
|
||||
pub fn new_root() -> Self {
|
||||
let current_segment =
|
||||
MatchedSegment { header: Token("", "", token::Variant::newline()), body: default() };
|
||||
let resolved_segments = default();
|
||||
let possible_next_segments = default();
|
||||
let matched_macro_def = Some(Rc::new(macros::Definition {
|
||||
rev_prefix_pattern: None,
|
||||
segments: im_list::NonEmpty::singleton(macros::SegmentDefinition {
|
||||
header: "__ROOT__",
|
||||
pattern: Pattern::Everything,
|
||||
}),
|
||||
body: Rc::new(|_, v| {
|
||||
if v.len() != 1 {
|
||||
panic!()
|
||||
}
|
||||
let t = v.into_vec().pop().unwrap().1;
|
||||
resolve_operator_precedence(t)
|
||||
}),
|
||||
}));
|
||||
Self { current_segment, resolved_segments, possible_next_segments, matched_macro_def }
|
||||
}
|
||||
}
|
||||
|
||||
/// A matched macro segment. Partial macro resolution product.
|
||||
#[derive(Debug)]
|
||||
pub struct MatchedSegment<'s> {
|
||||
header: Token<'s>,
|
||||
body: Vec<SyntaxItemOrMacroResolver<'s>>,
|
||||
}
|
||||
|
||||
impl<'s> MatchedSegment<'s> {
|
||||
impl Parser {
|
||||
/// Constructor.
|
||||
pub fn new(header: Token<'s>) -> Self {
|
||||
let body = default();
|
||||
Self { header, body }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Main macro resolver capable of resolving nested macro usages. See the docs of the main module to
|
||||
/// learn more about the macro resolution steps.
|
||||
#[derive(Debug)]
|
||||
pub struct Resolver<'s> {
|
||||
current_macro: MacroResolver<'s>,
|
||||
macro_stack: Vec<MacroResolver<'s>>,
|
||||
}
|
||||
|
||||
/// Result of the macro resolution step.
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
enum ResolverStep {
|
||||
NormalToken,
|
||||
NewSegmentStarted,
|
||||
MacroStackPop,
|
||||
}
|
||||
|
||||
impl<'s> Resolver<'s> {
|
||||
fn new_root() -> Self {
|
||||
let current_macro = MacroResolver::new_root();
|
||||
let macro_stack = default();
|
||||
Self { current_macro, macro_stack }
|
||||
pub fn new() -> Self {
|
||||
let macros = macros::built_in::all();
|
||||
Self { macros }
|
||||
}
|
||||
|
||||
fn run(
|
||||
mut self,
|
||||
lexer: &Lexer<'s>,
|
||||
root_macro_map: &MacroMatchTree<'s>,
|
||||
tokens: Vec<syntax::Item<'s>>,
|
||||
) -> syntax::Tree<'s> {
|
||||
let mut stream = tokens.into_iter();
|
||||
let mut opt_token: Option<syntax::Item<'s>>;
|
||||
macro_rules! next_token {
|
||||
() => {{
|
||||
opt_token = stream.next();
|
||||
if let Some(token) = opt_token.as_ref() {
|
||||
event!(TRACE, "New token {:#?}", token);
|
||||
}
|
||||
}};
|
||||
/// Main entry point.
|
||||
pub fn run<'s>(&self, code: &'s str) -> syntax::Tree<'s> {
|
||||
let tokens = lexer::run(code);
|
||||
let mut statements = vec![];
|
||||
let mut tokens = tokens.into_iter().peekable();
|
||||
while tokens.peek().is_some() {
|
||||
let resolver = macros::resolver::Resolver::new_root();
|
||||
let tree = resolver.run(&self.macros, &mut tokens);
|
||||
statements.push(tree);
|
||||
}
|
||||
macro_rules! trace_state {
|
||||
() => {
|
||||
event!(TRACE, "Current macro:\n{:#?}", self.current_macro);
|
||||
event!(TRACE, "Parent macros:\n{:#?}", self.macro_stack);
|
||||
};
|
||||
}
|
||||
next_token!();
|
||||
while let Some(token) = opt_token {
|
||||
let step_result = match &token {
|
||||
// FIXME: clone?
|
||||
syntax::Item::Token(token) => self.process_token(root_macro_map, token.clone()),
|
||||
_ => ResolverStep::NormalToken,
|
||||
};
|
||||
match step_result {
|
||||
ResolverStep::MacroStackPop => {
|
||||
trace_state!();
|
||||
opt_token = Some(token)
|
||||
}
|
||||
ResolverStep::NewSegmentStarted => {
|
||||
trace_state!();
|
||||
next_token!()
|
||||
}
|
||||
ResolverStep::NormalToken => {
|
||||
self.current_macro.current_segment.body.push(token.into());
|
||||
trace_state!();
|
||||
next_token!();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while let Some(parent_macro) = self.macro_stack.pop() {
|
||||
self.replace_current_with_parent_macro(parent_macro);
|
||||
}
|
||||
|
||||
trace_state!();
|
||||
|
||||
Self::resolve(lexer, self.current_macro, None)
|
||||
}
|
||||
|
||||
fn replace_current_with_parent_macro(&mut self, mut parent_macro: MacroResolver<'s>) {
|
||||
mem::swap(&mut parent_macro, &mut self.current_macro);
|
||||
let mut child_macro = parent_macro;
|
||||
if let Some(def) = &child_macro.matched_macro_def {
|
||||
let pattern = &def.segments.last().pattern;
|
||||
let child_tokens = mem::take(&mut child_macro.current_segment.body);
|
||||
// FIXME: the first [`false`] below is invalid.
|
||||
let match_result = pattern.resolve(child_tokens, false, false).unwrap();
|
||||
let mut new_child_tokens = match_result.matched;
|
||||
let new_parent_tokens = match_result.rest;
|
||||
mem::swap(&mut child_macro.current_segment.body, &mut new_child_tokens);
|
||||
self.current_macro.current_segment.body.push(child_macro.into());
|
||||
self.current_macro.current_segment.body.extend(new_parent_tokens);
|
||||
} else {
|
||||
panic!()
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve(
|
||||
lexer: &Lexer<'s>,
|
||||
m: MacroResolver<'s>,
|
||||
prefix_tokens: Option<Vec<syntax::Item<'s>>>,
|
||||
) -> syntax::Tree<'s> {
|
||||
let segments = NonEmptyVec::new_with_last(m.resolved_segments, m.current_segment);
|
||||
let sss: NonEmptyVec<(Token, Vec<syntax::Item<'s>>)> = segments.mapped(|segment| {
|
||||
let mut ss: Vec<syntax::Item<'s>> = vec![];
|
||||
for item in segment.body {
|
||||
let resolved_token = match item {
|
||||
SyntaxItemOrMacroResolver::MacroResolver(m2) => {
|
||||
if let Some(macro_def) = &m2.matched_macro_def
|
||||
&& let Some(pfx_pattern) = ¯o_def.rev_prefix_pattern {
|
||||
ss.reverse();
|
||||
let spacing = m2.current_segment.header.left_offset.visible > VisibleOffset(0);
|
||||
let mut match_result = pfx_pattern.resolve(ss,spacing,true).unwrap();
|
||||
match_result.matched.reverse();
|
||||
ss = match_result.rest;
|
||||
ss.reverse();
|
||||
Self::resolve(lexer, m2, Some(match_result.matched)).into()
|
||||
} else {
|
||||
Self::resolve(lexer, m2, None).into()
|
||||
}
|
||||
},
|
||||
SyntaxItemOrMacroResolver::SyntaxItem(t) => t,
|
||||
};
|
||||
ss.push(resolved_token);
|
||||
}
|
||||
(segment.header, ss)
|
||||
});
|
||||
|
||||
if let Some(macro_def) = m.matched_macro_def {
|
||||
(macro_def.body)(prefix_tokens, sss)
|
||||
} else {
|
||||
todo!("Handling non-fully-resolved macros")
|
||||
}
|
||||
}
|
||||
|
||||
fn pop_macro_stack_if_reserved(&mut self, repr: &str) -> Option<MacroResolver<'s>> {
|
||||
let reserved = self.macro_stack.iter().any(|p| p.possible_next_segments.contains_key(repr));
|
||||
if reserved {
|
||||
self.macro_stack.pop()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn process_token(
|
||||
&mut self,
|
||||
root_macro_map: &MacroMatchTree<'s>,
|
||||
token: Token<'s>,
|
||||
) -> ResolverStep {
|
||||
let repr = &**token.code;
|
||||
if let Some(subsegments) = self.current_macro.possible_next_segments.get(repr) {
|
||||
event!(TRACE, "Entering next segment of the current macro.");
|
||||
let mut new_match_tree =
|
||||
Self::enter(&mut self.current_macro.matched_macro_def, subsegments);
|
||||
let mut current_segment = MatchedSegment::new(token);
|
||||
mem::swap(&mut new_match_tree, &mut self.current_macro.possible_next_segments);
|
||||
mem::swap(&mut self.current_macro.current_segment, &mut current_segment);
|
||||
self.current_macro.resolved_segments.push(current_segment);
|
||||
ResolverStep::NewSegmentStarted
|
||||
} else if let Some(parent_macro) = self.pop_macro_stack_if_reserved(repr) {
|
||||
event!(TRACE, "Next token reserved by parent macro. Resolving current macro.");
|
||||
self.replace_current_with_parent_macro(parent_macro);
|
||||
ResolverStep::MacroStackPop
|
||||
} else if let Some(segments) = root_macro_map.get(repr) {
|
||||
event!(TRACE, "Starting a new nested macro resolution.");
|
||||
let mut matched_macro_def = default();
|
||||
let mut current_macro = MacroResolver {
|
||||
current_segment: MatchedSegment { header: token, body: default() },
|
||||
resolved_segments: default(),
|
||||
possible_next_segments: Self::enter(&mut matched_macro_def, segments),
|
||||
matched_macro_def,
|
||||
};
|
||||
mem::swap(&mut self.current_macro, &mut current_macro);
|
||||
self.macro_stack.push(current_macro);
|
||||
ResolverStep::NewSegmentStarted
|
||||
} else {
|
||||
event!(TRACE, "Consuming token as current segment body.");
|
||||
ResolverStep::NormalToken
|
||||
}
|
||||
}
|
||||
|
||||
fn enter(
|
||||
matched_macro_def: &mut Option<Rc<macros::Definition<'s>>>,
|
||||
path: &[PartiallyMatchedMacro<'s>],
|
||||
) -> MacroMatchTree<'s> {
|
||||
*matched_macro_def = None;
|
||||
let mut new_section_tree = MacroMatchTree::default();
|
||||
for v in path {
|
||||
if let Some(first) = v.required_segments.head() {
|
||||
let tail = v.required_segments.tail().cloned().unwrap_or_default();
|
||||
let definition = v.definition.clone_ref();
|
||||
let x = PartiallyMatchedMacro { required_segments: tail, definition };
|
||||
if let Some(node) = new_section_tree.get_mut(&first.header) {
|
||||
node.push(x);
|
||||
} else {
|
||||
new_section_tree.insert(first.header, NonEmptyVec::singleton(x));
|
||||
}
|
||||
} else {
|
||||
if matched_macro_def.is_some() {
|
||||
event!(ERROR, "Internal error. Duplicate macro definition.");
|
||||
}
|
||||
*matched_macro_def = Some(v.definition.clone_ref());
|
||||
}
|
||||
}
|
||||
new_section_tree
|
||||
syntax::Tree::module(statements)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// FIXME: hardcoded values + not finished implementation.
|
||||
fn precedence_of(operator: &str) -> usize {
|
||||
match operator {
|
||||
"+" => 3,
|
||||
"-" => 3,
|
||||
"*" => 7,
|
||||
_ => panic!("Operator not supported: {}", operator),
|
||||
}
|
||||
}
|
||||
//
|
||||
#[derive(Clone, Copy, Debug, Deref, DerefMut)]
|
||||
struct WithPrecedence<T> {
|
||||
#[deref]
|
||||
#[deref_mut]
|
||||
elem: T,
|
||||
precedence: usize,
|
||||
}
|
||||
|
||||
impl<T> WithPrecedence<T> {
|
||||
pub fn new(precedence: usize, elem: T) -> Self {
|
||||
Self { elem, precedence }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn annotate_tokens_that_need_spacing(items: Vec<syntax::Item>) -> Vec<syntax::Item> {
|
||||
items
|
||||
.into_iter()
|
||||
.map(|item| match item {
|
||||
syntax::Item::Token(_) => item,
|
||||
syntax::Item::Tree(ast) =>
|
||||
match &*ast.variant {
|
||||
syntax::tree::Variant::MultiSegmentApp(data) => {
|
||||
if data.segments.first().header.variant.marker()
|
||||
!= token::variant::VariantMarker::Symbol
|
||||
{
|
||||
syntax::Item::Tree(ast.with_error(
|
||||
"This expression cannot be used in a non-spaced equation.",
|
||||
))
|
||||
} else {
|
||||
syntax::Item::Tree(ast)
|
||||
}
|
||||
}
|
||||
_ => syntax::Item::Tree(ast),
|
||||
},
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn resolve_operator_precedence<'s>(items: Vec<syntax::Item<'s>>) -> syntax::Tree<'s> {
|
||||
type Tokens<'s> = Vec<syntax::Item<'s>>;
|
||||
let mut flattened: Tokens<'s> = default();
|
||||
let mut no_space_group: Tokens<'s> = default();
|
||||
let processs_no_space_group = |flattened: &mut Tokens<'s>, no_space_group: &mut Tokens<'s>| {
|
||||
let tokens = mem::take(no_space_group);
|
||||
if tokens.len() == 1 {
|
||||
flattened.extend(tokens);
|
||||
} else {
|
||||
let tokens = annotate_tokens_that_need_spacing(tokens);
|
||||
let ast = resolve_operator_precedence_internal(tokens);
|
||||
flattened.push(ast.into());
|
||||
}
|
||||
};
|
||||
for item in items {
|
||||
if item.span().left_offset.visible.width_in_spaces == 0 || no_space_group.is_empty() {
|
||||
no_space_group.push(item)
|
||||
} else if !no_space_group.is_empty() {
|
||||
processs_no_space_group(&mut flattened, &mut no_space_group);
|
||||
no_space_group.push(item);
|
||||
} else {
|
||||
// FIXME: this is unreachable.
|
||||
flattened.push(item);
|
||||
}
|
||||
}
|
||||
if !no_space_group.is_empty() {
|
||||
processs_no_space_group(&mut flattened, &mut no_space_group);
|
||||
}
|
||||
resolve_operator_precedence_internal(flattened)
|
||||
}
|
||||
|
||||
fn resolve_operator_precedence_internal(items: Vec<syntax::Item<'_>>) -> syntax::Tree<'_> {
|
||||
// Reverse-polish notation encoding.
|
||||
let mut output: Vec<syntax::Item> = default();
|
||||
let mut operator_stack: Vec<WithPrecedence<syntax::tree::OperatorOrError>> = default();
|
||||
let mut last_token_was_ast = false;
|
||||
let mut last_token_was_opr = false;
|
||||
for item in items {
|
||||
let i2 = item.clone(); // FIXME
|
||||
if let syntax::Item::Token(token) = i2 && let token::Variant::Operator(opr) = token.variant {
|
||||
// Item is an operator.
|
||||
let last_token_was_opr_copy = last_token_was_opr;
|
||||
last_token_was_ast = false;
|
||||
last_token_was_opr = true;
|
||||
|
||||
let prec = precedence_of(&token.code);
|
||||
let opr = Token(token.left_offset, token.code, opr);
|
||||
// let opr = item.span().with(opr);
|
||||
|
||||
if last_token_was_opr_copy && let Some(prev_opr) = operator_stack.last_mut() {
|
||||
// Error. Multiple operators next to each other.
|
||||
match &mut prev_opr.elem {
|
||||
Err(err) => err.operators.push(opr),
|
||||
Ok(prev) => {
|
||||
let operators = NonEmptyVec::new(prev.clone(),vec![opr]); // FIXME: clone?
|
||||
prev_opr.elem = Err(syntax::tree::MultipleOperatorError{operators});
|
||||
}
|
||||
}
|
||||
} else {
|
||||
while let Some(prev_opr) = operator_stack.last()
|
||||
&& prev_opr.precedence >= prec
|
||||
&& let Some(prev_opr) = operator_stack.pop()
|
||||
&& let Some(rhs) = output.pop()
|
||||
{
|
||||
// Prev operator in the [`operator_stack`] has a higher precedence.
|
||||
let lhs = output.pop().map(token_to_ast);
|
||||
let ast = syntax::Tree::opr_app(lhs, prev_opr.elem, Some(token_to_ast(rhs)));
|
||||
output.push(ast.into());
|
||||
}
|
||||
operator_stack.push(WithPrecedence::new(prec, Ok(opr)));
|
||||
}
|
||||
} else if last_token_was_ast && let Some(lhs) = output.pop() {
|
||||
// Multiple non-operators next to each other.
|
||||
let lhs = token_to_ast(lhs);
|
||||
let rhs = token_to_ast(item);
|
||||
let ast = syntax::Tree::app(lhs, rhs);
|
||||
output.push(ast.into());
|
||||
} else {
|
||||
// Non-operator that follows previously consumed operator.
|
||||
last_token_was_ast = true;
|
||||
last_token_was_opr = false;
|
||||
output.push(item);
|
||||
}
|
||||
}
|
||||
let mut opt_rhs = last_token_was_ast.and_option_from(|| output.pop().map(token_to_ast));
|
||||
while let Some(opr) = operator_stack.pop() {
|
||||
let opt_lhs = output.pop().map(token_to_ast);
|
||||
opt_rhs = Some(syntax::Tree::opr_app(opt_lhs, opr.elem, opt_rhs));
|
||||
}
|
||||
if !output.is_empty() {
|
||||
panic!(
|
||||
"Internal error. Not all tokens were consumed while constructing the
|
||||
expression."
|
||||
);
|
||||
}
|
||||
syntax::Tree::opr_section_boundary(opt_rhs.unwrap()) // fixme
|
||||
}
|
||||
|
||||
fn token_to_ast(elem: syntax::Item) -> syntax::Tree {
|
||||
match elem {
|
||||
syntax::Item::Token(token) => match token.variant {
|
||||
token::Variant::Ident(ident) => {
|
||||
let ii2 = token.with_variant(ident);
|
||||
syntax::tree::Tree::ident(ii2)
|
||||
}
|
||||
_ => panic!(),
|
||||
},
|
||||
syntax::Item::Tree(ast) => ast,
|
||||
}
|
||||
}
|
||||
|
||||
fn matched_segments_into_multi_segment_app<'s>(
|
||||
prefix_tokens: Option<Vec<syntax::Item<'s>>>,
|
||||
matched_segments: NonEmptyVec<(Token<'s>, Vec<syntax::Item<'s>>)>,
|
||||
) -> syntax::Tree<'s> {
|
||||
// FIXME: remove into_vec and use NonEmptyVec::mapped
|
||||
let segments = matched_segments
|
||||
.into_vec()
|
||||
.into_iter()
|
||||
.map(|segment| {
|
||||
let header = segment.0;
|
||||
let body =
|
||||
(!segment.1.is_empty()).as_some_from(|| resolve_operator_precedence(segment.1));
|
||||
syntax::tree::MultiSegmentAppSegment { header, body }
|
||||
})
|
||||
.collect_vec();
|
||||
if let Ok(segments) = NonEmptyVec::try_from(segments) {
|
||||
let prefix = prefix_tokens.map(resolve_operator_precedence);
|
||||
syntax::Tree::multi_segment_app(prefix, segments)
|
||||
} else {
|
||||
panic!()
|
||||
impl Default for Parser {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =========================
|
||||
// === Macro Definitions ===
|
||||
// =========================
|
||||
|
||||
fn macro_if_then_else<'s>() -> macros::Definition<'s> {
|
||||
macro_definition! {
|
||||
("if", Pattern::Everything, "then", Pattern::Everything, "else", Pattern::Everything)
|
||||
matched_segments_into_multi_segment_app
|
||||
}
|
||||
}
|
||||
|
||||
fn macro_if_then<'s>() -> macros::Definition<'s> {
|
||||
macro_definition! {
|
||||
("if", Pattern::Everything, "then", Pattern::Everything)
|
||||
matched_segments_into_multi_segment_app
|
||||
}
|
||||
}
|
||||
|
||||
fn macro_group<'s>() -> macros::Definition<'s> {
|
||||
macro_definition! {
|
||||
("(", Pattern::Everything, ")", Pattern::Nothing)
|
||||
matched_segments_into_multi_segment_app
|
||||
}
|
||||
}
|
||||
|
||||
fn macro_lambda<'s>() -> macros::Definition<'s> {
|
||||
let prefix = Pattern::Or(
|
||||
Box::new(Pattern::Item(macros::pattern::Item { has_rhs_spacing: Some(false) })),
|
||||
Box::new(Pattern::Everything),
|
||||
);
|
||||
macro_definition! {
|
||||
(prefix, "->", Pattern::Everything)
|
||||
matched_segments_into_multi_segment_app
|
||||
}
|
||||
}
|
||||
|
||||
fn builtin_macros() -> MacroMatchTree<'static> {
|
||||
let mut macro_map = MacroMatchTree::default();
|
||||
macro_map.register(macro_if_then());
|
||||
macro_map.register(macro_if_then_else());
|
||||
macro_map.register(macro_group());
|
||||
macro_map.register(macro_lambda());
|
||||
macro_map
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ============
|
||||
// === Main ===
|
||||
// ============
|
||||
|
||||
// fn main() {
|
||||
// lexer::lexer_main();
|
||||
// }
|
||||
// =============
|
||||
// === Tests ===
|
||||
// =============
|
||||
|
||||
fn main() {
|
||||
init_tracing(TRACE);
|
||||
// let str = "if a then b else c";
|
||||
// let str = "if if * a + b * then y then b";
|
||||
// let str = "* a + b *";
|
||||
// let str = "* a + * b";
|
||||
// let str = "(a) (b) c";
|
||||
// let str = "if (a) then b";
|
||||
// let str = "foo a-> b";
|
||||
// let str = "a+b * c";
|
||||
// let str = "foo if a then b";
|
||||
// let str = "foo *(a)";
|
||||
let str = "foo if a then b else c";
|
||||
let mut lexer = Lexer::new(str);
|
||||
lexer.run();
|
||||
|
||||
let root_macro_map = builtin_macros();
|
||||
|
||||
event!(TRACE, "Registered macros:\n{:#?}", root_macro_map);
|
||||
|
||||
let resolver = Resolver::new_root();
|
||||
let ast = resolver.run(
|
||||
&lexer,
|
||||
&root_macro_map,
|
||||
lexer.output.iter().map(|t| t.clone().into()).collect_vec(),
|
||||
);
|
||||
println!("{:#?}", ast);
|
||||
println!("\n\n{}", ast.code());
|
||||
|
||||
let ast = Parser::new().run("type Option (a) b c");
|
||||
println!("\n\n==================\n\n");
|
||||
|
||||
lexer::main();
|
||||
println!("{:#?}", ast);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use enso_parser_syntax_tree_builder::ast_builder;
|
||||
|
||||
macro_rules! test_parse {
|
||||
($input:tt = {$($def:tt)*}) => {
|
||||
assert_eq!(
|
||||
Parser::new().run($input),
|
||||
ast_builder! { $($def)* }
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_expressions() {
|
||||
test_parse! {"a" = {a}};
|
||||
test_parse! {"a b" = {a b}};
|
||||
test_parse! {"a b c" = {[a b] c}};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==================
|
||||
// === Benchmarks ===
|
||||
// ==================
|
||||
|
||||
#[cfg(test)]
|
||||
mod benches {
|
||||
use super::*;
|
||||
extern crate test;
|
||||
use test::Bencher;
|
||||
|
||||
#[bench]
|
||||
fn bench_parsing_type_defs(bencher: &mut Bencher) {
|
||||
let reps = 1_000;
|
||||
let str = "type Option a b c\n".repeat(reps);
|
||||
let parser = Parser::new();
|
||||
bencher.iter(move || {
|
||||
parser.run(&str);
|
||||
});
|
||||
}
|
||||
}
|
||||
//
|
||||
//
|
||||
//
|
||||
// // =============
|
||||
// // === Tests ===
|
||||
// // =============
|
||||
//
|
||||
// #[cfg(test)]
|
||||
// mod test {
|
||||
// use super::*;
|
||||
//
|
||||
// pub fn ident(repr: &str) -> syntax::Tree {
|
||||
// match token::Variant::to_ident_unchecked(repr) {
|
||||
// token::Variant::Ident(ident) => span::With::new_no_left_offset_no_start(
|
||||
// Bytes::from(repr.len()),
|
||||
// syntax::tree::Type::from(syntax::tree::Ident(ident)),
|
||||
// ),
|
||||
// _ => panic!(),
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// pub fn app_segment(
|
||||
// header: Token,
|
||||
// body: Option<syntax::Tree>,
|
||||
// ) -> syntax::tree::MultiSegmentAppSegment {
|
||||
// syntax::tree::MultiSegmentAppSegment { header, body }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
//
|
||||
//
|
||||
// #[cfg(test)]
|
||||
// mod tests {
|
||||
// use super::*;
|
||||
// use enso_parser_syntax_tree_builder::ast_builder;
|
||||
//
|
||||
// fn one_shot(input: &str) -> syntax::Tree {
|
||||
// let mut lexer = Lexer::new(input);
|
||||
// lexer.run();
|
||||
// let root_macro_map = builtin_macros();
|
||||
// let resolver = Resolver::new_root();
|
||||
// let ast = resolver.run(
|
||||
// &lexer,
|
||||
// &root_macro_map,
|
||||
// lexer.output.borrow_vec().iter().map(|t| (*t).into()).collect_vec(),
|
||||
// );
|
||||
// ast
|
||||
// }
|
||||
//
|
||||
// macro_rules! test_parse {
|
||||
// ($input:tt = {$($def:tt)*}) => {
|
||||
// assert_eq!(
|
||||
// one_shot($input).with_removed_span_info(),
|
||||
// ast_builder! { $($def)* }.with_removed_span_info()
|
||||
// )
|
||||
// };
|
||||
// }
|
||||
//
|
||||
// #[test]
|
||||
// fn test_expressions() {
|
||||
// test_parse!("if a then b" = { {if} a {then} b });
|
||||
// test_parse!("if a then b else c" = { {if} a {then} b {else} c });
|
||||
// test_parse!("if a b then c d else e f" = { {if} a b {then} c d {else} e f });
|
||||
// }
|
||||
// }
|
||||
|
@ -64,3 +64,17 @@ impl std::borrow::Borrow<str> for Code<'_> {
|
||||
&self.repr
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> std::ops::AddAssign<Code<'s>> for Code<'s> {
|
||||
#[inline(always)]
|
||||
fn add_assign(&mut self, other: Code<'s>) {
|
||||
self.repr.add_assign(other.repr);
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> std::ops::AddAssign<&Code<'s>> for Code<'s> {
|
||||
#[inline(always)]
|
||||
fn add_assign(&mut self, other: &Code<'s>) {
|
||||
self.repr.add_assign(other.repr.clone());
|
||||
}
|
||||
}
|
||||
|
@ -79,6 +79,16 @@ impl<'s> Offset<'s> {
|
||||
pub fn len(&self) -> Bytes {
|
||||
self.code.len()
|
||||
}
|
||||
|
||||
/// Check if the offset is 0.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == Bytes(0)
|
||||
}
|
||||
|
||||
/// Check if the offset is bigger than 0.
|
||||
pub fn exists(&self) -> bool {
|
||||
self.len() > Bytes(0)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> AsRef<Offset<'s>> for Offset<'s> {
|
||||
@ -94,17 +104,31 @@ impl<'s> From<&'s str> for Offset<'s> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> std::ops::AddAssign<Offset<'s>> for Offset<'s> {
|
||||
fn add_assign(&mut self, other: Offset<'s>) {
|
||||
self.visible += other.visible;
|
||||
self.code += other.code;
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> std::ops::AddAssign<&Offset<'s>> for Offset<'s> {
|
||||
fn add_assign(&mut self, other: &Offset<'s>) {
|
||||
self.visible += other.visible;
|
||||
self.code += &other.code;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ============
|
||||
// === Span ===
|
||||
// ============
|
||||
|
||||
/// A span of a given syntactic element (token or AST). It contains the left offset code and the
|
||||
/// information about the length of the element. It does not contain the code of the element. This
|
||||
/// is done in order to not duplicate the data. For example, some AST nodes contain a lot of tokens.
|
||||
/// They need to remember their span, but they do not need to remember their code, because it is
|
||||
/// already stored in the tokens.
|
||||
/// A span of a given syntactic element (token or AST). It is a monoid that contains the left offset
|
||||
/// code and the information about the length of the element. It does not contain the code of the
|
||||
/// element. This is done in order to not duplicate the data. For example, some AST nodes contain a
|
||||
/// lot of tokens. They need to remember their span, but they do not need to remember their code,
|
||||
/// because it is already stored in the tokens.
|
||||
#[derive(Clone, Debug, Default, Eq, PartialEq)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Span<'s> {
|
||||
@ -114,30 +138,30 @@ pub struct Span<'s> {
|
||||
}
|
||||
|
||||
impl<'s> Span<'s> {
|
||||
/// Extend the span with another one. The other span has to be the immediate neighbor of the
|
||||
/// current span.
|
||||
#[inline(always)]
|
||||
pub fn extend<'a, T>(&mut self, other: T)
|
||||
where
|
||||
T: Into<Ref<'s, 'a>>,
|
||||
's: 'a, {
|
||||
let other = other.into();
|
||||
self.code_length += other.left_offset.len() + other.code_length;
|
||||
/// Constructor.
|
||||
pub fn new() -> Self {
|
||||
default()
|
||||
}
|
||||
/// Check whether the span is empty.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.left_offset.is_empty() && self.code_length.is_zero()
|
||||
}
|
||||
|
||||
/// Self consuming version of [`extend`].
|
||||
pub fn extended<'a, T>(mut self, other: T) -> Self
|
||||
where
|
||||
T: Into<Ref<'s, 'a>>,
|
||||
's: 'a, {
|
||||
self.extend(other);
|
||||
self
|
||||
/// Check whether the span is only an offset, without the code part.
|
||||
pub fn is_only_offset(&self) -> bool {
|
||||
self.code_length.is_zero()
|
||||
}
|
||||
|
||||
/// Get the [`Ref`] of the current span.
|
||||
pub fn as_ref(&self) -> Ref<'_, 's> {
|
||||
Ref { left_offset: &self.left_offset, code_length: self.code_length }
|
||||
}
|
||||
|
||||
/// Add the item to this span. The item can be anything that implements the span [`Builder`].
|
||||
#[allow(clippy::should_implement_trait)]
|
||||
pub fn add<T: Builder<'s>>(self, elem: &mut T) -> Self {
|
||||
Builder::add_to_span(elem, self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> AsRef<Span<'s>> for Span<'s> {
|
||||
@ -146,6 +170,22 @@ impl<'s> AsRef<Span<'s>> for Span<'s> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, 'a, T> PartialSemigroup<T> for Span<'s>
|
||||
where
|
||||
T: Into<Ref<'s, 'a>>,
|
||||
's: 'a,
|
||||
{
|
||||
fn concat_mut(&mut self, other: T) {
|
||||
let other = other.into();
|
||||
if self.code_length.is_zero() {
|
||||
self.left_offset += other.left_offset;
|
||||
self.code_length = other.code_length;
|
||||
} else {
|
||||
self.code_length += other.left_offset.len() + other.code_length;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===========
|
||||
@ -233,254 +273,98 @@ impl<'s> FirstChildTrim<'s> for Span<'s> {
|
||||
#[macro_export]
|
||||
macro_rules! span_builder {
|
||||
($($arg:ident),* $(,)?) => {
|
||||
$crate::source::span::Builder::new() $(.add(&mut $arg))* .span
|
||||
$crate::source::span::Span::new() $(.add(&mut $arg))*
|
||||
};
|
||||
}
|
||||
|
||||
/// A marker struct for span building. The [`T`] parameter can be one of:
|
||||
/// - [`()`], which means that the structure was not used yet.
|
||||
/// - [`Option<Span<'s>>`], which means that the struct was used to build the span, however, we are
|
||||
/// unsure whether the span is known in all the cases.
|
||||
/// - [`Span<'s>`], which means that the total span can be always computed for the provided
|
||||
/// parameters.
|
||||
#[derive(Default, Debug)]
|
||||
|
||||
/// Elements implementing this trait can contain a span or multiple spans. If an element is added to
|
||||
/// an empty span, it means that it is the first element in the span group. In such a case, the left
|
||||
/// offset of the element will be removed and moved to the resulting span. See the docs of
|
||||
/// [`FirstChildTrim`] to learn more.
|
||||
#[allow(missing_docs)]
|
||||
pub struct Builder<T = ()> {
|
||||
pub span: T,
|
||||
}
|
||||
|
||||
/// Constructor.
|
||||
#[allow(non_snake_case)]
|
||||
pub fn Builder<T>(span: T) -> Builder<T> {
|
||||
Builder { span }
|
||||
}
|
||||
|
||||
impl Builder<()> {
|
||||
/// Constructor.
|
||||
pub fn new() -> Self {
|
||||
default()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Builder<T> {
|
||||
/// Add a new span to the builder.
|
||||
#[inline(always)]
|
||||
#[allow(clippy::should_implement_trait)]
|
||||
pub fn add<S>(self, elem: &mut S) -> Builder<S::Output>
|
||||
where S: Build<T> {
|
||||
Builder(elem.build(self))
|
||||
}
|
||||
}
|
||||
|
||||
/// A trait defining the behavior of [`Builder`] for different types containing spans.
|
||||
///
|
||||
/// The trait definition is a little bit strange, consuming the builder as a parameter instead of
|
||||
/// consuming it as self. This is done because otherwise Rust type checker goes into infinite
|
||||
/// loops.
|
||||
#[allow(missing_docs)]
|
||||
pub trait Build<T> {
|
||||
type Output;
|
||||
fn build(&mut self, builder: Builder<T>) -> Self::Output;
|
||||
pub trait Builder<'s> {
|
||||
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s>;
|
||||
}
|
||||
|
||||
|
||||
// === Instances ===
|
||||
|
||||
impl<'s> Build<()> for Span<'s> {
|
||||
type Output = Span<'s>;
|
||||
impl<'s> Builder<'s> for Span<'s> {
|
||||
#[inline(always)]
|
||||
fn build(&mut self, _builder: Builder<()>) -> Self::Output {
|
||||
self.trim_as_first_child()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Build<Span<'s>> for Span<'s> {
|
||||
type Output = Span<'s>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<Span<'s>>) -> Self::Output {
|
||||
builder.span.extended(&*self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Build<Option<Span<'s>>> for Span<'s> {
|
||||
type Output = Span<'s>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<Option<Span<'s>>>) -> Self::Output {
|
||||
match builder.span {
|
||||
Some(span) => span.extended(&*self),
|
||||
None => self.trim_as_first_child(),
|
||||
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
|
||||
if span.is_only_offset() {
|
||||
span.concat(&self.trim_as_first_child())
|
||||
} else {
|
||||
span.concat(&*self)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Build<()> for Tree<'s> {
|
||||
type Output = Span<'s>;
|
||||
impl<'s> Builder<'s> for Tree<'s> {
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<()>) -> Self::Output {
|
||||
Build::build(&mut self.span, builder)
|
||||
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
|
||||
Builder::add_to_span(&mut self.span, span)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Build<Span<'s>> for Tree<'s> {
|
||||
type Output = Span<'s>;
|
||||
impl<'s, T> Builder<'s> for Token<'s, T> {
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<Span<'s>>) -> Self::Output {
|
||||
builder.span.extended(&self.span)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Build<Option<Span<'s>>> for Tree<'s> {
|
||||
type Output = Span<'s>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<Option<Span<'s>>>) -> Self::Output {
|
||||
Build::build(&mut self.span, builder)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T> Build<()> for Token<'s, T> {
|
||||
type Output = Span<'s>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, _builder: Builder<()>) -> Self::Output {
|
||||
self.trim_as_first_child()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T> Build<Span<'s>> for Token<'s, T> {
|
||||
type Output = Span<'s>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<Span<'s>>) -> Self::Output {
|
||||
builder.span.extended(self.span())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T> Build<Option<Span<'s>>> for Token<'s, T> {
|
||||
type Output = Span<'s>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<Option<Span<'s>>>) -> Self::Output {
|
||||
match builder.span {
|
||||
Some(span) => span.extended(self.span()),
|
||||
None => self.trim_as_first_child(),
|
||||
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
|
||||
if span.is_only_offset() {
|
||||
span.concat(&self.trim_as_first_child())
|
||||
} else {
|
||||
span.concat(self.span())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Build<()> for Option<T>
|
||||
where T: Build<()>
|
||||
impl<'s, T> Builder<'s> for Option<T>
|
||||
where T: Builder<'s>
|
||||
{
|
||||
type Output = Option<<T as Build<()>>::Output>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<()>) -> Self::Output {
|
||||
self.as_mut().map(|t| Build::build(t, builder))
|
||||
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
|
||||
self.as_mut().map(|t| Builder::add_to_span(t, span)).unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T> Build<Option<Span<'s>>> for Option<T>
|
||||
where T: Build<Option<Span<'s>>>
|
||||
{
|
||||
type Output = Option<<T as Build<Option<Span<'s>>>>::Output>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<Option<Span<'s>>>) -> Self::Output {
|
||||
self.as_mut().map(|t| Build::build(t, builder))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T> Build<Span<'s>> for Option<T>
|
||||
where T: Build<Span<'s>, Output = Span<'s>>
|
||||
{
|
||||
type Output = Span<'s>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<Span<'s>>) -> Self::Output {
|
||||
match self.as_mut() {
|
||||
None => builder.span,
|
||||
Some(t) => Build::build(t, builder),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S, T, E> Build<S> for Result<T, E>
|
||||
impl<'s, T, E> Builder<'s> for Result<T, E>
|
||||
where
|
||||
T: Build<S>,
|
||||
E: Build<S, Output = <T as Build<S>>::Output>,
|
||||
T: Builder<'s>,
|
||||
E: Builder<'s>,
|
||||
{
|
||||
type Output = <T as Build<S>>::Output;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<S>) -> Self::Output {
|
||||
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
|
||||
match self {
|
||||
Ok(t) => Build::build(t, builder),
|
||||
Err(t) => Build::build(t, builder),
|
||||
Ok(t) => Builder::add_to_span(t, span),
|
||||
Err(t) => Builder::add_to_span(t, span),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S, T> Build<S> for NonEmptyVec<T>
|
||||
where
|
||||
T: Build<S>,
|
||||
[T]: Build<<T as Build<S>>::Output>,
|
||||
impl<'s, T> Builder<'s> for NonEmptyVec<T>
|
||||
where T: Builder<'s>
|
||||
{
|
||||
type Output = <[T] as Build<T::Output>>::Output;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<S>) -> Self::Output {
|
||||
let b = Build::build(self.first_mut(), builder);
|
||||
Build::build(self.tail_mut(), Builder(b))
|
||||
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
|
||||
self.into_iter().fold(span, |sum, new_span| Builder::add_to_span(new_span, sum))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T> Build<Span<'s>> for Vec<T>
|
||||
where T: Build<Span<'s>, Output = Span<'s>>
|
||||
impl<'s, T> Builder<'s> for Vec<T>
|
||||
where T: Builder<'s>
|
||||
{
|
||||
type Output = Span<'s>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<Span<'s>>) -> Self::Output {
|
||||
let mut out = builder.span;
|
||||
for elem in self {
|
||||
out = Build::build(elem, Builder(out))
|
||||
}
|
||||
out
|
||||
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
|
||||
self.iter_mut().fold(span, |sum, new_span| Builder::add_to_span(new_span, sum))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T> Build<Option<Span<'s>>> for Vec<T>
|
||||
where
|
||||
T: Build<Option<Span<'s>>>,
|
||||
T::Output: Into<Option<Span<'s>>>,
|
||||
impl<'s, T> Builder<'s> for [T]
|
||||
where T: Builder<'s>
|
||||
{
|
||||
type Output = Option<Span<'s>>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<Option<Span<'s>>>) -> Self::Output {
|
||||
let mut out = builder.span;
|
||||
for elem in self {
|
||||
out = Build::build(elem, Builder(out)).into();
|
||||
}
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T> Build<Span<'s>> for [T]
|
||||
where T: Build<Span<'s>, Output = Span<'s>>
|
||||
{
|
||||
type Output = Span<'s>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<Span<'s>>) -> Self::Output {
|
||||
let mut out = builder.span;
|
||||
for elem in self {
|
||||
out = Build::build(elem, Builder(out));
|
||||
}
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T> Build<Option<Span<'s>>> for [T]
|
||||
where
|
||||
T: Build<Option<Span<'s>>>,
|
||||
T::Output: Into<Option<Span<'s>>>,
|
||||
{
|
||||
type Output = Option<Span<'s>>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<Option<Span<'s>>>) -> Self::Output {
|
||||
let mut out = builder.span;
|
||||
for elem in self {
|
||||
out = Build::build(elem, Builder(out)).into();
|
||||
}
|
||||
out
|
||||
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
|
||||
self.iter_mut().fold(span, |sum, new_span| Builder::add_to_span(new_span, sum))
|
||||
}
|
||||
}
|
||||
|
@ -7,6 +7,7 @@
|
||||
// ==============
|
||||
|
||||
pub mod item;
|
||||
pub mod operator;
|
||||
pub mod token;
|
||||
pub mod tree;
|
||||
|
||||
|
@ -18,12 +18,13 @@ use crate::syntax::*;
|
||||
#[allow(missing_docs)]
|
||||
pub enum Item<'s> {
|
||||
Token(Token<'s>),
|
||||
Block(Vec<Item<'s>>),
|
||||
Tree(Tree<'s>),
|
||||
}
|
||||
|
||||
impl<'s> Item<'s> {
|
||||
/// Check whether the element is the provided token variant. Returns [`false`] if it was an
|
||||
/// [`Tree`] node.
|
||||
/// Check whether the element is the provided token variant. Returns [`false`] if it was not a
|
||||
/// token.
|
||||
pub fn is_variant(&self, variant: token::variant::VariantMarker) -> bool {
|
||||
match self {
|
||||
Item::Token(token) => token.is(variant),
|
||||
@ -32,20 +33,23 @@ impl<'s> Item<'s> {
|
||||
}
|
||||
|
||||
/// [`location::Span`] of the element.
|
||||
pub fn span(&self) -> span::Ref<'_, 's> {
|
||||
pub fn left_visible_offset(&self) -> VisibleOffset {
|
||||
match self {
|
||||
Self::Token(t) => t.span(),
|
||||
Self::Tree(t) => t.span.as_ref(),
|
||||
Self::Token(t) => t.span().left_offset.visible,
|
||||
Self::Tree(t) => t.span.left_offset.visible,
|
||||
Self::Block(t) => t.first().map(|t| t.left_visible_offset()).unwrap_or_default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> FirstChildTrim<'s> for Item<'s> {
|
||||
#[inline(always)]
|
||||
fn trim_as_first_child(&mut self) -> Span<'s> {
|
||||
/// Convert this item to a [`Tree`].
|
||||
pub fn to_ast(self) -> Tree<'s> {
|
||||
match self {
|
||||
Self::Token(t) => t.trim_as_first_child(),
|
||||
Self::Tree(t) => t.span.trim_as_first_child(),
|
||||
Item::Token(token) => match token.variant {
|
||||
token::Variant::Ident(ident) => Tree::ident(token.with_variant(ident)),
|
||||
_ => todo!(),
|
||||
},
|
||||
Item::Tree(ast) => ast,
|
||||
Item::Block(_) => todo!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -81,3 +85,35 @@ pub enum Ref<'s, 'a> {
|
||||
Token(token::Ref<'s, 'a>),
|
||||
Tree(&'a Tree<'s>),
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ======================
|
||||
// === Variant Checks ===
|
||||
// ======================
|
||||
|
||||
/// For each token variant, generates a function checking if the token is of the given variant. For
|
||||
/// example, the `is_ident` function checks if the token is an identifier.
|
||||
macro_rules! generate_variant_checks {
|
||||
(
|
||||
$(#$enum_meta:tt)*
|
||||
pub enum $enum:ident {
|
||||
$(
|
||||
$(#$variant_meta:tt)*
|
||||
$variant:ident $({ $(pub $field:ident : $field_ty:ty),* $(,)? })?
|
||||
),* $(,)?
|
||||
}
|
||||
) => { paste!{
|
||||
impl<'s> Item<'s> {
|
||||
$(
|
||||
$(#[$($variant_meta)*])*
|
||||
#[allow(missing_docs)]
|
||||
pub fn [<is_ $variant:snake:lower>](&self) -> bool {
|
||||
self.is_variant(token::variant::VariantMarker::$variant)
|
||||
}
|
||||
)*
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
crate::with_token_definition!(generate_variant_checks());
|
||||
|
172
lib/rust/parser/src/syntax/operator.rs
Normal file
172
lib/rust/parser/src/syntax/operator.rs
Normal file
@ -0,0 +1,172 @@
|
||||
//! Operator related functionalities.
|
||||
|
||||
use crate::prelude::*;
|
||||
|
||||
use crate::syntax;
|
||||
use crate::syntax::token;
|
||||
use crate::syntax::token::Token;
|
||||
|
||||
|
||||
|
||||
// ==================
|
||||
// === Precedence ===
|
||||
// ==================
|
||||
|
||||
// FIXME: The current implementation hard-codes precedence values and does not support precedence
|
||||
// computations for any operator (according to the spec)
|
||||
fn precedence_of(operator: &str) -> usize {
|
||||
match operator {
|
||||
"+" => 3,
|
||||
"-" => 3,
|
||||
"*" => 7,
|
||||
_ => panic!("Operator not supported: {}", operator),
|
||||
}
|
||||
}
|
||||
|
||||
/// An item with an assigned precedence.
|
||||
#[derive(Clone, Copy, Debug, Deref, DerefMut)]
|
||||
struct WithPrecedence<T> {
|
||||
#[deref]
|
||||
#[deref_mut]
|
||||
elem: T,
|
||||
precedence: usize,
|
||||
}
|
||||
|
||||
impl<T> WithPrecedence<T> {
|
||||
/// Constructor.
|
||||
pub fn new(precedence: usize, elem: T) -> Self {
|
||||
Self { elem, precedence }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Annotate expressions that should use spacing, because otherwise they are misleading. For
|
||||
/// example, `if cond then.x else.y` is parsed as `if cond then .x else .y`, which after expansion
|
||||
/// translates to `if cond then (\t -> t.x) else (\t -> t.y)`. However, for some macros spacing is
|
||||
/// not needed. For example, `(.x)` is parsed as `(\t -> t.x)`, which is understandable.
|
||||
fn annotate_tokens_that_need_spacing(items: Vec<syntax::Item>) -> Vec<syntax::Item> {
|
||||
// TODO: It should be possible to make it faster by iterating over mut vec. To be checked.
|
||||
items
|
||||
.into_iter()
|
||||
.map(|item| match item {
|
||||
syntax::Item::Block(_) => item,
|
||||
syntax::Item::Token(_) => item,
|
||||
syntax::Item::Tree(ast) => syntax::Item::Tree(match &*ast.variant {
|
||||
syntax::tree::Variant::MultiSegmentApp(data)
|
||||
if !data.segments.first().header.is_symbol() =>
|
||||
ast.with_error("This expression cannot be used in a non-spaced equation."),
|
||||
_ => ast,
|
||||
}),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Take [`Item`] stream, resolve operators precedence and return the final AST. The precedence
|
||||
/// resolution algorithm bases on the [Shunting yard algorithm](https://en.wikipedia.org/wiki/Shunting_yard_algorithm).
|
||||
/// It is extended to handle operator sections.
|
||||
#[inline(always)]
|
||||
pub fn resolve_operator_precedence<'s>(items: Vec<syntax::Item<'s>>) -> syntax::Tree<'s> {
|
||||
type Tokens<'s> = Vec<syntax::Item<'s>>;
|
||||
let mut flattened: Tokens<'s> = default();
|
||||
let mut no_space_group: Tokens<'s> = default();
|
||||
let processs_no_space_group = |flattened: &mut Tokens<'s>, no_space_group: &mut Tokens<'s>| {
|
||||
let tokens = mem::take(no_space_group);
|
||||
if tokens.len() == 1 {
|
||||
flattened.extend(tokens);
|
||||
} else {
|
||||
let tokens = annotate_tokens_that_need_spacing(tokens);
|
||||
let ast = resolve_operator_precedence_internal(tokens);
|
||||
flattened.push(ast.into());
|
||||
}
|
||||
};
|
||||
for item in items {
|
||||
if item.left_visible_offset().width_in_spaces == 0 || no_space_group.is_empty() {
|
||||
no_space_group.push(item)
|
||||
} else if !no_space_group.is_empty() {
|
||||
processs_no_space_group(&mut flattened, &mut no_space_group);
|
||||
no_space_group.push(item);
|
||||
} else {
|
||||
// FIXME: this is unreachable.
|
||||
flattened.push(item);
|
||||
}
|
||||
}
|
||||
if !no_space_group.is_empty() {
|
||||
processs_no_space_group(&mut flattened, &mut no_space_group);
|
||||
}
|
||||
resolve_operator_precedence_internal(flattened)
|
||||
}
|
||||
|
||||
fn resolve_operator_precedence_internal(items: Vec<syntax::Item<'_>>) -> syntax::Tree<'_> {
|
||||
// Reverse-polish notation encoding.
|
||||
let mut was_section_used = false;
|
||||
let mut output: Vec<syntax::Item> = default();
|
||||
let mut operator_stack: Vec<WithPrecedence<syntax::tree::OperatorOrError>> = default();
|
||||
let mut last_token_was_ast = false;
|
||||
let mut last_token_was_opr = false;
|
||||
for item in items {
|
||||
if let syntax::Item::Token(token) = item.clone()
|
||||
&& let token::Variant::Operator(opr) = token.variant {
|
||||
// Item is an operator.
|
||||
let last_token_was_opr_copy = last_token_was_opr;
|
||||
last_token_was_ast = false;
|
||||
last_token_was_opr = true;
|
||||
|
||||
let prec = precedence_of(&token.code);
|
||||
let opr = Token(token.left_offset, token.code, opr);
|
||||
|
||||
if last_token_was_opr_copy && let Some(prev_opr) = operator_stack.last_mut() {
|
||||
// Error. Multiple operators next to each other.
|
||||
match &mut prev_opr.elem {
|
||||
Err(err) => err.operators.push(opr),
|
||||
Ok(prev) => {
|
||||
let operators = NonEmptyVec::new(prev.clone(),vec![opr]);
|
||||
prev_opr.elem = Err(syntax::tree::MultipleOperatorError{operators});
|
||||
}
|
||||
}
|
||||
} else {
|
||||
while let Some(prev_opr) = operator_stack.last()
|
||||
&& prev_opr.precedence >= prec
|
||||
&& let Some(prev_opr) = operator_stack.pop()
|
||||
&& let Some(rhs) = output.pop()
|
||||
{
|
||||
// Prev operator in the [`operator_stack`] has a higher precedence.
|
||||
let lhs = output.pop().map(|t| t.to_ast());
|
||||
if lhs.is_none() { was_section_used = true; }
|
||||
let ast = syntax::Tree::opr_app(lhs, prev_opr.elem, Some(rhs.to_ast()));
|
||||
output.push(ast.into());
|
||||
}
|
||||
operator_stack.push(WithPrecedence::new(prec, Ok(opr)));
|
||||
}
|
||||
} else if last_token_was_ast && let Some(lhs) = output.pop() {
|
||||
// Multiple non-operators next to each other.
|
||||
let lhs = lhs.to_ast();
|
||||
let rhs = item.to_ast();
|
||||
let ast = syntax::Tree::app(lhs, rhs);
|
||||
output.push(ast.into());
|
||||
} else {
|
||||
// Non-operator that follows previously consumed operator.
|
||||
last_token_was_ast = true;
|
||||
last_token_was_opr = false;
|
||||
output.push(item);
|
||||
}
|
||||
}
|
||||
let mut opt_rhs = last_token_was_ast.and_option_from(|| output.pop().map(|t| t.to_ast()));
|
||||
while let Some(opr) = operator_stack.pop() {
|
||||
let opt_lhs = output.pop().map(|t| t.to_ast());
|
||||
if opt_lhs.is_none() || opt_rhs.is_none() {
|
||||
was_section_used = true;
|
||||
}
|
||||
opt_rhs = Some(syntax::Tree::opr_app(opt_lhs, opr.elem, opt_rhs));
|
||||
}
|
||||
if !output.is_empty() {
|
||||
panic!("Internal error. Not all tokens were consumed while constructing the expression.");
|
||||
}
|
||||
|
||||
// FIXME
|
||||
let out = opt_rhs.unwrap();
|
||||
if was_section_used {
|
||||
syntax::Tree::opr_section_boundary(out)
|
||||
} else {
|
||||
out
|
||||
}
|
||||
}
|
@ -64,6 +64,9 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
|
||||
pub error: Error,
|
||||
pub ast: Tree<'s>,
|
||||
},
|
||||
Module {
|
||||
pub statements: Vec<Tree<'s>>,
|
||||
},
|
||||
/// A simple identifier, like `foo` or `bar`.
|
||||
Ident {
|
||||
pub token: token::Ident<'s>,
|
||||
@ -97,8 +100,13 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
|
||||
/// `x + y + z` is the section body, and `Vector x y z` is the prefix of this function
|
||||
/// application.
|
||||
MultiSegmentApp {
|
||||
pub prefix: Option<Tree<'s>>,
|
||||
pub segments: NonEmptyVec<MultiSegmentAppSegment<'s>>,
|
||||
},
|
||||
|
||||
TypeDef {
|
||||
pub keyword: Token<'s>,
|
||||
pub name: Tree<'s>,
|
||||
pub params: Vec<Tree<'s>>,
|
||||
}
|
||||
}
|
||||
}};}
|
||||
@ -158,10 +166,9 @@ impl<'s> Tree<'s> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<S> span::Build<S> for Error {
|
||||
type Output = S;
|
||||
fn build(&mut self, builder: span::Builder<S>) -> Self::Output {
|
||||
builder.span
|
||||
impl<'s> span::Builder<'s> for Error {
|
||||
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
|
||||
span
|
||||
}
|
||||
}
|
||||
|
||||
@ -178,12 +185,9 @@ pub struct MultipleOperatorError<'s> {
|
||||
pub operators: NonEmptyVec<token::Operator<'s>>,
|
||||
}
|
||||
|
||||
impl<'s, S> span::Build<S> for MultipleOperatorError<'s>
|
||||
where NonEmptyVec<token::Operator<'s>>: span::Build<S>
|
||||
{
|
||||
type Output = <NonEmptyVec<token::Operator<'s>> as span::Build<S>>::Output;
|
||||
fn build(&mut self, builder: span::Builder<S>) -> Self::Output {
|
||||
self.operators.build(builder)
|
||||
impl<'s> span::Builder<'s> for MultipleOperatorError<'s> {
|
||||
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
|
||||
self.operators.add_to_span(span)
|
||||
}
|
||||
}
|
||||
|
||||
@ -198,12 +202,9 @@ pub struct MultiSegmentAppSegment<'s> {
|
||||
pub body: Option<Tree<'s>>,
|
||||
}
|
||||
|
||||
impl<'s, S> span::Build<S> for MultiSegmentAppSegment<'s>
|
||||
where Token<'s>: span::Build<S, Output = Span<'s>>
|
||||
{
|
||||
type Output = Span<'s>;
|
||||
fn build(&mut self, builder: span::Builder<S>) -> Self::Output {
|
||||
builder.add(&mut self.header).add(&mut self.body).span
|
||||
impl<'s> span::Builder<'s> for MultiSegmentAppSegment<'s> {
|
||||
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
|
||||
span.add(&mut self.header).add(&mut self.body)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,7 @@
|
||||
//! Definition of a macro allowing building mock AST structures, mostly useful for testing.
|
||||
|
||||
// === Features ===
|
||||
#![feature(proc_macro_span)]
|
||||
// === Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![warn(unsafe_code)]
|
||||
@ -36,8 +38,8 @@ use std::mem;
|
||||
/// braces. You can also place segments in quotes, like `{"("} a {")"}`.
|
||||
#[proc_macro]
|
||||
pub fn ast_builder(tokens: proc_macro::TokenStream) -> proc_macro::TokenStream {
|
||||
let output = expr(tokens);
|
||||
let output = quote!(syntax::Tree::opr_section_boundary(#output));
|
||||
let output = expr(tokens, None);
|
||||
let output = quote!(crate::syntax::Tree::module(vec![#output]));
|
||||
output.into()
|
||||
}
|
||||
|
||||
@ -54,12 +56,13 @@ impl Segment {
|
||||
}
|
||||
}
|
||||
|
||||
fn expr(tokens: proc_macro::TokenStream) -> TokenStream {
|
||||
fn expr(tokens: proc_macro::TokenStream, parent_spacing: Option<usize>) -> TokenStream {
|
||||
use proc_macro::TokenTree::*;
|
||||
let mut output = quote! {};
|
||||
let mut prefix: Option<TokenStream> = None;
|
||||
let mut segments: Vec<Segment> = vec![];
|
||||
let mut current_segment: Option<Segment> = None;
|
||||
let mut last_column: Option<usize> = None;
|
||||
let app_to_output = |output: &mut TokenStream, tok| {
|
||||
if output.is_empty() {
|
||||
*output = tok;
|
||||
@ -67,12 +70,21 @@ fn expr(tokens: proc_macro::TokenStream) -> TokenStream {
|
||||
*output = quote! {syntax::Tree::app(#output,#tok)};
|
||||
}
|
||||
};
|
||||
let mut inherited_spacing = parent_spacing.unwrap_or(0);
|
||||
for token in tokens {
|
||||
match token {
|
||||
let spacing = last_column.map(|t| token.span().start().column - t).unwrap_or(0);
|
||||
let spacing = spacing + inherited_spacing;
|
||||
inherited_spacing = 0;
|
||||
last_column = Some(token.span().end().column);
|
||||
match &token {
|
||||
// a b c ...
|
||||
Ident(ident) => {
|
||||
let ident = ident.to_string();
|
||||
app_to_output(&mut output, quote! {test::ident(#ident)});
|
||||
let spacing = " ".repeat(spacing);
|
||||
app_to_output(
|
||||
&mut output,
|
||||
quote! {crate::syntax::Tree::ident(crate::syntax::Token(#spacing, #ident, syntax::token::Variant::new_ident_unchecked(#ident)))},
|
||||
);
|
||||
}
|
||||
// {if} a {then} b {else} c
|
||||
// {"("} a {")"}
|
||||
@ -83,12 +95,15 @@ fn expr(tokens: proc_macro::TokenStream) -> TokenStream {
|
||||
} else if !output.is_empty() {
|
||||
prefix = Some(mem::take(&mut output));
|
||||
}
|
||||
let body = group.stream().to_string();
|
||||
current_segment = Some(Segment::new(quote! {Token::ident(#body)})); // Token::symbol
|
||||
let ident = group.stream().to_string();
|
||||
let spacing = " ".repeat(spacing);
|
||||
current_segment = Some(Segment::new(
|
||||
quote! { Token(#spacing, #ident, syntax::token::Variant::new_ident_unchecked(#ident).into())},
|
||||
)); // Token::symbol
|
||||
}
|
||||
// a [b c] d
|
||||
Group(group) if group.delimiter() == proc_macro::Delimiter::Bracket => {
|
||||
app_to_output(&mut output, expr(group.stream()));
|
||||
app_to_output(&mut output, expr(group.stream(), Some(spacing)));
|
||||
}
|
||||
_ => panic!("Unsupported token {:?}", token),
|
||||
}
|
||||
@ -114,10 +129,7 @@ fn expr(tokens: proc_macro::TokenStream) -> TokenStream {
|
||||
.unwrap_or_else(|| quote! {None});
|
||||
let segments = quote! {NonEmptyVec::try_from(vec![#(#segments),*]).unwrap()};
|
||||
output = quote! {
|
||||
span::With::new_no_left_offset_no_start(
|
||||
Bytes::from(0),
|
||||
syntax::tree::Type::MultiSegmentApp(Box::new(syntax::tree::MultiSegmentApp {prefix: #pfx, segments: #segments}))
|
||||
)
|
||||
syntax::Tree::multi_segment_app (#pfx, #segments)
|
||||
}
|
||||
}
|
||||
output
|
||||
|
@ -49,6 +49,12 @@ impl<T> NonEmptyVec<T> {
|
||||
NonEmptyVec { elems }
|
||||
}
|
||||
|
||||
/// Length of the vector.
|
||||
#[allow(clippy::len_without_is_empty)]
|
||||
pub fn len(&self) -> usize {
|
||||
self.elems.len()
|
||||
}
|
||||
|
||||
/// Construct a `NonEmptyVec` containing a single element.
|
||||
///
|
||||
/// # Examples
|
||||
@ -207,7 +213,7 @@ impl<T> NonEmptyVec<T> {
|
||||
/// assert_eq!(*vec.first(), 0);
|
||||
/// ```
|
||||
pub fn first(&self) -> &T {
|
||||
self.elems.first().expect("The NonEmptyVec always has an item in it.")
|
||||
self.elems.first().unwrap_or_else(|| unreachable!())
|
||||
}
|
||||
|
||||
/// Obtain a mutable reference to the head of the `NonEmptyVec`.
|
||||
@ -220,7 +226,7 @@ impl<T> NonEmptyVec<T> {
|
||||
/// assert_eq!(*vec.first_mut(), 0);
|
||||
/// ```
|
||||
pub fn first_mut(&mut self) -> &mut T {
|
||||
self.elems.first_mut().expect("The NonEmptyVec always has an item in it.")
|
||||
self.elems.first_mut().unwrap_or_else(|| unreachable!())
|
||||
}
|
||||
|
||||
/// Get the tail reference.
|
||||
@ -243,7 +249,7 @@ impl<T> NonEmptyVec<T> {
|
||||
/// assert_eq!(*vec.last(), 2)
|
||||
/// ```
|
||||
pub fn last(&self) -> &T {
|
||||
self.get(self.len() - 1).expect("There is always one element in a NonEmptyVec.")
|
||||
self.get(self.len() - 1).unwrap_or_else(|| unreachable!())
|
||||
}
|
||||
|
||||
/// Obtain a mutable reference to the last element in the `NonEmptyVec`.
|
||||
@ -256,7 +262,7 @@ impl<T> NonEmptyVec<T> {
|
||||
/// assert_eq!(*vec.last_mut(), 2)
|
||||
/// ```
|
||||
pub fn last_mut(&mut self) -> &mut T {
|
||||
self.get_mut(self.len() - 1).expect("There is always one element in a NonEmptyVec.")
|
||||
self.get_mut(self.len() - 1).unwrap_or_else(|| unreachable!())
|
||||
}
|
||||
|
||||
/// Create a draining iterator that removes the specified range in the vector and yields the
|
||||
|
@ -17,7 +17,7 @@ use std::iter::Extend;
|
||||
/// Mutable Semigroup definition. Impls should satisfy the associativity law:
|
||||
/// `x.concat(y.concat(z)) = x.concat(y).concat(z)`, in symbolic form:
|
||||
/// `x <> (y <> z) = (x <> y) <> z`
|
||||
pub trait PartialSemigroup<T>: Clone {
|
||||
pub trait PartialSemigroup<T = Self>: Clone {
|
||||
/// An associative operation.
|
||||
fn concat_mut(&mut self, other: T);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user