mirror of
https://github.com/enso-org/enso.git
synced 2024-12-22 13:41:39 +03:00
Implement part of the Enso lexer in rust (#1109)
This commit is contained in:
parent
170df5ebd8
commit
e64c0384b0
@ -14,6 +14,8 @@ members = [
|
||||
"lib/rust/flexer-testing/definition",
|
||||
"lib/rust/flexer-testing/generation",
|
||||
"lib/rust/lazy-reader",
|
||||
"lib/rust/lexer/definition",
|
||||
"lib/rust/lexer/generation",
|
||||
"lib/rust/parser",
|
||||
]
|
||||
|
||||
|
@ -42,7 +42,7 @@ My_Package
|
||||
│ ├── Helper.enso
|
||||
│ └── Util.enso
|
||||
└── visualization (optional)
|
||||
└──
|
||||
└──
|
||||
```
|
||||
|
||||
### The `src` Directory
|
||||
|
@ -86,6 +86,11 @@ deactivated by using `flexer::pop_state(state)` or
|
||||
from which they can inherit rules. This is fantastic for removing the need to
|
||||
repeat yourself when defining the lexer.
|
||||
|
||||
When inheriting rules from a parent group, the rules from the parent group are
|
||||
matched strictly _after_ the rules from the child group. This means that groups
|
||||
are able to selectively "override" the rules of their parents. Rules are still
|
||||
matched in order for each group's set of rules.
|
||||
|
||||
### Patterns
|
||||
|
||||
Rules are defined to match _patterns_. Patterns are regular-grammar-like
|
||||
|
@ -15,11 +15,32 @@ identify blocks
|
||||
|
||||
<!-- MarkdownTOC levels="2,3" autolink="true" -->
|
||||
|
||||
- [Lexer Architecture](#lexer-architecture)
|
||||
- [Libraries in the Lexer Definition](#libraries-in-the-lexer-definition)
|
||||
- [Lexer Functionality](#lexer-functionality)
|
||||
- [The Lexer AST](#the-lexer-ast)
|
||||
|
||||
<!-- /MarkdownTOC -->
|
||||
|
||||
## Lexer Architecture
|
||||
|
||||
The structure of the flexer's code generation forces the lexer to be split into
|
||||
two parts: the definition, and the generation. As the latter is the point from
|
||||
which the lexer will be used, the second subproject is the one that is graced
|
||||
with the name `lexer`.
|
||||
|
||||
### Libraries in the Lexer Definition
|
||||
|
||||
The lexer generation subproject needs to be able to make the assumption that all
|
||||
imports will be in the same place (relative to the crate root). To this end, the
|
||||
definition subproject exports public modules `library` and `prelude`. These are
|
||||
re-imported and used in the generation subproject to ensure that all components
|
||||
are found at the same paths relative to the crate root.
|
||||
|
||||
This does mean, however, that all imports from _within_ the current crate in the
|
||||
definition subproject must be imported from the `library` module, not from their
|
||||
paths directly from the crate root.
|
||||
|
||||
## Lexer Functionality
|
||||
|
||||
The lexer needs to provide the following functionality as part of the parser.
|
||||
@ -42,13 +63,21 @@ for use by the GUI.
|
||||
|
||||
It contains the following constructs:
|
||||
|
||||
- `Var`: Variable identifiers.
|
||||
- `Ref`: Referrent identifiers.
|
||||
- `Opr`: Operator identifiers.
|
||||
- `Number`: Numbers.
|
||||
- `Text`: Text.
|
||||
- `Invalid`: Invalid constructs that cannot be lexed.
|
||||
- `Referent`: Referrent identifiers (e.g. `Some_Ref_Ident`).
|
||||
- `Variable`: Variable identifiers (e.g. `some_var_ident`).
|
||||
- `External`: External identifiers (e.g. `someJavaName`).
|
||||
- `Blank`: The blank name `_`.
|
||||
- `Operator`: Operator identifiers (e.g. `-->>`).
|
||||
- `Modifier`: Modifier operators (e.g. `+=`).
|
||||
- `Number`: Numbers (`16_FFFF`).
|
||||
- `DanglingBase`: An explicit base without an associated number (e.g. `16_`).
|
||||
- `Text`: Text (e.g. `"Some text goes here."`).
|
||||
- `Line`: A line in a block that contains tokens.
|
||||
- `BlankLine`: A line in a block that contains only whitespace.
|
||||
- `Block`: Syntactic blocks in the language.
|
||||
- `InvalidSuffix`: Invalid tokens when in a given state that would otherwise be
|
||||
valid.
|
||||
- `Unrecognized`: Tokens that the lexer doesn't recognise.
|
||||
|
||||
The distinction is made here between the various kinds of identifiers in order
|
||||
to keep lexing fast, but also in order to allow macros to switch on the kinds of
|
||||
|
@ -17,6 +17,24 @@ specific nodes on the AST.
|
||||
|
||||
<!-- /MarkdownTOC -->
|
||||
|
||||
> The actionables for this section are:
|
||||
>
|
||||
> - Work out how to ensure that precedence and associativity isn't broken by the
|
||||
> macro resolution phase.
|
||||
> - Work out how to handle the special case for `,`. We don't want comma to be
|
||||
> subject to the variable precedence functionality, as conventional spacing
|
||||
> for defining lists goes `[x, y, z]` and that should be allowed without the
|
||||
> variable precedence happening.
|
||||
> - Work out how to handle the special case for `-`. The expression `-n` should
|
||||
> be treated as an application of the unary operator negate, while `- n`
|
||||
> should be treated as part of a larger expression (e.g. a section,
|
||||
> subtraction).
|
||||
> - As Enso has no syntactic marker for the introduction of a lambda, we need to
|
||||
> have a special case for `->` so that it has appropriate precedence on its
|
||||
> left and right sides. Ideally, `map.fold 0 $ y -> foo $ y` is resolved as
|
||||
> `(map.fold 0) $ (y -> (foo $ y))`. This makes writing code much more
|
||||
> natural.
|
||||
|
||||
## Resolution Algorithm
|
||||
|
||||
The operator resolution process uses a version of the classic
|
||||
|
@ -106,9 +106,9 @@ A qualified export statement only exports the name of the exported module
|
||||
In a `from` export, any mentioned items become available as though they were
|
||||
defined in the exporting module.
|
||||
|
||||
Please note it is explicitly forbidden for export statements across modules
|
||||
to form a cycle. If export statements cycle is detected, a compile error will
|
||||
be reported.
|
||||
Please note it is explicitly forbidden for export statements across modules to
|
||||
form a cycle. If export statements cycle is detected, a compile error will be
|
||||
reported.
|
||||
|
||||
## Project Main Module
|
||||
|
||||
|
@ -19,9 +19,11 @@ giving Enso code a uniform identity.
|
||||
<!-- MarkdownTOC levels="2,3" autolink="true" -->
|
||||
|
||||
- [Naming Constructs](#naming-constructs)
|
||||
- [External Identifiers](#external-identifiers)
|
||||
- [Pattern Contexts](#pattern-contexts)
|
||||
- [Localised Naming](#localised-naming)
|
||||
- [Operator Naming](#operator-naming)
|
||||
- [Modifier Operators](#modifier-operators)
|
||||
- [Reserved Names](#reserved-names)
|
||||
|
||||
<!-- /MarkdownTOC -->
|
||||
@ -69,6 +71,23 @@ Identifiers are introduced by:
|
||||
- Using them in a pattern matching context (free variables).
|
||||
- Using them in a type ascription (free variables).
|
||||
|
||||
### External Identifiers
|
||||
|
||||
As Enso has the ability to interface with many other programming languages in a
|
||||
highly-integrated fashion, it needs to be able to use naming styles from other
|
||||
languages natively. To do this, we have the concept of a _third_ kind of
|
||||
identifier, called the 'external' identifier.
|
||||
|
||||
An external identifier is one that doesn't match either the variable or referent
|
||||
forms described above, for example `someJavaName`. It is not an _exclusive_
|
||||
category, however. Common styles of naming functions in Python, for example,
|
||||
will usually lex as variable identifiers.
|
||||
|
||||
> The actionables for this section are:
|
||||
>
|
||||
> - Work out how and where to make a variable/referent distinction for external
|
||||
> names.
|
||||
|
||||
## Pattern Contexts
|
||||
|
||||
A pattern context is a span in the code where variable identifiers (as described
|
||||
@ -117,13 +136,19 @@ Operator names are those built solely from operator symbols (e.g. `+` or `<*>`).
|
||||
Operator symbols are defined as characters in the following set.
|
||||
|
||||
```
|
||||
!$%&*+-/<>?^~|:\,.()[]{}=
|
||||
;!$%&*+-/<>?^~|:\\=
|
||||
```
|
||||
|
||||
Please note that not every sequence that can be created from the above is a
|
||||
_valid_ operator name, as some may collide with built-in language constructs
|
||||
(e.g. `[` and `]`, which start and end a vector literal respectively).
|
||||
|
||||
### Modifier Operators
|
||||
|
||||
Barring specially defined operators (`=`, `==`, `!=`, `#=`, `>=` and `<=`), any
|
||||
operator that ends with an equals sign `=` is called a _modifier_ operator.
|
||||
These will, in the future, have special treatment in the language.
|
||||
|
||||
## Reserved Names
|
||||
|
||||
Even though we do not intend to reserve any names at the level of the lexer or
|
||||
|
@ -1,6 +1,6 @@
|
||||
name: Cycle_Test
|
||||
version: 0.0.1
|
||||
enso-version: 0.1.1-rc5
|
||||
license: ''
|
||||
author: ''
|
||||
maintainer: ''
|
||||
license: ""
|
||||
author: ""
|
||||
maintainer: ""
|
||||
|
@ -23,13 +23,13 @@ pub type AnyAst = Ast<Shape>;
|
||||
#[derive(Debug,Clone)]
|
||||
pub struct Ast<T> {
|
||||
/// A unique identifier.
|
||||
uid: Option<Uuid>,
|
||||
uid : Option<Uuid>,
|
||||
/// Length in number of chars of this ast node.
|
||||
len: usize,
|
||||
len : usize,
|
||||
/// The number of trailing spaces.
|
||||
off: usize,
|
||||
off : usize,
|
||||
/// The ast node itself.
|
||||
ast: T,
|
||||
ast : T,
|
||||
}
|
||||
|
||||
// The set of all ast nodes.
|
||||
|
@ -40,7 +40,7 @@ impl Logger {
|
||||
}
|
||||
|
||||
fn dec_indent(&self) {
|
||||
self.indent.update(|t|t-1);
|
||||
self.indent.update(|t|t.saturating_sub(1));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "optics"
|
||||
name = "enso-optics"
|
||||
version = "0.1.0"
|
||||
authors = ["Enso Team <enso-dev@enso.org>"]
|
||||
edition = "2018"
|
||||
|
@ -131,6 +131,8 @@ impl<T> NonEmptyVec<T> {
|
||||
/// use enso_prelude::NonEmptyVec;
|
||||
/// let mut vec = NonEmptyVec::with_capacity(0, 10);
|
||||
/// assert_eq!(vec.capacity(),10);
|
||||
/// vec.shrink_to_fit();
|
||||
/// assert!(vec.capacity() < 10);
|
||||
/// ```
|
||||
pub fn shrink_to_fit(&mut self) {
|
||||
self.elems.shrink_to_fit();
|
||||
@ -165,9 +167,10 @@ impl<T> NonEmptyVec<T> {
|
||||
/// let mut vec = NonEmptyVec::new(0,vec![1]);
|
||||
/// assert!(vec.pop().is_some());
|
||||
/// assert!(vec.pop().is_none());
|
||||
/// assert_eq!(vec.len(),1);
|
||||
/// ```
|
||||
pub fn pop(&mut self) -> Option<T> {
|
||||
(self.len() != 1).and_option(self.elems.pop())
|
||||
(self.len() > 1).and_option_from(||self.elems.pop())
|
||||
}
|
||||
|
||||
/// Obtain a mutable reference to teh element in the vector at the specified `index`.
|
||||
|
@ -3,12 +3,12 @@
|
||||
//! defines several aliases and utils which may find their place in new
|
||||
//! libraries in the future.
|
||||
|
||||
#![feature(specialization)]
|
||||
#![feature(test)]
|
||||
#![warn(unsafe_code)]
|
||||
#![feature(trait_alias)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![feature(specialization)]
|
||||
#![feature(trait_alias)]
|
||||
#![warn(unsafe_code)]
|
||||
|
||||
mod clone;
|
||||
mod collections;
|
||||
|
@ -6,8 +6,6 @@
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unsafe_code)]
|
||||
#![warn(unused_import_braces)]
|
||||
#![allow(unused_imports)]
|
||||
#![allow(clippy::all)]
|
||||
|
||||
//! This file contains the code defining a lexer for the following small language. Due to the way in
|
||||
//! which the code-generation from the flexer is used, it has to be defined in a separate crate from
|
||||
@ -35,7 +33,6 @@ use flexer::automata::pattern::Pattern;
|
||||
use flexer::group::Registry;
|
||||
use flexer::prelude::logger::Disabled;
|
||||
use flexer::prelude::reader::BookmarkManager;
|
||||
use flexer::prelude::reader::decoder::DecoderUTF8;
|
||||
|
||||
|
||||
|
||||
@ -128,12 +125,10 @@ impl TestLexer {
|
||||
}
|
||||
}
|
||||
|
||||
/// Implementations of functionality used by the lexer.
|
||||
///
|
||||
/// These functions are provided by the user, by hand, and must all take a reader.
|
||||
#[allow(missing_docs)]
|
||||
/// Rules for the root state.
|
||||
#[allow(dead_code,missing_docs)]
|
||||
impl TestLexer {
|
||||
pub fn on_first_word<R:LazyReader>(&mut self, _reader:&mut R) {
|
||||
fn on_first_word<R:LazyReader>(&mut self, _reader:&mut R) {
|
||||
let str = self.current_match.clone();
|
||||
let ast = Token::Word(str);
|
||||
self.output.push(ast);
|
||||
@ -141,28 +136,65 @@ impl TestLexer {
|
||||
self.push_state(id);
|
||||
}
|
||||
|
||||
pub fn on_spaced_word<R:LazyReader>(&mut self, _reader:&mut R) {
|
||||
fn on_err_suffix_first_word<R:LazyReader>(&mut self, _reader:&mut R) {
|
||||
let ast = Token::Unrecognized(self.current_match.clone());
|
||||
self.output.push(ast);
|
||||
}
|
||||
|
||||
fn on_no_err_suffix_first_word<R:LazyReader>(&mut self, _reader:&mut R) {}
|
||||
|
||||
fn rules_in_root(lexer:&mut TestLexer) {
|
||||
let a_word = Pattern::char('a').many1();
|
||||
let b_word = Pattern::char('b').many1();
|
||||
let any = Pattern::any();
|
||||
let end = Pattern::eof();
|
||||
|
||||
let root_group_id = lexer.initial_state;
|
||||
let root_group = lexer.groups_mut().group_mut(root_group_id);
|
||||
|
||||
root_group.create_rule(&a_word,"self.on_first_word(reader)");
|
||||
root_group.create_rule(&b_word,"self.on_first_word(reader)");
|
||||
root_group.create_rule(&end, "self.on_no_err_suffix_first_word(reader)");
|
||||
root_group.create_rule(&any, "self.on_err_suffix_first_word(reader)");
|
||||
}
|
||||
}
|
||||
|
||||
/// Rules for the "seen first word" state.
|
||||
#[allow(dead_code,missing_docs)]
|
||||
impl TestLexer {
|
||||
fn on_spaced_word<R:LazyReader>(&mut self, _reader:&mut R) {
|
||||
let str = self.current_match.clone();
|
||||
let ast = Token::Word(String::from(str.trim()));
|
||||
self.output.push(ast);
|
||||
}
|
||||
|
||||
pub fn on_err_suffix_first_word<R:LazyReader>(&mut self, _reader:&mut R) {
|
||||
let ast = Token::Unrecognized(self.current_match.clone());
|
||||
self.output.push(ast);
|
||||
}
|
||||
|
||||
pub fn on_err_suffix<R:LazyReader>(&mut self, reader:&mut R) {
|
||||
fn on_err_suffix<R:LazyReader>(&mut self, reader:&mut R) {
|
||||
self.on_err_suffix_first_word(reader);
|
||||
self.pop_state();
|
||||
}
|
||||
|
||||
pub fn on_no_err_suffix_first_word<R:LazyReader>(&mut self, _reader:&mut R) {}
|
||||
|
||||
pub fn on_no_err_suffix<R:LazyReader>(&mut self, reader:&mut R) {
|
||||
fn on_no_err_suffix<R:LazyReader>(&mut self, reader:&mut R) {
|
||||
self.on_no_err_suffix_first_word(reader);
|
||||
self.pop_state();
|
||||
}
|
||||
|
||||
fn rules_in_seen_first_word(lexer:&mut TestLexer) {
|
||||
let a_word = Pattern::char('a').many1();
|
||||
let b_word = Pattern::char('b').many1();
|
||||
let space = Pattern::char(' ');
|
||||
let spaced_a_word = &space >> &a_word;
|
||||
let spaced_b_word = &space >> &b_word;
|
||||
let any = Pattern::any();
|
||||
let end = Pattern::eof();
|
||||
|
||||
let seen_first_word_group_id = lexer.seen_first_word_state;
|
||||
let seen_first_word_group = lexer.groups_mut().group_mut(seen_first_word_group_id);
|
||||
|
||||
seen_first_word_group.create_rule(&spaced_a_word,"self.on_spaced_word(reader)");
|
||||
seen_first_word_group.create_rule(&spaced_b_word,"self.on_spaced_word(reader)");
|
||||
seen_first_word_group.create_rule(&end, "self.on_no_err_suffix(reader)");
|
||||
seen_first_word_group.create_rule(&any, "self.on_err_suffix(reader)");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -172,27 +204,8 @@ impl flexer::Definition for TestLexer {
|
||||
fn define() -> Self {
|
||||
let mut lexer = TestLexer::new();
|
||||
|
||||
let a_word = Pattern::char('a').many1();
|
||||
let b_word = Pattern::char('b').many1();
|
||||
let space = Pattern::char(' ');
|
||||
let spaced_a_word = &space >> &a_word;
|
||||
let spaced_b_word = &space >> &b_word;
|
||||
let any = Pattern::any();
|
||||
let end = Pattern::eof();
|
||||
|
||||
let root_group_id = lexer.initial_state;
|
||||
let root_group = lexer.groups_mut().group_mut(root_group_id);
|
||||
root_group.create_rule(&a_word,"self.on_first_word(reader)");
|
||||
root_group.create_rule(&b_word,"self.on_first_word(reader)");
|
||||
root_group.create_rule(&end, "self.on_no_err_suffix_first_word(reader)");
|
||||
root_group.create_rule(&any, "self.on_err_suffix_first_word(reader)");
|
||||
|
||||
let seen_first_word_group_id = lexer.seen_first_word_state;
|
||||
let seen_first_word_group = lexer.groups_mut().group_mut(seen_first_word_group_id);
|
||||
seen_first_word_group.create_rule(&spaced_a_word,"self.on_spaced_word(reader)");
|
||||
seen_first_word_group.create_rule(&spaced_b_word,"self.on_spaced_word(reader)");
|
||||
seen_first_word_group.create_rule(&end, "self.on_no_err_suffix(reader)");
|
||||
seen_first_word_group.create_rule(&any, "self.on_err_suffix(reader)");
|
||||
TestLexer::rules_in_seen_first_word(&mut lexer);
|
||||
TestLexer::rules_in_root(&mut lexer);
|
||||
|
||||
lexer
|
||||
}
|
||||
@ -200,6 +213,16 @@ impl flexer::Definition for TestLexer {
|
||||
fn groups(&self) -> &Registry {
|
||||
self.lexer.groups()
|
||||
}
|
||||
|
||||
fn set_up(&mut self) {}
|
||||
|
||||
fn tear_down(&mut self) {}
|
||||
}
|
||||
|
||||
impl Default for TestLexer {
|
||||
fn default() -> Self {
|
||||
TestLexer::new()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -225,7 +248,7 @@ pub struct TestState {
|
||||
// === Trait Impls ===
|
||||
|
||||
impl flexer::State for TestState {
|
||||
fn new() -> Self {
|
||||
fn new(_logger:&impl AnyLogger) -> Self {
|
||||
let mut lexer_states = group::Registry::default();
|
||||
let initial_state = lexer_states.define_group("ROOT",None);
|
||||
let seen_first_word_state = lexer_states.define_group("SEEN FIRST WORD",None);
|
||||
|
@ -112,7 +112,8 @@ impl NFA {
|
||||
self.connect(state,end);
|
||||
}
|
||||
end
|
||||
}
|
||||
},
|
||||
Pattern::Always => current,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -29,19 +29,21 @@ pub enum Pattern {
|
||||
/// The pattern that triggers when a sequence of patterns is encountered.
|
||||
Seq(Vec<Pattern>),
|
||||
/// The pattern that triggers on 0..N repetitions of given pattern.
|
||||
Many(Box<Pattern>)
|
||||
Many(Box<Pattern>),
|
||||
/// The pattern that always triggers.
|
||||
Always,
|
||||
}
|
||||
|
||||
impl Pattern {
|
||||
|
||||
/// A pattern that never triggers.
|
||||
pub fn never() -> Self {
|
||||
Pattern::symbols(Symbol::from(1)..=Symbol::from(0))
|
||||
Pattern::symbol(Symbol::INVALID_SYMBOL)
|
||||
}
|
||||
|
||||
/// A pattern that always triggers
|
||||
pub fn always() -> Self {
|
||||
Pattern::symbols(Symbol::from(u32::min_value())..=Symbol::from(u32::max_value()))
|
||||
Pattern::Always
|
||||
}
|
||||
|
||||
/// A pattern that triggers on any character.
|
||||
@ -50,18 +52,18 @@ impl Pattern {
|
||||
}
|
||||
|
||||
/// A pattern that triggers on 0..N repetitions of the pattern described by `self`.
|
||||
pub fn many(self) -> Self {
|
||||
Many(Box::new(self))
|
||||
pub fn many(&self) -> Self {
|
||||
Many(Box::new(self.clone()))
|
||||
}
|
||||
|
||||
/// A pattern that triggers on 1..N repetitions of the pattern described by `self`.
|
||||
pub fn many1(self) -> Self {
|
||||
pub fn many1(&self) -> Self {
|
||||
self.clone() >> self.many()
|
||||
}
|
||||
|
||||
/// A pattern that triggers on 0..=1 repetitions of the pattern described by `self`.
|
||||
pub fn opt(self) -> Self {
|
||||
self | Self::always()
|
||||
pub fn opt(&self) -> Self {
|
||||
self.clone() | Self::always()
|
||||
}
|
||||
|
||||
/// A pattern that triggers on the given character.
|
||||
@ -91,7 +93,12 @@ impl Pattern {
|
||||
|
||||
/// Pattern that triggers when sequence of characters given by `chars` is encountered.
|
||||
pub fn all_of(chars:&str) -> Self {
|
||||
chars.chars().fold(Self::never(),|pat,char| pat >> Self::char(char))
|
||||
let mut chars_iter = chars.chars();
|
||||
if let Some(first) = chars_iter.next() {
|
||||
chars_iter.fold(Self::char(first),|pat, char| pat >> Self::char(char))
|
||||
} else {
|
||||
Pattern::never()
|
||||
}
|
||||
}
|
||||
|
||||
/// The pattern that triggers on any characters contained in `chars`.
|
||||
@ -105,11 +112,12 @@ impl Pattern {
|
||||
let char_iter = chars.chars().map(|char| char as u32);
|
||||
let char_iter2 = iter::once(0).chain(char_iter).chain(iter::once(max));
|
||||
let mut codes = char_iter2.collect_vec();
|
||||
|
||||
codes.sort();
|
||||
codes.iter().tuple_windows().fold(Self::never(),|pat,(start,end)| {
|
||||
codes.iter().tuple_windows().fold(Self::never(),|pat,(prev_code,next_code)| {
|
||||
let start = prev_code + 1;
|
||||
let end = next_code - 1;
|
||||
if end < start {pat} else {
|
||||
pat | Pattern::symbols(Symbol::from(*start)..=Symbol::from(*end))
|
||||
pat | Pattern::symbols(Symbol::from(start)..=Symbol::from(end))
|
||||
}
|
||||
})
|
||||
}
|
||||
@ -158,3 +166,29 @@ impl Shr<Pattern> for Pattern {
|
||||
}
|
||||
}
|
||||
gen_ref_versions!(Pattern,Shr,shr);
|
||||
|
||||
|
||||
|
||||
// =================
|
||||
// === Utilities ===
|
||||
// =================
|
||||
|
||||
/// Quote a character as a character pattern.
|
||||
///
|
||||
/// It is equivalent to `Pattern::char(...)`.
|
||||
#[macro_export]
|
||||
macro_rules! c {
|
||||
($char:literal) => {
|
||||
Pattern::char($char)
|
||||
}
|
||||
}
|
||||
|
||||
/// Quote a string as a literal pattern.
|
||||
///
|
||||
/// It is equivalent to `Pattern::all_of(...)`.
|
||||
#[macro_export]
|
||||
macro_rules! l {
|
||||
($lit:literal) => {
|
||||
Pattern::all_of($lit)
|
||||
}
|
||||
}
|
||||
|
@ -14,11 +14,14 @@ pub struct Symbol {
|
||||
}
|
||||
|
||||
impl Symbol {
|
||||
/// A representation of the end of the file.
|
||||
pub const EOF_CODE:Symbol = Symbol{value:u32::max_value()};
|
||||
|
||||
/// A representation of the null symbol.
|
||||
pub const NULL:Symbol = Symbol{value:0};
|
||||
/// A representation of the end of the file.
|
||||
pub const EOF_CODE:Symbol = Symbol{value:u32::max_value()};
|
||||
/// A representation of an arbitrary invalid unicode symbol.
|
||||
pub const INVALID_SYMBOL:Symbol = Symbol{value:0xFFFF};
|
||||
/// A representation of the group reaching its end without matching.
|
||||
pub const INCOMPLETE_GROUP:Symbol = Symbol{value:u32::max_value() - 1};
|
||||
}
|
||||
|
||||
|
||||
|
@ -33,8 +33,8 @@ use crate as flexer;
|
||||
/// overhead.
|
||||
pub fn specialize
|
||||
( definition : &impl flexer::State
|
||||
, state_type_name : impl Into<String>
|
||||
, output_type_name : impl Into<String>
|
||||
, state_type_name : impl Str
|
||||
, output_type_name : impl Str
|
||||
) -> Result<String,GenError> {
|
||||
let group_registry = definition.groups();
|
||||
let mut body_items = Vec::new();
|
||||
@ -59,7 +59,7 @@ pub fn wrap_in_impl_for
|
||||
) -> Result<ItemImpl,GenError> {
|
||||
let state_name:Ident = str_to_ident(state_name.into().as_str())?;
|
||||
let mut tree:ItemImpl = parse_quote! {
|
||||
#[allow(missing_docs,dead_code)]
|
||||
#[allow(missing_docs,dead_code,clippy::all)]
|
||||
impl #state_name {}
|
||||
};
|
||||
tree.items.extend(body);
|
||||
@ -68,14 +68,15 @@ pub fn wrap_in_impl_for
|
||||
|
||||
/// Generate the `run` function for the specialized lexer.
|
||||
///
|
||||
/// This function is what the user of the lexer will call
|
||||
pub fn run_function(output_type_name:impl Into<String>) -> Result<ImplItem,GenError> {
|
||||
let output_type_name:Ident = str_to_ident(output_type_name)?;
|
||||
let tree:ImplItem = parse_quote! {
|
||||
/// This function is what the user of the lexer will call to begin execution.
|
||||
pub fn run_function(output_type_name:impl Str) -> Result<ImplItem,GenError> {
|
||||
let output_type_name = str_to_path(output_type_name)?;
|
||||
let tree:ImplItem = parse_quote! {
|
||||
pub fn run<R:LazyReader>(&mut self, mut reader:R) -> LexingResult<#output_type_name> {
|
||||
self.set_up();
|
||||
reader.advance_char(&mut self.bookmarks);
|
||||
while self.run_current_state(&mut reader) == StageStatus::ExitSuccess {}
|
||||
match self.status {
|
||||
let result = match self.status {
|
||||
StageStatus::ExitFinished => LexingResult::success(
|
||||
mem::take(&mut self.output)
|
||||
),
|
||||
@ -83,7 +84,9 @@ pub fn run_function(output_type_name:impl Into<String>) -> Result<ImplItem,GenEr
|
||||
mem::take(&mut self.output)
|
||||
),
|
||||
_ => LexingResult::partial(mem::take(&mut self.output))
|
||||
}
|
||||
};
|
||||
self.tear_down();
|
||||
result
|
||||
}
|
||||
};
|
||||
Ok(tree)
|
||||
@ -94,16 +97,19 @@ pub fn run_current_state_function() -> ImplItem {
|
||||
let tree:ImplItem = parse_quote! {
|
||||
fn run_current_state<R:LazyReader>(&mut self, reader:&mut R) -> StageStatus {
|
||||
self.status = StageStatus::Initial;
|
||||
let mut finished = false;
|
||||
|
||||
// Runs until reaching a state that no longer says to continue.
|
||||
while let Some(next_state) = self.status.continue_as() {
|
||||
self.logger.info(||format!("Current character is {:?}.",reader.character()));
|
||||
self.logger.info(||format!("Continuing in {:?}.",next_state));
|
||||
self.logger.debug(||format!("Current character is {:?}.",reader.character().char));
|
||||
self.logger.debug(||format!("Continuing in {:?}.",next_state));
|
||||
self.status = self.step(next_state,reader);
|
||||
|
||||
if reader.finished() {
|
||||
if finished && reader.finished(self.bookmarks()) {
|
||||
self.logger.info("Input finished.");
|
||||
self.status = StageStatus::ExitFinished
|
||||
}
|
||||
finished = reader.character().is_eof();
|
||||
|
||||
if self.status.should_continue() {
|
||||
match reader.character().char {
|
||||
@ -111,6 +117,9 @@ pub fn run_current_state_function() -> ImplItem {
|
||||
reader.append_result(char);
|
||||
self.logger.info(||format!("Result is {:?}.",reader.result()));
|
||||
},
|
||||
Err(flexer::prelude::reader::Error::EOF) => {
|
||||
self.logger.info("Reached EOF.");
|
||||
},
|
||||
Err(flexer::prelude::reader::Error::EndOfGroup) => {
|
||||
let current_state = self.current_state();
|
||||
let group_name = self.groups().group(current_state).name.as_str();
|
||||
@ -439,6 +448,8 @@ pub enum GenError {
|
||||
BadExpression(String),
|
||||
/// The provided string is not a valid rust literal.
|
||||
BadLiteral(String),
|
||||
/// The provided string is not a valid rust path.
|
||||
BadPath(String),
|
||||
}
|
||||
|
||||
|
||||
@ -453,6 +464,7 @@ impl Display for GenError {
|
||||
GenError::BadIdentifier(str) => write!(f,"`{}` is not a valid rust identifier.",str),
|
||||
GenError::BadExpression(str) => write!(f,"`{}` is not a valid rust expression.",str),
|
||||
GenError::BadLiteral(str) => write!(f,"`{}` is not a valid rust literal.",str),
|
||||
GenError::BadPath(str) => write!(f,"`{}` is not a valid rust path.",str),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -512,12 +524,13 @@ impl Into<Arm> for Branch {
|
||||
// =================
|
||||
|
||||
/// Convert a string to an identifier.
|
||||
pub fn str_to_ident(str:impl Into<String>) -> Result<Ident,GenError> {
|
||||
let string = str.into();
|
||||
match parse_str(string.as_ref()) {
|
||||
Ok(literal) => Ok(literal),
|
||||
Err(_) => Err(GenError::BadIdentifier(string))
|
||||
}
|
||||
pub fn str_to_ident(str:impl Str) -> Result<Ident,GenError> {
|
||||
parse_str(str.as_ref()).map_err(|_| GenError::BadIdentifier(str.into()))
|
||||
}
|
||||
|
||||
/// Convert a string to a path.
|
||||
pub fn str_to_path(str:impl Str) -> Result<Path,GenError> {
|
||||
parse_str(str.as_ref()).map_err(|_| GenError::BadPath(str.into()))
|
||||
}
|
||||
|
||||
/// Convert the syntax tree into a string.
|
||||
|
@ -5,6 +5,8 @@ use crate::automata::pattern::Pattern;
|
||||
use crate::group::rule::Rule;
|
||||
|
||||
use itertools::Itertools;
|
||||
use std::fmt::Display;
|
||||
use wasm_bindgen::__rt::core::fmt::Formatter;
|
||||
|
||||
pub mod rule;
|
||||
|
||||
@ -234,6 +236,12 @@ impl Into<Registry> for Group {
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Group {
|
||||
fn fmt(&self, f:&mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f,"Group {}",self.name)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
@ -289,11 +297,12 @@ pub mod tests {
|
||||
fn complex_rules(count:usize) -> Registry {
|
||||
let mut group = Group::default();
|
||||
for ix in 0..count {
|
||||
let string = ix.to_string();
|
||||
let all = Pattern::all_of(&string);
|
||||
let any = Pattern::any_of(&string);
|
||||
let none = Pattern::none_of(&string);
|
||||
let pattern = Pattern::many(all >> any >> none);
|
||||
let string = ix.to_string();
|
||||
let all = Pattern::all_of(&string);
|
||||
let any = Pattern::any_of(&string);
|
||||
let none = Pattern::none_of(&string);
|
||||
let all_any_none = all >> any >> none;
|
||||
let pattern = Pattern::many(&all_any_none);
|
||||
group.add_rule(Rule::new(pattern.clone(),""));
|
||||
}
|
||||
group.into()
|
||||
|
@ -154,6 +154,7 @@
|
||||
//! use flexer::generate;
|
||||
//! # use flexer::group;
|
||||
//! use flexer::generate::GenError;
|
||||
//! use flexer::prelude::AnyLogger;
|
||||
//! # use flexer::prelude::reader::BookmarkManager;
|
||||
//! # use flexer::State;
|
||||
//! #
|
||||
@ -195,7 +196,7 @@
|
||||
//! # }
|
||||
//!
|
||||
//! impl flexer::State for LexerState {
|
||||
//! fn new() -> Self {
|
||||
//! fn new(_logger:&impl AnyLogger) -> Self {
|
||||
//! // Here we construct all of the elements needed for our lexer state. This function can
|
||||
//! // contain arbitrarily complex logic and is only called once at initialization time.
|
||||
//! let mut lexer_states = group::Registry::default();
|
||||
@ -251,6 +252,7 @@
|
||||
//! # use flexer::generate;
|
||||
//! # use flexer::group;
|
||||
//! # use flexer::prelude::GenError;
|
||||
//! # use flexer::prelude::AnyLogger;
|
||||
//! use flexer::prelude::logger::Disabled;
|
||||
//! # use flexer::prelude::reader::BookmarkManager;
|
||||
//! # use flexer::State;
|
||||
@ -295,7 +297,7 @@
|
||||
//! # }
|
||||
//! #
|
||||
//! # impl flexer::State for LexerState {
|
||||
//! # fn new() -> Self {
|
||||
//! # fn new(_logger:&impl AnyLogger) -> Self {
|
||||
//! # // Here we construct all of the elements needed for our lexer state. This function can
|
||||
//! # // contain arbitrarily complex logic and is only called once at initialization time.
|
||||
//! # let mut lexer_states = group::Registry::default();
|
||||
@ -351,7 +353,7 @@
|
||||
//! # use flexer::Flexer;
|
||||
//! # use flexer::generate;
|
||||
//! # use flexer::group;
|
||||
//! use flexer::prelude::AnyLogger;
|
||||
//! # use flexer::prelude::AnyLogger;
|
||||
//! # use flexer::prelude::GenError;
|
||||
//! # use flexer::prelude::logger::Disabled;
|
||||
//! # use flexer::prelude::reader::BookmarkManager;
|
||||
@ -397,7 +399,7 @@
|
||||
//! # }
|
||||
//! #
|
||||
//! # impl flexer::State for LexerState {
|
||||
//! # fn new() -> Self {
|
||||
//! # fn new(_logger:&impl AnyLogger) -> Self {
|
||||
//! # // Here we construct all of the elements needed for our lexer state. This function can
|
||||
//! # // contain arbitrarily complex logic and is only called once at initialization time.
|
||||
//! # let mut lexer_states = group::Registry::default();
|
||||
@ -504,7 +506,7 @@
|
||||
//! # }
|
||||
//! #
|
||||
//! # impl flexer::State for LexerState {
|
||||
//! # fn new() -> Self {
|
||||
//! # fn new(_logger:&impl AnyLogger) -> Self {
|
||||
//! # // Here we construct all of the elements needed for our lexer state. This function can
|
||||
//! # // contain arbitrarily complex logic and is only called once at initialization time.
|
||||
//! # let mut lexer_states = group::Registry::default();
|
||||
@ -638,7 +640,7 @@
|
||||
//! # }
|
||||
//! #
|
||||
//! # impl flexer::State for LexerState {
|
||||
//! # fn new() -> Self {
|
||||
//! # fn new(_logger:&impl AnyLogger) -> Self {
|
||||
//! # // Here we construct all of the elements needed for our lexer state. This function can
|
||||
//! # // contain arbitrarily complex logic and is only called once at initialization time.
|
||||
//! # let mut lexer_states = group::Registry::default();
|
||||
@ -740,6 +742,12 @@
|
||||
//! fn groups(&self) -> &Registry {
|
||||
//! self.lexer.groups()
|
||||
//! }
|
||||
//!
|
||||
//! /// Code you want to run before lexing begins.
|
||||
//! fn set_up(&mut self) {}
|
||||
//!
|
||||
//! /// Code you want to run after lexing finishes.
|
||||
//! fn tear_down(&mut self) {}
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
@ -820,7 +828,7 @@
|
||||
//! # }
|
||||
//! #
|
||||
//! # impl flexer::State for LexerState {
|
||||
//! # fn new() -> Self {
|
||||
//! # fn new(_logger:&impl AnyLogger) -> Self {
|
||||
//! # // Here we construct all of the elements needed for our lexer state. This function can
|
||||
//! # // contain arbitrarily complex logic and is only called once at initialization time.
|
||||
//! # let mut lexer_states = group::Registry::default();
|
||||
@ -922,6 +930,12 @@
|
||||
//! # fn groups(&self) -> &Registry {
|
||||
//! # self.lexer.groups()
|
||||
//! # }
|
||||
//! #
|
||||
//! # /// Code you want to run before lexing begins.
|
||||
//! # fn set_up(&mut self) {}
|
||||
//! #
|
||||
//! # /// Code you want to run after lexing finishes.
|
||||
//! # fn tear_down(&mut self) {}
|
||||
//! # }
|
||||
//!
|
||||
//! impl Lexer {
|
||||
@ -1000,6 +1014,7 @@
|
||||
//! of lexing languages of a high complexity.
|
||||
|
||||
use crate::prelude::*;
|
||||
use prelude::logger::*;
|
||||
|
||||
use crate::generate::GenError;
|
||||
use prelude::logger::AnyLogger;
|
||||
@ -1085,7 +1100,7 @@ where Definition : State,
|
||||
let logger = <Logger>::sub(&parent_logger,"Flexer");
|
||||
let status = default();
|
||||
let output = default();
|
||||
let definition = Definition::new();
|
||||
let definition = Definition::new(&logger);
|
||||
let initial_state_id = definition.initial_state();
|
||||
let mut state_stack = NonEmptyVec::singleton(initial_state_id);
|
||||
let current_match = default();
|
||||
@ -1098,7 +1113,7 @@ where Definition : State,
|
||||
impl<Definition,Output,Logger> Flexer<Definition,Output,Logger>
|
||||
where Definition : State,
|
||||
Output : Clone,
|
||||
Logger : AnyLogger {
|
||||
Logger : AnyLogger<Owned=Logger> {
|
||||
/// Get the lexer result.
|
||||
pub fn result(&mut self) -> &Output {
|
||||
&self.output
|
||||
@ -1116,7 +1131,9 @@ where Definition : State,
|
||||
|
||||
/// Tell the lexer to enter the state described by `state`.
|
||||
pub fn push_state(&mut self, state:group::Identifier) {
|
||||
self.logger.info(||format!("Pushing state {:?}",state));
|
||||
self.logger.group_begin(
|
||||
||format!("Enter State: {}",self.groups().group(state).name.as_str())
|
||||
);
|
||||
self.state_stack.push(state);
|
||||
}
|
||||
|
||||
@ -1125,21 +1142,36 @@ where Definition : State,
|
||||
/// It will never end the initial state of the lexer.
|
||||
pub fn pop_state(&mut self) -> Option<group::Identifier> {
|
||||
let result = self.state_stack.pop();
|
||||
self.logger.info(||format!("Popped state {:?}",result));
|
||||
match result {
|
||||
None => (),
|
||||
Some(ident) => debug!(self.logger,"Leave State: {self.groups().group(ident)}"),
|
||||
};
|
||||
self.logger.group_end();
|
||||
result
|
||||
}
|
||||
|
||||
/// End states until the specified `state` is reached, leaving the lexer in `state`.
|
||||
///
|
||||
/// If `state` does not exist on the lexer's stack, then the lexer will be left in the root
|
||||
/// state.
|
||||
pub fn pop_states_until(&mut self, state:group::Identifier) -> Vec<group::Identifier> {
|
||||
let non_opt_root_state_position =
|
||||
self.state_stack.iter().positions(|elem| *elem == state).last().unwrap_or(0);
|
||||
let range = (non_opt_root_state_position + 1)..self.state_stack.len();
|
||||
let states = self.state_stack.drain(range).collect();
|
||||
self.logger.info(||format!("Popped states {:?}",states));
|
||||
states
|
||||
/// state. Additionally, this function cannot pop the final occurrence of the root state.
|
||||
pub fn pop_states_until(&mut self, state:group::Identifier) -> group::Identifier {
|
||||
while self.current_state() != state && self.current_state() != self.initial_state() {
|
||||
self.pop_state();
|
||||
}
|
||||
*self.state_stack.last()
|
||||
}
|
||||
|
||||
/// End states up to and including the first instance of `state`, returning the identifier of
|
||||
/// the new state the lexer is in.
|
||||
///
|
||||
/// If `state` does not exist on the lexer's stack, the lexer will be left in the root state.
|
||||
/// Additionally, this function cannot pop the final occurrence of the root state.
|
||||
pub fn pop_states_including(&mut self, state:group::Identifier) -> group::Identifier {
|
||||
while self.current_state() != state && self.current_state() != self.initial_state() {
|
||||
self.pop_state();
|
||||
}
|
||||
self.pop_state();
|
||||
*self.state_stack.last()
|
||||
}
|
||||
|
||||
/// Check if the lexer is currently in the state described by `state`.
|
||||
@ -1309,7 +1341,7 @@ pub trait State {
|
||||
/// Create a new instance of the lexer's state.
|
||||
///
|
||||
/// This function is guaranteed to be called at most once per run of the lexer.
|
||||
fn new() -> Self;
|
||||
fn new(parent_logger:&impl AnyLogger) -> Self;
|
||||
/// Return the _initial_ lexing state.
|
||||
fn initial_state(&self) -> group::Identifier;
|
||||
/// Return a reference to the group registry for a given lexer.
|
||||
@ -1339,4 +1371,8 @@ pub trait Definition {
|
||||
fn define() -> Self;
|
||||
/// Obtain the registry of groups for the lexer.
|
||||
fn groups(&self) -> &group::Registry;
|
||||
/// Run before any lexing takes place.
|
||||
fn set_up(&mut self);
|
||||
/// Run after lexing has completed.
|
||||
fn tear_down(&mut self);
|
||||
}
|
||||
|
@ -53,7 +53,7 @@ pub struct LexerState {
|
||||
initial_state:group::Identifier,
|
||||
}
|
||||
impl flexer::State for LexerState {
|
||||
fn new() -> Self {
|
||||
fn new(_logger:&impl AnyLogger) -> Self {
|
||||
let mut lexer_states = group::Registry::default();
|
||||
let initial_state = lexer_states.define_group("ROOT",None);
|
||||
LexerState{lexer_states,initial_state}
|
||||
@ -143,6 +143,14 @@ impl flexer::Definition for Lexer1 {
|
||||
fn groups(&self) -> &Registry {
|
||||
self.lexer.groups()
|
||||
}
|
||||
|
||||
fn set_up(&mut self) {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn tear_down(&mut self) {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -204,6 +212,14 @@ impl flexer::Definition for Lexer2 {
|
||||
fn groups(&self) -> &Registry {
|
||||
self.lexer.groups()
|
||||
}
|
||||
|
||||
fn set_up(&mut self) {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn tear_down(&mut self) {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -268,6 +284,14 @@ impl flexer::Definition for Lexer3 {
|
||||
fn groups(&self) -> &Registry {
|
||||
self.lexer.groups()
|
||||
}
|
||||
|
||||
fn set_up(&mut self) {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn tear_down(&mut self) {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct LexerState1 {
|
||||
@ -275,7 +299,7 @@ pub struct LexerState1 {
|
||||
initial_state:group::Identifier,
|
||||
}
|
||||
impl flexer::State for LexerState1 {
|
||||
fn new() -> Self {
|
||||
fn new(_logger:&impl AnyLogger) -> Self {
|
||||
let mut lexer_states = group::Registry::default();
|
||||
let initial_state = lexer_states.define_group("ROOT",None);
|
||||
LexerState1 {lexer_states,initial_state}
|
||||
@ -366,6 +390,14 @@ impl flexer::Definition for Lexer4 {
|
||||
fn groups(&self) -> &Registry {
|
||||
self.lexer.groups()
|
||||
}
|
||||
|
||||
fn set_up(&mut self) {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn tear_down(&mut self) {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct LexerState2 {
|
||||
@ -373,7 +405,7 @@ pub struct LexerState2 {
|
||||
initial_state:group::Identifier,
|
||||
}
|
||||
impl flexer::State for LexerState2 {
|
||||
fn new() -> Self {
|
||||
fn new(_logger:&impl AnyLogger) -> Self {
|
||||
let mut lexer_states = group::Registry::default();
|
||||
let initial_state = lexer_states.define_group("ROOT",None);
|
||||
LexerState2 {lexer_states,initial_state}
|
||||
@ -410,5 +442,5 @@ pub fn test_bad_output_name() {
|
||||
let result = lexer.specialize();
|
||||
assert!(result.is_err());
|
||||
let message = result.unwrap_err().to_string();
|
||||
assert_eq!(message,"`Bad output name` is not a valid rust identifier.");
|
||||
assert_eq!(message,"`Bad output name` is not a valid rust path.");
|
||||
}
|
@ -39,6 +39,17 @@ pub struct Char<Error> {
|
||||
pub size: usize,
|
||||
}
|
||||
|
||||
impl Char<crate::Error> {
|
||||
/// Check if the character represents the end of file.
|
||||
pub fn is_eof(&self) -> bool {
|
||||
match self.char {
|
||||
Ok(_) => false,
|
||||
Err(crate::Error::EOF) => true,
|
||||
Err(_) => false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =====================
|
||||
|
@ -14,9 +14,10 @@ pub mod decoder;
|
||||
|
||||
use enso_prelude::*;
|
||||
|
||||
use crate::decoder::Char;
|
||||
use crate::decoder::InvalidChar;
|
||||
use decoder::Decoder;
|
||||
use crate::decoder::{Char, InvalidChar};
|
||||
use crate::Error::EOF;
|
||||
|
||||
|
||||
|
||||
// ============
|
||||
@ -79,10 +80,10 @@ pub enum Error {
|
||||
impl Error {
|
||||
/// The `u32` value that corresponds to EOF.
|
||||
pub const END_OF_FILE:u32 = u32::max_value();
|
||||
/// The `u32` value that corresponds to an invalid character.
|
||||
pub const INVALID_CHAR:u32 = u32::max_value() - 1;
|
||||
/// The `u32` value that corresponds to an invalid unicode character.
|
||||
pub const INVALID_CHAR:u32 = 0xFFFF;
|
||||
/// The `u32` value corresponding to the end of group.
|
||||
pub const END_OF_GROUP:u32 = u32::max_value() - 2;
|
||||
pub const END_OF_GROUP:u32 = u32::max_value() - 1;
|
||||
}
|
||||
|
||||
|
||||
@ -145,7 +146,10 @@ pub trait LazyReader {
|
||||
/// Get the current character from the reader.
|
||||
fn character(&self) -> decoder::Char<Error>;
|
||||
/// Check if the reader has finished reading.
|
||||
fn finished(&self) -> bool;
|
||||
///
|
||||
/// A reader is finished when it has no further input left to read, and when it does not need to
|
||||
/// rewind to any point.
|
||||
fn finished(&self, bookmarks:&BookmarkManager) -> bool;
|
||||
/// Check if the reader is empty.
|
||||
fn empty(&self) -> bool;
|
||||
/// Fill the buffer with words from the input.
|
||||
@ -240,8 +244,9 @@ impl<D:Decoder, R:Read<Item=D::Word>> LazyReader for Reader<D,R> {
|
||||
self.character
|
||||
}
|
||||
|
||||
fn finished(&self) -> bool {
|
||||
self.empty() && self.character.char == Err(EOF)
|
||||
fn finished(&self, _bookmarks:&BookmarkManager) -> bool {
|
||||
let rewinded = self.max_possible_rewind_len(_bookmarks) != 0;
|
||||
self.empty() && rewinded
|
||||
}
|
||||
|
||||
fn empty(&self) -> bool {
|
||||
|
18
lib/rust/lexer/definition/Cargo.toml
Normal file
18
lib/rust/lexer/definition/Cargo.toml
Normal file
@ -0,0 +1,18 @@
|
||||
[package]
|
||||
name = "lexer-definition"
|
||||
version = "0.1.0"
|
||||
authors = ["Enso Team <enso-dev@enso.org>"]
|
||||
edition = "2018"
|
||||
|
||||
publish = false
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib", "rlib"]
|
||||
test = true
|
||||
bench = true
|
||||
|
||||
[dependencies]
|
||||
flexer = { path = "../../flexer", version = "0.1.0" }
|
||||
enso-prelude = { path = "../../enso-prelude", version = "0.1.0" }
|
||||
|
||||
uuid = { version = "0.8.1" , features = ["serde","v4","wasm-bindgen"] }
|
1159
lib/rust/lexer/definition/src/lexer.rs
Normal file
1159
lib/rust/lexer/definition/src/lexer.rs
Normal file
File diff suppressed because it is too large
Load Diff
29
lib/rust/lexer/definition/src/lib.rs
Normal file
29
lib/rust/lexer/definition/src/lib.rs
Normal file
@ -0,0 +1,29 @@
|
||||
#![feature(test)]
|
||||
#![deny(unconditional_recursion)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(trivial_casts)]
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unsafe_code)]
|
||||
#![warn(unused_import_braces)]
|
||||
|
||||
//! This library defines the lexer for the syntax of the Enso language.
|
||||
|
||||
pub mod lexer;
|
||||
pub mod token;
|
||||
|
||||
/// A module that can be re-exported under the same name in the generation crate.
|
||||
///
|
||||
/// This is necessary to avoid issues with paths getting wonky when the code is generated from the
|
||||
/// Enso lexer definition. In this project, imports should _not_ be made from the crate root
|
||||
/// _except_ through use of this `library` module.
|
||||
pub mod library {
|
||||
pub use crate::token;
|
||||
}
|
||||
|
||||
/// A collection of functionality for working with the lexer definition.
|
||||
pub mod prelude {
|
||||
pub use flexer::prelude::*;
|
||||
pub use flexer::prelude::logger::*;
|
||||
}
|
570
lib/rust/lexer/definition/src/token.rs
Normal file
570
lib/rust/lexer/definition/src/token.rs
Normal file
@ -0,0 +1,570 @@
|
||||
//! This file defines the various tokens requried by the Enso lexer.
|
||||
//!
|
||||
//! This file makes heavy use of terminology from the Enso design documentation, particularly the
|
||||
//! [syntax](https://dev.enso.org/docs/enso/syntax) documentation. For the sake of brevity, many
|
||||
//! terms will _not_ be defined here.
|
||||
|
||||
use crate::prelude::*;
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === Token ===
|
||||
// =============
|
||||
|
||||
/// A lexer token.
|
||||
#[derive(Clone,Debug,Eq,PartialEq)]
|
||||
pub struct Token {
|
||||
/// The shape of the token.
|
||||
pub shape : Shape,
|
||||
/// The length (in characters) of this token.
|
||||
pub length : usize,
|
||||
/// The number of trailing spaces after this token before the next.
|
||||
pub offset : usize,
|
||||
}
|
||||
|
||||
impl Token {
|
||||
/// Get the length that the token takes up in the program source.
|
||||
pub fn source_length(&self) -> usize {
|
||||
self.length + self.offset
|
||||
}
|
||||
}
|
||||
|
||||
/// Constructors for the various forms of token.
|
||||
#[allow(non_snake_case)]
|
||||
impl Token {
|
||||
/// Construct a token representing a referent identifier.
|
||||
pub fn Referent(name:impl Str, offset:usize) -> Token {
|
||||
let str = name.into();
|
||||
let length = str.chars().count();
|
||||
let shape = Shape::Referent(str);
|
||||
Token{shape,length,offset}
|
||||
}
|
||||
|
||||
/// Construct a token representing a variable identifier.
|
||||
pub fn Variable(name:impl Str, offset:usize) -> Token {
|
||||
let str = name.into();
|
||||
let length = str.chars().count();
|
||||
let shape = Shape::Variable(str);
|
||||
Token{shape,length,offset}
|
||||
}
|
||||
|
||||
/// Construct a token representing an external identifier.
|
||||
pub fn External(name:impl Str, offset:usize) -> Token {
|
||||
let str = name.into();
|
||||
let length = str.chars().count();
|
||||
let shape = Shape::External(str);
|
||||
Token{shape,length,offset}
|
||||
}
|
||||
|
||||
/// Construct a token representing a blank identifier.
|
||||
pub fn Blank(offset:usize) -> Token {
|
||||
let shape = Shape::Blank;
|
||||
let length = 1;
|
||||
Token{shape,length,offset}
|
||||
}
|
||||
|
||||
/// Construct a token representing an operator.
|
||||
pub fn Operator(name:impl Str, offset:usize) -> Token {
|
||||
let str = name.into();
|
||||
let length = str.chars().count();
|
||||
let shape = Shape::Operator(str);
|
||||
Token{shape,length,offset}
|
||||
}
|
||||
|
||||
/// Construct a token representing a modifier operator.
|
||||
pub fn Modifier(name:impl Str, offset:usize) -> Token {
|
||||
let str = name.into();
|
||||
let length = str.chars().count() + 1;
|
||||
let shape = Shape::Modifier(str);
|
||||
Token{shape,length,offset}
|
||||
}
|
||||
|
||||
/// Construct a token representing a number literal.
|
||||
pub fn Number(base:impl Str, num:impl Into<String>, offset:usize) -> Token {
|
||||
let str = num.into();
|
||||
let base_str = base.into();
|
||||
let length = if base_str.is_empty() {
|
||||
str.chars().count()
|
||||
} else {
|
||||
base_str.chars().count() + 1 + str.chars().count()
|
||||
};
|
||||
let shape = Shape::Number{base:base_str,number:str};
|
||||
Token{shape,length,offset}
|
||||
}
|
||||
|
||||
/// Construct a token representing a dangling number base.
|
||||
pub fn DanglingBase(base:impl Str, offset:usize) -> Token {
|
||||
let base_str = base.into();
|
||||
let length = base_str.chars().count() + 1;
|
||||
let shape = Shape::DanglingBase(base_str);
|
||||
Token{shape,length,offset}
|
||||
}
|
||||
|
||||
/// Construct a token representing a text literal.
|
||||
pub fn Text(text:impl Str, offset:usize) -> Token {
|
||||
let str = text.into();
|
||||
let length = str.chars().count();
|
||||
let shape = Shape::Text(str);
|
||||
Token{shape,length,offset}
|
||||
}
|
||||
|
||||
/// Construct a token representing a line of tokens.
|
||||
pub fn Line(tokens:Vec<Token>, offset:usize, trailing_line_ending:LineEnding) -> Token {
|
||||
let line_ending_len = trailing_line_ending.size();
|
||||
let length = tokens.iter().fold(line_ending_len,|l,r| l + r.offset + r.length);
|
||||
let shape = Shape::Line{tokens,trailing_line_ending};
|
||||
Token{shape,length,offset}
|
||||
}
|
||||
|
||||
/// Construct a token representing a blank line.
|
||||
///
|
||||
/// The `offset` for blank lines is from the leftmost column, not from the parent block's
|
||||
/// indentation.
|
||||
pub fn BlankLine(offset:usize, trailing_line_ending:LineEnding) -> Token {
|
||||
let length = trailing_line_ending.size();
|
||||
let shape = Shape::BlankLine(trailing_line_ending);
|
||||
Token{shape,length,offset}
|
||||
}
|
||||
|
||||
/// Construct a token representing a block.
|
||||
pub fn Block
|
||||
( block_type : BlockType
|
||||
, indent : usize
|
||||
, lines : Vec<Token>
|
||||
, offset : usize
|
||||
) -> Token {
|
||||
let length = lines.iter().map(|line| {
|
||||
let line_length = line.length;
|
||||
let line_offset = line.offset;
|
||||
match line.shape {
|
||||
Shape::Line{..} => indent + line_offset + line_length,
|
||||
Shape::BlankLine(_) => line_offset + line_length,
|
||||
_ => unreachable_panic!("Tokens in a blocks should always be lines."),
|
||||
}
|
||||
}).sum();
|
||||
let shape = Shape::Block{block_type,indent,lines};
|
||||
Token{shape,length,offset}
|
||||
}
|
||||
|
||||
/// Construct a token representing an invalid suffix.
|
||||
pub fn InvalidSuffix(text:impl Str, offset:usize) -> Token {
|
||||
let str = text.into();
|
||||
let length = str.chars().count();
|
||||
let shape = Shape::InvalidSuffix(str);
|
||||
Token{shape,length,offset}
|
||||
}
|
||||
|
||||
/// Construct a token representing an unrecognised lexeme.
|
||||
pub fn Unrecognized(text:impl Str, offset:usize) -> Token {
|
||||
let str = text.into();
|
||||
let length = str.chars().count();
|
||||
let shape = Shape::Unrecognized(str);
|
||||
Token{shape,length,offset}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =================
|
||||
// === BlockType ===
|
||||
// =================
|
||||
|
||||
/// The type for an Enso Block token.
|
||||
#[derive(Copy,Clone,Debug,PartialEq,Eq)]
|
||||
pub enum BlockType {
|
||||
/// A block made up of arguments to a function.
|
||||
Continuous,
|
||||
/// A block made up of separate lines.
|
||||
Discontinuous,
|
||||
}
|
||||
|
||||
// ===================
|
||||
// === NewlineType ===
|
||||
// ===================
|
||||
|
||||
/// The type of newline associated with the line.
|
||||
#[derive(Copy,Clone,Debug,Display,PartialEq,Eq)]
|
||||
pub enum LineEnding {
|
||||
/// There is no newline.
|
||||
None,
|
||||
/// The unix-style line-feed (`'\n'`),
|
||||
LF,
|
||||
/// The windows-style carriage-return, line-feed (`"\r\n"`).
|
||||
CRLF
|
||||
}
|
||||
|
||||
impl LineEnding {
|
||||
/// Get the number of rust `char`s that the newline type takes up.
|
||||
pub fn size(self) -> usize {
|
||||
match self {
|
||||
Self::None => 0,
|
||||
Self::LF => 1,
|
||||
Self::CRLF => 2,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// === Trait Impls ===
|
||||
|
||||
impl Default for LineEnding {
|
||||
fn default() -> Self {
|
||||
LineEnding::None
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === Shape ===
|
||||
// =============
|
||||
|
||||
/// The shapes of tokens needed by the Enso lexer.
|
||||
///
|
||||
/// This is a very small set of shapes, because the [`Token`] type only deals with the tokens that
|
||||
/// the lexer works with, not the full complexity of Enso's syntax.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Clone,Debug,PartialEq,Eq)]
|
||||
pub enum Shape {
|
||||
// === Identifiers ===
|
||||
|
||||
/// An identifier in referent form.
|
||||
Referent(String),
|
||||
/// An identifier in variable form.
|
||||
Variable(String),
|
||||
/// An identifier not conforming to the Enso identifier rules (e.g. a Java identifier).
|
||||
External(String),
|
||||
/// A blank identifier (`_`).
|
||||
Blank,
|
||||
/// An operator identifier.
|
||||
Operator(String),
|
||||
/// A modifier identifier.
|
||||
Modifier(String),
|
||||
|
||||
// === Literals ===
|
||||
|
||||
/// A literal number.
|
||||
Number{base:String, number:String},
|
||||
/// A dangling base from a number literal.
|
||||
DanglingBase(String),
|
||||
/// A text literal.
|
||||
///
|
||||
/// This is currently way too simplistic to actually represent text, but it is a good
|
||||
/// placeholder.
|
||||
Text(String),
|
||||
|
||||
// === Lines ===
|
||||
/// A line containing tokens.
|
||||
///
|
||||
/// The offset for a line is always zero, as it is contained in a block with a defined
|
||||
/// indentation.
|
||||
Line{
|
||||
/// The tokens on the line.
|
||||
tokens : Vec<Token>,
|
||||
/// The line ending that _ends_ the line.
|
||||
///
|
||||
/// Please note that the concept of 'ending' the line is a bit strange, as blocks are
|
||||
/// treated as tokens in their own right, and hence are included in lines.
|
||||
trailing_line_ending : LineEnding
|
||||
},
|
||||
/// A blank line.
|
||||
///
|
||||
/// The offset for a blank line is from the leftmost column, as it may be negative from the
|
||||
/// block's indentation level.
|
||||
BlankLine(LineEnding),
|
||||
|
||||
// === Block ===
|
||||
/// A block of tokens.
|
||||
Block {
|
||||
/// The type of the block.
|
||||
block_type : BlockType,
|
||||
/// The leading indentation of the block.
|
||||
indent : usize,
|
||||
/// The lines in the block.
|
||||
lines : Vec<Token>,
|
||||
},
|
||||
|
||||
// === Errors ===
|
||||
/// An invalid suffix.
|
||||
InvalidSuffix(String),
|
||||
/// An unrecognized token.
|
||||
Unrecognized(String),
|
||||
}
|
||||
|
||||
impl Shape {
|
||||
|
||||
/// Construct an identifier in referent form.
|
||||
pub fn referent(name:impl Into<String>) -> Shape {
|
||||
Shape::Referent(name.into())
|
||||
}
|
||||
|
||||
/// Construct an identifier in variable form.
|
||||
pub fn variable(name:impl Into<String>) -> Shape {
|
||||
Shape::Variable(name.into())
|
||||
}
|
||||
|
||||
/// Construct an identifier in external form.
|
||||
pub fn external(name:impl Into<String>) -> Shape {
|
||||
Shape::External(name.into())
|
||||
}
|
||||
|
||||
/// Construct a blank identifier.
|
||||
///
|
||||
/// This is provided as a function for completeness.
|
||||
pub fn blank() -> Shape {
|
||||
Shape::Blank
|
||||
}
|
||||
|
||||
/// Construct an operator identifier.
|
||||
pub fn operator(opr:impl Into<String>) -> Shape {
|
||||
Shape::Operator(opr.into())
|
||||
}
|
||||
|
||||
/// Construct a modifier identifier.
|
||||
pub fn modifier(opr:impl Into<String>) -> Shape {
|
||||
Shape::Modifier(opr.into())
|
||||
}
|
||||
|
||||
/// Construct a number literal.
|
||||
pub fn number(base:impl Into<String>, num:impl Into<String>) -> Shape {
|
||||
Shape::Number{base:base.into(),number:num.into()}
|
||||
}
|
||||
|
||||
/// Construct a dangling base literal.
|
||||
pub fn dangling_base(base:impl Into<String>) -> Shape {
|
||||
Shape::DanglingBase(base.into())
|
||||
}
|
||||
|
||||
/// Construct a text literal.
|
||||
pub fn text(text:impl Into<String>) -> Shape {
|
||||
Shape::Text(text.into())
|
||||
}
|
||||
|
||||
/// Construct a line that contains tokens.
|
||||
pub fn line(tokens:Vec<Token>, trailing_line_ending:LineEnding) -> Shape {
|
||||
Shape::Line{tokens,trailing_line_ending }
|
||||
}
|
||||
|
||||
/// Construct a line that is blank.
|
||||
pub fn blank_line(trailing_line_ending:LineEnding) -> Shape {
|
||||
Shape::BlankLine(trailing_line_ending)
|
||||
}
|
||||
|
||||
/// Construct a block containing lines.
|
||||
pub fn block(block_type:BlockType, indent:usize, lines:Vec<Token>) -> Shape {
|
||||
Shape::Block{block_type,indent,lines}
|
||||
}
|
||||
|
||||
/// Construct an invalid suffix.
|
||||
pub fn invalid_suffix(text:impl Into<String>) -> Shape {
|
||||
Shape::InvalidSuffix(text.into())
|
||||
}
|
||||
|
||||
/// Construct an unrecognised token.
|
||||
pub fn unrecognized(text:impl Into<String>) -> Shape {
|
||||
Shape::Unrecognized(text.into())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==============
|
||||
// === Stream ===
|
||||
// ==============
|
||||
|
||||
/// A representation of the Enso token stream.
|
||||
#[derive(Clone,Debug,Default,PartialEq)]
|
||||
pub struct Stream {
|
||||
/// The tokens in the token stream.
|
||||
tokens:Vec<Token>
|
||||
}
|
||||
|
||||
impl Stream {
|
||||
/// Append the provided `token` to the token stream.
|
||||
pub fn append(&mut self, token:Token) {
|
||||
self.tokens.push(token)
|
||||
}
|
||||
|
||||
/// Get a reference to the tokens in the stream.
|
||||
pub fn tokens(&self) -> &Vec<Token> {
|
||||
&self.tokens
|
||||
}
|
||||
|
||||
/// Get the length of the elements in the token stream.
|
||||
pub fn tokens_len(&self) -> usize {
|
||||
self.tokens.iter().map(|token|token.length + token.offset).sum()
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for Stream {
|
||||
type Target = Vec<Token>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.tokens
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for Stream {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.tokens
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// === Trait Impls ===
|
||||
|
||||
impl From<Vec<Token>> for Stream {
|
||||
fn from(tokens:Vec<Token>) -> Self {
|
||||
Stream{tokens}
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<Vec<Token>> for Stream {
|
||||
fn into(self) -> Vec<Token> {
|
||||
self.tokens
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === Tests ===
|
||||
// =============
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::token::BlockType;
|
||||
|
||||
|
||||
// === Testing Utilities ===
|
||||
|
||||
/// Asserts that the `token` has the provided `shape`.
|
||||
pub fn assert_shape(token:&Token, shape:Shape) {
|
||||
assert_eq!(token.shape,shape);
|
||||
}
|
||||
|
||||
/// Asserts that the `token` has the provided `length`.
|
||||
pub fn assert_length(token:&Token, length:usize) {
|
||||
assert_eq!(token.length,length)
|
||||
}
|
||||
|
||||
|
||||
// === Tests for Token Construction ===
|
||||
|
||||
#[test]
|
||||
fn construct_referent_token() {
|
||||
let token = Token::Referent("Some_Ref_Name",0);
|
||||
assert_shape(&token,Shape::referent("Some_Ref_Name"));
|
||||
assert_length(&token,13);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn construct_variable_token() {
|
||||
let token = Token::Variable("some_variable_name",0);
|
||||
assert_shape(&token,Shape::variable("some_variable_name"));
|
||||
assert_length(&token,18);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn construct_external_name_token() {
|
||||
let token = Token::External("camelCase",0);
|
||||
assert_shape(&token,Shape::external("camelCase"));
|
||||
assert_length(&token,9);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn construct_blank_token() {
|
||||
let token = Token::Blank(0);
|
||||
assert_shape(&token,Shape::blank());
|
||||
assert_length(&token,1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn construct_operator_token() {
|
||||
let token = Token::Operator("==>",0);
|
||||
assert_shape(&token,Shape::operator("==>"));
|
||||
assert_length(&token,3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn construct_modifier_token() {
|
||||
let token = Token::Modifier("+",0);
|
||||
assert_shape(&token,Shape::modifier("+"));
|
||||
assert_length(&token,2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn construct_number_token() {
|
||||
let token = Token::Number("","1231",0);
|
||||
assert_shape(&token,Shape::number("","1231"));
|
||||
assert_length(&token,4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn construct_dangling_base_token() {
|
||||
let token = Token::DanglingBase("15",0);
|
||||
assert_shape(&token,Shape::dangling_base("15"));
|
||||
assert_length(&token,3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn construct_text_token() {
|
||||
let token = Token::Text("some prose goes here",0);
|
||||
assert_shape(&token,Shape::text("some prose goes here"));
|
||||
assert_length(&token,20);
|
||||
// TODO [AA] Make this internally account for length of quotes.
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn construct_line_token() {
|
||||
let tokens = vec![Token::Variable("aa",0),Token::Referent("Abc",1)];
|
||||
let token = Token::Line(tokens.clone(), 4, LineEnding::LF);
|
||||
assert_shape(&token,Shape::line(tokens.clone(), LineEnding::LF));
|
||||
assert_length(&token,7);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn construct_blank_line_token() {
|
||||
let token = Token::BlankLine(13,LineEnding::LF);
|
||||
assert_shape(&token, Shape::blank_line(LineEnding::LF));
|
||||
assert_length(&token,1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn construct_block_token_lf() {
|
||||
let lines = vec![
|
||||
Token::Line(vec![],0,LineEnding::LF),
|
||||
Token::Line(vec![],4,LineEnding::LF)
|
||||
];
|
||||
let token = Token::Block(BlockType::Continuous,4,lines.clone(),0);
|
||||
assert_shape(&token,Shape::block(BlockType::Continuous,4,lines.clone()));
|
||||
assert_length(&token,14);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn construct_block_token_crlf() {
|
||||
let lines = vec![
|
||||
Token::Line(vec![],0,LineEnding::CRLF),
|
||||
Token::Line(vec![],4,LineEnding::CRLF)
|
||||
];
|
||||
let token = Token::Block(BlockType::Continuous,4,lines.clone(),0);
|
||||
assert_shape(&token,Shape::block(BlockType::Continuous,4,lines.clone()));
|
||||
assert_length(&token,16);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn construct_invalid_suffix_token() {
|
||||
let token = Token::InvalidSuffix("aaa",0);
|
||||
assert_shape(&token,Shape::invalid_suffix("aaa"));
|
||||
assert_length(&token,3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn construct_unrecognized_token() {
|
||||
let token = Token::Unrecognized("a",0);
|
||||
assert_shape(&token,Shape::unrecognized("a"));
|
||||
assert_length(&token,1);
|
||||
}
|
||||
}
|
21
lib/rust/lexer/generation/Cargo.toml
Normal file
21
lib/rust/lexer/generation/Cargo.toml
Normal file
@ -0,0 +1,21 @@
|
||||
[package]
|
||||
name = "lexer"
|
||||
version = "0.1.0"
|
||||
authors = ["Enso Team <enso-dev@enso.org>"]
|
||||
edition = "2018"
|
||||
|
||||
publish = false
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib", "rlib"]
|
||||
test = true
|
||||
bench = true
|
||||
|
||||
[dependencies]
|
||||
flexer = { path = "../../flexer", version = "0.1.0" }
|
||||
enso-prelude = { path = "../../enso-prelude", version = "0.1.0" }
|
||||
lexer-definition = { path = "../definition", version = "0.1.0" }
|
||||
|
||||
[build-dependencies]
|
||||
flexer = { path = "../../flexer", version = "0.1.0" }
|
||||
lexer-definition = { path = "../definition", version = "0.1.0" }
|
32
lib/rust/lexer/generation/build.rs
Normal file
32
lib/rust/lexer/generation/build.rs
Normal file
@ -0,0 +1,32 @@
|
||||
use std::fs::File;
|
||||
use lexer_definition::lexer::EnsoLexer;
|
||||
use std::io::prelude::*;
|
||||
use flexer::Definition;
|
||||
use flexer::State;
|
||||
|
||||
|
||||
|
||||
/// Generates the lexer engine and saves the result into the file `src/engine.rs`.
|
||||
///
|
||||
/// The content of the generated file can be used with the `include!` macro.
|
||||
fn generate_engine() -> std::io::Result<()> {
|
||||
let definition_path = "../definition/src/lexer.rs";
|
||||
let output_directory = "src/generated";
|
||||
let _ = std::fs::create_dir(output_directory);
|
||||
let output_path = "src/generated/engine.rs";
|
||||
let definition_error = format!("The lexer definition should exist at {}.",definition_path);
|
||||
let output_error = format!("Cannot open output file at {}.",output_path);
|
||||
let mut lexer_def = File::open(definition_path).expect(definition_error.as_str());
|
||||
let mut contents = String::new();
|
||||
let mut file = File::create(output_path).expect(output_error.as_str());
|
||||
let lexer = EnsoLexer::define();
|
||||
let engine = lexer.specialize().unwrap();
|
||||
lexer_def.read_to_string(&mut contents).expect("Unable to read lexer definition.");
|
||||
file.write_all(contents.as_bytes()).expect("Unable to write lexer definition.");
|
||||
file.write_all(engine.as_bytes()).expect("Unable to write lexer specialization.");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn main() -> std::io::Result<()> {
|
||||
generate_engine()
|
||||
}
|
3
lib/rust/lexer/generation/src/generated.rs
Normal file
3
lib/rust/lexer/generation/src/generated.rs
Normal file
@ -0,0 +1,3 @@
|
||||
//! This module re-exports the generated lexer sources.
|
||||
|
||||
pub mod engine;
|
25
lib/rust/lexer/generation/src/lib.rs
Normal file
25
lib/rust/lexer/generation/src/lib.rs
Normal file
@ -0,0 +1,25 @@
|
||||
#![feature(test)]
|
||||
#![deny(unconditional_recursion)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(trivial_casts)]
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unsafe_code)]
|
||||
#![warn(unused_import_braces)]
|
||||
|
||||
//! This module exports the interface to the generated Enso lexer.
|
||||
|
||||
pub mod generated;
|
||||
|
||||
/// Support libraries for the lexer definition.
|
||||
///
|
||||
/// This is an intentional re-export in this crate's namespace.
|
||||
mod library {
|
||||
pub use lexer_definition::library::*;
|
||||
}
|
||||
|
||||
/// A library of commonly useful functionality.
|
||||
mod prelude {
|
||||
pub use lexer_definition::prelude::*;
|
||||
}
|
759
lib/rust/lexer/generation/tests/enso_lexer.rs
Normal file
759
lib/rust/lexer/generation/tests/enso_lexer.rs
Normal file
@ -0,0 +1,759 @@
|
||||
#![feature(test)]
|
||||
#![deny(unconditional_recursion)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(trivial_casts)]
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unsafe_code)]
|
||||
#![warn(unused_import_braces)]
|
||||
|
||||
//! This file contains tests for the Enso Lexer.
|
||||
|
||||
// TODO [AA] Tests for error scenarios once it's done.
|
||||
|
||||
use flexer::*;
|
||||
use lexer_definition::library::*;
|
||||
|
||||
use flexer::prelude::reader::decoder::DecoderUTF8;
|
||||
use flexer::prelude::Reader;
|
||||
use lexer::generated::engine::EnsoLexer;
|
||||
use lexer_definition::library::token::Token;
|
||||
use lexer_definition::token::BlockType;
|
||||
use lexer_definition::token::LineEnding;
|
||||
|
||||
|
||||
|
||||
// =================
|
||||
// === Utilities ===
|
||||
// =================
|
||||
|
||||
/// Assert that `result` is a success with tokens `expected`.
|
||||
fn assert_succeeds_as(result:&LexingResult<token::Stream>, expected:token::Stream) {
|
||||
match result.kind {
|
||||
ResultKind::Success => assert_eq!(result.tokens,expected),
|
||||
_ => panic!("Lexing failed.")
|
||||
}
|
||||
}
|
||||
|
||||
/// Assert that the provided input lexes as `expected`.
|
||||
fn assert_lexes(input:impl AsRef<str>, expected:token::Stream) {
|
||||
let input_len = input.as_ref().chars().count();
|
||||
let result = lex(input);
|
||||
assert_succeeds_as(&result,expected);
|
||||
let tokens_vec : Vec<_> = result.tokens.into();
|
||||
let total_length : usize = tokens_vec.iter().map(|token| token.offset + token.length).sum();
|
||||
assert_eq!(total_length,input_len);
|
||||
}
|
||||
|
||||
/// Lex the provided string.
|
||||
fn lex(input:impl AsRef<str>) -> LexingResult<token::Stream> {
|
||||
let mut lexer = EnsoLexer::new();
|
||||
let reader = Reader::new(input.as_ref().as_bytes(),DecoderUTF8());
|
||||
lexer.run(reader)
|
||||
}
|
||||
|
||||
/// Asserts that the input is a block and has a length equal to `length`.
|
||||
fn assert_block_has_length(input:impl AsRef<str>, expected_length:usize) {
|
||||
let result = lex(input);
|
||||
match result.kind {
|
||||
ResultKind::Success => {
|
||||
let tokens = result.tokens.tokens();
|
||||
match tokens.first().expect("Token should be present.") {
|
||||
Token{shape:token::Shape::Block{..},length,..} =>
|
||||
assert_eq!(*length,expected_length),
|
||||
_ => panic!("Token not a block."),
|
||||
}
|
||||
},
|
||||
_ => panic!("Lexing failed"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Makes the test text have unix line endings to ensure consistency regardless of git checkout
|
||||
/// style.
|
||||
fn make_unix_line_endings(input:&str) -> String {
|
||||
let string = String::from(input);
|
||||
string.chars().filter(|c| *c != '\r').collect()
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =================
|
||||
// === Operators ===
|
||||
// =================
|
||||
|
||||
#[test]
|
||||
fn function_operator() {
|
||||
let input = "->";
|
||||
let expected = token::Stream::from(vec![Token::Operator("->",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bind_operator() {
|
||||
let input = "<-";
|
||||
let expected = token::Stream::from(vec![Token::Operator("<-",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn left_pipe_operator() {
|
||||
let input = "<|";
|
||||
let expected = token::Stream::from(vec![Token::Operator("<|",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn right_pipe_operator() {
|
||||
let input = "|>";
|
||||
let expected = token::Stream::from(vec![Token::Operator("|>",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn eq_operator() {
|
||||
let input = "=";
|
||||
let expected = token::Stream::from(vec![Token::Operator("=",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn eq_compare_operator() {
|
||||
let input = "==";
|
||||
let expected = token::Stream::from(vec![Token::Operator("==",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn geq_operator() {
|
||||
let input = ">=";
|
||||
let expected = token::Stream::from(vec![Token::Operator(">=",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn neq_operator() {
|
||||
let input = "!=";
|
||||
let expected = token::Stream::from(vec![Token::Operator("!=",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dot_operator() {
|
||||
let input = ".";
|
||||
let expected = token::Stream::from(vec![Token::Operator(".",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn comma_operator() {
|
||||
let input = ",";
|
||||
let expected = token::Stream::from(vec![Token::Operator(",",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn double_dot_operator() {
|
||||
let input = "..";
|
||||
let expected = token::Stream::from(vec![Token::Operator("..",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn triple_dot_operator() {
|
||||
let input = "...";
|
||||
let expected = token::Stream::from(vec![Token::Operator("...",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error_operator() {
|
||||
let input = "!";
|
||||
let expected = token::Stream::from(vec![Token::Operator("!",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn type_ascription_operator() {
|
||||
let input = ":";
|
||||
let expected = token::Stream::from(vec![Token::Operator(":",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn in_operator() {
|
||||
let input = "in";
|
||||
let expected = token::Stream::from(vec![Token::Operator("in",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn typeset_union_operator() {
|
||||
let input = "|";
|
||||
let expected = token::Stream::from(vec![Token::Operator("|",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn typeset_intersection_operator() {
|
||||
let input = "&";
|
||||
let expected = token::Stream::from(vec![Token::Operator("&",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn typeset_subtraction_operator() {
|
||||
let input = "\\";
|
||||
let expected = token::Stream::from(vec![Token::Operator("\\",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn disable_comment() {
|
||||
let input = "#";
|
||||
let expected = token::Stream::from(vec![Token::Operator("#",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn doc_comment() {
|
||||
let input = "##";
|
||||
let expected = token::Stream::from(vec![Token::Operator("##",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn arbitrary_left_operator() {
|
||||
let input = "<!!-";
|
||||
let expected = token::Stream::from(vec![Token::Operator("<!!-",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn arbitrary_right_operator() {
|
||||
let input = "-->>";
|
||||
let expected = token::Stream::from(vec![Token::Operator("-->>",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn modifier_plus() {
|
||||
let input = "+=";
|
||||
let expected = token::Stream::from(vec![Token::Modifier("+",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn modifier_minus() {
|
||||
let input = "-=";
|
||||
let expected = token::Stream::from(vec![Token::Modifier("-",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn arbitrary_modifier() {
|
||||
let input = "<%=";
|
||||
let expected = token::Stream::from(vec![Token::Modifier("<%",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_eq_suffix() {
|
||||
let input = "===";
|
||||
let expected = token::Stream::from(vec![Token::Operator("==",0),Token::InvalidSuffix("=",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_dots_suffix() {
|
||||
let input = "....";
|
||||
let expected = token::Stream::from(vec![Token::Operator("...",0),Token::InvalidSuffix(".",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_modifier_suffix() {
|
||||
let input = "+==";
|
||||
let expected = token::Stream::from(vec![Token::Operator("+",0),Token::InvalidSuffix("==",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===================
|
||||
// === Identifiers ===
|
||||
// ===================
|
||||
|
||||
#[test]
|
||||
fn variable_ident() {
|
||||
let input = "some_variable_name";
|
||||
let expected = token::Stream::from(vec![Token::Variable("some_variable_name",0)]);
|
||||
assert_lexes(input,expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn referent_ident() {
|
||||
let input = "Some_Referent_Name";
|
||||
let expected = token::Stream::from(vec![Token::Referent("Some_Referent_Name",0)]);
|
||||
assert_lexes(input,expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn external_ident() {
|
||||
let input = "__camelCaseIdentifier";
|
||||
let expected = token::Stream::from(vec![Token::External("__camelCaseIdentifier",0)]);
|
||||
assert_lexes(input,expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn blank_ident() {
|
||||
let input = "_";
|
||||
let expected = token::Stream::from(vec![Token::Blank(0)]);
|
||||
assert_lexes(input,expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ticked_variable_ident() {
|
||||
let input = "some_variable_name'";
|
||||
let expected = token::Stream::from(vec![Token::Variable("some_variable_name'",0)]);
|
||||
assert_lexes(input,expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ticked_referent_ident() {
|
||||
let input = "Some_Referent_Name'";
|
||||
let expected = token::Stream::from(vec![Token::Referent("Some_Referent_Name'",0)]);
|
||||
assert_lexes(input,expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multi_ticked_variable_ident() {
|
||||
let input = "some_variable_name'''";
|
||||
let expected = token::Stream::from(vec![Token::Variable("some_variable_name'''",0)]);
|
||||
assert_lexes(input,expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multi_ticked_referent_ident() {
|
||||
let input = "Some_Referent_Name'''";
|
||||
let expected = token::Stream::from(vec![Token::Referent("Some_Referent_Name'''",0)]);
|
||||
assert_lexes(input,expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn variable_with_numbers() {
|
||||
let input = "some0_1";
|
||||
let expected = token::Stream::from(vec![Token::Variable("some0_1",0)]);
|
||||
assert_lexes(input,expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn referent_with_numbers() {
|
||||
let input = "Some_1821";
|
||||
let expected = token::Stream::from(vec![Token::Referent("Some_1821",0)]);
|
||||
assert_lexes(input,expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tick_not_at_end_variable() {
|
||||
let input = "some_var'iable";
|
||||
let expected = token::Stream::from(vec![
|
||||
Token::Variable("some_var'",0),
|
||||
Token::InvalidSuffix("iable",0),
|
||||
]);
|
||||
assert_lexes(input,expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trailing_underscore() {
|
||||
let input = "some_var_";
|
||||
let expected = token::Stream::from(vec![Token::External("some_var_",0)]);
|
||||
assert_lexes(input,expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trailing_underscore_with_tick() {
|
||||
let input = "some_var_'";
|
||||
let expected = token::Stream::from(vec![Token::External("some_var_'",0)]);
|
||||
assert_lexes(input,expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_suffix() {
|
||||
let input = "some_varД";
|
||||
let expected = token::Stream::from(vec![
|
||||
Token::Variable("some_var",0),
|
||||
Token::InvalidSuffix("Д",0),
|
||||
]);
|
||||
assert_lexes(input,expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unrecognized_token() {
|
||||
let input = "some_var`";
|
||||
let expected = token::Stream::from(vec![
|
||||
Token::Variable("some_var",0),
|
||||
Token::Unrecognized("`",0),
|
||||
]);
|
||||
assert_lexes(input,expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chained_identifiers() {
|
||||
let input = "my_func A' someJavaValue some_python_value";
|
||||
let expected = token::Stream::from(vec![
|
||||
Token::Variable("my_func",0),
|
||||
Token::Referent("A'",1),
|
||||
Token::External("someJavaValue",1),
|
||||
Token::Variable("some_python_value",1),
|
||||
]);
|
||||
assert_lexes(input,expected)
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===============
|
||||
// === Numbers ===
|
||||
// ===============
|
||||
|
||||
#[test]
|
||||
fn integer() {
|
||||
let input = "13831";
|
||||
let expected = token::Stream::from(vec![Token::Number("","13831",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn integer_with_explicit_base() {
|
||||
let input = "10_13831";
|
||||
let expected = token::Stream::from(vec![Token::Number("10","13831",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dangling_base() {
|
||||
let input = "10_";
|
||||
let expected = token::Stream::from(vec![Token::DanglingBase("10",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hex_number() {
|
||||
let input = "16_ff";
|
||||
let expected = token::Stream::from(vec![Token::Number("16","ff",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decimal() {
|
||||
let input = "2.71828";
|
||||
let expected = token::Stream::from(vec![Token::Number("","2.71828",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decimal_with_explicit_base() {
|
||||
let input = "10_2.71828";
|
||||
let expected = token::Stream::from(vec![Token::Number("10","2.71828",0)]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error_base() {
|
||||
let input = "10.2_2";
|
||||
let expected = token::Stream::from(vec![
|
||||
Token::Number("","10.2",0),
|
||||
Token::InvalidSuffix("_2",0),
|
||||
]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_number() {
|
||||
let input = " 10.2";
|
||||
let expected = token::Stream::from(vec![
|
||||
Token::Number("","10.2",4),
|
||||
]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ============
|
||||
// === Text ===
|
||||
// ============
|
||||
|
||||
|
||||
|
||||
// ==============
|
||||
// === Blocks ===
|
||||
// ==============
|
||||
|
||||
#[test]
|
||||
fn block_function_call() {
|
||||
let input = make_unix_line_endings(
|
||||
r#"f
|
||||
argument_1
|
||||
argument_2
|
||||
fn a1 a2 a3
|
||||
argument_4
|
||||
argument_5"#);
|
||||
let block_fn_args =
|
||||
Token::Block(
|
||||
BlockType::Continuous,
|
||||
4,
|
||||
vec![
|
||||
Token::Line(
|
||||
vec![Token::Variable("argument_1",0)],
|
||||
0,
|
||||
LineEnding::LF
|
||||
),
|
||||
Token::Line(
|
||||
vec![
|
||||
Token::Variable("argument_2",0),
|
||||
],
|
||||
0,
|
||||
LineEnding::LF
|
||||
),
|
||||
Token::Line(
|
||||
vec![
|
||||
Token::Variable("fn",0),
|
||||
Token::Variable("a1",1),
|
||||
Token::Variable("a2",1),
|
||||
Token::Variable("a3",1),
|
||||
],
|
||||
0,
|
||||
LineEnding::LF
|
||||
),
|
||||
Token::Line(
|
||||
vec![
|
||||
Token::Variable("argument_4",0),
|
||||
],
|
||||
0,
|
||||
LineEnding::LF
|
||||
),
|
||||
Token::Line(
|
||||
vec![
|
||||
Token::Variable("argument_5",0),
|
||||
],
|
||||
0,
|
||||
LineEnding::None
|
||||
),
|
||||
],
|
||||
0
|
||||
);
|
||||
let top_level_first_line = Token::Line(
|
||||
vec![
|
||||
Token::Variable("f",0),
|
||||
block_fn_args
|
||||
],
|
||||
0,
|
||||
LineEnding::LF
|
||||
);
|
||||
let top_level_block = token::Stream::from(vec![
|
||||
Token::Block(
|
||||
BlockType::Continuous,
|
||||
0,
|
||||
vec![top_level_first_line],
|
||||
0
|
||||
)
|
||||
]);
|
||||
assert_lexes(input,top_level_block);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn block_empty_lines() {
|
||||
let input = "f\r\n a\n\n b\n";
|
||||
let nested_block = Token::Block(
|
||||
BlockType::Continuous,
|
||||
4,
|
||||
vec![
|
||||
Token::Line(vec![Token::Variable("a",0)],0,LineEnding::LF),
|
||||
Token::BlankLine(0,LineEnding::LF),
|
||||
Token::Line(vec![Token::Variable("b",0)],0,LineEnding::LF),
|
||||
],
|
||||
0
|
||||
);
|
||||
let top_line = Token::Line(
|
||||
vec![
|
||||
Token::Variable("f",0),
|
||||
nested_block
|
||||
],
|
||||
0,
|
||||
LineEnding::CRLF
|
||||
);
|
||||
let expected = token::Stream::from(vec![
|
||||
Token::Block(
|
||||
BlockType::Continuous,
|
||||
0,
|
||||
vec![top_line],
|
||||
0
|
||||
)
|
||||
]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn block_top_level() {
|
||||
let input = make_unix_line_endings(
|
||||
r#"
|
||||
|
||||
foo
|
||||
bar
|
||||
baz
|
||||
"#);
|
||||
let expected = token::Stream::from(vec![
|
||||
Token::Block(
|
||||
BlockType::Continuous,
|
||||
0,
|
||||
vec![
|
||||
Token::BlankLine(0,LineEnding::LF),
|
||||
Token::BlankLine(0,LineEnding::LF),
|
||||
Token::Line(vec![Token::Variable("foo",0)],0,LineEnding::LF),
|
||||
Token::Line(vec![Token::Variable("bar",0)],0,LineEnding::LF),
|
||||
Token::Line(vec![Token::Variable("baz",0)],0,LineEnding::LF),
|
||||
],
|
||||
0
|
||||
)
|
||||
]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn block_with_operator() {
|
||||
let input = make_unix_line_endings(
|
||||
r#"x ->
|
||||
foo x 1
|
||||
"#);
|
||||
let nested_block = Token::Block(
|
||||
BlockType::Discontinuous,
|
||||
4,
|
||||
vec![
|
||||
Token::Line(vec![
|
||||
Token::Variable("foo",0),
|
||||
Token::Variable("x",1),
|
||||
Token::Number("","1",1),
|
||||
], 0, LineEnding::LF)
|
||||
],
|
||||
0
|
||||
);
|
||||
let expected = token::Stream::from(vec![
|
||||
Token::Block(
|
||||
BlockType::Continuous,
|
||||
0,
|
||||
vec![
|
||||
Token::Line(vec![
|
||||
Token::Variable("x",0),
|
||||
Token::Operator("->",1),
|
||||
nested_block
|
||||
], 0, LineEnding::LF)
|
||||
],
|
||||
0
|
||||
)
|
||||
]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn block_with_nesting() {
|
||||
let input = make_unix_line_endings(r#"
|
||||
some_long_thing
|
||||
foo ->
|
||||
Bar
|
||||
baz
|
||||
|
||||
quux
|
||||
"#);
|
||||
let function_block = Token::Block(
|
||||
BlockType::Discontinuous,
|
||||
8,
|
||||
vec![
|
||||
Token::Line(vec![Token::Referent("Bar",0)],0,LineEnding::LF),
|
||||
Token::Line(vec![Token::Variable("baz",0)],0,LineEnding::LF),
|
||||
Token::BlankLine(0,LineEnding::LF),
|
||||
],
|
||||
0
|
||||
);
|
||||
let foo_block = Token::Block(
|
||||
BlockType::Continuous,
|
||||
4,
|
||||
vec![
|
||||
Token::Line(vec![
|
||||
Token::Variable("foo",0),
|
||||
Token::Operator("->",1),
|
||||
function_block,
|
||||
], 0, LineEnding::LF),
|
||||
Token::Line(vec![Token::Variable("quux",0)],0,LineEnding::LF),
|
||||
],
|
||||
0
|
||||
);
|
||||
let expected = token::Stream::from(vec![
|
||||
Token::Block(
|
||||
BlockType::Continuous,
|
||||
0,
|
||||
vec![
|
||||
Token::BlankLine(0,LineEnding::LF),
|
||||
Token::Line(vec![
|
||||
Token::Variable("some_long_thing",0),
|
||||
foo_block
|
||||
], 0, LineEnding::LF),
|
||||
],
|
||||
0
|
||||
)
|
||||
]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn block_extra_indented_blank_lines() {
|
||||
let input = "a\n b\n \n \n c";
|
||||
let indented_block = Token::Block(
|
||||
BlockType::Continuous,
|
||||
4,
|
||||
vec![
|
||||
Token::Line(vec![Token::Variable("b",0)],0,LineEnding::LF),
|
||||
Token::BlankLine(8,LineEnding::LF),
|
||||
Token::BlankLine(2,LineEnding::LF),
|
||||
Token::Line(vec![Token::Variable("c",0)],0,LineEnding::None),
|
||||
],
|
||||
0
|
||||
);
|
||||
let top_level_line = Token::Line(vec![
|
||||
Token::Variable("a",0),
|
||||
indented_block
|
||||
],0,LineEnding::LF);
|
||||
let expected = token::Stream::from(vec![
|
||||
Token::Block(
|
||||
BlockType::Continuous,
|
||||
0,
|
||||
vec![top_level_line],
|
||||
0
|
||||
)
|
||||
]);
|
||||
assert_lexes(input,expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn block_length_unix() {
|
||||
let input = "a\n b\n c";
|
||||
assert_block_has_length(input,13);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn block_length_windows() {
|
||||
let input = "a\r\n b\r\n c";
|
||||
assert_block_has_length(input,15);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn block_length_mixed() {
|
||||
let input = "a\r\n b\n c\n d";
|
||||
assert_block_has_length(input,20);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ================
|
||||
// === Combined ===
|
||||
// ================
|
@ -33,7 +33,6 @@ case class ParserDef() extends flexer.Parser[AST.Module] {
|
||||
val lowerLetter: Pattern = range('a', 'z')
|
||||
val upperLetter: Pattern = range('A', 'Z')
|
||||
val digit: Pattern = range('0', '9')
|
||||
val hex: Pattern = digit | range('a', 'f') | range('A', 'F')
|
||||
val alphaNum: Pattern = digit | lowerLetter | upperLetter
|
||||
val space: Pattern = ' '.many1
|
||||
val newline: Pattern = '\n'
|
||||
|
Loading…
Reference in New Issue
Block a user