Initial parser implementation in Rust (#3341)

This commit is contained in:
Wojciech Daniło 2022-05-17 05:13:20 +02:00 committed by GitHub
parent 0b34346c19
commit 9e219d698c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
98 changed files with 5105 additions and 14354 deletions

183
Cargo.lock generated
View File

@ -44,6 +44,15 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "ansi_term"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "anyhow"
version = "1.0.57"
@ -948,6 +957,7 @@ version = "0.2.0"
dependencies = [
"criterion",
"enso-prelude",
"failure",
"itertools 0.9.0",
"rustversion",
"serde",
@ -973,25 +983,6 @@ dependencies = [
"debug-scene-visualization",
]
[[package]]
name = "enso-flexer"
version = "0.2.1"
dependencies = [
"enso-automata",
"enso-lazy-reader",
"enso-logger",
"enso-macro-utils",
"enso-prelude",
"itertools 0.8.2",
"nonempty",
"proc-macro2",
"quote",
"syn",
"unicode-segmentation",
"wasm-bindgen",
"wasm-bindgen-test",
]
[[package]]
name = "enso-formatter"
version = "0.1.0"
@ -1096,14 +1087,6 @@ dependencies = [
"wasm-bindgen-test",
]
[[package]]
name = "enso-lazy-reader"
version = "0.2.0"
dependencies = [
"enso-prelude",
"itertools 0.8.2",
]
[[package]]
name = "enso-logger"
version = "0.3.1"
@ -1132,6 +1115,38 @@ dependencies = [
"enso-prelude",
]
[[package]]
name = "enso-parser"
version = "0.1.0"
dependencies = [
"enso-data-structures",
"enso-parser-syntax-tree-builder",
"enso-parser-syntax-tree-visitor",
"enso-prelude",
"enso-shapely-macros",
"enso-types",
]
[[package]]
name = "enso-parser-syntax-tree-builder"
version = "0.1.0"
dependencies = [
"enso-macro-utils",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "enso-parser-syntax-tree-visitor"
version = "0.1.0"
dependencies = [
"enso-macro-utils",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "enso-prelude"
version = "0.2.6"
@ -1139,7 +1154,6 @@ dependencies = [
"anyhow",
"backtrace",
"boolinator",
"bumpalo",
"cfg-if 1.0.0",
"colored",
"derivative",
@ -1159,6 +1173,9 @@ dependencies = [
"serde_json",
"shrinkwraprs 0.3.0",
"smallvec 1.8.0",
"tracing",
"tracing-subscriber",
"tracing-wasm",
"wasm-bindgen",
"wasm-bindgen-test",
"weak-table",
@ -2576,25 +2593,6 @@ version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "lexer"
version = "0.1.0"
dependencies = [
"criterion",
"enso-flexer",
"enso-prelude",
"lexer-definition",
]
[[package]]
name = "lexer-definition"
version = "0.1.0"
dependencies = [
"enso-flexer",
"enso-prelude",
"uuid",
]
[[package]]
name = "libc"
version = "0.2.125"
@ -2849,12 +2847,6 @@ dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "nonempty"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f962080273ac958f790079cfc886b5b9d722969dbd7b03f473902bdfe5c69b1"
[[package]]
name = "normalize-line-endings"
version = "0.3.0"
@ -3170,18 +3162,6 @@ dependencies = [
"websocket",
]
[[package]]
name = "parser-new"
version = "0.1.0"
dependencies = [
"criterion",
"enso-data-structures",
"enso-logger",
"enso-prelude",
"itertools 0.10.3",
"lexer",
]
[[package]]
name = "paste"
version = "0.1.18"
@ -3884,6 +3864,15 @@ dependencies = [
"opaque-debug",
]
[[package]]
name = "sharded-slab"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31"
dependencies = [
"lazy_static",
]
[[package]]
name = "shrinkwraprs"
version = "0.2.3"
@ -4097,6 +4086,15 @@ dependencies = [
"syn",
]
[[package]]
name = "thread_local"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180"
dependencies = [
"once_cell",
]
[[package]]
name = "time"
version = "0.1.44"
@ -4297,9 +4295,21 @@ dependencies = [
"cfg-if 1.0.0",
"log 0.4.17",
"pin-project-lite 0.2.9",
"tracing-attributes",
"tracing-core",
]
[[package]]
name = "tracing-attributes"
version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc6b8ad3567499f98a1db7a752b07a7c8c7c7c34c332ec00effb2b0027974b7c"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "tracing-core"
version = "0.1.26"
@ -4307,6 +4317,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f54c8ca710e81886d498c2fd3331b56c93aa248d49de2222ad2742247c60072f"
dependencies = [
"lazy_static",
"valuable",
]
[[package]]
@ -4319,6 +4330,42 @@ dependencies = [
"tracing",
]
[[package]]
name = "tracing-log"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ddad33d2d10b1ed7eb9d1f518a5674713876e97e5bb9b7345a7984fbb4f922"
dependencies = [
"lazy_static",
"log 0.4.17",
"tracing-core",
]
[[package]]
name = "tracing-subscriber"
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4bc28f93baff38037f64e6f43d34cfa1605f27a49c34e8a04c5e78b0babf2596"
dependencies = [
"ansi_term",
"sharded-slab",
"smallvec 1.8.0",
"thread_local",
"tracing-core",
"tracing-log",
]
[[package]]
name = "tracing-wasm"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4575c663a174420fa2d78f4108ff68f65bf2fbb7dd89f33749b6e826b3626e07"
dependencies = [
"tracing",
"tracing-subscriber",
"wasm-bindgen",
]
[[package]]
name = "traitobject"
version = "0.1.0"
@ -4428,6 +4475,12 @@ dependencies = [
"sha1",
]
[[package]]
name = "valuable"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
[[package]]
name = "value-bag"
version = "1.0.0-alpha.9"

View File

@ -9,6 +9,8 @@ members = [
"build/enso-formatter",
"build/rust-scripts",
"lib/rust/*",
"lib/rust/parser/src/syntax/tree/visitor",
"lib/rust/parser/src/syntax/tree/builder",
"lib/rust/profiler/data",
"integration-test"
]

View File

@ -8,6 +8,7 @@ use crate::config::InitialView;
use crate::executor::web::EventLoopExecutor;
use crate::initializer::setup_global_executor;
use crate::Ide;
use enso_frp::future::EventOutputExt;
use enso_web::Closure;
use enso_web::HtmlDivElement;

View File

@ -59,7 +59,6 @@
use wasm_bindgen::prelude::*;
// ==============
// === Export ===
// ==============

View File

@ -38,15 +38,9 @@ use ensogl_text as text;
// === Export ===
// ==============
pub mod entry;
pub mod wide;
// ==============
// === Export ===
// ==============
pub mod entry;
pub use entry::View as Entry;

View File

@ -207,7 +207,7 @@ commands.build.rust = async function (argv) {
console.log('Minimizing the WASM binary.')
await gzip(paths.wasm.main, paths.wasm.mainGz)
const releaseLimitMb = 4.36
const releaseLimitMb = 4.37
let limitMb = releaseLimitMb + allowExtraMb
await checkWasmSize(paths.wasm.mainGz, limitMb)
}

View File

@ -6,8 +6,8 @@ edition = "2021"
description = "A finite-automata-based lexing engine."
readme = "README.md"
homepage = "https://github.com/enso-org/rust-lib/src/automata"
repository = "https://github.com/enso-org/rust-lib"
homepage = "https://github.com/enso-org/enso/lib/rust/automata"
repository = "https://github.com/enso-org/enso"
license-file = "../../LICENSE"
keywords = ["lexer", "finite-automata"]

View File

@ -6,8 +6,8 @@ edition = "2021"
description = "A collection of useful data structures."
readme = "README.md"
homepage = "https://github.com/enso-org/rust-lib/src/data"
repository = "https://github.com/enso-org/rust-lib"
homepage = "https://github.com/enso-org/enso/lib/rust/data"
repository = "https://github.com/enso-org/enso"
license-file = "../../LICENSE"
keywords = []
@ -23,7 +23,7 @@ enso-prelude = { version = "^0.2.1", path = "../prelude" }
serde = { version = "1.0" , features = ["derive"] }
typenum = { version = "1.11.2" }
rustversion = { version = "1.0" }
failure = { version = "0.1.6" }
[dev-dependencies]
itertools = "0.9.0"

View File

@ -21,10 +21,10 @@ pub type Branches<K, V, S> = HashMap<K, HashMapTree<K, V, S>, S>;
/// more branches accessible by the given key type.
#[derive(Derivative)]
#[derivative(Clone)]
#[derivative(Debug(bound = "K:Eq+Hash+Debug , V:Debug , S:BuildHasher"))]
#[derivative(Default(bound = "K:Eq+Hash , V:Default , S:BuildHasher+Default"))]
#[derivative(PartialEq(bound = "K:Eq+Hash , V:PartialEq , S:BuildHasher"))]
#[derivative(Eq(bound = "K:Eq+Hash , V:Eq , S:BuildHasher"))]
#[derivative(Debug(bound = "K:Eq+Hash+Debug, V:Debug, S:BuildHasher"))]
#[derivative(Default(bound = "K:Eq+Hash, V:Default, S:BuildHasher+Default"))]
#[derivative(PartialEq(bound = "K:Eq+Hash, V:PartialEq, S:BuildHasher"))]
#[derivative(Eq(bound = "K:Eq+Hash, V:Eq, S:BuildHasher"))]
pub struct HashMapTree<K, V, S = RandomState> {
/// Value of the current tree node.
pub value: V,

View File

@ -0,0 +1,207 @@
//! An immutable linked list implementation.
use crate::prelude::*;
// ============
// === List ===
// ============
/// Immutable linked list containing values of type [`T`]. As every node of the list is kept in
/// [`Rc`], cloning of any subsection of this list is very fast.
#[derive(Derivative, Deref)]
#[derivative(Clone(bound = ""))]
#[derivative(Default(bound = ""))]
pub struct List<T> {
#[allow(missing_docs)]
pub data: Option<NonEmpty<T>>,
}
/// Non-empty list. It is guaranteed to have at least one element. See [`List`] to learn more.
#[derive(Derivative, Deref, Debug)]
#[derivative(Clone(bound = ""))]
pub struct NonEmpty<T> {
#[allow(missing_docs)]
pub node: Rc<Node<T>>,
}
/// A node of the [`List`]. Contains the current value and link to list [`tail`].
#[derive(Clone, Debug)]
#[allow(missing_docs)]
pub struct Node<T> {
pub head: T,
pub tail: List<T>,
}
impl<T> Node<T> {
/// Constructor.
pub fn singleton(head: T) -> Self {
let tail = default();
Self { head, tail }
}
}
impl<T> NonEmpty<T> {
/// Constructor.
pub fn singleton(head: T) -> Self {
let node = Rc::new(Node::singleton(head));
Self { node }
}
/// Convert this non-empty list to list of unknown length.
pub fn into_list(self) -> List<T> {
let data = Some(self);
List { data }
}
/// Prepend the element to this list.
pub fn prepend(self, head: T) -> Self {
self.into_list().prepend(head)
}
/// Get the head element of this list.
pub fn head(&self) -> &T {
&self.head
}
/// Get tail of this list.
pub fn tail(&self) -> &List<T> {
&self.tail
}
/// Get the last element of this list.
pub fn last(&self) -> &T {
self.tail.last().unwrap_or_else(|| self.head())
}
/// Check whether this list is empty.
pub fn is_empty(&self) -> bool {
false
}
/// Convert this list to a vector.
fn to_vec(&self) -> Vec<&T> {
let mut out = vec![&self.head];
let mut list = self.tail();
loop {
match list.head() {
None => break,
Some(head) => {
out.push(head);
match list.tail() {
None => break,
Some(tail) => list = tail,
}
}
}
}
out
}
}
impl<T> List<T> {
/// Prepend the element to the list.
pub fn prepend(self, head: T) -> NonEmpty<T> {
let tail = self;
let node = Rc::new(Node { head, tail });
NonEmpty { node }
}
/// Get the head element.
pub fn head(&self) -> Option<&T> {
self.as_ref().map(|t| t.head())
}
/// Get the tail of this list.
pub fn tail(&self) -> Option<&List<T>> {
self.as_ref().map(|t| t.tail())
}
/// Get the last element of this list.
pub fn last(&self) -> Option<&T> {
self.data.as_ref().map(|t| t.last())
}
/// Check whether this list is empty.
pub fn is_empty(&self) -> bool {
self.is_none()
}
/// Convert this list to a vector.
fn to_vec(&self) -> Vec<&T> {
self.data.as_ref().map(|t| t.to_vec()).unwrap_or_default()
}
/// Convert this list to a non-empty list. Return [`None`] if the list is empty.
pub fn as_non_empty(&self) -> &Option<NonEmpty<T>> {
&self.data
}
/// Convert this list to a non-empty list. Return [`None`] if the list is empty.
pub fn into_non_empty(self) -> Option<NonEmpty<T>> {
self.data
}
}
impl<T> From<NonEmpty<T>> for List<T> {
fn from(list: NonEmpty<T>) -> Self {
list.into_list()
}
}
impl<T: Debug> Debug for List<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
Debug::fmt(&self.to_vec(), f)
}
}
impl<'a, T> IntoIterator for &'a List<T> {
type Item = &'a T;
type IntoIter = std::vec::IntoIter<&'a T>;
fn into_iter(self) -> Self::IntoIter {
self.to_vec().into_iter()
}
}
impl<'a, T> IntoIterator for &'a NonEmpty<T> {
type Item = &'a T;
type IntoIter = std::vec::IntoIter<&'a T>;
fn into_iter(self) -> Self::IntoIter {
self.to_vec().into_iter()
}
}
impl<T> FromIterator<T> for List<T> {
// Clippy reports false warning here as we cannot add a bound to `I` that it needs to be a
// double-ended iterator.
#[allow(clippy::needless_collect)]
fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
let vec: Vec<T> = iter.into_iter().collect();
let mut list = List::default();
for item in vec.into_iter().rev() {
list = list.prepend(item).into()
}
list
}
}
impl<T> From<Vec<T>> for List<T> {
fn from(v: Vec<T>) -> Self {
let mut out = List::default();
for item in v.into_iter().rev() {
out = out.prepend(item).into_list();
}
out
}
}
impl<T> TryFrom<Vec<T>> for NonEmpty<T> {
type Error = failure::Error;
fn try_from(v: Vec<T>) -> Result<Self, Self::Error> {
let err = "Cannot convert empty Vec to NonEmpty one.";
List::<T>::from(v).into_non_empty().ok_or_else(|| failure::err_msg(err))
}
}

View File

@ -24,6 +24,7 @@
pub mod dependency_graph;
pub mod diet;
pub mod hash_map_tree;
pub mod im_list;
pub mod index;
pub mod opt_vec;

View File

@ -1,5 +1,6 @@
//! Functionality for producing debug information.
// === Features ===
#![feature(extern_types)]
// === Standard Linter Configuration ===
#![deny(non_ascii_idents)]

View File

@ -6,8 +6,8 @@ edition = "2021"
description = "A library for supporting generic programming."
readme = "README.md"
homepage = "https://github.com/enso-org/rust-lib/src/generics"
repository = "https://github.com/enso-org/rust-lib"
homepage = "https://github.com/enso-org/enso/lib/rust/generics"
repository = "https://github.com/enso-org/enso"
license-file = "../../LICENSE"
keywords = ["generic"]

View File

@ -6,8 +6,8 @@ edition = "2021"
description = "An efficient logger for writing applications in Rust."
readme = "README.md"
homepage = "https://github.com/enso-org/rust-lib/src/logger"
repository = "https://github.com/enso-org/rust-lib"
homepage = "https://github.com/enso-org/enso/lib/rust/logger"
repository = "https://github.com/enso-org/enso"
license-file = "../../LICENSE"
keywords = ["logging"]

View File

@ -6,8 +6,8 @@ edition = "2021"
description = "Utilities for writing macros."
readme = "README.md"
homepage = "https://github.com/enso-org/rust-lib/src/macro-utils"
repository = "https://github.com/enso-org/rust-lib"
homepage = "https://github.com/enso-org/enso/lib/rust/macro-utils"
repository = "https://github.com/enso-org/enso"
license-file = "../../LICENSE"
keywords = ["macro", "utility"]

View File

@ -6,8 +6,8 @@ edition = "2021"
description = "An implementation of functional optics."
readme = "README.md"
homepage = "https://github.com/enso-org/rust-lib/src/optics"
repository = "https://github.com/enso-org/rust-lib"
homepage = "https://github.com/enso-org/enso/lib/rust/optics"
repository = "https://github.com/enso-org/enso"
license-file = "../../LICENSE"
keywords = ["optics","lenses"]

View File

@ -1,24 +1,20 @@
[package]
name = "parser-new"
name = "enso-parser"
version = "0.1.0"
authors = ["Enso Team <enso-dev@enso.org>"]
edition = "2021"
publish = false
[lib]
crate-type = ["cdylib", "rlib"]
test = true
bench = true
description = "Enso Parser."
readme = "README.md"
homepage = "https://github.com/enso-org/enso"
repository = "https://github.com/enso-org/enso"
license-file = "../../LICENSE"
[dependencies]
enso-data-structures = { version = "0.2.0", path = "../data-structures" }
enso-logger = { version = "0.3.0", path = "../logger" }
enso-prelude = { version = "0.2.0", path = "../prelude" }
lexer = { version = "0.1.0", path = "lexer/generation" }
itertools = { version = "0.10.0" }
enso-prelude = { path = "../prelude" }
enso-data-structures = { path = "../data-structures" }
enso-types = { path = "../types" }
enso-shapely-macros = { path = "../shapely/macros" }
enso-parser-syntax-tree-visitor = { path = "src/syntax/tree/visitor" }
enso-parser-syntax-tree-builder = { path = "src/syntax/tree/builder" }
[build-dependencies]
[dev-dependencies]
criterion = "0.3"

View File

@ -1,28 +0,0 @@
[package]
name = "ast-new"
version = "0.1.0"
authors = ["Enso Team <enso-dev@enso.org>"]
edition = "2021"
description = "An abstract syntax tree for the Enso language."
readme = "README.md"
homepage = "https://github.com/enso-org/enso/lib/rust/ast"
repository = "https://github.com/enso-org/enso"
license-file = "../../../LICENSE"
keywords = ["ast"]
categories = ["parsing"]
publish = false
[lib]
crate-type = ["cdylib", "rlib"]
test = true
bench = true
[dependencies]
clap = { version = "2.33.3" }
itertools = { version = "0.10.0" }
proc-macro2 = { version = "1.0.26" }
syn = { version = "1.0.72", features = ["full", "extra-traits", "visit-mut", "visit"] }
uuid = { version = "0.8.1" , features = ["serde","v4","wasm-bindgen"] }

View File

@ -1,366 +0,0 @@
//! This module exports the implementation of the enso abstract syntax tree.
use app::*;
use def::*;
use invalid::*;
use lines::*;
use name::*;
use num::*;
use txt::*;
use uuid::Uuid;
// ===================================
// === Abstract Syntax Tree (Stub) ===
// ===================================
/// An ast node of unknown shape.
pub type AnyAst = Ast<Shape>;
/// An ast node with an unique id and length.
#[derive(Debug, Clone)]
pub struct Ast<T> {
/// A unique identifier.
pub uid: Option<Uuid>,
/// Length in number of chars of this ast node.
pub len: usize,
/// The number of trailing spaces.
pub off: usize,
/// The ast node itself.
pub ast: T,
}
// The set of all ast nodes.
#[allow(missing_docs)]
#[derive(Debug, Clone)]
pub enum Shape {
Unrecognized(invalid::Unrecognized),
Blank(name::Blank),
Var(name::Var),
Cons(name::Cons),
Opr(name::Opr),
Number(num::Number),
Text(txt::Text),
Prefix(app::Prefix),
Infix(app::Infix),
Module(lines::Module),
Block(lines::Block),
FunDef(def::FunDef),
OprDef(def::OprDef),
VarDef(def::VarDef),
}
// ===================
// === Application ===
// ===================
/// This module exports ast shapes that represent function application.
pub mod app {
use super::*;
/// The ast node for application.
#[allow(missing_docs)]
#[derive(Debug, Clone)]
pub struct Prefix {
pub func: Box<AnyAst>,
pub arg: Box<AnyAst>,
}
/// The ast node for an infix operator application.
#[allow(missing_docs)]
#[derive(Debug, Clone)]
pub struct Infix {
pub larg: Box<AnyAst>,
pub opr: Box<Ast<name::Opr>>,
pub rarg: Box<AnyAst>,
}
}
// ======================
// === Block & Module ===
// ======================
/// This module exports ast shapes that are represented as sequence of equally indented lines.
pub mod lines {
use super::*;
/// The ast node for a module that represents the file's root block.
///
/// The module consists of a sequence of possibly empty lines with no leading indentation.
#[allow(missing_docs)]
#[derive(Debug, Clone)]
pub struct Module {
pub lines: Vec<Option<AnyAst>>,
}
/// The ast node for a block that represents a sequence of equally indented lines.
///
/// Lines may contain some child ast or be empty. Block is used for all code blocks except
/// for the root one, which uses `Module`.
#[derive(Debug, Clone)]
pub struct Block {
/// Absolute's block indent, counting from the module's root.
pub indent: usize,
/// Leading empty lines. Each line is represented by absolute count of spaces
/// it contains, counting from the root.
pub empty_lines: Vec<usize>,
/// First line with non-empty item.
pub first_line: Box<AnyAst>,
/// Rest of lines, each of them optionally having contents.
pub lines: Vec<Option<AnyAst>>,
}
}
// ==================
// === Definition ===
// ==================
/// This module exports ast shapes that represent definition of variable, function etc.
pub mod def {
use super::*;
/// The ast node for a method definition.
#[allow(missing_docs)]
#[derive(Debug, Clone)]
pub struct FunDef {
pub name: Box<Ast<name::Var>>,
pub args: Vec<AnyAst>,
pub body: Box<AnyAst>,
}
/// The ast node for an operator definition.
#[allow(missing_docs)]
#[derive(Debug, Clone)]
pub struct OprDef {
pub name: Box<Ast<name::Opr>>,
pub args: Vec<AnyAst>,
pub body: Box<AnyAst>,
}
/// The ast node for a variable definition.
#[allow(missing_docs)]
#[derive(Debug, Clone)]
pub struct VarDef {
pub name: Box<Ast<name::Var>>,
pub value: Box<AnyAst>,
}
}
// ===================
// === Identifiers ===
// ===================
/// This module exports ast shapes for basic identifiers.
pub mod name {
/// The ast node for the underscore `_`.
#[allow(missing_docs)]
#[derive(Debug, Clone, Copy)]
pub struct Blank {}
/// The ast node for a variable.
#[allow(missing_docs)]
#[derive(Debug, Clone)]
pub struct Var {
pub name: String,
}
/// The ast node for a constructor.
#[allow(missing_docs)]
#[derive(Debug, Clone)]
pub struct Cons {
pub name: String,
}
/// The ast node for an operator.
#[allow(missing_docs)]
#[derive(Debug, Clone)]
pub struct Opr {
pub name: String,
}
}
// ===============
// === Invalid ===
// ===============
/// This module exports invalid ast shapes.
pub mod invalid {
/// Unrecognized token.
#[allow(missing_docs)]
#[derive(Debug, Clone)]
pub struct Unrecognized {
pub str: String,
}
}
// ==============
// === Number ===
// ==============
/// This module exports ast shapes that represent numbers.
pub mod num {
/// The ast node for a number.
#[allow(missing_docs)]
#[derive(Debug, Clone)]
pub struct Number {
pub number: String,
}
}
// ============
// === Text ===
// ============
/// This module exports ast shapes that represent text (strings).
pub mod txt {
/// The ast node for a string of text.
#[allow(missing_docs)]
#[derive(Debug, Clone)]
pub struct Text {
pub text: String,
}
}
// === Into<Shape> ===
impl From<Unrecognized> for Shape {
fn from(val: Unrecognized) -> Self {
Self::Unrecognized(val)
}
}
impl From<Blank> for Shape {
fn from(val: Blank) -> Self {
Self::Blank(val)
}
}
impl From<Var> for Shape {
fn from(val: Var) -> Self {
Self::Var(val)
}
}
impl From<Cons> for Shape {
fn from(val: Cons) -> Self {
Self::Cons(val)
}
}
impl From<Opr> for Shape {
fn from(val: Opr) -> Self {
Self::Opr(val)
}
}
impl From<Number> for Shape {
fn from(val: Number) -> Self {
Self::Number(val)
}
}
impl From<Text> for Shape {
fn from(val: Text) -> Self {
Self::Text(val)
}
}
impl From<Prefix> for Shape {
fn from(val: Prefix) -> Self {
Self::Prefix(val)
}
}
impl From<Infix> for Shape {
fn from(val: Infix) -> Self {
Self::Infix(val)
}
}
impl From<Module> for Shape {
fn from(val: Module) -> Self {
Self::Module(val)
}
}
impl From<Block> for Shape {
fn from(val: Block) -> Self {
Self::Block(val)
}
}
impl From<FunDef> for Shape {
fn from(val: FunDef) -> Self {
Self::FunDef(val)
}
}
impl From<OprDef> for Shape {
fn from(val: OprDef) -> Self {
Self::OprDef(val)
}
}
impl From<VarDef> for Shape {
fn from(val: VarDef) -> Self {
Self::VarDef(val)
}
}
// ====================
// === Constructors ===
// ====================
impl AnyAst {
/// Creates a new ast node with random `Uuid` from `Shape`.
pub fn new(ast: impl Into<Shape>) -> Self {
Self { ast: ast.into(), uid: Some(Uuid::new_v4()), len: 0, off: 0 }
}
/// Creates a new ast node with `Shape::Unrecognized`.
pub fn unrecognized(str: String) -> Self {
Self::new(Unrecognized { str })
}
/// Creates a new ast node with `Shape::Blank`.
pub fn blank() -> Self {
Self::new(Blank {})
}
/// Creates a new ast node with `Shape::Var`.
pub fn var(name: String) -> Self {
Self::new(Var { name })
}
/// Creates a new ast node with `Shape::Cons`.
pub fn cons(name: String) -> Self {
Self::new(Cons { name })
}
/// Creates a new ast node with `Shape::Opr`.
pub fn opr(name: String) -> Self {
Self::new(Opr { name })
}
/// Creates a new ast node with `Shape::Number`.
pub fn num(number: i64) -> Self {
Self::new(Number { number: number.to_string() })
}
/// Creates a new ast node with `Shape::Text`.
pub fn text(text: String) -> Self {
Self::new(Text { text })
}
}

View File

@ -1,367 +0,0 @@
//! This module exports scala ast generator.
// === Non-Standard Linter Configuration ===
#![allow(unused_must_use)]
use std::io::prelude::*;
use itertools::Itertools;
use proc_macro2::Span;
use std::collections::HashMap;
use std::fmt::Write;
use std::fs::File;
use syn;
use syn::Ident;
// =======================
// === Scala Generator ===
// =======================
/// A Scala ast generator.
#[derive(Debug, Clone, Default)]
pub struct ScalaGenerator {
/// The content of the file.
code: String,
/// Current indentation.
indent: usize,
/// Inheritance hierarchy.
extends: HashMap<Ident, Ident>,
}
impl ScalaGenerator {
/// Generates a Scala ast from `lib/rust/ast/src/lib.rs`.
pub fn ast() -> std::io::Result<String> {
let mut content = String::new();
let mut file = File::open("lib/rust/ast/src/ast.rs")?;
file.read_to_string(&mut content);
Ok(Self::file("ast", syn::parse_file(content.as_str()).unwrap()))
}
/// Generates a Scala ast definition from a parsed Rust ast definition.
pub fn file(name: &str, file: syn::File) -> String {
let mut this = Self::default();
writeln!(this.code, "package org.enso.ast\n");
writeln!(this.code, "import java.util.UUID\n\n");
this.block(&Ident::new(name, Span::call_site()), &file.items[..]);
this.code
}
/// Generates a block of Scala code.
fn block(&mut self, ident: &Ident, lines: &[syn::Item]) {
write!(self.code, "\n{:i$}object ", "", i = self.indent);
self.typ_name(ident);
writeln!(self.code, " {{");
self.indent += 2;
if self.extends.contains_key(ident) {
write!(self.code, "{:i$}sealed trait ", "", i = self.indent);
self.typ_name(ident);
self.extends(ident);
}
for item in lines {
match item {
syn::Item::Enum(val) => self.adt(val),
syn::Item::Type(val) => {
write!(self.code, "\n{:i$}type ", "", i = self.indent);
self.typ_name(&val.ident);
self.generics(&val.generics);
write!(self.code, " = ");
self.typ(val.ty.as_ref());
writeln!(self.code);
}
syn::Item::Struct(val) =>
if let syn::Fields::Named(fields) = &val.fields {
self.class(&val.ident, &val.generics, fields);
} else {
panic!("All struct fields must be named!");
},
syn::Item::Mod(val) => {
if let Some(content) = &val.content {
self.block(&val.ident, &content.1[..]);
};
}
_ => (),
}
}
self.indent -= 2;
writeln!(self.code, "{:i$}}}", "", i = self.indent);
}
/// Generates a Scala case class.
///
/// `struct Foo { bar:Bar, baz:Baz }` => `case class Foo(bar:Bar, baz:Baz)`
fn class(&mut self, ident: &Ident, generics: &syn::Generics, fields: &syn::FieldsNamed) {
write!(self.code, "{:i$}case class ", "", i = self.indent);
self.typ_name(ident);
self.generics(generics);
write!(self.code, "(");
for (i, field) in fields.named.iter().enumerate() {
if i != 0 {
write!(self.code, ", ");
}
if let Some(ident) = &field.ident {
self.var_name(ident);
}
write!(self.code, ": ");
self.typ(&field.ty);
}
write!(self.code, ")");
self.extends(ident);
}
/// Generates Scala ADT - case classes extending a sealed trait.
///
/// There are two modes of conversion:
///
/// 1) When the Rust enum variant has named fields:
/// ```
/// enum Foo {
/// Bar { x: isize },
/// Baz { y: isize },
/// }
/// ```
/// ===>
/// ```scala
/// sealed trait Foo
/// case class Bar(x:Int) extends Foo
/// case class Baz(y:Int) extends Foo
/// ```
///
/// 2) When the Rust enum variant has one unnamed field with qualified type:
/// ```
/// enum Foo {
/// Bar(barz::Bar),
/// Baz(barz::Baz),
/// }
/// mod barz {
/// pub struct Bar {}
/// pub struct Baz {
/// y: isize,
/// }
/// }
/// ```
/// ===>
/// ```scala
/// sealed trait Foo
/// object barz {
/// sealed trait Barz extends Foo
/// case class Bar() extends Barz
/// case class Baz(y:size) extends Barz
/// }
/// ```
fn adt(&mut self, adt: &syn::ItemEnum) {
write!(self.code, "\n{:i$}sealed trait {}", "", adt.ident, i = self.indent);
self.generics(&adt.generics);
self.extends(&adt.ident);
for variant in &adt.variants {
match &variant.fields {
syn::Fields::Named(fields) => {
self.extends.insert(variant.ident.clone(), adt.ident.clone());
self.class(&variant.ident, &adt.generics, fields);
}
syn::Fields::Unnamed(fields) => {
if let Some(syn::Type::Path(path)) = fields.unnamed.first().map(|f| &f.ty) {
let path = path.path.segments.iter().rev().take(2).collect_tuple();
if let Some((class, object)) = path {
self.extends.insert(object.ident.clone(), adt.ident.clone());
self.extends.insert(class.ident.clone(), object.ident.clone());
}
}
}
_ => (),
}
}
}
/// Generates Scala class extension.
///
/// `foo` => `extends Foo`
fn extends(&mut self, ident: &Ident) {
if let Some(name) = self.extends.get(ident).cloned() {
write!(self.code, " extends ");
self.typ_name(&name);
}
writeln!(self.code);
}
/// Generates Scala type parameters.
///
/// `<Foo, Bar>` = `[Foo, Bar]`
fn generics(&mut self, generics: &syn::Generics) {
if generics.params.is_empty() {
return;
}
write!(self.code, "[");
for (i, param) in generics.params.iter().enumerate() {
if i != 0 {
write!(self.code, ", ");
}
if let syn::GenericParam::Type(typ) = param {
self.typ_name(&typ.ident)
}
}
write!(self.code, "]");
}
/// Generates a qualified scala type with type arguments.
///
/// `foo::Bar<Baz>` => `Foo.Bar[Baz]`
fn typ(&mut self, typ: &syn::Type) {
if let syn::Type::Path(path) = typ {
for (i, typ) in path.path.segments.iter().enumerate() {
if i != 0 {
write!(self.code, ".");
}
self.typ_segment(typ);
}
}
}
/// Generates a Scala type with type arguments.
///
/// `Foo<Bar<Baz>>` => `Foo[Bar[Baz]]`
fn typ_segment(&mut self, typ: &syn::PathSegment) {
let boxed = typ.ident.to_string().as_str() == "Box";
if !boxed {
self.typ_name(&typ.ident);
}
if let syn::PathArguments::AngleBracketed(typ) = &typ.arguments {
if !boxed {
write!(self.code, "[");
}
for (i, typ) in typ.args.iter().enumerate() {
if i != 0 {
write!(self.code, ", ");
}
if let syn::GenericArgument::Type(typ) = typ {
self.typ(typ);
}
}
if !boxed {
write!(self.code, "]");
}
}
}
/// Generates a Scala variable name (camel case).
///
/// `foo_bar` => `fooBar`
fn var_name(&mut self, ident: &Ident) {
let mut underscore = false;
for char in ident.to_string().chars() {
if char == '_' {
underscore = true;
} else if underscore {
underscore = false;
for char in char.to_uppercase() {
self.code.push(char)
}
} else {
self.code.push(char);
}
}
}
/// Generates a Scala type name.
///
/// The following Rust types are automatically converted to Scala types:
/// ```code
/// u32 | i32 | u16 | i16 | i8 => Int,
/// usize | isize | u64 | i64 => Long,
/// u8 => Byte,
/// char => Char,
/// Vec => Vector,
/// Uuid => UUID,
/// ```
fn typ_name(&mut self, ident: &Ident) {
let name = match ident.to_string().as_str() {
"u32" | "i32" | "u16" | "i16" | "i8" => "Int",
"usize" | "isize" | "u64" | "i64" => "Long",
"u8" => "Byte",
"char" => "Char",
"Vec" => "Vector",
"Uuid" => "UUID",
name => {
let mut chars = name.chars();
if let Some(char) = chars.next() {
write!(self.code, "{}", char.to_uppercase().to_string() + chars.as_str());
}
""
}
};
write!(self.code, "{}", name);
}
}
// =============
// === Tests ===
// =============
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_file() {
let rust = syn::parse_quote! {
type A<X> = B<X,Y>;
pub enum FooBarBaz {
Foo(a::Foo),
Bar(a::Bar),
Baz(b::Baz),
}
mod a {
struct Foo {}
struct Bar {x:usize, y:u8, z:b::Type}
}
mod b {
type Type = Baz;
enum Baz {
Baz1 {},
Baz2 {foo_bar:Box<Vec<i32>>},
}
}
};
let scala = "\
package org.enso.ast
import java.util.UUID
object Ast {
type A[X] = B[X, Y]
sealed trait FooBarBaz
object A {
sealed trait A extends FooBarBaz
case class Foo() extends A
case class Bar(x: Long, y: Byte, z: B.Type) extends A
}
object B {
sealed trait B extends FooBarBaz
type Type = Baz
sealed trait Baz extends B
case class Baz1() extends Baz
case class Baz2(fooBar: Vector[Int]) extends Baz
}
}
";
assert_eq!(ScalaGenerator::file("ast", rust), scala);
}
}

View File

@ -1,24 +0,0 @@
//! This module exports the implementation of the enso abstract syntax tree.
// === Features ===
#![feature(test)]
// === Standard Linter Configuration ===
#![deny(non_ascii_idents)]
#![warn(unsafe_code)]
// === Non-Standard Linter Configuration ===
#![deny(unconditional_recursion)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
#![warn(missing_docs)]
#![warn(trivial_casts)]
#![warn(trivial_numeric_casts)]
#![warn(unused_import_braces)]
mod ast;
pub mod generation;
pub use crate::ast::*;

View File

@ -1,24 +0,0 @@
// === Standard Linter Configuration ===
#![deny(non_ascii_idents)]
#![warn(unsafe_code)]
use ast_new::generation::ScalaGenerator;
use std::fs::File;
use std::io::Write;
pub fn main() -> std::io::Result<()> {
let matches = clap::App::new("Enso AST")
.version("1.0")
.author("Enso Team <enso-dev@enso.org>")
.about("Enso AST generator.")
.args_from_usage("--generate-scala-ast [FILE] 'Generates a scala ast in specified file.'")
.get_matches();
if let Some(file) = matches.value_of("generate-scala-ast") {
File::create(file)?.write_all(ScalaGenerator::ast()?.as_bytes())?;
println!("Generated scala ast at path: {}", file);
}
Ok(())
}

View File

@ -1,15 +0,0 @@
[package]
name = "flexer-test-definition"
version = "0.1.0"
authors = ["Enso Team <enso-dev@enso.org>"]
edition = "2021"
publish = false
[lib]
crate-type = ["cdylib", "rlib"]
test = true
bench = true
[dependencies]
enso-flexer = { version = "^0.2.0", path = "../../flexer" }

View File

@ -1,285 +0,0 @@
//! This file contains the code defining a lexer for the following small language. Due to the way in
//! which the code-generation from the flexer is used, it has to be defined in a separate crate from
//! the site at which it's used. For the actual tests of this code, please see
//! `flexer-testing/generation`.
//!
//! The language here is being defined as follows:
//!
//! a-word = 'a'+;
//! b-word = 'b'+;
//! word = a-word | b-word;
//! space = ' ';
//! spaced-word = space, word;
//! language = word, spaced-word*;
//!
//! Please note that there is a fair amount of duplicated code between this test and the
//! `lexer_generated_api_test` file. This is to present the full view of what each portion of the
//! process looks like.
// === Standard Linter Configuration ===
#![deny(non_ascii_idents)]
#![warn(unsafe_code)]
// === Non-Standard Linter Configuration ===
#![deny(unconditional_recursion)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
#![warn(missing_docs)]
#![warn(trivial_casts)]
#![warn(trivial_numeric_casts)]
#![warn(unused_import_braces)]
use enso_flexer::prelude::*;
use enso_flexer::*;
use enso_flexer::automata::pattern::Pattern;
use enso_flexer::group::Registry;
use enso_flexer::prelude::logger::Disabled;
use enso_flexer::prelude::reader::BookmarkManager;
// ====================
// === Type Aliases ===
// ====================
type Logger = Disabled;
// ===========
// === AST ===
// ===========
/// A very simple AST, sufficient for the simple language being defined.
#[derive(Clone, Debug, PartialEq)]
pub enum Token {
/// A word from the input, consisting of a sequence of all `a` or all `b`.
Word(String),
/// A token that the lexer is unable to recognise.
Unrecognized(String),
}
impl Token {
/// Construct a new word token.
pub fn word(name: impl Into<String>) -> Token {
Token::Word(name.into())
}
/// Construct a new unrecognized token.
pub fn unrecognized(name: impl Into<String>) -> Token {
Token::Unrecognized(name.into())
}
}
/// A representation of a stream of tokens.
#[allow(missing_docs)]
#[derive(Clone, Debug, Default, PartialEq)]
pub struct TokenStream {
tokens: Vec<Token>,
}
impl TokenStream {
/// Append the provided token to the token stream.
pub fn push(&mut self, token: Token) {
self.tokens.push(token);
}
}
// === Trait Impls ===
impl From<Vec<Token>> for TokenStream {
fn from(tokens: Vec<Token>) -> Self {
TokenStream { tokens }
}
}
// ==================
// === Test Lexer ===
// ==================
/// The definition of a test lexer for the above-described language.
#[derive(Debug)]
pub struct TestLexer {
lexer: Flexer<TestState, TokenStream, Logger>,
}
impl Deref for TestLexer {
type Target = Flexer<TestState, TokenStream, Logger>;
fn deref(&self) -> &Self::Target {
&self.lexer
}
}
impl DerefMut for TestLexer {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.lexer
}
}
impl TestLexer {
/// Creates a new instance of this lexer.
pub fn new() -> Self {
let logger = Logger::new("TestLexer");
let lexer = Flexer::new(logger);
TestLexer { lexer }
}
}
/// Rules for the root state.
#[allow(dead_code, missing_docs)]
impl TestLexer {
fn on_first_word<R: ReaderOps>(&mut self, _reader: &mut R) {
let str = self.current_match.clone();
let ast = Token::Word(str);
self.output.push(ast);
let id = self.seen_first_word_state;
self.push_state(id);
}
fn on_err_suffix_first_word<R: ReaderOps>(&mut self, _reader: &mut R) {
let ast = Token::Unrecognized(self.current_match.clone());
self.output.push(ast);
}
fn on_no_err_suffix_first_word<R: ReaderOps>(&mut self, _reader: &mut R) {}
fn rules_in_root(lexer: &mut TestLexer) {
let a_word = Pattern::char('a').many1();
let b_word = Pattern::char('b').many1();
let any = Pattern::any();
let end = Pattern::eof();
let root_group_id = lexer.initial_state;
let root_group = lexer.groups_mut().group_mut(root_group_id);
root_group.create_rule(&a_word, "self.on_first_word(reader)");
root_group.create_rule(&b_word, "self.on_first_word(reader)");
root_group.create_rule(&end, "self.on_no_err_suffix_first_word(reader)");
root_group.create_rule(&any, "self.on_err_suffix_first_word(reader)");
}
}
/// Rules for the "seen first word" state.
#[allow(dead_code, missing_docs)]
impl TestLexer {
fn on_spaced_word<R: ReaderOps>(&mut self, _reader: &mut R, _test_arg: bool) {
let str = self.current_match.clone();
let ast = Token::Word(String::from(str.trim()));
self.output.push(ast);
}
fn on_err_suffix<R: ReaderOps>(&mut self, reader: &mut R) {
self.on_err_suffix_first_word(reader);
self.pop_state();
}
fn on_no_err_suffix<R: ReaderOps>(&mut self, reader: &mut R) {
self.on_no_err_suffix_first_word(reader);
self.pop_state();
}
fn rules_in_seen_first_word(lexer: &mut TestLexer) {
let a_word = Pattern::char('a').many1();
let b_word = Pattern::char('b').many1();
let space = Pattern::char(' ');
let spaced_a_word = &space >> &a_word;
let spaced_b_word = &space >> &b_word;
let any = Pattern::any();
let end = Pattern::eof();
let seen_first_word_group_id = lexer.seen_first_word_state;
let seen_first_word_group = lexer.groups_mut().group_mut(seen_first_word_group_id);
seen_first_word_group.create_rule(&spaced_a_word, "self.on_spaced_word(reader,true)");
seen_first_word_group.create_rule(&spaced_b_word, "self.on_spaced_word(reader,false)");
seen_first_word_group.create_rule(&end, "self.on_no_err_suffix(reader)");
seen_first_word_group.create_rule(&any, "self.on_err_suffix(reader)");
}
}
// === Trait Impls ===
impl enso_flexer::Definition for TestLexer {
fn define() -> Self {
let mut lexer = TestLexer::new();
TestLexer::rules_in_seen_first_word(&mut lexer);
TestLexer::rules_in_root(&mut lexer);
lexer
}
fn groups(&self) -> &Registry {
self.lexer.groups()
}
fn set_up(&mut self) {}
fn tear_down(&mut self) {}
}
impl Default for TestLexer {
fn default() -> Self {
TestLexer::new()
}
}
// ===================
// === Lexer State ===
// ===================
/// The stateful components of the test lexer.
#[derive(Debug)]
pub struct TestState {
/// The registry for groups in the lexer.
lexer_states: group::Registry,
/// The initial state of the lexer.
initial_state: group::Identifier,
/// The state entered when the first word has been seen.
seen_first_word_state: group::Identifier,
/// The bookmarks for this lexer.
bookmarks: BookmarkManager,
}
// === Trait Impls ===
impl enso_flexer::State for TestState {
fn new(_logger: &impl AnyLogger) -> Self {
let mut lexer_states = group::Registry::default();
let initial_state = lexer_states.define_group("ROOT", None);
let seen_first_word_state = lexer_states.define_group("SEEN FIRST WORD", None);
let bookmarks = BookmarkManager::new();
Self { lexer_states, initial_state, seen_first_word_state, bookmarks }
}
fn initial_state(&self) -> group::Identifier {
self.initial_state
}
fn groups(&self) -> &group::Registry {
&self.lexer_states
}
fn groups_mut(&mut self) -> &mut group::Registry {
&mut self.lexer_states
}
fn bookmarks(&self) -> &BookmarkManager {
&self.bookmarks
}
fn bookmarks_mut(&mut self) -> &mut BookmarkManager {
&mut self.bookmarks
}
fn specialize(&self) -> Result<String, GenError> {
generate::specialize(self, "TestLexer", "TokenStream")
}
}

View File

@ -1,20 +0,0 @@
[package]
name = "flexer-test-generation"
version = "0.1.0"
authors = ["Enso Team <enso-dev@enso.org>"]
edition = "2021"
publish = false
[lib]
crate-type = ["cdylib", "rlib"]
test = true
bench = true
[dependencies]
enso-flexer = { version = "^0.2.0", path = "../../flexer" }
flexer-test-definition = { version = "0.1.0", path = "../definition" }
[build-dependencies]
enso-flexer = { version = "^0.2.0", path = "../../flexer" }
flexer-test-definition = { version = "0.1.0", path = "../definition" }

View File

@ -1,32 +0,0 @@
use std::io::prelude::*;
use enso_flexer::Definition;
use enso_flexer::State;
use flexer_test_definition::TestLexer;
use std::fs::File;
/// Generates the lexer engine and saves the result into the file `src/engine.rs`.
///
/// The content of the generated file can be used with the `include!` macro.
fn generate_engine() {
let definition_path = "../definition/src/lib.rs";
let output_directory = "src/generated";
let _ = std::fs::create_dir(output_directory);
let output_path = "src/generated/engine.rs";
let mut lexer_def = File::open(definition_path)
.unwrap_or_else(|_| panic!("The lexer definition should exist at {}.", definition_path));
let mut contents = String::new();
let mut file = File::create(output_path)
.unwrap_or_else(|_| panic!("Cannot open output file at {}.", output_path));
let lexer = TestLexer::define();
let engine = lexer.specialize().unwrap();
lexer_def.read_to_string(&mut contents).expect("Unable to read lexer definition.");
file.write_all(contents.as_bytes()).expect("Unable to write lexer definition.");
file.write_all(engine.as_bytes()).expect("Unable to write lexer specialization.");
}
fn main() {
generate_engine()
}

View File

@ -1,11 +0,0 @@
//! This module serves to re-export the generated lexer.
// ==============
// === Export ===
// ==============
pub mod engine;

View File

@ -1,34 +0,0 @@
//! This library exposes the specialized version of the Enso lexer.
//!
//! Its sole purpose is to avoid the lexer definition getting out of sync with its implementation
//! (the generated engine), which requires the engine to live in a separate crate.
//!
//! This separation enables generation of the enso lexer source code with `build.rs` during
//! compilation. Its output is then stored in a new file `engine.rs`and exported by `lexer.rs`.
// === Features ===
#![feature(test)]
// === Standard Linter Configuration ===
#![deny(non_ascii_idents)]
#![warn(unsafe_code)]
// === Non-Standard Linter Configuration ===
#![deny(unconditional_recursion)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
#![warn(missing_docs)]
#![warn(trivial_casts)]
#![warn(trivial_numeric_casts)]
#![warn(unused_import_braces)]
// ==============
// === Export ===
// ==============
#[rustfmt::skip]
pub mod generated;

View File

@ -1,114 +0,0 @@
//! This file contains tests for the generated lexer.
// === Features ===
#![feature(test)]
// === Non-Standard Linter Configuration ===
#![deny(non_ascii_idents)]
#![deny(unconditional_recursion)]
#![warn(unsafe_code)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
#![warn(missing_docs)]
#![warn(trivial_casts)]
#![warn(trivial_numeric_casts)]
#![warn(unused_import_braces)]
use enso_flexer::prelude::*;
use enso_flexer::prelude::reader::decoder::DecoderUTF8;
use flexer_test_generation::generated::engine::TestLexer;
use flexer_test_generation::generated::engine::Token;
use flexer_test_generation::generated::engine::TokenStream;
// =============
// === Tests ===
// =============
/// Executes the test on the provided input string slice.
fn run_test_on(str: impl AsRef<str>) -> TokenStream {
// Hardcoded for ease of use here.
let reader = Reader::new(str.as_ref().as_bytes(), DecoderUTF8());
let mut lexer = TestLexer::new();
let run_result = lexer.run(reader);
match run_result.kind {
enso_flexer::ResultKind::Success => run_result.tokens,
_ => default(),
}
}
#[test]
fn test_single_a_word() {
let input = "aaaaa";
let expected_output = TokenStream::from(vec![Token::word(input)]);
let result = run_test_on(input);
assert_eq!(result, expected_output);
}
#[test]
fn test_single_b_word() {
let input = "bbbbb";
let expected_output = TokenStream::from(vec![Token::word(input)]);
let result = run_test_on(input);
assert_eq!(result, expected_output);
}
#[test]
fn test_two_word() {
let input = "aaaaa bbbbb";
let expected_output = TokenStream::from(vec![Token::word("aaaaa"), Token::word("bbbbb")]);
let result = run_test_on(input);
assert_eq!(result, expected_output);
}
#[test]
fn test_multi_word() {
let input = "bbb aa a b bbbbb aa";
let expected_output = TokenStream::from(vec![
Token::word("bbb"),
Token::word("aa"),
Token::word("a"),
Token::word("b"),
Token::word("bbbbb"),
Token::word("aa"),
]);
let result = run_test_on(input);
assert_eq!(result, expected_output);
}
#[test]
fn test_invalid_single_word() {
let input = "c";
let expected_output = TokenStream::from(vec![Token::unrecognized(input)]);
let result = run_test_on(input);
assert_eq!(result, expected_output);
}
#[test]
fn test_multi_word_invalid() {
let input = "aaaaaa c bbbbbb";
let expected_output = TokenStream::from(vec![
Token::word("aaaaaa"),
Token::unrecognized(" "),
Token::unrecognized("c"),
Token::unrecognized(" "),
Token::word("bbbbbb"),
]);
let result = run_test_on(input);
assert_eq!(result, expected_output);
}
#[test]
fn test_end_invalid() {
let input = "bbbbbb c";
let expected_output = TokenStream::from(vec![
Token::word("bbbbbb"),
Token::unrecognized(" "),
Token::unrecognized("c"),
]);
let result = run_test_on(input);
assert_eq!(result, expected_output);
}

View File

@ -1,40 +0,0 @@
[package]
name = "enso-flexer"
version = "0.2.1"
authors = ["Enso Team <enso-dev@enso.org>"]
edition = "2021"
description = "A finite-automata-based lexing engine."
readme = "README.md"
homepage = "https://github.com/enso-org/enso/lib/rust/flexer"
repository = "https://github.com/enso-org/enso"
license-file = "../../LICENSE"
keywords = ["lexer", "finite-automata"]
categories = ["parsing"]
publish = true
[lib]
name = "enso_flexer"
crate-type = ["cdylib", "rlib"]
test = true
bench = true
[dependencies]
enso-automata = { version = "^0.2.0", path = "../../automata" }
enso-logger = { version = "^0.3.0", path = "../../logger" }
enso-prelude = { version = "^0.2.1", path = "../../prelude" }
enso-lazy-reader = { version = "^0.2.0", path = "../lazy-reader" }
enso-macro-utils = { version = "^0.2.0", path = "../../macro-utils" }
itertools = "0.8"
proc-macro2 = "1.0.19"
nonempty = "0.1.5"
quote = "1.0"
syn = { version = "1.0.12", features = ["full", "extra-traits", "visit-mut", "visit", "parsing", "printing"] }
unicode-segmentation = "1.6.0"
wasm-bindgen = "0.2"
[dev-dependencies]
wasm-bindgen-test = "0.3.8"

View File

@ -1,4 +0,0 @@
# Flexer
This library provides a finite-automata-based lexing engine that can flexibly
tokenize an input stream.

View File

@ -1,544 +0,0 @@
//! This file contains utilities for generating rust code from lexer definitions, allowing the
//! flexer to be specialised for a specific language.
use crate::prelude::*;
use quote::*;
use syn::*;
use crate as flexer;
use crate::automata::dfa;
use crate::automata::dfa::Dfa;
use crate::automata::nfa;
use crate::automata::state::State;
use crate::group;
use crate::group::AutomatonData;
use crate::group::Group;
use enso_macro_utils::repr;
use proc_macro2::Literal;
use std::fmt;
use std::hash::BuildHasher;
use std::result::Result;
// =======================
// === Code Generation ===
// =======================
/// Generate specialized code for the provided lexer `definition`.
///
/// This specialized code is a highly-optimised and tailored lexer that dispatches based on simple
/// code-point switches, with no dynamic lookup. This means that it is very fast, and very low
/// overhead.
pub fn specialize(
definition: &impl flexer::State,
state_type_name: impl Str,
output_type_name: impl Str,
) -> Result<String, GenError> {
let group_registry = definition.groups();
let mut body_items =
vec![run_function(output_type_name)?, run_current_state_function(), step(group_registry)];
for group in group_registry.all().iter() {
body_items.extend(automaton_for_group(group, group_registry)?)
}
let result = wrap_in_impl_for(state_type_name, body_items)?;
let code = show_code(&result);
Ok(code)
}
// === Whole-Lexer Codegen Utilities ===
/// Wrap the provided implementation items into an `impl` block for the provided `state_name` type.
pub fn wrap_in_impl_for(
state_name: impl Into<String>,
body: Vec<ImplItem>,
) -> Result<ItemImpl, GenError> {
let state_name: Ident = str_to_ident(state_name.into().as_str())?;
let mut tree: ItemImpl = parse_quote! {
#[allow(missing_docs,dead_code,clippy::all)]
impl #state_name {}
};
tree.items.extend(body);
Ok(tree)
}
/// Generate the `run` function for the specialized lexer.
///
/// This function is what the user of the lexer will call to begin execution.
pub fn run_function(output_type_name: impl Str) -> Result<ImplItem, GenError> {
let output_type_name = str_to_path(output_type_name)?;
let tree: ImplItem = parse_quote! {
pub fn run<R:ReaderOps>(&mut self, mut reader:R) -> LexingResult<#output_type_name> {
self.set_up();
reader.advance_char(&mut self.bookmarks);
while self.run_current_state(&mut reader) == StageStatus::ExitSuccess {}
let result = match self.status {
StageStatus::ExitFinished => LexingResult::success(
mem::take(&mut self.output)
),
StageStatus::ExitFail => LexingResult::failure(
mem::take(&mut self.output)
),
_ => LexingResult::partial(mem::take(&mut self.output))
};
self.tear_down();
result
}
};
Ok(tree)
}
/// Generate the function responsible for executing the lexer in its current state.
pub fn run_current_state_function() -> ImplItem {
let tree: ImplItem = parse_quote! {
fn run_current_state<R:ReaderOps>(&mut self, reader:&mut R) -> StageStatus {
self.status = StageStatus::Initial;
let mut finished = false;
// Runs until reaching a state that no longer says to continue.
while let Some(next_state) = self.status.continue_as() {
// debug!(self.logger,"Current character is {reader.character().char:?}.");
// debug!(self.logger,"Continuing in {next_state:?}.");
self.status = self.step(next_state,reader);
if finished && reader.finished(self.bookmarks()) {
// info!(self.logger,"Input finished.");
self.status = StageStatus::ExitFinished
}
finished = reader.character().is_eof();
if self.status.should_continue() {
match reader.character().char {
Ok(char) => {
reader.append_result(char);
// info!(self.logger,"Result is {reader.result():?}.");
},
Err(enso_flexer::prelude::reader::Error::EOF) => {
// info!(self.logger,"Reached EOF.");
},
Err(enso_flexer::prelude::reader::Error::EndOfGroup) => {
let current_state = self.current_state();
let group_name = self.groups().group(current_state).name.as_str();
panic!("Missing rules for state {}.", group_name)
}
Err(_) => {
// error!(self.logger,"Unexpected error!");
panic!("Unexpected error!")
}
}
reader.advance_char(&mut self.bookmarks);
}
}
self.status
}
};
tree
}
/// Generate the `step` function for the lexer.
///
/// This function is responsible for dispatching based on the current state, consuming a character,
/// and returning the state to transition to.
pub fn step(groups: &group::Registry) -> ImplItem {
let arms = groups.all().iter().map(|g| step_match_arm(g.id.into())).collect_vec();
parse_quote! {
fn step<R:ReaderOps>(&mut self, next_state:SubStateId, reader:&mut R) -> StageStatus {
let current_state:usize = self.current_state().into();
match current_state {
#(#arms)*
_ => unreachable_panic!("Unreachable state reached in lexer."),
}
}
}
}
/// Generate a match arm for the step function.
///
/// There is one match arm per lexer state.
pub fn step_match_arm(number: usize) -> Arm {
let literal = Literal::usize_unsuffixed(number);
let function_name_str = format!("dispatch_in_state_{}", number);
let func_name: Ident = parse_str(function_name_str.as_str()).unwrap();
let arm: Arm = parse_quote! {
#literal => self.#func_name(next_state,reader),
};
arm
}
// === Generation for a Specific Lexer State ===
/// Generate the functions that implement the lexer automaton for a given lexer state.
pub fn automaton_for_group(
group: &Group,
registry: &group::Registry,
) -> Result<Vec<ImplItem>, GenError> {
let mut nfa = registry.to_nfa_from(group.id);
let mut rules = Vec::with_capacity(nfa.states().len());
for state in nfa.public_states().iter() {
if nfa.name(*state).is_some() {
rules.push(rule_for_state(*state, &nfa)?);
}
}
let mut dfa = Dfa::from(nfa.automaton());
let dispatch_for_dfa = dispatch_in_state(&dfa, group.id.into())?;
let mut dfa_transitions = transitions_for_dfa(&mut dfa, &mut nfa, group.id.into())?;
dfa_transitions.push(dispatch_for_dfa);
dfa_transitions.extend(rules);
Ok(dfa_transitions)
}
/// Generate a set of transition functions for the provided `dfa`, with identifier `id`.
pub fn transitions_for_dfa(
dfa: &mut Dfa,
data: &mut AutomatonData,
id: usize,
) -> Result<Vec<ImplItem>, GenError> {
let mut state_has_overlapping_rules: HashMap<usize, bool> = HashMap::new();
state_has_overlapping_rules.insert(0, false);
let state_names: Vec<_> =
dfa.links.row_indices().map(|ix| (ix, name_for_step(id, ix))).collect();
let mut transitions = Vec::with_capacity(state_names.len());
for (ix, name) in state_names.into_iter() {
transitions.push(transition_for_dfa(dfa, name, data, ix, &mut state_has_overlapping_rules)?)
}
Ok(transitions)
}
/// Generate a specific transition function for
#[allow(clippy::implicit_hasher)]
pub fn transition_for_dfa<S: BuildHasher>(
dfa: &mut Dfa,
transition_name: Ident,
data: &mut AutomatonData,
state_ix: usize,
has_overlaps: &mut HashMap<usize, bool, S>,
) -> Result<ImplItem, GenError> {
let match_expr: Expr = match_for_transition(dfa, state_ix, data, has_overlaps)?;
let function: ImplItem = parse_quote! {
fn #transition_name<R:ReaderOps>(&mut self, reader:&mut R) -> StageStatus {
#match_expr
}
};
Ok(function)
}
/// Generate the pattern match for a given transition function.
pub fn match_for_transition<S: BuildHasher>(
dfa: &mut Dfa,
state_ix: usize,
data: &mut AutomatonData,
has_overlaps: &mut HashMap<usize, bool, S>,
) -> Result<Expr, GenError> {
let overlaps = *has_overlaps.get(&state_ix).unwrap_or(&false);
let mut trigger_state = dfa.links[(state_ix, 0)];
let mut range_start = enso_automata::symbol::SymbolIndex::min_value();
let divisions = dfa.alphabet.division_map.clone();
let mut branches = Vec::with_capacity(divisions.len());
for (sym, ix) in divisions.into_iter() {
let new_trigger_state = dfa.links[(state_ix, ix)];
if new_trigger_state != trigger_state {
let range_end = if sym.index != 0 { sym.index - 1 } else { sym.index };
let current_trigger_state = trigger_state;
let current_range_start = range_start;
trigger_state = new_trigger_state;
range_start = sym.index;
let body =
branch_body(dfa, current_trigger_state, state_ix, data, has_overlaps, overlaps)?;
branches.push(Branch::new(Some(current_range_start..=range_end), body));
} else {
}
}
let catch_all_branch_body =
branch_body(dfa, trigger_state, state_ix, data, has_overlaps, overlaps)?;
let catch_all_branch = Branch::new(None, catch_all_branch_body);
branches.push(catch_all_branch);
let arms: Vec<Arm> = branches.into_iter().map(Into::into).collect();
let mut match_expr: ExprMatch = parse_quote! {
match u64::from(reader.character()) {
#(#arms)*
}
};
match_expr.arms = arms;
Ok(Expr::Match(match_expr))
}
/// Generate the branch body for a transition in the DFA.
pub fn branch_body<S: BuildHasher>(
dfa: &mut Dfa,
target_state: State<Dfa>,
state_ix: usize,
data: &mut AutomatonData,
has_overlaps: &mut HashMap<usize, bool, S>,
rules_overlap: bool,
) -> Result<Block, GenError> {
let sources = dfa.sources.get(state_ix).expect("Internal error.");
let rule_name_for_state = data.name_for_dfa_state(sources);
if target_state == State::<Dfa>::INVALID {
match rule_name_for_state {
None => Ok(parse_quote! {{
StageStatus::ExitFail
}}),
Some(rule) => {
let rule: Expr = match parse_str(rule) {
Ok(rule) => rule,
Err(_) => return Err(GenError::BadExpression(rule.to_string())),
};
if rules_overlap {
Ok(parse_quote! {{
let rule_bookmark = self.bookmarks.rule_bookmark;
let matched_bookmark = self.bookmarks.matched_bookmark;
self.bookmarks.rewind(rule_bookmark,reader);
self.current_match = reader.pop_result();
self.#rule(reader);
self.bookmarks.bookmark(matched_bookmark,reader);
StageStatus::ExitSuccess
}})
} else {
Ok(parse_quote! {{
let matched_bookmark = self.bookmarks.matched_bookmark;
self.current_match = reader.pop_result();
self.#rule(reader);
self.bookmarks.bookmark(matched_bookmark,reader);
StageStatus::ExitSuccess
}})
}
}
}
} else {
let target_state_has_no_rule = match rule_name_for_state {
Some(_) =>
if !dfa_has_rule_name_for(data, dfa, target_state) {
dfa.sources[target_state.id()] = (*sources).clone();
has_overlaps.insert(target_state.id(), true);
true
} else {
false
},
None => false,
};
let state_id = Literal::usize_unsuffixed(target_state.id());
let ret: Expr = parse_quote! {
StageStatus::ContinueWith(#state_id.into())
};
if target_state_has_no_rule && !rules_overlap {
Ok(parse_quote! {{
let rule_bookmark = self.bookmarks.rule_bookmark;
self.bookmarks.bookmark(rule_bookmark,reader);
#ret
}})
} else {
Ok(parse_quote! {{
#ret
}})
}
}
}
/// Generate the dispatch function for a given lexer state.
///
/// This dispatch function is responsible for dispatching based on the sub-state of any given lexer
/// state, and is the main part of implementing the actual lexer transitions.
pub fn dispatch_in_state(dfa: &Dfa, id: usize) -> Result<ImplItem, GenError> {
let dispatch_name: Ident = str_to_ident(format!("dispatch_in_state_{}", id))?;
let state_names = dfa.links.row_indices().map(|ix| (ix, name_for_step(id, ix))).collect_vec();
let mut branches = Vec::with_capacity(state_names.len());
for (ix, name) in state_names.into_iter() {
let literal = Literal::usize_unsuffixed(ix);
let arm: Arm = parse_quote! {
#literal => self.#name(reader),
};
branches.push(arm);
}
let pattern_match: ExprMatch = parse_quote! {
match new_state_index.into() {
#(#branches)*
_ => unreachable_panic!("Unreachable state reached in lexer.")
}
};
let func: ImplItem = parse_quote! {
fn #dispatch_name<R:ReaderOps>
( &mut self
, new_state_index:SubStateId
, reader:&mut R
) -> StageStatus {
#pattern_match
}
};
Ok(func)
}
/// Generate a name for a given step function.
pub fn name_for_step(in_state: usize, to_state: usize) -> Ident {
let name_str = format!("state_{}_to_{}", in_state, to_state);
parse_str(name_str.as_str()).expect("Impossible to not be a valid identifier.")
}
/// Generate an executable rule function for a given lexer state.
pub fn rule_for_state(state: nfa::State, automaton: &AutomatonData) -> Result<ImplItem, GenError> {
let state_name = automaton.name(state);
match state_name {
None => unreachable_panic!("Rule for state requested, but state has none."),
Some(name) => {
let rule_name = str_to_ident(name)?;
let callback = automaton.code(state).expect("If it is named it has a callback.");
let code: Expr = match parse_str(callback) {
Ok(expr) => expr,
Err(_) => return Err(GenError::BadExpression(callback.into())),
};
if !has_reader_arg(&code) {
return Err(GenError::BadCallbackArgument);
}
let tree: ImplItem = parse_quote! {
fn #rule_name<R:ReaderOps>(&mut self, reader:&mut R) {
#code
}
};
Ok(tree)
}
}
}
/// Checks if the given `expr` is a call with a single argument "reader" being passed.
#[allow(clippy::cmp_owned)]
pub fn has_reader_arg(expr: &Expr) -> bool {
match expr {
Expr::MethodCall(expr) => match expr.args.first() {
Some(Expr::Path(path)) => match path.path.segments.first() {
Some(segment) => segment.ident.to_string() == "reader",
_ => false,
},
_ => false,
},
Expr::Call(expr) => match expr.args.last() {
Some(Expr::Path(path)) => match path.path.segments.first() {
Some(segment) => segment.ident.to_string() == "reader",
_ => false,
},
_ => false,
},
_ => false,
}
}
// ================
// === GenError ===
// ================
/// Errors that arise during code generation.
#[derive(Clone, Debug, PartialEq)]
pub enum GenError {
/// The callback function does not take a single argument `reader`.
BadCallbackArgument,
/// The provided string is not a valid rust identifier.
BadIdentifier(String),
/// The provided expression isn't a valid rust expression.
BadExpression(String),
/// The provided string is not a valid rust literal.
BadLiteral(String),
/// The provided string is not a valid rust path.
BadPath(String),
}
// === Trait Impls ===
impl Display for GenError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
GenError::BadCallbackArgument => write!(
f,
"Bad argument to a callback function. It must take a single argument `reader`."
),
GenError::BadIdentifier(str) => write!(f, "`{}` is not a valid rust identifier.", str),
GenError::BadExpression(str) => write!(f, "`{}` is not a valid rust expression.", str),
GenError::BadLiteral(str) => write!(f, "`{}` is not a valid rust literal.", str),
GenError::BadPath(str) => write!(f, "`{}` is not a valid rust path.", str),
}
}
}
// ==============
// === Branch ===
// ==============
/// A representation of a dispatch branch for helping to generate pattern arms.
#[allow(missing_docs)]
#[derive(Clone, Debug, PartialEq)]
struct Branch {
pub range: Option<RangeInclusive<enso_automata::symbol::SymbolIndex>>,
pub body: Block,
}
impl Branch {
/// Create a new branch, from the provided `range` and with `body` as the code it executes.
pub fn new(
range: Option<RangeInclusive<enso_automata::symbol::SymbolIndex>>,
body: Block,
) -> Branch {
Branch { range, body }
}
}
// === Trait Impls ===
impl From<Branch> for Arm {
fn from(value: Branch) -> Self {
let body = value.body;
match value.range {
Some(range) => {
let range_start = Literal::u64_unsuffixed(*range.start());
let range_end = Literal::u64_unsuffixed(*range.end());
if range.start() == range.end() {
parse_quote! {
#range_start => #body,
}
} else {
parse_quote! {
#range_start..=#range_end => #body,
}
}
}
None => parse_quote! {
_ => #body,
},
}
}
}
// =================
// === Utilities ===
// =================
/// Check if the DFA has a rule name for the provided target `state`.
pub fn dfa_has_rule_name_for(nfa: &AutomatonData, dfa: &Dfa, state: dfa::State) -> bool {
nfa.name_for_dfa_state(&dfa.sources[state.id()]).is_some()
}
/// Convert a string to an identifier.
pub fn str_to_ident(str: impl Str) -> Result<Ident, GenError> {
parse_str(str.as_ref()).map_err(|_| GenError::BadIdentifier(str.into()))
}
/// Convert a string to a path.
pub fn str_to_path(str: impl Str) -> Result<Path, GenError> {
parse_str(str.as_ref()).map_err(|_| GenError::BadPath(str.into()))
}
/// Convert the syntax tree into a string.
pub fn show_code(tokens: &impl ToTokens) -> String {
repr(tokens)
}

View File

@ -1,458 +0,0 @@
//! This module provides an API for grouping multiple flexer rules.
use crate::prelude::*;
use crate::automata::nfa;
use crate::automata::nfa::Nfa;
use crate::automata::pattern::Pattern;
use crate::automata::state;
use crate::group::rule::Rule;
use crate::prelude::fmt::Formatter;
use crate::prelude::HashMap;
use itertools::Itertools;
use std::fmt::Display;
// ==============
// === Export ===
// ==============
pub mod rule;
// ================
// === Registry ===
// ================
/// The group Registry is a container for [`Group`]s in the flexer implementation.
///
/// It allows groups to contain associations between themselves, and also implements useful
/// conversions for groups.
#[derive(Clone, Debug, Default)]
pub struct Registry {
/// The groups defined for the lexer.
groups: Vec<Group>,
}
impl Registry {
/// Defines a new group of rules for the lexer with the specified `name` and `parent`.
///
/// It returns the identifier of the newly-created group.
pub fn define_group(
&mut self,
name: impl Into<String>,
parent_index: Option<Identifier>,
) -> Identifier {
let id = self.next_id();
let group = Group::new(id, name.into(), parent_index);
self.groups.push(group);
id
}
/// Adds an existing `group` to the registry, updating and returning its identifier.
pub fn add_group(&mut self, mut group: Group) -> Identifier {
let new_id = self.next_id();
group.id = new_id;
self.groups.push(group);
new_id
}
/// Creates a rule that matches `pattern` for the group identified by `group_id`.
///
/// Panics if `group_id` refers to a nonexistent group.
pub fn create_rule(&mut self, group: Identifier, pattern: &Pattern, callback: impl AsRef<str>) {
let group = self.group_mut(group);
group.create_rule(pattern, callback.as_ref());
}
/// Associates the provided `rule` with the group identified by `group_id`.
///
/// Panics if `group_id` refers to a nonexistent group.
pub fn add_rule(&mut self, group: Identifier, rule: Rule) {
let group = self.group_mut(group);
group.add_rule(rule);
}
/// Collates the entire set of rules that are matchable when the lexer has the group identified
/// by `group_id` as active.
///
/// This set of rules includes the rules inherited from any parent groups.
pub fn rules_for(&self, group: Identifier) -> Vec<&Rule> {
let group_handle = self.group(group);
let mut parent = group_handle.parent_index.map(|p| self.group(p));
let mut rules = (&group_handle.rules).iter().collect_vec();
while let Some(parent_group) = parent {
if parent_group.id == group_handle.id {
panic!("There should not be cycles in parent links for lexer groups.")
}
rules.extend((&parent_group.rules).iter());
parent = parent_group.parent_index.map(|p| self.group(p));
}
rules
}
/// Obtains a reference to the group for the given `group_id`.
///
/// As group identifiers can only be created by use of this `Registry`, this will always
/// succeed.
pub fn group(&self, group: Identifier) -> &Group {
self.groups.get(group.0).expect("The group must exist.")
}
/// Obtains a mutable reference to the group for the given `group_id`.
///
/// As group identifiers can only be created by use of this `Registry`, this will always
/// succeed.
pub fn group_mut(&mut self, group: Identifier) -> &mut Group {
self.groups.get_mut(group.0).expect("The group should exist.")
}
/// Converts the group identified by `group_id` into an NFA.
///
/// Returns `None` if the group does not exist, or if the conversion fails.
pub fn to_nfa_from(&self, group_id: Identifier) -> AutomatonData {
let group = self.group(group_id);
let mut nfa = AutomatonData::default();
let start = nfa.automaton.start;
nfa.add_public_state(start);
let build = |rule: &Rule| nfa.new_pattern(start, &rule.pattern);
let rules = self.rules_for(group.id);
let callbacks = rules.iter().map(|r| r.callback.clone()).collect_vec();
let states = rules.into_iter().map(build).collect_vec();
let end = nfa.new_state_exported();
for (ix, state) in states.into_iter().enumerate() {
nfa.add_public_state(state);
nfa.set_name(state, group.callback_name(ix));
nfa.set_code(state, callbacks.get(ix).unwrap().clone());
nfa.connect(state, end);
}
nfa.add_public_state(end);
nfa
}
/// Generates the next group identifier for this registry.
fn next_id(&self) -> Identifier {
let val = self.groups.len();
Identifier(val)
}
/// Get an immutable reference to the groups contained within the registry.
pub fn all(&self) -> &Vec<Group> {
&self.groups
}
}
// ====================
// === AutomataData ===
// ====================
/// Storage for the generated automaton and auxiliary data required for code generation.
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct AutomatonData {
/// The non-deterministic finite automaton implementing the group of rules it was generated
/// from.
automaton: Nfa,
/// The states defined in the automaton.
states: Vec<nfa::State>,
/// The names of callbacks, where provided.
transition_names: HashMap<nfa::State, String>,
/// The code to execute on a callback, where available.
callback_code: HashMap<nfa::State, String>,
}
impl AutomatonData {
/// Set the name for the provided `state_id`.
pub fn set_name(&mut self, state_id: nfa::State, name: impl Str) {
self.transition_names.insert(state_id, name.into());
}
/// Set the callback code for the provided `state_id`.
pub fn set_code(&mut self, state_id: nfa::State, code: impl Str) {
self.callback_code.insert(state_id, code.into());
}
/// Add the provided `state` to the state registry.
pub fn add_public_state(&mut self, state: nfa::State) {
self.states.push(state);
}
/// Get the name for the provided `state_id`, if present.
pub fn name(&self, state_id: nfa::State) -> Option<&str> {
self.transition_names.get(&state_id).map(|s| s.as_str())
}
/// Get the callback code for the provided `state_id`, if present.
pub fn code(&self, state_id: nfa::State) -> Option<&str> {
self.callback_code.get(&state_id).map(|s| s.as_str())
}
/// Get a reference to the public states for this automaton.
///
/// A public state is one that was explicitly defined by the user.
pub fn public_states(&self) -> &Vec<nfa::State> {
&self.states
}
/// Get a reference to the states for this automaton.
pub fn states(&self) -> &Vec<state::Data> {
self.automaton.states()
}
/// Get a reference to the state names for this automaton.
pub fn names(&self) -> &HashMap<nfa::State, String> {
&self.transition_names
}
/// Get a reference to the callbacks for this automaton.
pub fn callbacks(&self) -> &HashMap<nfa::State, String> {
&self.callback_code
}
/// Get a reference to the automaton itself.
pub fn automaton(&self) -> &Nfa {
&self.automaton
}
/// Get the rule name for a the provided state.
pub fn name_for_dfa_state(&self, sources: &[nfa::State]) -> Option<&str> {
let mut result = None;
for source in sources.iter() {
let name = self.name(*source);
if name.is_some() {
result = name;
break;
}
}
result
}
}
/// Errors that can occur when querying callbacks for a DFA state.
#[derive(Copy, Clone, Debug, Display, Eq, PartialEq)]
pub enum CallbackError {
/// There are no available callbacks for this state.
NoCallback,
/// There is more than one callback available for this state.
DuplicateCallbacks,
}
// === Trait Impls ===
impl Deref for AutomatonData {
type Target = Nfa;
fn deref(&self) -> &Self::Target {
&self.automaton
}
}
impl DerefMut for AutomatonData {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.automaton
}
}
// ==================
// === Identifier ===
// ==================
/// An identifier for a group.
#[allow(missing_docs)]
#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
pub struct Identifier(usize);
// === Trait Impls ===
impl From<usize> for Identifier {
fn from(id: usize) -> Self {
Identifier(id)
}
}
impl From<&usize> for Identifier {
fn from(id: &usize) -> Self {
Identifier(*id)
}
}
impl From<Identifier> for usize {
fn from(value: Identifier) -> Self {
value.0
}
}
// ===========
// == Group ==
// ===========
/// A group is a structure for associating multiple rules with each other, and is the basic building
/// block of the flexer.
///
/// A group consists of the following:
///
/// - A set of [`Rule`s](Rule), each containing a regex pattern and associated callback.
/// - Inherited rules from a parent group, if such a group exists.
///
/// Internally, the flexer maintains a stack of groups, where only one group can be active at any
/// given time. Rules are matched _in order_, and hence overlaps are handled by the order in which
/// the rules are matched, with the first callback being triggered.
///
/// Whenever a [`rule.pattern`](Rule::pattern) from the active group is matched against part of the
/// input, the associated [`rule.callback`](Rule::callback) is executed. This callback may exit the
/// current group or even enter a new one. As a result, groups allow us to elegantly model a
/// situation where certain parts of a program (e.g. within a string literal) have very different
/// lexing rules than other portions of a program (e.g. the body of a function).
#[derive(Clone, Debug, Default)]
pub struct Group {
/// A unique identifier for the group.
pub id: Identifier,
/// A name for the group (useful in debugging).
pub name: String,
/// The parent group from which rules are inherited.
///
/// It is ensured that the group is held mutably.
pub parent_index: Option<Identifier>,
/// A set of flexer rules.
pub rules: Vec<Rule>,
/// The names for the user-defined states.
pub state_names: HashMap<usize, String>,
/// The callback functions for the user-defined states.
pub state_callbacks: HashMap<usize, String>,
}
impl Group {
/// Creates a new group.
pub fn new(id: Identifier, name: impl Into<String>, parent_index: Option<Identifier>) -> Self {
let rules = default();
let state_names = default();
let state_callbacks = default();
Group { id, name: name.into(), parent_index, rules, state_names, state_callbacks }
}
/// Adds a new rule to the current group.
pub fn add_rule(&mut self, rule: Rule) {
self.rules.push(rule)
}
/// Creates a new rule.
pub fn create_rule(&mut self, pattern: &Pattern, code: &str) {
let pattern_clone = pattern.clone();
let rule = Rule::new(pattern_clone, code);
self.rules.push(rule)
}
/// The canonical name for a given rule.
pub fn callback_name(&self, rule_ix: usize) -> String {
format!("group_{}_rule_{}", self.id.0, rule_ix)
}
}
// === Trait Impls ===
impl From<Group> for Registry {
fn from(value: Group) -> Self {
let mut registry = Registry::default();
registry.add_group(value);
registry
}
}
impl Display for Group {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "Group {}", self.name)
}
}
// =============
// === Tests ===
// =============
#[cfg(test)]
pub mod tests {
use super::*;
#[test]
fn group_create_rule() {
let pattern = Pattern::all_of("abcde");
let mut group = Group::new(0.into(), "Test Name", None);
group.create_rule(&pattern, "code");
let rule = Rule::new(pattern, "code");
assert!(group.rules.contains(&rule));
assert_eq!(group.rules[0].callback, "code".to_string());
}
#[test]
fn group_callback_name() {
let pattern_1 = Pattern::all_of("abcde");
let pattern_2 = Pattern::all_of("abcde");
let mut group = Group::new(0.into(), "Test Name", None);
group.create_rule(&pattern_1, "code");
group.create_rule(&pattern_2, "code");
assert_eq!(group.callback_name(0), "group_0_rule_0");
assert_eq!(group.callback_name(1), "group_0_rule_1");
}
#[test]
fn group_registry_define_group() {
let mut registry = Registry::default();
registry.define_group("TEST_GROUP", None);
assert!(registry.all().iter().any(|g| g.name == *"TEST_GROUP"));
}
#[test]
fn group_registry_create_rule() {
let pattern = Pattern::none_of("abcde");
let mut registry = Registry::default();
let group_1_id = registry.define_group("GROUP_1", None);
let group_2_id = registry.define_group("GROUP_2", None);
let group_1 = registry.group_mut(group_1_id);
group_1.create_rule(&pattern, "rule_1");
let group_2 = registry.group_mut(group_2_id);
group_2.create_rule(&pattern, "rule_2");
let rules_1 = registry.rules_for(group_1_id);
let rules_2 = registry.rules_for(group_2_id);
assert!(rules_1.iter().any(|r| **r == Rule::new(pattern.clone(), "rule_1")));
assert!(rules_2.iter().any(|r| **r == Rule::new(pattern.clone(), "rule_2")));
}
#[test]
fn group_registry_group_parents() {
let pattern_1 = Pattern::char('a');
let pattern_2 = Pattern::char('b');
let pattern_3 = Pattern::char('c');
let mut registry = Registry::default();
let group_1_id = registry.define_group("GROUP_1", None);
let group_2_id = registry.define_group("GROUP_2", Some(group_1_id));
let group_3_id = registry.define_group("GROUP_3", Some(group_2_id));
let group_1 = registry.group_mut(group_1_id);
group_1.create_rule(&pattern_1, "rule_1");
let group_2 = registry.group_mut(group_2_id);
group_2.create_rule(&pattern_2, "rule_2");
let group_3 = registry.group_mut(group_3_id);
group_3.create_rule(&pattern_3, "rule_3");
let rules = registry.rules_for(group_3_id);
assert_eq!(rules.len(), 3);
assert!(rules.iter().any(|r| **r == Rule::new(pattern_1.clone(), "rule_1")));
assert!(rules.iter().any(|r| **r == Rule::new(pattern_2.clone(), "rule_2")));
assert!(rules.iter().any(|r| **r == Rule::new(pattern_3.clone(), "rule_3")));
}
}

View File

@ -1,34 +0,0 @@
//! An API for declaring rust-code callbacks to be executed when a given pattern is matched.
//!
//! A flexer rule is a [`crate::automata::pattern`] associated with rust code to be executed as a
//! callback.
use crate::automata::pattern::Pattern;
// ==========
// == Rule ==
// ==========
/// A flexer rule.
#[derive(Clone, Debug, PartialEq)]
pub struct Rule {
/// The pattern that triggers the callback.
pub pattern: Pattern,
/// The code to execute when [`Rule::pattern`] matches, containing rust code as a
/// [`std::string::String`].
///
/// This code will be called directly from a method defined on your Lexer (the one that
/// contains a [`crate::Flexer`] instance. To this end, the code you provide as a string
/// must be valid in that context.
pub callback: String,
}
impl Rule {
/// Creates a new rule.
pub fn new(pattern: Pattern, callback: impl Into<String>) -> Self {
Rule { pattern, callback: callback.into() }
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,452 +0,0 @@
//! This file contains tests for the user-facing error-handling logic in the flexer code generator.
//!
//! This file includes quite a bit of duplicated code, but this is known and intentional as it
//! allows for increased clarity in the testing.
// === Non-Standard Linter Configuration ===
#![allow(missing_docs)]
#![deny(non_ascii_idents)]
#![warn(unsafe_code)]
#![allow(clippy::blacklisted_name)] // `foo` is fine here.
#![allow(clippy::new_without_default)] // No need for boilerplate in throwaway test code.
use enso_flexer::*;
use crate::prelude::logger::AnyLogger;
use crate::prelude::logger::Disabled;
use crate::prelude::reader::BookmarkManager;
use crate::prelude::ReaderOps;
use enso_flexer::automata::pattern::Pattern;
use enso_flexer::generate;
use enso_flexer::group;
use enso_flexer::group::Identifier;
use enso_flexer::group::Registry;
use enso_flexer::prelude::*;
use enso_flexer::Flexer;
use enso_flexer::State;
// ====================
// === Type Aliases ===
// ====================
type Logger = Disabled;
// ====================
// === Shared Setup ===
// ====================
/// A token type for these lexers.
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum Token {
Foo,
Bar,
}
/// An output type for these lexers.
#[allow(missing_docs)]
#[derive(Clone, Debug, Default, PartialEq)]
pub struct Output {
tokens: Vec<Token>,
}
/// A testing lexer state.
pub struct LexerState {
lexer_states: group::Registry,
initial_state: group::Identifier,
}
impl enso_flexer::State for LexerState {
fn new(_logger: &impl AnyLogger) -> Self {
let mut lexer_states = group::Registry::default();
let initial_state = lexer_states.define_group("ROOT", None);
LexerState { lexer_states, initial_state }
}
fn initial_state(&self) -> Identifier {
self.initial_state
}
fn groups(&self) -> &Registry {
&self.lexer_states
}
fn groups_mut(&mut self) -> &mut Registry {
&mut self.lexer_states
}
fn bookmarks(&self) -> &BookmarkManager {
unimplemented!()
}
fn bookmarks_mut(&mut self) -> &mut BookmarkManager {
unimplemented!()
}
fn specialize(&self) -> Result<String, GenError> {
// Note [Naming "Lexer"]
generate::specialize(self, "Lexer", "Output")
}
}
/* Note [Naming "Lexer"]
* ~~~~~~~~~~~~~~~~~~~~~
* In general, the name passed to `specialize` should match that of your lexer definition.
* However here, as we never compile the code, we set it to a generic constant that is a valid
* rust identifier so as to reduce testing boilerplate.
*/
// ====================
// === Definition 1 ===
// ====================
pub struct Lexer1 {
lexer: Flexer<LexerState, Output, Logger>,
}
impl Deref for Lexer1 {
type Target = Flexer<LexerState, Output, Logger>;
fn deref(&self) -> &Self::Target {
&self.lexer
}
}
impl DerefMut for Lexer1 {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.lexer
}
}
impl Lexer1 {
pub fn new() -> Lexer1 {
let logger = Logger::new("Lexer1");
let lexer = Flexer::new(logger);
Lexer1 { lexer }
}
pub fn my_test_fun<R: ReaderOps>(&mut self, _reader: &mut R) {
unimplemented!()
}
}
impl enso_flexer::Definition for Lexer1 {
fn define() -> Self {
let mut lexer = Self::new();
let foo = Pattern::all_of("foo");
let root_group_id = lexer.initial_state();
let root_group = lexer.groups_mut().group_mut(root_group_id);
root_group.create_rule(&foo, "ETERNAL SCREAMING");
lexer
}
fn groups(&self) -> &Registry {
self.lexer.groups()
}
fn set_up(&mut self) {
unimplemented!()
}
fn tear_down(&mut self) {
unimplemented!()
}
}
#[test]
fn test_bad_rule_expression() {
let lexer = Lexer1::define();
let result = lexer.specialize();
assert!(result.is_err());
let message = result.unwrap_err().to_string();
assert_eq!(message, "`ETERNAL SCREAMING` is not a valid rust expression.");
}
// ====================
// === Definition 2 ===
// ====================
pub struct Lexer2 {
lexer: Flexer<LexerState, Output, Logger>,
}
impl Deref for Lexer2 {
type Target = Flexer<LexerState, Output, Logger>;
fn deref(&self) -> &Self::Target {
&self.lexer
}
}
impl DerefMut for Lexer2 {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.lexer
}
}
impl Lexer2 {
pub fn new() -> Lexer2 {
let logger = Logger::new("Lexer2");
let lexer = Flexer::new(logger);
Lexer2 { lexer }
}
pub fn my_test_fun<R: ReaderOps>(&mut self, _reader: &mut R) {
unimplemented!()
}
}
impl enso_flexer::Definition for Lexer2 {
fn define() -> Self {
let mut lexer = Self::new();
let foo = Pattern::all_of("foo");
let root_group_id = lexer.initial_state();
let root_group = lexer.groups_mut().group_mut(root_group_id);
root_group.create_rule(&foo, "self.test_function_no_reader()");
lexer
}
fn groups(&self) -> &Registry {
self.lexer.groups()
}
fn set_up(&mut self) {
unimplemented!()
}
fn tear_down(&mut self) {
unimplemented!()
}
}
#[test]
pub fn test_no_reader_arg() {
let lexer = Lexer2::define();
let result = lexer.specialize();
let expected_message =
"Bad argument to a callback function. It must take a single argument `reader`.";
assert!(result.is_err());
let message = result.unwrap_err().to_string();
assert_eq!(message, expected_message);
}
// ====================
// === Definition 3 ===
// ====================
pub struct Lexer3 {
lexer: Flexer<LexerState1, Output, Logger>,
}
impl Deref for Lexer3 {
type Target = Flexer<LexerState1, Output, Logger>;
fn deref(&self) -> &Self::Target {
&self.lexer
}
}
impl DerefMut for Lexer3 {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.lexer
}
}
impl Lexer3 {
pub fn new() -> Lexer3 {
let logger = Logger::new("Lexer3");
let lexer = Flexer::new(logger);
Lexer3 { lexer }
}
pub fn my_test_fun<R: ReaderOps>(&mut self, _reader: &mut R) {
unimplemented!()
}
}
impl enso_flexer::Definition for Lexer3 {
fn define() -> Self {
let mut lexer = Self::new();
let foo = Pattern::all_of("foo");
let root_group_id = lexer.initial_state();
let root_group = lexer.groups_mut().group_mut(root_group_id);
root_group.create_rule(&foo, "self.test_function_reader(reader)");
lexer
}
fn groups(&self) -> &Registry {
self.lexer.groups()
}
fn set_up(&mut self) {
unimplemented!()
}
fn tear_down(&mut self) {
unimplemented!()
}
}
pub struct LexerState1 {
lexer_states: group::Registry,
initial_state: group::Identifier,
}
impl enso_flexer::State for LexerState1 {
fn new(_logger: &impl AnyLogger) -> Self {
let mut lexer_states = group::Registry::default();
let initial_state = lexer_states.define_group("ROOT", None);
LexerState1 { lexer_states, initial_state }
}
fn initial_state(&self) -> Identifier {
self.initial_state
}
fn groups(&self) -> &Registry {
&self.lexer_states
}
fn groups_mut(&mut self) -> &mut Registry {
&mut self.lexer_states
}
fn bookmarks(&self) -> &BookmarkManager {
unimplemented!()
}
fn bookmarks_mut(&mut self) -> &mut BookmarkManager {
unimplemented!()
}
fn specialize(&self) -> Result<String, GenError> {
generate::specialize(self, "Bad Lexer Name", "Output")
}
}
#[test]
pub fn test_bad_state_name() {
let lexer = Lexer3::define();
let result = lexer.specialize();
assert!(result.is_err());
let message = result.unwrap_err().to_string();
assert_eq!(message, "`Bad Lexer Name` is not a valid rust identifier.");
}
// ====================
// === Definition 4 ===
// ====================
pub struct Lexer4 {
lexer: Flexer<LexerState2, Output, Logger>,
}
impl Deref for Lexer4 {
type Target = Flexer<LexerState2, Output, Logger>;
fn deref(&self) -> &Self::Target {
&self.lexer
}
}
impl DerefMut for Lexer4 {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.lexer
}
}
impl Lexer4 {
pub fn new() -> Lexer4 {
let logger = Logger::new("Lexer4");
let lexer = Flexer::new(logger);
Lexer4 { lexer }
}
pub fn my_test_fun<R: ReaderOps>(&mut self, _reader: &mut R) {
unimplemented!()
}
}
impl enso_flexer::Definition for Lexer4 {
fn define() -> Self {
let mut lexer = Self::new();
let foo = Pattern::all_of("foo");
let root_group_id = lexer.initial_state();
let root_group = lexer.groups_mut().group_mut(root_group_id);
root_group.create_rule(&foo, "self.test_function_reader(reader)");
lexer
}
fn groups(&self) -> &Registry {
self.lexer.groups()
}
fn set_up(&mut self) {
unimplemented!()
}
fn tear_down(&mut self) {
unimplemented!()
}
}
pub struct LexerState2 {
lexer_states: group::Registry,
initial_state: group::Identifier,
}
impl enso_flexer::State for LexerState2 {
fn new(_logger: &impl AnyLogger) -> Self {
let mut lexer_states = group::Registry::default();
let initial_state = lexer_states.define_group("ROOT", None);
LexerState2 { lexer_states, initial_state }
}
fn initial_state(&self) -> Identifier {
self.initial_state
}
fn groups(&self) -> &Registry {
&self.lexer_states
}
fn groups_mut(&mut self) -> &mut Registry {
&mut self.lexer_states
}
fn bookmarks(&self) -> &BookmarkManager {
unimplemented!()
}
fn bookmarks_mut(&mut self) -> &mut BookmarkManager {
unimplemented!()
}
fn specialize(&self) -> Result<String, GenError> {
generate::specialize(self, "Lexer4", "Bad output name")
}
}
#[test]
pub fn test_bad_output_name() {
let lexer = Lexer4::define();
let result = lexer.specialize();
assert!(result.is_err());
let message = result.unwrap_err().to_string();
assert_eq!(message, "`Bad output name` is not a valid rust path.");
}

View File

@ -1,25 +0,0 @@
[package]
name = "enso-lazy-reader"
version = "0.2.0"
authors = ["Enso Team <enso-dev@enso.org>"]
edition = "2021"
description = "An efficient buffered reader."
readme = "README.md"
homepage = "https://github.com/enso-org/rust-lib/src/lazy-reader"
repository = "https://github.com/enso-org/rust-lib"
license-file = "../../LICENSE"
keywords = ["read", "UTF"]
publish = true
[lib]
name = "lazy_reader"
crate-type = ["cdylib", "rlib"]
test = true
bench = true
[dependencies]
itertools = "0.8"
enso-prelude = { version = "^0.2.1", path = "../../prelude" }

View File

@ -1,3 +0,0 @@
# Enso Lazy Reader
An efficient lazy reader.

View File

@ -1,197 +0,0 @@
//! This module exports various UTF decoders for decoding UTF32 characters.
// === Non-Standard Linter Configuration ===
#![allow(unsafe_code)]
use std::fmt::Debug;
// ===============
// === Decoder ===
// ===============
/// The error for an invalid character.
#[derive(Debug, Clone, Copy)]
pub struct InvalidChar();
/// Trait for decoding UTF32 characters.
pub trait Decoder {
/// The input of the decoder.
type Word: Default + Copy + Debug;
/// The maximum amount of words needed to decode one symbol.
const MAX_CODEPOINT_LEN: usize;
/// Decodes the first symbol from the slice and returns it with its length (in words).
///
/// This function can panic if `words.len() < MAX_CODEPOINT_LEN`.
fn decode(words: &[Self::Word]) -> Char<InvalidChar>;
}
// === Char ===
/// The result of `decoder.decode`.
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct Char<Error> {
/// The decoded character.
pub char: Result<char, Error>,
/// The number of words read.
pub size: usize,
}
impl Char<crate::Error> {
/// Check if the character represents the end of file.
pub fn is_eof(&self) -> bool {
match self.char {
Ok(_) => false,
Err(crate::Error::EOF) => true,
Err(_) => false,
}
}
}
// =====================
// === UTF-8 Decoder ===
// =====================
/// Decoder for UTF-8.
///
/// For more info on UTF-8 and the algorithm used see [UTF-8](https://en.wikipedia.org/wiki/UTF-8).
#[derive(Debug, Copy, Clone)]
pub struct DecoderUTF8();
// === Trait Impls ===
impl Decoder for DecoderUTF8 {
type Word = u8;
const MAX_CODEPOINT_LEN: usize = 4;
fn decode(words: &[u8]) -> Char<InvalidChar> {
let size = match !words[0] >> 4 {
0 => 4,
1 => 3,
2 | 3 => 2,
_ => 1,
};
let mut char = (words[0] << size >> size) as u32;
for word in &words[1..size] {
char = char << 6 | (word & 0b_0011_1111) as u32;
}
Char { char: std::char::from_u32(char).ok_or_else(InvalidChar), size }
}
}
// ======================
// === UTF-16 Decoder ===
// ======================
/// Decoder for UTF-16.
///
/// For more info on UTF-16 and the algorithm used see [UTF-16](https://en.wikipedia.org/wiki/UTF-16).
#[derive(Debug, Copy, Clone)]
pub struct DecoderUTF16();
// === Trait Impls ===
impl Decoder for DecoderUTF16 {
type Word = u16;
const MAX_CODEPOINT_LEN: usize = 2;
fn decode(words: &[u16]) -> Char<InvalidChar> {
if words[0] < 0xD800 || 0xDFFF < words[0] {
let char = Ok(unsafe { std::char::from_u32_unchecked(words[0] as u32) });
return Char { char, size: 1 };
}
let char = (((words[0] - 0xD800) as u32) << 10 | (words[1] - 0xDC00) as u32) + 0x1_0000;
Char { char: std::char::from_u32(char).ok_or_else(InvalidChar), size: 2 }
}
}
// ======================
// === UTF-32 Decoder ===
// ======================
/// Trivial decoder for UTF-32 (`char`).
#[derive(Debug, Copy, Clone)]
pub struct DecoderUTF32();
// === Trait Impls ===
impl Decoder for DecoderUTF32 {
type Word = char;
const MAX_CODEPOINT_LEN: usize = 1;
fn decode(words: &[char]) -> Char<InvalidChar> {
Char { char: Ok(words[0]), size: 1 }
}
}
// =============
// === Tests ===
// =============
#[cfg(test)]
mod tests {
use super::*;
use itertools::Itertools;
#[test]
fn test_utf8() {
let string = "a.b^c! #𤭢界んにち𤭢#𤭢";
let mut buf = string.as_bytes();
let mut str = String::from("");
while !buf.is_empty() {
let char = DecoderUTF8::decode(buf);
str.push(char.char.unwrap());
buf = &buf[char.size..];
}
assert_eq!(str, string);
}
#[test]
fn test_utf16() {
let string = "a.b^c! #𤭢界んにち𤭢#𤭢";
let buffer = string.encode_utf16().collect_vec();
let mut buf = &buffer[..];
let mut str = String::from("");
while !buf.is_empty() {
let char = DecoderUTF16::decode(buf);
str.push(char.char.unwrap());
buf = &buf[char.size..];
}
assert_eq!(str, string);
}
#[test]
fn test_utf32() {
let string = "a.b^c! #𤭢界んにち𤭢#𤭢".chars().collect_vec();
let mut buf = &string[..];
let mut str = vec![];
while !buf.is_empty() {
let char = DecoderUTF32::decode(buf);
str.push(char.char.unwrap());
buf = &buf[char.size..];
}
assert_eq!(str, string);
}
}

View File

@ -1,600 +0,0 @@
//! This module exports a reader that is able to process large textual inputs in constant memory.
// === Features ===
#![feature(test)]
// === Standard Linter Configuration ===
#![deny(non_ascii_idents)]
#![warn(unsafe_code)]
// === Non-Standard Linter Configuration ===
#![deny(unconditional_recursion)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
#![warn(missing_docs)]
#![warn(trivial_casts)]
#![warn(trivial_numeric_casts)]
#![warn(unused_import_braces)]
use enso_prelude::*;
use crate::decoder::Char;
use crate::decoder::InvalidChar;
use decoder::Decoder;
// ==============
// === Export ===
// ==============
pub mod decoder;
// ============
// === Read ===
// ============
/// Trait for reading input data into a buffer.
///
/// Compared to `std::io::Read` this reader supports multiple input encodings.
pub trait Read {
/// The type of the data in the buffer.
type Item;
/// Fills the buffer and returns amount of elements read.
///
/// In case it isn't possible to fill the whole buffer (i.e. if an error like EOF is
/// encountered), the buffer will be filled with all the data read before encountering such an
/// error.
fn read(&mut self, buffer: &mut [Self::Item]) -> usize;
}
// === Trait Impls ===
impl<R: std::io::Read> Read for R {
type Item = u8;
fn read(&mut self, mut buffer: &mut [u8]) -> usize {
let length = buffer.len();
while !buffer.is_empty() {
match self.read(buffer) {
Err(_) => break,
Ok(0) => break,
Ok(n) => {
buffer = &mut buffer[n..];
}
}
}
length - buffer.len()
}
}
// =============
// === Error ===
// =============
/// Set of errors returned by lazy reader.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Error {
/// End Of Input.
EOF,
/// Couldn't decode character.
InvalidChar,
/// The lexer has found no matching rule in the current state.
EndOfGroup,
}
impl Error {
/// The `u32` value that corresponds to EOF.
pub const END_OF_FILE: u32 = u32::max_value();
/// The `u32` value that corresponds to an invalid unicode character.
pub const INVALID_CHAR: u32 = 0xFFFF;
/// The `u32` value corresponding to the end of group.
pub const END_OF_GROUP: u32 = u32::max_value() - 1;
/// The `u64` value that corresponds to EOF.
pub const END_OF_FILE_64: u64 = u64::max_value();
/// The `u64` value that corresponds to an invalid unicode character.
pub const INVALID_CHAR_64: u64 = 0xFFFF;
/// The `u32` value corresponding to the end of group.
pub const END_OF_GROUP_64: u64 = u64::max_value() - 1;
}
// === Trait Impls ===
impl From<decoder::Char<decoder::InvalidChar>> for decoder::Char<Error> {
fn from(char: Char<InvalidChar>) -> Self {
let size = char.size;
let char = match char.char {
Ok(char) => Ok(char),
Err(_) => Err(Error::InvalidChar),
};
decoder::Char { char, size }
}
}
impl From<decoder::Char<Error>> for u32 {
fn from(char: decoder::Char<Error>) -> Self {
match char.char {
Ok(char) => char as u32,
Err(Error::EOF) => Error::END_OF_FILE,
Err(Error::InvalidChar) => Error::INVALID_CHAR,
Err(Error::EndOfGroup) => Error::END_OF_GROUP,
}
}
}
impl From<decoder::Char<Error>> for u64 {
fn from(char: decoder::Char<Error>) -> Self {
match char.char {
Ok(char) => char as u64,
Err(Error::EOF) => Error::END_OF_FILE_64,
Err(Error::InvalidChar) => Error::INVALID_CHAR_64,
Err(Error::EndOfGroup) => Error::END_OF_GROUP_64,
}
}
}
// ==================
// === BookmarkId ===
// ==================
/// Strongly typed identifier of `Bookmark`
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct BookmarkId {
#[allow(missing_docs)]
id: usize,
}
impl BookmarkId {
/// Creates a new bookmark handle using the specified identifier.
pub fn new(id: usize) -> BookmarkId {
BookmarkId { id }
}
}
// =================
// === ReaderOps ===
// =================
/// The behaviour needed by the reader interface.
pub trait ReaderOps {
/// Read the next character from input.
fn next_char(&mut self, bookmarks: &mut BookmarkManager) -> Result<char, Error>;
/// Advance along the input without returning the character.
fn advance_char(&mut self, bookmarks: &mut BookmarkManager);
/// Get the current character from the reader.
fn character(&self) -> decoder::Char<Error>;
/// Check if the reader has finished reading.
///
/// A reader is finished when it has no further input left to read, and when it does not need to
/// rewind to any point.
fn finished(&self, bookmarks: &BookmarkManager) -> bool;
/// Check if the reader is empty.
fn empty(&self) -> bool;
/// Fill the buffer with words from the input.
fn fill(&mut self, bookmarks: &mut BookmarkManager);
/// Get the maximum possible rewind for the reader.
fn max_possible_rewind_len(&self, bookmarks: &BookmarkManager) -> usize;
/// Append the provided character to the reader's result.
fn append_result(&mut self, char: char);
/// Return `self.result` and sets the internal result to empty.
fn pop_result(&mut self) -> String;
/// Get the reader's current offset in the buffer.
fn offset(&self) -> usize;
/// Get an immutable reference to the reader's result.
fn result(&self) -> &String;
/// Get a mutable reference to the reader's result.
fn result_mut(&mut self) -> &mut String;
/// Get the current length of the reader's buffer.
fn buffer_len(&self) -> usize;
/// Set the buffer offset to the specified value.
fn set_offset(&mut self, off: usize);
/// Truncate the current match to the provided length.
fn truncate_match(&mut self, len: usize);
}
/// The default size of the buffer.
pub const BUFFER_SIZE: usize = 32768;
// ==============
// === Reader ===
// ==============
/// A buffered reader able to efficiently read big inputs in constant memory.
///
/// It supports various encodings via `Decoder` and also bookmarks which allow it to return
/// back to a character at specific offset.
#[derive(Debug, Clone, PartialEq)]
pub struct Reader<D: Decoder, Read> {
/// The reader that holds the input.
pub reader: Read,
/// The buffer that stores the input data.
pub buffer: Vec<D::Word>,
/// The string representation of data that has been read.
pub result: String,
/// The buffer offset of the current element read.
pub offset: usize,
/// The number of elements stored in buffer.
pub length: usize,
/// The last character read.
pub character: decoder::Char<Error>,
}
impl<D: Decoder, R: Read<Item = D::Word>> Reader<D, R> {
/// Creates a new instance of the reader.
pub fn new(reader: R, _decoder: D) -> Self {
let mut reader = Reader::<D, R> {
reader,
buffer: vec![D::Word::default(); BUFFER_SIZE],
result: String::from(""),
offset: 0,
length: 0,
character: decoder::Char { char: Err(Error::EOF), size: 0 },
};
reader.length = reader.reader.read(&mut reader.buffer[..]);
reader
}
}
// === Trait Impls ===
impl<D: Decoder, R: Read<Item = D::Word>> ReaderOps for Reader<D, R> {
fn next_char(&mut self, bookmarks: &mut BookmarkManager) -> Result<char, Error> {
if self.empty() {
self.character.char = Err(Error::EOF);
return Err(Error::EOF);
}
if self.offset >= self.buffer.len() - D::MAX_CODEPOINT_LEN {
self.fill(bookmarks);
}
self.character = D::decode(&self.buffer[self.offset..]).into();
self.offset += self.character.size;
self.character.char
}
fn advance_char(&mut self, bookmarks: &mut BookmarkManager) {
let _ = self.next_char(bookmarks);
}
fn character(&self) -> Char<Error> {
self.character
}
fn finished(&self, _bookmarks: &BookmarkManager) -> bool {
let rewinded = self.max_possible_rewind_len(_bookmarks) != 0;
self.empty() && rewinded
}
fn empty(&self) -> bool {
self.length < self.buffer.len() && self.length <= self.offset
}
fn fill(&mut self, bookmarks: &mut BookmarkManager) {
let len = self.buffer.len();
let words = len - self.offset;
self.offset = self.max_possible_rewind_len(bookmarks);
if self.offset == len {
panic!("Rewind won't be possible. Buffer is too small.")
}
bookmarks.decrease_bookmark_offsets(len - self.offset);
for i in 1..=self.offset {
self.buffer[self.offset - i] = self.buffer[len - i];
}
self.length = self.offset + self.reader.read(&mut self.buffer[self.offset..]);
self.offset -= words;
}
fn max_possible_rewind_len(&self, bookmarks: &BookmarkManager) -> usize {
if let Some(offset) = bookmarks.min_offset() {
return self.buffer_len() - offset;
}
D::MAX_CODEPOINT_LEN
}
fn append_result(&mut self, char: char) {
self.result.push(char);
}
fn pop_result(&mut self) -> String {
let str = self.result.clone();
self.result.truncate(0);
str
}
fn offset(&self) -> usize {
self.offset
}
fn result(&self) -> &String {
&self.result
}
fn result_mut(&mut self) -> &mut String {
&mut self.result
}
fn buffer_len(&self) -> usize {
self.buffer.len()
}
fn set_offset(&mut self, off: usize) {
self.offset = off;
}
fn truncate_match(&mut self, len: usize) {
self.result.truncate(len);
}
}
// ================
// === Bookmark ===
// ================
/// Bookmarks a specific character in buffer, so that `LazyReader` can return to it when needed.
#[derive(Debug, Clone, Copy, Default, PartialEq)]
pub struct Bookmark {
/// The position of the bookmarked character in the `reader.buffer`.
offset: usize,
/// The length of `reader.result` up to the bookmarked character.
length: usize,
/// Whether or not the bookmark has been set by the user.
set: bool,
}
// =======================
// === BookmarkManager ===
// =======================
/// Contains and manages bookmarks for a running lexer.
///
/// Some of its operations operate on a specific [`Reader`]. It is undefined behaviour to not pass
/// the same reader to all calls for a given bookmark manager.
#[allow(missing_docs)]
#[derive(Clone, Debug, PartialEq)]
pub struct BookmarkManager {
bookmarks: Vec<Bookmark>,
/// The bookmark used by the flexer to mark the end of the last matched segment of the input.
pub matched_bookmark: BookmarkId,
/// A bookmark used by the flexer to deal with overlapping rules that may fail later.
pub rule_bookmark: BookmarkId,
}
#[allow(missing_docs)]
impl BookmarkManager {
/// Create a new bookmark manager, with no associated bookmarks.
pub fn new() -> BookmarkManager {
let mut bookmarks = Vec::new();
let matched_bookmark = BookmarkManager::make_bookmark(&mut bookmarks);
let rule_bookmark = BookmarkManager::make_bookmark(&mut bookmarks);
BookmarkManager { bookmarks, matched_bookmark, rule_bookmark }
}
/// Create a new bookmark in the manager, returning a handle to it.
fn make_bookmark(bookmarks: &mut Vec<Bookmark>) -> BookmarkId {
bookmarks.push(Bookmark::default());
BookmarkId::new(bookmarks.len() - 1)
}
/// Add a bookmark to the manager, returning a handle to that bookmark.
pub fn add_bookmark(&mut self) -> BookmarkId {
BookmarkManager::make_bookmark(&mut self.bookmarks)
}
/// Bookmarks the current position in `reader` using `bookmark`.
pub fn bookmark<R: ReaderOps>(&mut self, bookmark: BookmarkId, reader: &mut R) {
self.bookmarks[bookmark.id].offset = reader.offset() - reader.character().size;
self.bookmarks[bookmark.id].length = reader.result().len();
self.bookmarks[bookmark.id].set = true
}
/// Unsets a bookmark.
pub fn unset<R: ReaderOps>(&mut self, bookmark: BookmarkId) {
self.bookmarks[bookmark.id].offset = 0;
self.bookmarks[bookmark.id].length = 0;
self.bookmarks[bookmark.id].set = false
}
/// Decrease the offset for all bookmarks by the specified `amount` in preparation for
/// truncating the reader's buffer.
pub fn decrease_bookmark_offsets(&mut self, amount: usize) {
for bookmark in self.bookmarks.iter_mut() {
if bookmark.set {
bookmark.offset -= amount
}
}
}
/// Rewind the reader to the position marked by `bookmark`.
pub fn rewind<R: ReaderOps>(&mut self, bookmark: BookmarkId, reader: &mut R) {
let bookmark = self.bookmarks.get(bookmark.id).expect("Bookmark must exist.");
reader.set_offset(bookmark.offset);
reader.truncate_match(bookmark.length);
reader.advance_char(self);
}
/// Obtains the minimum offset from the start of the buffer for any bookmark.
pub fn min_offset(&self) -> Option<usize> {
self.bookmarks.iter().filter_map(|b| b.set.and_option(Some(b.offset))).min()
}
}
// === Trait Impls ===
impl Default for BookmarkManager {
fn default() -> Self {
BookmarkManager::new()
}
}
// =============
// === Tests ===
// =============
#[cfg(test)]
mod tests {
extern crate test;
use super::*;
use decoder::*;
use test::Bencher;
// ================
// === Repeater ===
// ================
/// Struct that holds state of `Reader` that repeats an input n times.
#[derive(Debug, Clone)]
struct Repeat<T> {
/// The input to be repeated.
buffer: Vec<T>,
/// The current offset of element currently read from buffer.
offset: usize,
/// How many more times the input should be repeated.
repeat: usize,
}
/// Creates a reader that repeats an input n times.
fn repeat<T: Copy>(input: Vec<T>, repeat: usize) -> impl Read<Item = T> {
Repeat { buffer: input, repeat, offset: 0 }
}
// === Trait Impls ===
impl<T: Copy> Read for Repeat<T> {
type Item = T;
fn read(&mut self, mut buffer: &mut [Self::Item]) -> usize {
if self.repeat == 0 {
return 0;
}
let len = self.buffer.len();
let read = buffer.len();
if read < len - self.offset {
buffer.copy_from_slice(&self.buffer[self.offset..self.offset + read]);
self.offset += read;
return read;
}
buffer[..len - self.offset].copy_from_slice(&self.buffer[self.offset..]);
buffer = &mut buffer[len - self.offset..];
let repeat = std::cmp::min(buffer.len() / len, self.repeat - 1);
self.repeat = self.repeat - repeat - 1;
for _ in 0..repeat {
buffer[..len].copy_from_slice(&self.buffer[..]);
buffer = &mut buffer[len..];
}
if self.repeat == 0 {
return len - self.offset + repeat * len;
}
buffer.copy_from_slice(&self.buffer[..buffer.len()]);
self.offset = buffer.len();
read
}
}
// =============
// === Utils ===
// =============
/// Constructs an _empty_ bookmark manager for testing purposes.
pub fn bookmark_manager() -> BookmarkManager {
BookmarkManager::new()
}
// =============
// === Tests ===
// =============
#[test]
fn test_repeater_with_small_buffer() {
let mut repeater = repeat(vec![1, 2, 3], 1);
let mut buffer = [0; 2];
assert_eq!(repeater.read(&mut buffer), 2);
assert_eq!(&buffer, &[1, 2]);
assert_eq!(repeater.read(&mut buffer), 1);
assert_eq!(&buffer, &[3, 2])
}
#[test]
fn test_repeater_with_big_buffer() {
let mut repeater = repeat(vec![1, 2], 3);
let mut buffer = [0; 5];
assert_eq!(repeater.read(&mut buffer), 5);
assert_eq!(&buffer, &[1, 2, 1, 2, 1]);
assert_eq!(repeater.read(&mut buffer), 1);
assert_eq!(&buffer, &[2, 2, 1, 2, 1])
}
#[test]
fn test_reader_small_input() {
let mut mgr = bookmark_manager();
let str = "a.b^c! #𤭢界んにち𤭢#𤭢";
let mut reader = Reader::new(str.as_bytes(), DecoderUTF8());
let mut result = String::from("");
while let Ok(char) = reader.next_char(&mut mgr) {
result.push(char);
}
assert_eq!(&result, str);
}
#[test]
fn test_reader_big_input() {
let mut mgr = bookmark_manager();
let str = "a.b^c! #𤭢界んにち𤭢#𤭢".repeat(10_000);
let mut reader = Reader::new(str.as_bytes(), DecoderUTF8());
let mut result = String::from("");
while let Ok(char) = reader.next_char(&mut mgr) {
mgr.bookmark(mgr.matched_bookmark, &mut reader);
result.push(char);
}
assert_eq!(&result, &str);
assert_eq!(reader.buffer.len(), BUFFER_SIZE);
}
#[bench]
fn bench_reader(bencher: &mut Bencher) {
let run = || {
let mut mgr = bookmark_manager();
let str = repeat("Hello, World!".as_bytes().to_vec(), 10_000_000);
let mut reader = Reader::new(str, DecoderUTF8());
let mut count = 0;
while reader.next_char(&mut mgr) != Err(Error::EOF) {
count += 1;
}
count
};
bencher.iter(run);
}
}

View File

@ -1,18 +0,0 @@
[package]
name = "lexer-definition"
version = "0.1.0"
authors = ["Enso Team <enso-dev@enso.org>"]
edition = "2021"
publish = false
[lib]
crate-type = ["cdylib", "rlib"]
test = true
bench = true
[dependencies]
enso-flexer = { version = "0.2.0", path = "../../flexer" }
enso-prelude = { version = "0.2.0", path = "../../../prelude" }
uuid = { version = "0.8.1" , features = ["serde","v4","wasm-bindgen"] }

View File

@ -1,371 +0,0 @@
//! This crate describes valid escape sequences inside Enso text literals.
use crate::prelude::*;
use crate::lexeme;
use crate::library::token;
use crate::token::EscapeStyle;
use crate::token::Shape;
// =======================
// === EscapeCharacter ===
// =======================
/// A representation of an escape character.
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct EscapeCharacter {
/// The lexing representation of the escape.
///
/// This is the literal string that must occur in the Enso source code to be interpreted as
/// this escape code.
pub pattern: String,
/// The literal representation of the escape.
///
/// This is the character-level encoding of this escape character in Rust, as the Rust escape
/// representation and the Enso escape representation may differ, or Rust may not support the
/// same literal escape code as Enso.
pub repr: String,
}
impl EscapeCharacter {
fn new(pattern: impl Str, repr: impl Str) -> EscapeCharacter {
let pattern = pattern.into();
let repr = repr.into();
Self { pattern, repr }
}
/// The set of character escape codes that Enso supports.
pub fn codes() -> Vec<EscapeCharacter> {
vec![
// === Null ===
Self::new(r"\0", "\0"),
// === Newlines ===
Self::new(r"\n", "\n"),
Self::new(r"\r", "\r"),
Self::new(r"\f", "\x0C"),
// === Tabs ===
Self::new(r"\t", "\t"),
Self::new(r"\v", "\x0B"),
// === Backspace ===
Self::new(r"\b", "\x08"),
// === Misc ===
Self::new(r"\a", "\x07"),
]
}
}
// =================
// === Utilities ===
// =================
/// Check if `c` is a hexadecimal digit.
fn is_hex_digit(c: char) -> bool {
let small_letters = 'a'..='f';
let large_letters = 'A'..='F';
let digits = '0'..='9';
small_letters.contains(&c) || large_letters.contains(&c) || digits.contains(&c)
}
// ======================
// === EscapeSequence ===
// ======================
/// A trait representing various kinds of escape sequence.
///
/// An escape sequence built using this trait will have its digits calculated by stripping the
/// [`Self::prefix_length()`] and [`Self::suffix_length()`] from the input string, and then
/// validated using [`Self::digits_min_length()`], [`Self::digits_max_length()`], and
/// [`Self::validator()`]. All digits must be valid hexadecimal digits as defined by
/// [`is_hex_digit`] above.
///
/// In addition, the implementation must define [`Self::style_on_success()`] and
/// [`Self::style_on_failure()`] to determine the type of escape output on success and failure.
pub trait EscapeSequence {
/// Create a token of the relevant escape sequence type.
///
/// This function should be passed the _full_ match for the escape sequence as `repr`, including
/// the delimiters. For example, if we have the escape sequence `\uAFAF`, we want to pass the
/// whole string `"\uAFAF"`, not just `"AFAF"` to this function..
fn build(repr: impl Str) -> Shape {
if let Some(digits) = Self::validate(repr.as_ref()) {
Shape::text_segment_escape(Self::style_on_success(), digits)
} else {
Shape::text_segment_escape(Self::style_on_failure(), repr)
}
}
/// Obtain the digits portion of the escape sequence.
fn get_digits(repr: &str) -> &str {
let start = Self::prefix_length();
let end = repr.len().saturating_sub(Self::suffix_length());
&repr[start..end]
}
/// Validate the provided unicode string for this type of escape sequence.
fn validate(repr: &str) -> Option<String> {
let digits = Self::get_digits(repr);
let ge_min = digits.len() >= Self::digits_min_length();
let le_max = digits.len() <= Self::digits_max_length();
let valid_length = ge_min && le_max;
let valid_escape = Self::validator(digits);
let valid_digits = digits.chars().all(is_hex_digit);
let is_valid = valid_length && valid_escape && valid_digits;
is_valid.as_some(digits.into())
}
/// Return the length of the escape prefix.
///
/// The suffix is the characters that need to be stripped from the front of the escape sequence
/// to get, in conjunction with [`EscapeSequence::suffix_length()`] the escape value itself.
fn prefix_length() -> usize;
/// Return the length of the escape suffix.
///
/// The suffix is the characters that need to be stripped from the end of the escape sequence to
/// get, in conjunction with [`EscapeSequence::prefix_length()`] the escape value itself.
///
/// This defaults to `0`.
fn suffix_length() -> usize {
0
}
/// Return the minimum number of digits accepted by the escape sequence type.
fn digits_min_length() -> usize;
/// Return the maximum number of digits accepted by the escape sequence type.
///
/// This defaults to `digits_min_length()`.
fn digits_max_length() -> usize {
Self::digits_min_length()
}
/// A validator for any additional properties of the escape sequence.
///
/// It will be passed the _digits_ of the escape sequence, as defined by
/// [`EscapeSequence::get_digits()`], and has a default implementation that always succeeds.
/// Please implement this validator yourself if you would like to assert _additional_ properties
/// on your escape sequence.
fn validator(_digits: &str) -> bool {
true
}
/// The style of escape after successful validation.
fn style_on_success() -> token::EscapeStyle;
/// The style of escape after unsuccessful validation.
fn style_on_failure() -> token::EscapeStyle;
}
// ==================
// === ByteEscape ===
// ==================
/// A validator for ASCII escapes.
///
/// An ascii escape begins with the sequence `\x` and is followed by two hexadecimal digits (e.g.
/// `\x0F`.
#[derive(Clone, Copy, Default, Debug, Eq, PartialEq)]
pub struct Byte;
impl EscapeSequence for Byte {
fn prefix_length() -> usize {
lexeme::len(lexeme::literal::BYTE_ESCAPE_START)
}
fn digits_min_length() -> usize {
2
}
fn style_on_success() -> EscapeStyle {
token::EscapeStyle::Byte
}
fn style_on_failure() -> EscapeStyle {
token::EscapeStyle::Invalid
}
}
// ===========
// === U16 ===
// ===========
/// A validator for U16 unicode escapes.
///
/// A U16 unicode escape begins with the sequence `\u` and is followed by four hexadecimal digits,
/// e.g. `\u0F0F`.
#[derive(Clone, Copy, Default, Debug, Eq, PartialEq)]
pub struct U16;
impl EscapeSequence for U16 {
fn prefix_length() -> usize {
lexeme::len(lexeme::literal::U16_ESCAPE_START)
}
fn digits_min_length() -> usize {
4
}
fn style_on_success() -> EscapeStyle {
token::EscapeStyle::U16
}
fn style_on_failure() -> EscapeStyle {
token::EscapeStyle::InvalidUnicode
}
}
// ===========
// === U21 ===
// ===========
/// A validator for U21 unicode escapes.
///
/// A U21 unicode escape begins with the sequence `\u`, followed by a sequence of 1-6 hexadecimal
/// digits enclosed in braces (`{}`). Both `\u{F}` and `\u{AFAFAF}` are valid U21 escapes.
#[derive(Clone, Copy, Default, Debug, Eq, PartialEq)]
pub struct U21;
impl EscapeSequence for U21 {
fn prefix_length() -> usize {
lexeme::len(lexeme::literal::U21_ESCAPE_START)
}
fn suffix_length() -> usize {
lexeme::len(lexeme::literal::U21_ESCAPE_END)
}
fn digits_min_length() -> usize {
1
}
fn digits_max_length() -> usize {
6
}
fn style_on_success() -> EscapeStyle {
token::EscapeStyle::U21
}
fn style_on_failure() -> EscapeStyle {
token::EscapeStyle::InvalidUnicode
}
}
// ===========
// === U32 ===
// ===========
/// A validator for U32 unicode escapes.
///
/// A U32 unicode escape begins with the sequence \U, followed by 8 hexadecimal digits. Due to the
/// restrictions of unicode, the first two digits _must_ be zero (e.g. `\U00AFAFAF`).
#[derive(Clone, Copy, Default, Debug, Eq, PartialEq)]
pub struct U32;
impl EscapeSequence for U32 {
fn prefix_length() -> usize {
lexeme::len(lexeme::literal::U32_ESCAPE_START)
}
fn digits_min_length() -> usize {
8
}
fn validator(digits: &str) -> bool {
digits.starts_with("00")
}
fn style_on_success() -> EscapeStyle {
token::EscapeStyle::U32
}
fn style_on_failure() -> EscapeStyle {
token::EscapeStyle::InvalidUnicode
}
}
// =============
// === Tests ===
// =============
#[cfg(test)]
mod test {
use super::*;
// === Utilities ===
/// Tests a valid input to ensure that it succeeds.
fn test_valid<Esc: EscapeSequence>(escape: &str, out: &str, out_style: token::EscapeStyle) {
let shape = Shape::text_segment_escape(out_style, out);
assert_eq!(Esc::build(escape), shape);
}
/// Tests invalid inputs to ensure they fail for the provided escape type `Esc`.
fn test_invalid<Esc: EscapeSequence>(invalid_cases: Vec<&str>, fail_with: token::EscapeStyle) {
for escape in invalid_cases {
let shape = Shape::text_segment_escape(fail_with, escape);
assert_eq!(Esc::build(escape), shape)
}
}
// === Is Hex Digit ===
#[test]
fn test_is_hex_digit() {
for val in u8::min_value()..=u8::max_value() {
let char = char::from(val);
let is_in_small = ('a'..='f').contains(&char);
let is_in_large = ('A'..='F').contains(&char);
let is_in_dec_digit = ('0'..='9').contains(&char);
let expected_result = is_in_small || is_in_large || is_in_dec_digit;
assert_eq!(is_hex_digit(char), expected_result);
}
}
// === Build ===
#[test]
fn test_byte_build_valid() {
test_valid::<Byte>(r"\x05", "05", token::EscapeStyle::Byte);
}
#[test]
fn test_byte_build_invalid() {
test_invalid::<Byte>(vec![r"\x5", r"\x", r"\x033", r"\xz2"], token::EscapeStyle::Invalid);
}
#[test]
fn test_u16_build_valid() {
test_valid::<U16>(r"\u4fe3", "4fe3", token::EscapeStyle::U16);
}
#[test]
fn test_u16_build_invalid() {
test_invalid::<U16>(
vec![r"\u123", r"\u", r"\u123aff", r"\uazaz"],
token::EscapeStyle::InvalidUnicode,
);
}
#[test]
fn test_u21_build_valid() {
test_valid::<U21>(r"\u{fa4e}", "fa4e", token::EscapeStyle::U21);
}
#[test]
fn test_u21_build_invalid() {
test_invalid::<U21>(vec![r"\u{1234567}", r"\u{}"], token::EscapeStyle::InvalidUnicode);
}
#[test]
fn test_u32_build_valid() {
test_valid::<U32>(r"\U0014A890", "0014A890", token::EscapeStyle::U32);
}
#[test]
fn test_u32_build_invalid() {
test_invalid::<U32>(
vec![r"\U12121212", r"\U", r"\U001234", r"\U001234567"],
token::EscapeStyle::InvalidUnicode,
);
}
}

View File

@ -1,303 +0,0 @@
//! This module defines the base lexemes for the Enso language.
use crate::prelude::*;
use enso_flexer::automata::pattern::Pattern;
// =================================
// === Basic Pattern Definitions ===
// =================================
/// Basic lexemes as patterns.
///
/// These must _only_ be used as part of the lexer definition, not used at runtime as they are not
/// performant at all.
pub mod definition_pattern {
use super::*;
/// Match lower-case ASCII letters.
pub fn lower_ascii_letter() -> Pattern {
Pattern::range('a'..='z')
}
/// Match upper-case ASCII letters.
pub fn upper_ascii_letter() -> Pattern {
Pattern::range('A'..='Z')
}
/// Match ASCII digits.
pub fn ascii_digit() -> Pattern {
Pattern::range('0'..='9')
}
/// Match ASCII letters.
pub fn ascii_letter() -> Pattern {
lower_ascii_letter() | upper_ascii_letter()
}
/// Match ASCII alphanumeric characters.
pub fn ascii_alpha_num() -> Pattern {
ascii_digit() | ascii_letter()
}
/// Match at least one ASCII space character.
pub fn spaces() -> Pattern {
into_pattern(literal::SPACE).many1()
}
/// Match the end-of-file character.
pub fn eof() -> Pattern {
Pattern::eof()
}
/// Match a newline.
///
/// This matches both Unix (LF) and Windows (CRLF) styles of newlines. This is particularly
/// important so as not to result in incorrect spans on windows clients.
pub fn newline() -> Pattern {
let lf = into_pattern(literal::LF);
let crlf = into_pattern(literal::CRLF);
lf | crlf
}
/// The characters that break tokens in Enso.
pub fn whitespace_break_chars() -> String {
[literal::TAB, literal::LF, literal::CR].concat()
}
/// The characters that break token lexing in Enso.
pub fn break_chars() -> String {
[
literal::INTERPOLATE_QUOTE,
literal::COMMENT,
literal::ANNOTATION_SYMBOL,
literal::SPACE,
literal::COMMA,
literal::DOT,
literal::OPERATOR_CHARS,
literal::GROUP_CHARS,
&whitespace_break_chars(),
]
.concat()
}
/// Adds the basic characters not allowed in a raw segment in a format text literal.
fn add_base_format_disallows(chars: &mut String) {
chars.push_str(literal::INTERPOLATE_QUOTE);
chars.push_str(literal::SLASH);
chars.push_str(literal::LF);
chars.push_str(literal::CR);
}
/// Characters allowable inside a raw segment in a format line.
pub fn format_line_raw_char() -> Pattern {
let mut chars = String::new();
chars.push_str(literal::FORMAT_QUOTE);
add_base_format_disallows(&mut chars);
Pattern::none_of(&chars)
}
/// Characters allowable inside a raw segment in a format block.
pub fn format_block_raw_char() -> Pattern {
let mut chars = String::new();
add_base_format_disallows(&mut chars);
Pattern::none_of(&chars)
}
/// Adds the basic characters not allowed in a raw segment in a raw text literal.
fn add_base_raw_disallows(chars: &mut String) {
chars.push_str(literal::SLASH);
chars.push_str(literal::LF);
chars.push_str(literal::CR);
}
/// Characters allowable inside a raw segment in a raw line.
pub fn raw_line_raw_char() -> Pattern {
let mut chars = String::new();
chars.push_str(literal::RAW_QUOTE);
add_base_raw_disallows(&mut chars);
Pattern::none_of(&chars)
}
/// Characters allowable inside a raw segment in a raw block.
pub fn raw_block_raw_char() -> Pattern {
let mut chars = String::new();
add_base_raw_disallows(&mut chars);
Pattern::none_of(&chars)
}
/// The characters allowed as digits in a unicode escape.
pub fn unicode_escape_digit() -> Pattern {
let chars = &[
literal::FORMAT_QUOTE,
literal::RAW_QUOTE,
literal::INTERPOLATE_QUOTE,
literal::SLASH,
literal::LF,
literal::CR,
"{}",
]
.concat();
Pattern::none_of(chars)
}
}
// ===============================
// === Enso Lexeme Definitions ===
// ===============================
/// The literal lexemes that make up the Enso language.
pub mod literal {
/// The type of a literal lexeme.
pub type Literal = &'static str;
// === The Lexemes ===
/// The space character.
pub const SPACE: Literal = " ";
/// The line-feed character.
pub const LF: Literal = "\n";
/// The carriage-return character.
pub const CR: Literal = "\r";
/// The crlf windows-style line ending.
pub const CRLF: Literal = "\r\n";
/// The tab character.
pub const TAB: Literal = "\t";
/// The comment character.
pub const COMMENT: Literal = "#";
/// The doc comment character.
pub const DOC_COMMENT: Literal = "##";
/// The symbol for beginning an annotation.
pub const ANNOTATION_SYMBOL: Literal = "@";
/// The dot symbol
pub const DOT: Literal = ".";
/// Two dots.
pub const TWO_DOTS: Literal = "..";
/// Three dots.
pub const THREE_DOTS: Literal = "...";
/// Three dots.
pub const COMMA: Literal = ",";
/// The `in` operator.
pub const OPERATOR_IN: Literal = "in";
/// The tick allowable at the end of an identifier.
pub const IDENTIFIER_TICK: Literal = "'";
/// The quote used to delimit interpolations in format text literals.
pub const INTERPOLATE_QUOTE: Literal = "`";
/// The quote used to delimit format text literals.
pub const FORMAT_QUOTE: Literal = "'";
/// The quote used to delimit format block literals.
pub const FORMAT_BLOCK_QUOTE: Literal = "'''";
/// The quote used to delimit raw text literals.
pub const RAW_QUOTE: Literal = "\"";
/// The quote used to delimit raw block literals.
pub const RAW_BLOCK_QUOTE: Literal = "\"\"\"";
/// The equals operator.
pub const EQUALS: Literal = "=";
/// The equality comparison operator.
pub const EQUALS_COMP: Literal = "==";
/// Greater-than or equal.
pub const GE_OPERATOR: Literal = ">=";
/// Less-than or equal.
pub const LE_OPERATOR: Literal = "<=";
/// Inequality comparison operator.
pub const NOT_EQUAL: Literal = "!=";
/// The hash eq operator.
pub const HASH_EQ: Literal = "#=";
/// The wide arrow operator.
pub const WIDE_ARROW: Literal = "=>";
/// The blank identifier.
pub const BLANK_IDENT: Literal = "_";
/// The identifier segment separator.
pub const IDENT_SEGMENT_SEPARATOR: Literal = "_";
/// The separator between a number literal's explicit base and the number itself.
pub const NUMBER_BASE_SEPARATOR: Literal = "_";
/// The separator between the integer and fractional parts of the number literal.
pub const DECIMAL_SEPARATOR: Literal = ".";
/// The backslash character.
pub const SLASH: Literal = r"\";
/// An escaped [`SLASH`].
pub const ESCAPED_SLASH: Literal = r"\\";
/// The beginning of a byte escape.
pub const BYTE_ESCAPE_START: Literal = r"\x";
/// The beginning of a u16 escape.
pub const U16_ESCAPE_START: Literal = r"\u";
/// The beginning of a u21 escape.
pub const U21_ESCAPE_START: Literal = r"\u{";
/// The end of a u21 escape.
pub const U21_ESCAPE_END: Literal = "}";
/// The beginning of a u32 escape.
pub const U32_ESCAPE_START: Literal = r"\U";
/// The allowable group characters in Enso.
pub const GROUP_CHARS: Literal = "()[]{}";
/// The allowable operator characters in Enso.
pub const OPERATOR_CHARS: Literal = ";!$%&*+-/<>?^~|:\\";
}
// =========================
// === Utility Functions ===
// =========================
/// Get the first character of the lexeme, if it exists.
pub fn char(literal: &'static str) -> Option<char> {
literal.chars().next()
}
/// Get the first character of the lexeme, assuming that it exists.
pub fn unsafe_char(literal: &'static str) -> char {
char(literal).expect("The first character of the literal exists.")
}
/// Convert the lexeme into a pattern.
pub fn into_pattern(literal: &'static str) -> Pattern {
literal.into()
}
/// The proper length of the `literal`.
pub fn len(literal: &'static str) -> usize {
literal.chars().count()
}

File diff suppressed because it is too large Load Diff

View File

@ -1,46 +0,0 @@
//! This library defines the lexer for the syntax of the Enso language.
// === Features ===
#![feature(test)]
// === Standard Linter Configuration ===
#![deny(non_ascii_idents)]
#![warn(unsafe_code)]
// === Non-Standard Linter Configuration ===
#![deny(unconditional_recursion)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
#![warn(missing_docs)]
#![warn(trivial_casts)]
#![warn(trivial_numeric_casts)]
#![warn(unused_import_braces)]
// ==============
// === Export ===
// ==============
pub mod escape;
pub mod lexeme;
pub mod lexer;
pub mod rule;
pub mod token;
/// A module that can be re-exported under the same name in the generation crate.
///
/// This is necessary to avoid issues with paths getting wonky when the code is generated from the
/// Enso lexer definition. In this project, imports should _not_ be made from the crate root
/// _except_ through use of this `library` module.
pub mod library {
pub use crate::escape;
pub use crate::lexeme;
pub use crate::rules;
pub use crate::token;
}
/// A collection of functionality for working with the lexer definition.
pub mod prelude {
pub use enso_flexer::prelude::logger::*;
pub use enso_flexer::prelude::*;
}

View File

@ -1,26 +0,0 @@
//! This file contains a macro to simplify writing the lexer rules.
// ===================
// === Rules Macro ===
// ===================
/// Define a group of rules for the lexer.
///
/// All of the rules must be defined for the same `state_name`, which must be the in-scope name of
/// the state for which the rules are being defined. Each `pattern` is a non-reference pattern that
/// the rule is being defined to match, and `code` is the code that will be executed when the rule
/// matches, omitting the (first) `reader` argument).
///
/// Branches are matched _in order_, from top-to-bottom, much like a standard `match` statement.
///
/// Please see `lexer.rs` for myriad examples of this macro's use.
#[macro_export]
macro_rules! rules {
($state_name:ident with $($pattern:expr => $path_root:ident $(.$path:ident)* ($($arg:tt)*)),+ $(,)?) => {
$($state_name.create_rule(&$pattern,stringify!{
$path_root $(.$path)* (reader,$($arg)*)
});)*
};
}

View File

@ -1,778 +0,0 @@
//! This file defines the various tokens requried by the Enso lexer.
//!
//! This file makes heavy use of terminology from the Enso design documentation, particularly the
//! [syntax](https://enso.org/docs/developer/docs/enso/syntax) documentation. For the sake of
//! brevity, many terms will _not_ be defined here.
use crate::prelude::*;
use crate::lexeme;
// =============
// === Token ===
// =============
/// A lexer token.
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub struct Token {
/// The shape of the token.
pub shape: Shape,
/// The length (in characters) of this token.
pub length: usize,
/// The number of trailing spaces after this token before the next.
pub offset: usize,
}
impl Token {
/// Constructor.
pub fn new(shape: Shape, length: usize, offset: usize) -> Token {
Token { shape, length, offset }
}
/// Get the length that the token takes up in the program source.
pub fn source_length(&self) -> usize {
self.length + self.offset
}
}
/// Constructors for the various forms of token.
impl Token {
/// Construct a token representing a referent identifier.
pub fn referent(name: impl Str, offset: usize) -> Token {
let str = name.into();
let length = str.chars().count();
let shape = Shape::Referent(str);
Token { shape, length, offset }
}
/// Construct a token representing a variable identifier.
pub fn variable(name: impl Str, offset: usize) -> Token {
let str = name.into();
let length = str.chars().count();
let shape = Shape::Variable(str);
Token { shape, length, offset }
}
/// Construct a token representing an external identifier.
pub fn external(name: impl Str, offset: usize) -> Token {
let str = name.into();
let length = str.chars().count();
let shape = Shape::External(str);
Token { shape, length, offset }
}
/// Construct a token representing a blank identifier.
pub fn blank(offset: usize) -> Token {
let shape = Shape::Blank;
let length = lexeme::len(lexeme::literal::BLANK_IDENT);
Token { shape, length, offset }
}
/// Construct a token representing an operator.
pub fn operator(name: impl Str, offset: usize) -> Token {
let name = name.into();
let length = name.chars().count();
let shape = Shape::Operator(name);
Token { shape, length, offset }
}
/// Construct a token representing a modifier operator.
pub fn modifier(name: impl Str, offset: usize) -> Token {
let name = name.into();
let modifier_len = lexeme::len(lexeme::literal::EQUALS);
let length = name.chars().count() + modifier_len;
let shape = Shape::Modifier(name);
Token { shape, length, offset }
}
/// Construct a token representing
pub fn annotation(name_str: impl Str, offset: usize) -> Token {
let name = name_str.into();
let annotation_len = lexeme::len(lexeme::literal::ANNOTATION_SYMBOL);
let length = name.chars().count() + annotation_len;
let shape = Shape::Annotation(name);
Token { shape, length, offset }
}
/// Construct a token representing a number literal.
pub fn number(base: impl Str, num: impl Into<String>, offset: usize) -> Token {
let number = num.into();
let base = base.into();
let length = if base.is_empty() {
number.chars().count()
} else {
let base_sep_len = lexeme::len(lexeme::literal::NUMBER_BASE_SEPARATOR);
base.chars().count() + base_sep_len + number.chars().count()
};
let shape = Shape::Number { base, number };
Token { shape, length, offset }
}
/// Construct a token representing a dangling number base.
pub fn dangling_base(base: impl Str, offset: usize) -> Token {
let base_str = base.into();
let base_sep_len = lexeme::len(lexeme::literal::NUMBER_BASE_SEPARATOR);
let length = base_str.chars().count() + base_sep_len;
let shape = Shape::DanglingBase(base_str);
Token { shape, length, offset }
}
/// Construct a token representing a line of text.
pub fn text_line(style: TextStyle, segments: Vec<Token>, offset: usize) -> Token {
let segments_len: usize = segments.iter().map(|s| s.source_length()).sum();
let length = style.length() + segments_len;
let shape = Shape::TextLine { style, segments };
Token { shape, length, offset }
}
/// Construct a token representing an inline block text literal.
pub fn text_inline_block(style: TextStyle, segments: Vec<Token>, offset: usize) -> Token {
let segments_length: usize = segments.iter().map(|s| s.source_length()).sum();
let length = style.length() + segments_length;
let shape = Shape::TextInlineBlock { style, segments };
Token { shape, length, offset }
}
/// Construct a token representing a block of text.
pub fn text_block(
start_line_ending: LineEnding,
style: TextStyle,
lines: Vec<Token>,
indent: usize,
offset: usize,
) -> Token {
let length = style.length()
+ start_line_ending.size()
+ lines.iter().fold(0, |l, r| {
l + match r.shape {
Shape::Line { .. } => indent + r.source_length(),
Shape::BlankLine(_) => r.source_length(),
_ => unreachable_panic!("Text blocks should only contain lines."),
}
});
let shape = Shape::TextBlock { start_line_ending, style, lines };
Token { shape, length, offset }
}
/// Construct a token representing an invalid quote.
pub fn invalid_quote(bad_quotes: impl Str, offset: usize) -> Token {
let bad_string = bad_quotes.into();
let length = bad_string.chars().count();
let shape = Shape::InvalidQuote(bad_string);
Token { shape, length, offset }
}
/// Construct a token representing a raw text segment.
pub fn text_segment_raw(str: impl Str, offset: usize) -> Token {
let string = str.into();
let length = string.chars().count();
let shape = Shape::TextSegmentRaw(string);
Token { shape, length, offset }
}
/// Construct a token representing an escape sequence.
pub fn text_segment_escape(style: EscapeStyle, repr_str: impl Str, offset: usize) -> Token {
let repr = repr_str.into();
let length = style.size() + repr.chars().count();
let shape = Shape::TextSegmentEscape { style, repr };
Token { shape, length, offset }
}
/// Construct a token representing an escape sequence using a literal `shape`.
pub fn text_segment_escape_from_shape(shape: Shape, offset: usize) -> Token {
match &shape {
Shape::TextSegmentEscape { style, repr } => {
let length = style.size() + repr.chars().count();
Token { shape, length, offset }
}
_ => unreachable_panic!("Shape must be a TextSegmentEscape."),
}
}
/// Construct a token representing an interpolated text segment.
pub fn text_segment_interpolate(tokens: Vec<Token>, offset: usize) -> Token {
let length_of_interpolation_ticks = 2;
let length =
length_of_interpolation_ticks + tokens.iter().fold(0, |l, r| l + r.source_length());
let shape = Shape::TextSegmentInterpolate { tokens };
Token { shape, length, offset }
}
/// Construct a token representing an unclosed interpolated text segment.
pub fn text_segment_unclosed_interpolate(tokens: Vec<Token>, offset: usize) -> Token {
let length_of_interpolation_tick = 1;
let length =
length_of_interpolation_tick + tokens.iter().fold(0, |l, r| l + r.source_length());
let shape = Shape::TextSegmentUnclosedInterpolate { tokens };
Token { shape, length, offset }
}
/// Construct a token representing a line of tokens.
pub fn line(tokens: Vec<Token>, offset: usize, trailing_line_ending: LineEnding) -> Token {
let line_ending_len = trailing_line_ending.size();
let length = tokens.iter().fold(line_ending_len, |l, r| l + r.source_length());
let shape = Shape::Line { tokens, trailing_line_ending };
Token { shape, length, offset }
}
/// Construct a token representing a blank line.
///
/// The `offset` for blank lines is from the leftmost column, not from the parent block's
/// indentation.
pub fn blank_line(offset: usize, trailing_line_ending: LineEnding) -> Token {
let length = trailing_line_ending.size();
let shape = Shape::BlankLine(trailing_line_ending);
Token { shape, length, offset }
}
/// Construct a token representing a block.
pub fn block(block_type: BlockType, indent: usize, lines: Vec<Token>, offset: usize) -> Token {
let length = lines
.iter()
.map(|line| match line.shape {
Shape::Line { .. } => indent + line.source_length(),
Shape::BlankLine(_) => line.source_length(),
_ => unreachable_panic!("Tokens in a blocks should always be lines."),
})
.sum();
let shape = Shape::Block { block_type, indent, lines };
Token { shape, length, offset }
}
/// Construct a token representing an invalid suffix.
pub fn invalid_suffix(text: impl Str, offset: usize) -> Token {
let text = text.into();
let length = text.chars().count();
let shape = Shape::InvalidSuffix(text);
Token { shape, length, offset }
}
/// Construct a token representing an unrecognised lexeme.
pub fn unrecognized(text: impl Str, offset: usize) -> Token {
let text = text.into();
let length = text.chars().count();
let shape = Shape::Unrecognized(text);
Token { shape, length, offset }
}
/// Construct a token representing a disable comment.
pub fn disable_comment(text: impl Str, offset: usize) -> Token {
let text = text.into();
let comment_len = lexeme::len(lexeme::literal::COMMENT);
let length = text.chars().count() + comment_len;
let shape = Shape::DisableComment(text);
Token { shape, length, offset }
}
/// Construct a token representing a documentation comment.
pub fn doc_comment(lines: Vec<Token>, indent: usize, offset: usize) -> Token {
let length = lines
.iter()
.map(|line| match line.shape {
Shape::Line { .. } => indent + line.source_length(),
Shape::BlankLine(_) => line.source_length(),
_ => unreachable_panic!("Tokens in a doc comment should always be lines."),
})
.sum();
let shape = Shape::DocComment { lines, indent };
Token { shape, length, offset }
}
}
// =================
// === BlockType ===
// =================
/// The type for an Enso Block token.
#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)]
pub enum BlockType {
/// A block made up of arguments to a function.
Continuous,
/// A block made up of separate lines.
Discontinuous,
}
// ==================
// === LineEnding ===
// ==================
/// The type of newline associated with the line.
#[derive(Copy, Clone, Debug, Display, Eq, Hash, PartialEq)]
pub enum LineEnding {
/// There is no newline.
None,
/// The unix-style line-feed (`'\n'`),
LF,
/// The windows-style carriage-return, line-feed (`"\r\n"`).
CRLF,
}
impl LineEnding {
const NO_LENGTH: usize = 0;
/// Get the number of rust `char`s that the newline type takes up.
pub fn size(self) -> usize {
match self {
Self::None => Self::NO_LENGTH,
Self::LF => lexeme::len(lexeme::literal::LF),
Self::CRLF => lexeme::len(lexeme::literal::CRLF),
}
}
}
// === Trait Impls ===
impl Default for LineEnding {
fn default() -> Self {
LineEnding::None
}
}
// =================
// === TextStyle ===
// =================
/// The style of the text literal.
#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)]
pub enum TextStyle {
// === Line ===
/// A interpolated text line literal.
FormatLine,
/// A raw text line literal.
RawLine,
/// An unclosed text line literal.
UnclosedLine,
// === Inline Block ===
/// A format inline block text literal.
FormatInlineBlock,
/// A raw inline block text literal.
RawInlineBlock,
// === Block ===
/// An interpolated text block literal.
FormatBlock,
/// A raw text block literal.
RawBlock,
}
impl TextStyle {
/// Calculate the length of the delimiters for a particular style of text literal.
pub fn length(self) -> usize {
match self {
TextStyle::FormatLine => lexeme::len(lexeme::literal::FORMAT_QUOTE) * 2,
TextStyle::RawLine => lexeme::len(lexeme::literal::RAW_QUOTE) * 2,
TextStyle::FormatInlineBlock => lexeme::len(lexeme::literal::FORMAT_BLOCK_QUOTE),
TextStyle::RawInlineBlock => lexeme::len(lexeme::literal::RAW_BLOCK_QUOTE),
TextStyle::UnclosedLine => lexeme::len(lexeme::literal::FORMAT_QUOTE),
TextStyle::FormatBlock => lexeme::len(lexeme::literal::FORMAT_BLOCK_QUOTE),
TextStyle::RawBlock => lexeme::len(lexeme::literal::RAW_BLOCK_QUOTE),
}
}
/// Check if the text literal is a line literal.
pub fn is_line_literal(self) -> bool {
matches!(self, TextStyle::RawLine | TextStyle::FormatLine | TextStyle::UnclosedLine)
}
/// Check if the text literal is an inline block literal.
pub fn is_inline_block_literal(self) -> bool {
matches!(self, TextStyle::FormatInlineBlock | TextStyle::RawInlineBlock)
}
/// Check if the text literal is a block literal.
pub fn is_block_literal(self) -> bool {
matches!(self, TextStyle::FormatBlock | TextStyle::RawBlock)
}
}
// ===================
// === EscapeStyle ===
// ===================
/// A description of the style of escape sequence seen.
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub enum EscapeStyle {
/// A \xNN-style byte escape.
Byte,
/// Unicode 16-bit escape sequence.
U16,
/// Unicode 21-bit escape sequence.
U21,
/// Unicode 32-bit escape sequence.
U32,
/// A literal escape character.
Literal,
/// An invalid unicode escape.
InvalidUnicode,
/// An invalid escape.
Invalid,
/// An escape slash without any following escape.
Unfinished,
}
impl EscapeStyle {
const NO_ADDITIONAL_LENGTH: usize = 0;
/// Get the length taken up in source by the delimiters to an escape type.
pub fn size(self) -> usize {
match self {
EscapeStyle::Byte => lexeme::len(lexeme::literal::BYTE_ESCAPE_START),
EscapeStyle::Literal => lexeme::len(lexeme::literal::SLASH),
EscapeStyle::U16 => lexeme::len(lexeme::literal::U16_ESCAPE_START),
EscapeStyle::U32 => lexeme::len(lexeme::literal::U32_ESCAPE_START),
EscapeStyle::U21 => {
let start_len = lexeme::len(lexeme::literal::U21_ESCAPE_START);
let end_len = lexeme::len(lexeme::literal::U21_ESCAPE_END);
start_len + end_len
}
_ => Self::NO_ADDITIONAL_LENGTH,
}
}
}
// =============
// === Shape ===
// =============
/// The shapes of tokens needed by the Enso lexer.
///
/// This is a very small set of shapes, because the [`Token`] type only deals with the tokens that
/// the lexer works with, not the full complexity of Enso's syntax.
#[allow(missing_docs)]
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub enum Shape {
// === Identifiers ===
/// An identifier in referent form.
Referent(String),
/// An identifier in variable form.
Variable(String),
/// An identifier not conforming to the Enso identifier rules (e.g. a Java identifier).
External(String),
/// A blank identifier (`_`).
Blank,
/// An operator identifier.
Operator(String),
/// A modifier identifier.
Modifier(String),
/// An annotation.
Annotation(String),
// === Literals ===
/// A literal number.
Number {
/// The (optional) base for the number to be interpreted in.
base: String,
/// The number itself, possibly with a decimal point.
number: String,
},
/// A dangling base from a number literal.
DanglingBase(String),
/// A text line literal.
TextLine {
/// The type of literal being encoded.
style: TextStyle,
/// The segments that make up the line of text.
segments: Vec<Token>,
},
/// An inline block text literal.
TextInlineBlock {
/// The type of literal being encoded.
style: TextStyle,
/// The segments that make up the line of text.
segments: Vec<Token>,
},
/// A text block literal.
TextBlock {
/// The line ending that occurs directly after the opening quote marks.
start_line_ending: LineEnding,
/// The type of literal being encoded.
style: TextStyle,
/// The lines in the text block literal.
lines: Vec<Token>,
},
/// An invalid quote for a text literal.
InvalidQuote(String),
/// A segment of a line of text containing only literal text.
TextSegmentRaw(String),
/// A segment of a line of text that represents an escape sequence.
TextSegmentEscape {
/// The type of escape being represented.
style: EscapeStyle,
/// The literal escape sequence.
repr: String,
},
/// A segment of a line of text that contains an interpolated expression.
TextSegmentInterpolate {
/// The tokens making up the interpolated expression.
tokens: Vec<Token>,
},
/// An interpolated expression that hasn't been closed.
TextSegmentUnclosedInterpolate {
/// The tokens making up the interpolated expression.
tokens: Vec<Token>,
},
/// An invalid text segment (e.g. unclosed interpolate segment).
TextSegmentInvalid(String),
// === Lines ===
/// A line containing tokens.
///
/// The offset for a line is always zero, as it is contained in a block with a defined
/// indentation.
Line {
/// The tokens on the line.
tokens: Vec<Token>,
/// The line ending that _ends_ the line.
///
/// Please note that the concept of 'ending' the line is a bit strange, as blocks are
/// treated as tokens in their own right, and hence are included in lines.
trailing_line_ending: LineEnding,
},
/// A blank line.
///
/// The offset for a blank line is from the leftmost column, as it may be negative from the
/// block's indentation level.
BlankLine(LineEnding),
// === Block ===
/// A block of tokens.
Block {
/// The type of the block.
block_type: BlockType,
/// The leading indentation of the block.
indent: usize,
/// The lines in the block.
lines: Vec<Token>,
},
// === Errors ===
/// An invalid suffix.
InvalidSuffix(String),
/// An unrecognized token.
Unrecognized(String),
// === Comments ===
/// A disable comment (`# ...`).
DisableComment(String),
/// An Enso documentation comment (`## ...`).
DocComment {
/// The lines in the doc comment body. Each line must contain raw text segments only.
lines: Vec<Token>,
/// The indentation of the doc comment's body from the baseline.
indent: usize,
},
}
impl Shape {
/// Construct an identifier in referent form.
pub fn referent(name: impl Into<String>) -> Shape {
Shape::Referent(name.into())
}
/// Construct an identifier in variable form.
pub fn variable(name: impl Into<String>) -> Shape {
Shape::Variable(name.into())
}
/// Construct an identifier in external form.
pub fn external(name: impl Into<String>) -> Shape {
Shape::External(name.into())
}
/// Construct a blank identifier.
///
/// This is provided as a function for completeness.
pub fn blank() -> Shape {
Shape::Blank
}
/// Construct an operator identifier.
pub fn operator(opr: impl Into<String>) -> Shape {
Shape::Operator(opr.into())
}
/// Construct a modifier identifier.
pub fn modifier(opr: impl Into<String>) -> Shape {
Shape::Modifier(opr.into())
}
/// Construct an annotation identifier.
pub fn annotation(name: impl Into<String>) -> Shape {
Shape::Annotation(name.into())
}
/// Construct a number literal.
pub fn number(base: impl Into<String>, num: impl Into<String>) -> Shape {
let base = base.into();
let number = num.into();
Shape::Number { base, number }
}
/// Construct a dangling base literal.
pub fn dangling_base(base: impl Into<String>) -> Shape {
Shape::DanglingBase(base.into())
}
/// Construct a text line literal.
pub fn text_line(style: TextStyle, segments: Vec<Token>) -> Shape {
Shape::TextLine { style, segments }
}
/// Construct an inline block text literal.
pub fn text_inline_block(style: TextStyle, segments: Vec<Token>) -> Shape {
Shape::TextInlineBlock { style, segments }
}
/// Construct a text block literal.
pub fn text_block(start_line_ending: LineEnding, style: TextStyle, lines: Vec<Token>) -> Shape {
Shape::TextBlock { start_line_ending, style, lines }
}
/// Construct an invalid quote literal.
pub fn invalid_quote(bad_quotes: impl Str) -> Shape {
Shape::InvalidQuote(bad_quotes.into())
}
/// Construct a raw text segment.
pub fn text_segment_raw(text: impl Str) -> Shape {
Shape::TextSegmentRaw(text.into())
}
/// Construct a text segment containing an escape sequence.
pub fn text_segment_escape(style: EscapeStyle, repr_str: impl Str) -> Shape {
let repr = repr_str.into();
Shape::TextSegmentEscape { style, repr }
}
/// Construct a text segment containing an interpolated expression.
pub fn text_segment_interpolate(tokens: Vec<Token>) -> Shape {
Shape::TextSegmentInterpolate { tokens }
}
/// Construct a text segment containing an unclosed interpolated expression.
pub fn text_segment_unclosed_interpolate(tokens: Vec<Token>) -> Shape {
Shape::TextSegmentUnclosedInterpolate { tokens }
}
/// Construct an invalid text segment.
pub fn text_segment_invalid(str: impl Str) -> Shape {
Shape::TextSegmentInvalid(str.into())
}
/// Construct a line that contains tokens.
pub fn line(tokens: Vec<Token>, trailing_line_ending: LineEnding) -> Shape {
Shape::Line { tokens, trailing_line_ending }
}
/// Construct a line that is blank.
pub fn blank_line(trailing_line_ending: LineEnding) -> Shape {
Shape::BlankLine(trailing_line_ending)
}
/// Construct a block containing lines.
pub fn block(block_type: BlockType, indent: usize, lines: Vec<Token>) -> Shape {
Shape::Block { block_type, indent, lines }
}
/// Construct an invalid suffix.
pub fn invalid_suffix(text: impl Into<String>) -> Shape {
Shape::InvalidSuffix(text.into())
}
/// Construct an unrecognised token.
pub fn unrecognized(text: impl Into<String>) -> Shape {
Shape::Unrecognized(text.into())
}
/// Construct a disable comment shape.
pub fn disable_comment(text: impl Str) -> Shape {
Shape::DisableComment(text.into())
}
/// Construct a doc comment shape.
pub fn doc_comment(lines: Vec<Token>, indent: usize) -> Shape {
Shape::DocComment { lines, indent }
}
}
// ==============
// === Stream ===
// ==============
/// A representation of the Enso token stream.
#[derive(Clone, Debug, Default, PartialEq)]
pub struct Stream {
/// The tokens in the token stream.
tokens: Vec<Token>,
}
impl Stream {
/// Append the provided `token` to the token stream.
pub fn append(&mut self, token: Token) {
self.tokens.push(token)
}
/// Get a reference to the tokens in the stream.
pub fn tokens(&self) -> &Vec<Token> {
&self.tokens
}
/// Get the length of the elements in the token stream.
pub fn tokens_len(&self) -> usize {
self.tokens.iter().map(|token| token.length + token.offset).sum()
}
}
/// Get a consuming iterator over the token stream.
impl std::iter::IntoIterator for Stream {
type Item = Token;
type IntoIter = std::vec::IntoIter<Self::Item>;
fn into_iter(self) -> Self::IntoIter {
self.tokens.into_iter()
}
}
impl Deref for Stream {
type Target = Vec<Token>;
fn deref(&self) -> &Self::Target {
&self.tokens
}
}
impl DerefMut for Stream {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.tokens
}
}
// === Trait Impls ===
impl From<Vec<Token>> for Stream {
fn from(tokens: Vec<Token>) -> Self {
Stream { tokens }
}
}
impl From<Stream> for Vec<Token> {
fn from(stream: Stream) -> Self {
stream.tokens
}
}

View File

@ -1,28 +0,0 @@
[package]
name = "lexer"
version = "0.1.0"
authors = ["Enso Team <enso-dev@enso.org>"]
edition = "2021"
publish = false
[lib]
crate-type = ["cdylib", "rlib"]
test = true
bench = true
[dependencies]
enso-flexer = { version = "0.2.0", path = "../../flexer" }
enso-prelude = { version = "0.2.0", path = "../../../prelude" }
lexer-definition = { path = "../definition", version = "0.1.0" }
[build-dependencies]
enso-flexer = { version = "0.2.0", path = "../../flexer" }
lexer-definition = { path = "../definition", version = "0.1.0" }
[dev-dependencies]
criterion = "0.3"
[[bench]]
name = "lexer_time_bench"
harness = false

View File

@ -1,328 +0,0 @@
//! This file contains the sources that are replicated many times over for the purposes of
//! benchmarking the Enso lexer.
use criterion::black_box;
use criterion::Criterion;
use criterion::Throughput;
use std::time::Duration;
// ===============================
// === Benchmark Configuration ===
// ===============================
/// Configures the benchmarking process.
pub fn bench_config() -> Criterion {
Criterion::default()
.measurement_time(Duration::from_secs(60))
.warm_up_time(Duration::from_secs(3))
.sample_size(25)
.retain_baseline("EnsoLexer".to_string())
}
// =======================
// === Benchmark Setup ===
// =======================
/// The sizes of text to run the benchmarks over.
pub const SIZES: [(usize, &str); 4] =
[(1024, "1KB"), (1024 * 100, "100KB"), (1024 * 1024, "1MB"), (1024 * 1024 * 10, "10MB")];
// ==============================
// === Benchmarking Utilities ===
// ==============================
/// Execute the provided benchmark for each of the [`SIZES`] above.
pub fn run_bench_sizes(name: &str, input: &str, add_newline: bool, c: &mut Criterion) {
let mut group = c.benchmark_group(name);
SIZES.iter().for_each(|(size, size_name)| {
group.throughput(Throughput::Bytes(*size as u64));
let input = replicate_to_size(input, *size, add_newline);
group.bench_function(*size_name, |b| {
b.iter(|| {
lexer::run(black_box(input.as_str()));
})
});
})
}
/// This function replicates `input` until it reaches `size` (in bytes).
///
/// If this cannot be done exactly, it will err on the side of over-replication,
/// meaning that the output will be _larger_ than `size` bytes. If the size of
/// the input already exceeds `size`, it is returned unchanged.
pub fn replicate_to_size(input: &str, size: usize, add_newline: bool) -> String {
let input_size = input.len();
let times = 1 + (size / input_size);
let mut input_newline = input.to_string();
let to_add = if add_newline { '\n' } else { ' ' };
input_newline.push(to_add);
input_newline.repeat(times)
}
/// Replace any windows-style line-endings in `input` with unix-style line-endings.
fn preprocess(input: &str) -> String {
input.replace("\r\n", "\n")
}
// ==============
// === Macros ===
// ==============
#[macro_export]
macro_rules! bench {
(bench_name = $bench_name:literal; fun_name = $fun_name:ident; bench_input = $bench_input:expr;) => {
pub fn $fun_name(c: &mut Criterion) {
src::run_bench_sizes($bench_name, $bench_input.as_str(), true, c)
}
};
}
// =================================
// === Literal Benchmark Sources ===
// =================================
#[allow(missing_docs)]
pub mod literal {
use super::*;
pub mod number {
use super::*;
pub fn integer() -> String {
preprocess("12345")
}
pub fn integer_explicit_base() -> String {
preprocess("16_a4fd31")
}
pub fn decimal() -> String {
preprocess("1.3141")
}
pub fn decimal_explicit_base() -> String {
preprocess("10_1.000999")
}
pub fn error_base() -> String {
preprocess("10.2_2")
}
}
pub mod text {
use super::*;
pub fn format_line() -> String {
preprocess(r"'dearest creature in \n creation studying english pronunciation'")
}
pub fn format_inline_block() -> String {
preprocess(r"''' An inline block. It's a very good inline block carl \u{AB}")
}
pub fn format_block() -> String {
preprocess(
r#"''' Here is my block of format text. I can `interpolate + things` like that.
It goes on and on and on for `times` times because I feel like it.
Complex interpolated expression `x -> y ~> x | y` woo!
"#,
)
}
pub fn raw_line() -> String {
preprocess(r#""dearest creature in '''' creation studying english pronunciation""#)
}
pub fn raw_inline_block() -> String {
preprocess(r#"""" An inline block. It's a very good inline block carl ""#)
}
pub fn raw_block() -> String {
preprocess(
r#"""" Here is my block of raw text. `Interpolations` are nothing special here.
It goes on and on and on for I can escape \" though.
It also supports blank lines!
"#,
)
}
}
}
// ==============================
// === Name Benchmark Sources ===
// ==============================
#[allow(missing_docs)]
pub mod name {
use super::*;
pub fn line_of() -> String {
preprocess("Referent_Ident var_ident JavaType _ @annotation ticked_ident' number_1")
}
pub fn invalid_suffix() -> String {
preprocess("some_var'iable some_varД")
}
}
// ===================================
// === Operator Benchmarks Sources ===
// ===================================
#[allow(missing_docs)]
pub mod operator {
use super::*;
pub fn line_of() -> String {
preprocess("+ - * -> ~> <~ <- ! & | /")
}
pub fn dot_call() -> String {
preprocess(".== . != .<*> .*> .|>")
}
pub fn invalid_suffix() -> String {
preprocess(".... +==")
}
}
// ================================
// === Block Benchmarks Sources ===
// ================================
#[allow(missing_docs)]
pub mod block {
use super::*;
pub fn top_level() -> String {
preprocess("foo\nbar\nbaz")
}
pub fn nested() -> String {
preprocess("foo\nbar\n baz\n quux")
}
pub fn deeply_nested() -> String {
preprocess(
r#"foo
bar
baz
quux
bim
bam
oh
no
"#,
)
}
}
// ===================================
// === Comments Benchmarks Sources ===
// ===================================
#[allow(missing_docs)]
pub mod comment {
use super::*;
pub fn line() -> String {
preprocess("# foo bar baz I have a really long line comment here that goes on and on")
}
pub fn in_line() -> String {
preprocess("a + b # A useless comment: add a to b")
}
pub fn doc() -> String {
preprocess(
r#"## I have a really big doc comment here
That just keeps prattling on and on and on.
With blank lines
Forever
and
ever
and
ever
documented
"#,
)
}
}
// ===========================
// === Combined Benchmarks ===
// ===========================
pub mod combined {
use super::*;
pub fn simple() -> String {
preprocess(
r#"
import Base.Meta
## Decompose the value using runtime reflection and print its decomposition.
Main.print_decomp a b =
y = a + b
decomp = Meta.decompose y
Io.println decomp
"#,
)
}
pub fn complex() -> String {
preprocess(
r#"
import Base.Meta
## Frobnicate the doodads by constructing a new type operator through runtime reflection such that
it can be passed to another language.
! WARNING
Type-checking code like this is virtually impossible, and it is treated as `Dynamic` inside
Enso code.
Main.foo a b =
y = x -> z ->
ty = a.gen_type (~>) (<-) b
ty (z x)
decomp = Meta.decompose (y a b)
Io.println decomp
## Execute the main function of this project.
main =
func = Meta.reify (here.foo "My_Name" "my_field")
Io.println(func)
"#,
)
}
}

View File

@ -1,300 +0,0 @@
//! This file contains the time-based benchmarks for the Enso lexer.
mod lexer_bench_sources;
use criterion::black_box;
use criterion::criterion_group;
use criterion::criterion_main;
use criterion::Criterion;
use criterion::Throughput;
use lexer_bench_sources as src;
// ==========================
// === Literal Benchmarks ===
// ==========================
bench! {
bench_name = "Integer";
fun_name = bench_literal_number_integer;
bench_input = src::literal::number::integer();
}
bench! {
bench_name = "Integer Explicit Base";
fun_name = bench_literal_number_integer_explicit_base;
bench_input = src::literal::number::integer_explicit_base();
}
bench! {
bench_name = "Decimal";
fun_name = bench_literal_number_decimal;
bench_input = src::literal::number::decimal();
}
bench! {
bench_name = "Decimal Explicit Base";
fun_name = bench_literal_number_decimal_explicit_base;
bench_input = src::literal::number::decimal_explicit_base();
}
bench! {
bench_name = "Number Error Base";
fun_name = bench_literal_number_error_base;
bench_input = src::literal::number::error_base();
}
bench! {
bench_name = "Text Format Line";
fun_name = bench_literal_text_format_line;
bench_input = src::literal::text::format_line();
}
bench! {
bench_name = "Text Format Inline Block";
fun_name = bench_literal_text_format_inline_block;
bench_input = src::literal::text::format_inline_block();
}
bench! {
bench_name = "Text Format Block";
fun_name = bench_literal_text_format_block;
bench_input = src::literal::text::format_block();
}
bench! {
bench_name = "Text Raw Line";
fun_name = bench_literal_text_raw_line;
bench_input = src::literal::text::raw_line();
}
bench! {
bench_name = "Text Raw Inline Block";
fun_name = bench_literal_text_raw_inline_block;
bench_input = src::literal::text::raw_inline_block();
}
bench! {
bench_name = "Text Raw Block";
fun_name = bench_literal_text_raw_block;
bench_input = src::literal::text::raw_block();
}
criterion_group! {
name = literal_benchmarks;
config = src::bench_config();
targets =
bench_literal_number_integer,
bench_literal_number_integer_explicit_base,
bench_literal_number_decimal,
bench_literal_number_decimal_explicit_base,
bench_literal_number_error_base,
bench_literal_text_format_line,
bench_literal_text_format_inline_block,
bench_literal_text_format_block,
bench_literal_text_raw_line,
bench_literal_text_raw_inline_block,
bench_literal_text_raw_block,
}
// ========================
// === Names Benchmarks ===
// ========================
bench! {
bench_name = "Line of Names";
fun_name = bench_names_line_of;
bench_input = src::name::line_of();
}
bench! {
bench_name = "Names with invalid Suffixes";
fun_name = bench_names_invalid_suffix;
bench_input = src::name::invalid_suffix();
}
criterion_group! {
name = name_benchmarks;
config = src::bench_config();
targets =
bench_names_line_of,
bench_names_invalid_suffix,
}
// ===========================
// === Operator Benchmarks ===
// ===========================
bench! {
bench_name = "Line of Operators";
fun_name = bench_operator_line_of;
bench_input = src::operator::line_of();
}
bench! {
bench_name = "Dot Call Operators";
fun_name = bench_operator_dot_call;
bench_input = src::operator::dot_call();
}
bench! {
bench_name = "Operators with Invalid Suffixes";
fun_name = bench_operator_invalid_suffix;
bench_input = src::operator::invalid_suffix();
}
criterion_group! {
name = operator_benchmarks;
config = src::bench_config();
targets =
bench_operator_line_of,
bench_operator_dot_call,
bench_operator_invalid_suffix
}
// ========================
// === Block Benchmarks ===
// ========================
bench! {
bench_name = "Top Level Block";
fun_name = bench_block_top_level;
bench_input = src::block::top_level();
}
bench! {
bench_name = "Nested Block";
fun_name = bench_block_nested;
bench_input = src::block::nested();
}
bench! {
bench_name = "Deeply Nested Blocks";
fun_name = bench_block_deeply_nested;
bench_input = src::block::deeply_nested();
}
criterion_group! {
name = block_benchmarks;
config = src::bench_config();
targets =
bench_block_top_level,
bench_block_nested,
bench_block_deeply_nested,
}
// ==========================
// === Comment Benchmarks ===
// ==========================
bench! {
bench_name = "Line Comment";
fun_name = bench_comment_line;
bench_input = src::comment::line();
}
bench! {
bench_name = "Comment in Line";
fun_name = bench_comment_in_line;
bench_input = src::comment::in_line();
}
bench! {
bench_name = "Doc Comment";
fun_name = bench_comment_doc;
bench_input = src::comment::doc();
}
criterion_group! {
name = comment_benchmarks;
config = src::bench_config();
targets =
bench_comment_line,
bench_comment_in_line,
bench_comment_doc,
}
// ===========================
// === Combined Benchmarks ===
// ===========================
bench! {
bench_name = "Simple Combined Example";
fun_name = bench_combined_simple;
bench_input = src::combined::simple();
}
bench! {
bench_name = "Complex Combined Example";
fun_name = bench_combined_complex;
bench_input = src::combined::complex();
}
criterion_group! {
name = combined_benchmarks;
config = src::bench_config();
targets =
bench_combined_simple,
bench_combined_complex,
}
// ===================
// === Comparisons ===
// ===================
fn bench_rust_reader(c: &mut Criterion) {
let mut group = c.benchmark_group("Rust Vector");
src::SIZES.iter().for_each(|(size, name)| {
group.throughput(Throughput::Bytes(*size as u64));
let input = "abcdefghijklmnopqrstuvwxyz".repeat(1 + size / 26);
group.bench_function(*name, |b| {
b.iter(|| {
let mut counter = 0usize;
for c in black_box(input.as_str()).chars() {
if c == 'f' {
counter += 1;
}
}
counter
})
});
})
}
criterion_group! {
name = rust_comparison;
config = src::bench_config();
targets =
bench_rust_reader,
}
// ===================
// === The Harness ===
// ===================
criterion_main!(
literal_benchmarks,
name_benchmarks,
operator_benchmarks,
block_benchmarks,
comment_benchmarks,
combined_benchmarks,
rust_comparison,
);

View File

@ -1,34 +0,0 @@
use std::io::prelude::*;
use enso_flexer::Definition;
use enso_flexer::State;
use lexer_definition::lexer::EnsoLexer;
use std::fs::File;
/// Generates the lexer engine and saves the result into the file `src/engine.rs`.
///
/// The content of the generated file can be used with the `include!` macro.
fn generate_engine() -> std::io::Result<()> {
let definition_path = "../definition/src/lexer.rs";
let output_directory = "src/generated";
let _ = std::fs::create_dir(output_directory);
let output_path = "src/generated/engine.rs";
let definition_error = format!("The lexer definition should exist at {}.", definition_path);
let output_error = format!("Cannot open output file at {}.", output_path);
let mut lexer_def = File::open(definition_path).expect(&definition_error);
let mut contents = String::new();
let mut file = File::create(output_path).expect(&output_error);
let lexer = EnsoLexer::define();
let engine = lexer.specialize().unwrap();
lexer_def.read_to_string(&mut contents).expect("Unable to read lexer definition.");
file.write_all(contents.as_bytes()).expect("Unable to write lexer definition.");
file.write_all("\n".as_bytes())?;
file.write_all(engine.as_bytes()).expect("Unable to write lexer specialization.");
Ok(())
}
fn main() -> std::io::Result<()> {
generate_engine()
}

View File

@ -1,11 +0,0 @@
//! This module re-exports the generated lexer sources.
// ==============
// === Export ===
// ==============
pub mod engine;

View File

@ -1,22 +0,0 @@
//! A driver for the Enso lexer.
use crate::prelude::*;
use crate::generated::engine::EnsoLexer;
use crate::library::token;
use crate::prelude::reader::decoder::DecoderUTF8;
use enso_flexer::LexingResult;
// ====================
// === Lexer Driver ===
// ====================
/// Execute the lexer on the provided `input`, assuming utf-8 encoding.
pub fn run(input: impl AsRef<str>) -> LexingResult<token::Stream> {
let mut lexer = EnsoLexer::new();
let reader = Reader::new(input.as_ref().as_bytes(), DecoderUTF8());
lexer.run(reader)
}

View File

@ -1,41 +0,0 @@
//! This module exports the interface to the generated Enso lexer.
// === Features ===
#![feature(test)]
// === Standard Linter Configuration ===
#![deny(non_ascii_idents)]
#![warn(unsafe_code)]
// === Non-Standard Linter Configuration ===
#![deny(unconditional_recursion)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
#![warn(missing_docs)]
#![warn(trivial_casts)]
#![warn(trivial_numeric_casts)]
#![warn(unused_import_braces)]
// ==============
// === Export ===
// ==============
#[rustfmt::skip]
pub mod generated;
pub mod lexer;
pub use crate::lexer::*;
/// Support libraries for the lexer definition.
///
/// This is an intentional re-export in this crate's namespace.
pub mod library {
pub use lexer_definition::library::*;
}
/// A library of commonly useful functionality.
mod prelude {
pub use lexer_definition::prelude::*;
}

View File

@ -1,277 +0,0 @@
//! This file contains tests for lexing blocks in the Enso lexer.
// === Features ===
#![feature(test)]
// === Non-Standard Linter Configuration ===
#![deny(non_ascii_idents)]
#![deny(unconditional_recursion)]
#![warn(unsafe_code)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
#![warn(missing_docs)]
#![warn(trivial_casts)]
#![warn(trivial_numeric_casts)]
#![warn(unused_import_braces)]
mod test_utils;
use lexer_definition::library::*;
use test_utils::*;
use lexer_definition::library::token::Token;
use lexer_definition::token::BlockType;
use lexer_definition::token::LineEnding;
// ==============
// === Blocks ===
// ==============
#[test]
fn function_call() {
let input = make_unix_line_endings(
r#"f
argument_1
argument_2
fn a1 a2 a3
argument_4
argument_5"#,
);
let block_fn_args = Token::block(
BlockType::Continuous,
4,
vec![
Token::line(vec![Token::variable("argument_1", 0)], 0, LineEnding::LF),
Token::line(vec![Token::variable("argument_2", 0)], 0, LineEnding::LF),
Token::line(
vec![
Token::variable("fn", 0),
Token::variable("a1", 1),
Token::variable("a2", 1),
Token::variable("a3", 1),
],
0,
LineEnding::LF,
),
Token::line(vec![Token::variable("argument_4", 0)], 0, LineEnding::LF),
Token::line(vec![Token::variable("argument_5", 0)], 0, LineEnding::None),
],
0,
);
let top_level_first_line =
Token::line(vec![Token::variable("f", 0), block_fn_args], 0, LineEnding::LF);
let top_level_block = token::Stream::from(vec![Token::block(
BlockType::Continuous,
0,
vec![top_level_first_line],
0,
)]);
assert_lexes(input, top_level_block);
}
#[test]
fn empty_lines() {
let input = "f\r\n a\n\n b\n";
let nested_block = Token::block(
BlockType::Continuous,
4,
vec![
Token::line(vec![Token::variable("a", 0)], 0, LineEnding::LF),
Token::blank_line(0, LineEnding::LF),
Token::line(vec![Token::variable("b", 0)], 0, LineEnding::LF),
],
0,
);
let top_line = Token::line(vec![Token::variable("f", 0), nested_block], 0, LineEnding::CRLF);
let expected =
token::Stream::from(vec![Token::block(BlockType::Continuous, 0, vec![top_line], 0)]);
assert_lexes(input, expected);
}
#[test]
fn top_level() {
let input = make_unix_line_endings(
r#"
foo
bar
baz
"#,
);
let expected = token::Stream::from(vec![Token::block(
BlockType::Continuous,
0,
vec![
Token::blank_line(0, LineEnding::LF),
Token::blank_line(0, LineEnding::LF),
Token::line(vec![Token::variable("foo", 0)], 0, LineEnding::LF),
Token::line(vec![Token::variable("bar", 0)], 0, LineEnding::LF),
Token::line(vec![Token::variable("baz", 0)], 0, LineEnding::LF),
],
0,
)]);
assert_lexes(input, expected);
}
#[test]
fn with_operator() {
let input = make_unix_line_endings(
r#"x ->
foo x 1
"#,
);
let nested_block = Token::block(
BlockType::Discontinuous,
4,
vec![Token::line(
vec![Token::variable("foo", 0), Token::variable("x", 1), Token::number("", "1", 1)],
0,
LineEnding::LF,
)],
0,
);
let expected = token::Stream::from(vec![Token::block(
BlockType::Continuous,
0,
vec![Token::line(
vec![Token::variable("x", 0), Token::operator("->", 1), nested_block],
0,
LineEnding::LF,
)],
0,
)]);
assert_lexes(input, expected);
}
#[test]
fn with_nesting() {
let input = make_unix_line_endings(
r#"
some_long_thing
foo ->
Bar
baz
quux
"#,
);
let function_block = Token::block(
BlockType::Discontinuous,
8,
vec![
Token::line(vec![Token::referent("Bar", 0)], 0, LineEnding::LF),
Token::line(vec![Token::variable("baz", 0)], 0, LineEnding::LF),
Token::blank_line(0, LineEnding::LF),
],
0,
);
let foo_block = Token::block(
BlockType::Continuous,
4,
vec![
Token::line(
vec![Token::variable("foo", 0), Token::operator("->", 1), function_block],
0,
LineEnding::LF,
),
Token::line(vec![Token::variable("quux", 0)], 0, LineEnding::LF),
],
0,
);
let expected = token::Stream::from(vec![Token::block(
BlockType::Continuous,
0,
vec![
Token::blank_line(0, LineEnding::LF),
Token::line(vec![Token::variable("some_long_thing", 0), foo_block], 0, LineEnding::LF),
],
0,
)]);
assert_lexes(input, expected);
}
#[test]
fn multiple_dedent() {
let input = make_unix_line_endings(
r#"
some_long_thing
foo ->
Bar
baz
quux
"#,
);
let function_block = Token::block(
BlockType::Discontinuous,
8,
vec![
Token::line(vec![Token::referent("Bar", 0)], 0, LineEnding::LF),
Token::line(vec![Token::variable("baz", 0)], 0, LineEnding::LF),
],
0,
);
let foo_block = Token::block(
BlockType::Continuous,
4,
vec![Token::line(
vec![Token::variable("foo", 0), Token::operator("->", 1), function_block],
0,
LineEnding::LF,
)],
0,
);
let expected = token::Stream::from(vec![Token::block(
BlockType::Continuous,
0,
vec![
Token::blank_line(0, LineEnding::LF),
Token::line(vec![Token::variable("some_long_thing", 0), foo_block], 0, LineEnding::LF),
Token::line(vec![Token::variable("quux", 0)], 0, LineEnding::LF),
],
0,
)]);
assert_lexes(input, expected);
}
#[test]
fn extra_indented_blank_lines() {
let input = "a\n b\n \n \n c";
let indented_block = Token::block(
BlockType::Continuous,
4,
vec![
Token::line(vec![Token::variable("b", 0)], 0, LineEnding::LF),
Token::blank_line(8, LineEnding::LF),
Token::blank_line(2, LineEnding::LF),
Token::line(vec![Token::variable("c", 0)], 0, LineEnding::None),
],
0,
);
let top_level_line =
Token::line(vec![Token::variable("a", 0), indented_block], 0, LineEnding::LF);
let expected =
token::Stream::from(vec![Token::block(BlockType::Continuous, 0, vec![top_level_line], 0)]);
assert_lexes(input, expected);
}
#[test]
fn length_unix() {
let input = "a\n b\n c";
assert_block_has_length(input, 13);
}
#[test]
fn length_windows() {
let input = "a\r\n b\r\n c";
assert_block_has_length(input, 15);
}
#[test]
fn length_mixed() {
let input = "a\r\n b\n c\n d";
assert_block_has_length(input, 20);
}

View File

@ -1,630 +0,0 @@
//! This file contains tests for lexing full-on Enso with the lexer.
// === Features ===
#![feature(test)]
// === Non-Standard Linter Configuration ===
#![deny(non_ascii_idents)]
#![deny(unconditional_recursion)]
#![warn(unsafe_code)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
#![warn(missing_docs)]
#![warn(trivial_casts)]
#![warn(trivial_numeric_casts)]
#![warn(unused_import_braces)]
mod test_utils;
use lexer_definition::library::*;
use test_utils::*;
use lexer_definition::library::token::Token;
// ================
// === Combined ===
// ================
#[test]
fn method_definition() {
let input = make_unix_line_endings(
r#"## Traverse the heterogeneous list, applying the provided polymorphic function
wherever it matches.
@Tail_Call
map : forall ts ts' => (this : H_List ts) -> (exists a b . a ~> b) -> H_List ts'
map this fn -> case this.types of
Cons x xs ->
x' = fn x
x.Cons (map xs)
x -> fn x
"#,
);
let doc_comment = Token::line(
vec![Token::doc_comment(
vec![
Token::line(
vec![Token::text_segment_raw(
"Traverse the heterogeneous list, applying the provided polymorphic \
function",
0,
)],
0,
token::LineEnding::LF,
),
Token::line(
vec![Token::text_segment_raw("wherever it matches.", 0)],
0,
token::LineEnding::LF,
),
],
4,
0,
)],
0,
token::LineEnding::None,
);
let annotation = Token::line(vec![Token::annotation("Tail_Call", 0)], 0, token::LineEnding::LF);
let signature = Token::line(
vec![
Token::variable("map", 0),
Token::operator(":", 1),
Token::variable("forall", 1),
Token::variable("ts", 1),
Token::variable("ts'", 1),
Token::operator("=>", 1),
Token::operator("(", 1),
Token::variable("this", 0),
Token::operator(":", 1),
Token::referent("H_List", 1),
Token::variable("ts", 1),
Token::operator(")", 0),
Token::operator("->", 1),
Token::operator("(", 1),
Token::variable("exists", 0),
Token::variable("a", 1),
Token::variable("b", 1),
Token::operator(".", 1),
Token::variable("a", 1),
Token::operator("~>", 1),
Token::variable("b", 1),
Token::operator(")", 0),
Token::operator("->", 1),
Token::referent("H_List", 1),
Token::variable("ts'", 1),
],
0,
token::LineEnding::LF,
);
let cons_branch_body = Token::block(
token::BlockType::Discontinuous,
8,
vec![
Token::line(
vec![
Token::variable("x'", 0),
Token::operator("=", 1),
Token::variable("fn", 1),
Token::variable("x", 1),
],
0,
token::LineEnding::LF,
),
Token::line(
vec![
Token::variable("x", 0),
Token::operator(".", 0),
Token::referent("Cons", 0),
Token::operator("(", 1),
Token::variable("map", 0),
Token::variable("xs", 1),
Token::operator(")", 0),
],
0,
token::LineEnding::LF,
),
],
0,
);
let case_body = Token::block(
token::BlockType::Continuous,
4,
vec![
Token::line(
vec![
Token::referent("Cons", 0),
Token::variable("x", 1),
Token::variable("xs", 1),
Token::operator("->", 1),
cons_branch_body,
],
0,
token::LineEnding::LF,
),
Token::line(
vec![
Token::variable("x", 0),
Token::operator("->", 1),
Token::variable("fn", 1),
Token::variable("x", 1),
],
0,
token::LineEnding::LF,
),
],
0,
);
let function = Token::line(
vec![
Token::variable("map", 0),
Token::variable("this", 1),
Token::variable("fn", 1),
Token::operator("->", 1),
Token::variable("case", 1),
Token::variable("this", 1),
Token::operator(".", 0),
Token::variable("types", 0),
Token::variable("of", 1),
case_body,
],
0,
token::LineEnding::LF,
);
let expected = token::Stream::from(vec![Token::block(
token::BlockType::Continuous,
0,
vec![doc_comment, annotation, signature, function],
0,
)]);
assert_lexes(input, expected);
}
#[test]
fn complex_type() {
let input = make_unix_line_endings(
r#"
type Maybe a
type Just item:a
Nothing
is_just = case this of
Just _ -> True
Nothing -> False
"#,
);
let case_block = Token::block(
token::BlockType::Continuous,
8,
vec![
Token::line(
vec![
Token::referent("Just", 0),
Token::blank(1),
Token::operator("->", 2),
Token::referent("True", 1),
],
0,
token::LineEnding::LF,
),
Token::line(
vec![
Token::referent("Nothing", 0),
Token::operator("->", 1),
Token::referent("False", 1),
],
0,
token::LineEnding::LF,
),
],
0,
);
let type_body = Token::block(
token::BlockType::Continuous,
4,
vec![
Token::line(
vec![
Token::variable("type", 0),
Token::referent("Just", 1),
Token::variable("item", 1),
Token::operator(":", 0),
Token::variable("a", 0),
],
0,
token::LineEnding::LF,
),
Token::line(vec![Token::referent("Nothing", 0)], 0, token::LineEnding::LF),
Token::blank_line(0, token::LineEnding::LF),
Token::line(
vec![
Token::variable("is_just", 0),
Token::operator("=", 1),
Token::variable("case", 1),
Token::variable("this", 1),
Token::variable("of", 1),
case_block,
],
0,
token::LineEnding::LF,
),
],
0,
);
let complex_type = Token::line(
vec![
Token::variable("type", 0),
Token::referent("Maybe", 1),
Token::variable("a", 1),
type_body,
],
0,
token::LineEnding::LF,
);
let expected = token::Stream::from(vec![Token::block(
token::BlockType::Continuous,
0,
vec![Token::blank_line(0, token::LineEnding::LF), complex_type],
0,
)]);
assert_lexes(input, expected);
}
#[test]
fn imports_exports() {
let input = make_unix_line_endings(
r#"import Base.List
import Base.Number.Extensions
from Standard.Builtins import Unit, Number, Integer, Any, True, False
from Standard.Builtins export all
from Base.List export Nil, Cons
from Base.Number.Extensions export all hiding Math
polyglot java import com.ibm.icu.text.BreakIterator
polyglot java import org.enso.base.Text_Utils
"#,
);
let expected = token::Stream::from(vec![Token::block(
token::BlockType::Continuous,
0,
vec![
Token::line(
vec![
Token::variable("import", 0),
Token::referent("Base", 1),
Token::operator(".", 0),
Token::referent("List", 0),
],
0,
token::LineEnding::LF,
),
Token::line(
vec![
Token::variable("import", 0),
Token::referent("Base", 1),
Token::operator(".", 0),
Token::referent("Number", 0),
Token::operator(".", 0),
Token::referent("Extensions", 0),
],
0,
token::LineEnding::LF,
),
Token::line(
vec![
Token::variable("from", 0),
Token::referent("Standard", 1),
Token::operator(".", 0),
Token::referent("Builtins", 0),
Token::variable("import", 1),
Token::referent("Unit", 1),
Token::operator(",", 0),
Token::referent("Number", 1),
Token::operator(",", 0),
Token::referent("Integer", 1),
Token::operator(",", 0),
Token::referent("Any", 1),
Token::operator(",", 0),
Token::referent("True", 1),
Token::operator(",", 0),
Token::referent("False", 1),
],
0,
token::LineEnding::LF,
),
Token::blank_line(0, token::LineEnding::LF),
Token::line(
vec![
Token::variable("from", 0),
Token::referent("Standard", 1),
Token::operator(".", 0),
Token::referent("Builtins", 0),
Token::variable("export", 1),
Token::variable("all", 1),
],
0,
token::LineEnding::LF,
),
Token::blank_line(0, token::LineEnding::LF),
Token::line(
vec![
Token::variable("from", 0),
Token::referent("Base", 1),
Token::operator(".", 0),
Token::referent("List", 0),
Token::variable("export", 1),
Token::referent("Nil", 1),
Token::operator(",", 0),
Token::referent("Cons", 1),
],
0,
token::LineEnding::LF,
),
Token::line(
vec![
Token::variable("from", 0),
Token::referent("Base", 1),
Token::operator(".", 0),
Token::referent("Number", 0),
Token::operator(".", 0),
Token::referent("Extensions", 0),
Token::variable("export", 1),
Token::variable("all", 1),
Token::variable("hiding", 1),
Token::referent("Math", 1),
],
0,
token::LineEnding::LF,
),
Token::blank_line(0, token::LineEnding::LF),
Token::line(
vec![
Token::variable("polyglot", 0),
Token::variable("java", 1),
Token::variable("import", 1),
Token::variable("com", 1),
Token::operator(".", 0),
Token::variable("ibm", 0),
Token::operator(".", 0),
Token::variable("icu", 0),
Token::operator(".", 0),
Token::variable("text", 0),
Token::operator(".", 0),
Token::external("BreakIterator", 0),
],
0,
token::LineEnding::LF,
),
Token::line(
vec![
Token::variable("polyglot", 0),
Token::variable("java", 1),
Token::variable("import", 1),
Token::variable("org", 1),
Token::operator(".", 0),
Token::variable("enso", 0),
Token::operator(".", 0),
Token::variable("base", 0),
Token::operator(".", 0),
Token::referent("Text_Utils", 0),
],
0,
token::LineEnding::LF,
),
],
0,
)]);
assert_lexes(input, expected);
}
#[test]
fn some_stdlib() {
let input = make_unix_line_endings(
r#"from Standard.Base import all
## The top-level entry point for a test suite.
type Suite specs
## PRIVATE
type Spec name behaviors
## PRIVATE
type Behavior name result
## PRIVATE
Behavior.is_fail = this.result.is_fail
## PRIVATE
Spec.is_fail = this.behaviors.any is_fail
## PRIVATE
Suite.is_fail = this.specs.any is_fail
"#,
);
let expected = token::Stream::from(vec![Token::block(
token::BlockType::Continuous,
0,
vec![
Token::line(
vec![
Token::variable("from", 0),
Token::referent("Standard", 1),
Token::operator(".", 0),
Token::referent("Base", 0),
Token::variable("import", 1),
Token::variable("all", 1),
],
0,
token::LineEnding::LF,
),
Token::blank_line(0, token::LineEnding::LF),
Token::line(
vec![Token::doc_comment(
vec![Token::line(
vec![Token::text_segment_raw(
"The top-level entry point for a test suite.",
0,
)],
0,
token::LineEnding::LF,
)],
3,
0,
)],
0,
token::LineEnding::None,
),
Token::line(
vec![
Token::variable("type", 0),
Token::referent("Suite", 1),
Token::variable("specs", 1),
],
0,
token::LineEnding::LF,
),
Token::blank_line(0, token::LineEnding::LF),
Token::line(
vec![Token::doc_comment(
vec![Token::line(
vec![Token::text_segment_raw("PRIVATE", 0)],
0,
token::LineEnding::LF,
)],
3,
0,
)],
0,
token::LineEnding::None,
),
Token::line(
vec![
Token::variable("type", 0),
Token::referent("Spec", 1),
Token::variable("name", 1),
Token::variable("behaviors", 1),
],
0,
token::LineEnding::LF,
),
Token::blank_line(0, token::LineEnding::LF),
Token::line(
vec![Token::doc_comment(
vec![Token::line(
vec![Token::text_segment_raw("PRIVATE", 0)],
0,
token::LineEnding::LF,
)],
3,
0,
)],
0,
token::LineEnding::None,
),
Token::line(
vec![
Token::variable("type", 0),
Token::referent("Behavior", 1),
Token::variable("name", 1),
Token::variable("result", 1),
],
0,
token::LineEnding::LF,
),
Token::blank_line(0, token::LineEnding::LF),
Token::line(
vec![Token::doc_comment(
vec![Token::line(
vec![Token::text_segment_raw("PRIVATE", 0)],
0,
token::LineEnding::LF,
)],
3,
0,
)],
0,
token::LineEnding::None,
),
Token::line(
vec![
Token::referent("Behavior", 0),
Token::operator(".", 0),
Token::variable("is_fail", 0),
Token::operator("=", 1),
Token::variable("this", 1),
Token::operator(".", 0),
Token::variable("result", 0),
Token::operator(".", 0),
Token::variable("is_fail", 0),
],
0,
token::LineEnding::LF,
),
Token::blank_line(0, token::LineEnding::LF),
Token::line(
vec![Token::doc_comment(
vec![Token::line(
vec![Token::text_segment_raw("PRIVATE", 0)],
0,
token::LineEnding::LF,
)],
3,
0,
)],
0,
token::LineEnding::None,
),
Token::line(
vec![
Token::referent("Spec", 0),
Token::operator(".", 0),
Token::variable("is_fail", 0),
Token::operator("=", 1),
Token::variable("this", 1),
Token::operator(".", 0),
Token::variable("behaviors", 0),
Token::operator(".", 0),
Token::variable("any", 0),
Token::variable("is_fail", 1),
],
0,
token::LineEnding::LF,
),
Token::blank_line(0, token::LineEnding::LF),
Token::line(
vec![Token::doc_comment(
vec![Token::line(
vec![Token::text_segment_raw("PRIVATE", 0)],
0,
token::LineEnding::LF,
)],
3,
0,
)],
0,
token::LineEnding::None,
),
Token::line(
vec![
Token::referent("Suite", 0),
Token::operator(".", 0),
Token::variable("is_fail", 0),
Token::operator("=", 1),
Token::variable("this", 1),
Token::operator(".", 0),
Token::variable("specs", 0),
Token::operator(".", 0),
Token::variable("any", 0),
Token::variable("is_fail", 1),
],
0,
token::LineEnding::LF,
),
],
0,
)]);
assert_lexes(input, expected);
}

View File

@ -1,297 +0,0 @@
//! This file contains tests for lexing comments in the Enso lexer.
// === Features ===
#![feature(test)]
// === Non-Standard Linter Configuration ===
#![deny(non_ascii_idents)]
#![deny(unconditional_recursion)]
#![warn(unsafe_code)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
#![warn(missing_docs)]
#![warn(trivial_casts)]
#![warn(trivial_numeric_casts)]
#![warn(unused_import_braces)]
mod test_utils;
use lexer_definition::library::*;
use test_utils::*;
use lexer_definition::library::token::Token;
// ================
// === Comments ===
// ================
#[test]
fn disable_eof() {
let input = "# Here is a nice long comment string.";
let expected = token::Stream::from(vec![Token::disable_comment(
" Here is a nice long comment string.",
0,
)]);
assert_lexes(input, expected);
}
#[test]
fn disable_lf() {
let input = "# Here is a nice long comment string.\n";
let expected = token::Stream::from(vec![Token::block(
token::BlockType::Continuous,
0,
vec![Token::line(
vec![Token::disable_comment(" Here is a nice long comment string.", 0)],
0,
token::LineEnding::LF,
)],
0,
)]);
assert_lexes(input, expected);
}
#[test]
fn disable_crlf() {
let input = "# Here is a nice long comment string.\r\n";
let expected = token::Stream::from(vec![Token::block(
token::BlockType::Continuous,
0,
vec![Token::line(
vec![Token::disable_comment(" Here is a nice long comment string.", 0)],
0,
token::LineEnding::CRLF,
)],
0,
)]);
assert_lexes(input, expected);
}
#[test]
fn disable_in_line() {
let input = "a + b <*> N # Compare the frobnicators.";
let expected = token::Stream::from(vec![
Token::variable("a", 0),
Token::operator("+", 1),
Token::variable("b", 1),
Token::operator("<*>", 1),
Token::referent("N", 1),
Token::disable_comment(" Compare the frobnicators.", 1),
]);
assert_lexes(input, expected)
}
#[test]
fn disable_in_interpolate() {
let input = "'String `1 + 1 # add` stuff.'";
let expected = token::Stream::from(vec![Token::text_line(
token::TextStyle::FormatLine,
vec![
Token::text_segment_raw("String ", 0),
Token::text_segment_interpolate(
vec![
Token::number("", "1", 0),
Token::operator("+", 1),
Token::number("", "1", 1),
Token::unrecognized("#", 1),
Token::variable("add", 1),
],
0,
),
Token::text_segment_raw(" stuff.", 0),
],
0,
)]);
assert_lexes(input, expected);
}
#[test]
fn doc_single_line_eof() {
let input = "## Foo bar baz";
let expected = token::Stream::from(vec![Token::doc_comment(
vec![Token::line(
vec![Token::text_segment_raw("Foo bar baz", 0)],
0,
token::LineEnding::None,
)],
3,
0,
)]);
assert_lexes(input, expected);
}
#[test]
fn doc_single_line_lf() {
let input = "## Foo bar baz\n";
let expected = token::Stream::from(vec![Token::block(
token::BlockType::Continuous,
0,
vec![
Token::line(
vec![Token::doc_comment(
vec![Token::line(
vec![Token::text_segment_raw("Foo bar baz", 0)],
0,
token::LineEnding::LF,
)],
3,
0,
)],
0,
token::LineEnding::None,
),
Token::blank_line(0, token::LineEnding::None),
],
0,
)]);
assert_lexes(input, expected);
}
#[test]
fn doc_single_line_crlf() {
let input = "## Foo bar baz\r\n";
let expected = token::Stream::from(vec![Token::block(
token::BlockType::Continuous,
0,
vec![
Token::line(
vec![Token::doc_comment(
vec![Token::line(
vec![Token::text_segment_raw("Foo bar baz", 0)],
0,
token::LineEnding::CRLF,
)],
3,
0,
)],
0,
token::LineEnding::None,
),
Token::blank_line(0, token::LineEnding::None),
],
0,
)]);
assert_lexes(input, expected);
}
#[test]
fn doc_in_interpolate() {
let input = "'String `1 + 1 ## add` stuff.'";
let expected = token::Stream::from(vec![Token::text_line(
token::TextStyle::FormatLine,
vec![
Token::text_segment_raw("String ", 0),
Token::text_segment_interpolate(
vec![
Token::number("", "1", 0),
Token::operator("+", 1),
Token::number("", "1", 1),
Token::unrecognized("##", 1),
Token::variable("add", 1),
],
0,
),
Token::text_segment_raw(" stuff.", 0),
],
0,
)]);
assert_lexes(input, expected);
}
#[test]
fn doc_multi_line() {
let input = make_unix_line_endings(
r#"## Here is a doc comment.
It spans multiple lines.
Some are indented much further.
And this is okay.
It keeps going, even with blank lines.
Until the indentation decreases back.
trailing_blanks_not_part_of_comment"#,
);
let doc_comment = Token::doc_comment(
vec![
Token::line(
vec![Token::text_segment_raw("Here is a doc comment.", 0)],
0,
token::LineEnding::LF,
),
Token::line(
vec![Token::text_segment_raw("It spans multiple lines.", 0)],
0,
token::LineEnding::LF,
),
Token::line(
vec![Token::text_segment_raw(" Some are indented much further.", 0)],
0,
token::LineEnding::LF,
),
Token::line(
vec![Token::text_segment_raw(" And this is okay.", 0)],
0,
token::LineEnding::LF,
),
Token::blank_line(0, token::LineEnding::LF),
Token::line(
vec![Token::text_segment_raw("It keeps going, even with blank lines.", 0)],
0,
token::LineEnding::LF,
),
Token::line(
vec![Token::text_segment_raw("Until the indentation decreases back.", 0)],
0,
token::LineEnding::LF,
),
],
4,
0,
);
let expected = token::Stream::from(vec![Token::block(
token::BlockType::Continuous,
0,
vec![
Token::line(vec![doc_comment], 0, token::LineEnding::None),
Token::blank_line(0, token::LineEnding::LF),
Token::line(
vec![Token::variable("trailing_blanks_not_part_of_comment", 0)],
0,
token::LineEnding::None,
),
],
0,
)]);
assert_lexes(input, expected);
}
#[test]
fn doc_mixed_line_endings() {
let input = "## Start a doc comment\n It has indent 3.\r\n \n An indented blank too.";
let expected = token::Stream::from(vec![Token::doc_comment(
vec![
Token::line(
vec![Token::text_segment_raw("Start a doc comment", 0)],
0,
token::LineEnding::LF,
),
Token::line(
vec![Token::text_segment_raw("It has indent 3.", 0)],
0,
token::LineEnding::CRLF,
),
Token::blank_line(4, token::LineEnding::LF),
Token::line(
vec![Token::text_segment_raw(" An indented blank too.", 0)],
0,
token::LineEnding::None,
),
],
3,
0,
)]);
assert_lexes(input, expected);
}

View File

@ -1,179 +0,0 @@
//! This file contains tests for lexing identifiers in the Enso lexer.
// === Features ===
#![feature(test)]
// === Non-Standard Linter Configuration ===
#![deny(non_ascii_idents)]
#![deny(unconditional_recursion)]
#![warn(unsafe_code)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
#![warn(missing_docs)]
#![warn(trivial_casts)]
#![warn(trivial_numeric_casts)]
#![warn(unused_import_braces)]
mod test_utils;
use lexer_definition::library::*;
use test_utils::*;
use lexer_definition::library::token::Token;
// ===================
// === Identifiers ===
// ===================
#[test]
fn variable_ident() {
let input = "some_variable_name";
let expected = token::Stream::from(vec![Token::variable("some_variable_name", 0)]);
assert_lexes(input, expected)
}
#[test]
fn referent_ident() {
let input = "Some_Referent_Name";
let expected = token::Stream::from(vec![Token::referent("Some_Referent_Name", 0)]);
assert_lexes(input, expected)
}
#[test]
fn external_ident() {
let input = "__camelCaseIdentifier";
let expected = token::Stream::from(vec![Token::external("__camelCaseIdentifier", 0)]);
assert_lexes(input, expected)
}
#[test]
fn blank_ident() {
let input = "_";
let expected = token::Stream::from(vec![Token::blank(0)]);
assert_lexes(input, expected)
}
#[test]
fn annotation() {
let input = "@debug";
let expected = token::Stream::from(vec![Token::annotation("debug", 0)]);
assert_lexes(input, expected);
}
#[test]
fn ticked_variable_ident() {
let input = "some_variable_name'";
let expected = token::Stream::from(vec![Token::variable("some_variable_name'", 0)]);
assert_lexes(input, expected)
}
#[test]
fn ticked_referent_ident() {
let input = "Some_Referent_Name'";
let expected = token::Stream::from(vec![Token::referent("Some_Referent_Name'", 0)]);
assert_lexes(input, expected)
}
#[test]
fn ticked_annotation() {
let input = "@debug'";
let expected = token::Stream::from(vec![Token::annotation("debug'", 0)]);
assert_lexes(input, expected);
}
#[test]
fn multi_ticked_variable_ident() {
let input = "some_variable_name'''";
let expected = token::Stream::from(vec![Token::variable("some_variable_name'''", 0)]);
assert_lexes(input, expected)
}
#[test]
fn multi_ticked_referent_ident() {
let input = "Some_Referent_Name'''";
let expected = token::Stream::from(vec![Token::referent("Some_Referent_Name'''", 0)]);
assert_lexes(input, expected)
}
#[test]
fn multi_ticked_annotation() {
let input = "@debug''";
let expected = token::Stream::from(vec![Token::annotation("debug''", 0)]);
assert_lexes(input, expected);
}
#[test]
fn variable_with_numbers() {
let input = "some0_1";
let expected = token::Stream::from(vec![Token::variable("some0_1", 0)]);
assert_lexes(input, expected)
}
#[test]
fn referent_with_numbers() {
let input = "Some_1821";
let expected = token::Stream::from(vec![Token::referent("Some_1821", 0)]);
assert_lexes(input, expected)
}
#[test]
fn annotation_with_numbers() {
let input = "@debug_1";
let expected = token::Stream::from(vec![Token::annotation("debug_1", 0)]);
assert_lexes(input, expected);
}
#[test]
fn tick_not_at_end_variable() {
let input = "some_var'iable";
let expected = token::Stream::from(vec![
Token::variable("some_var'", 0),
Token::invalid_suffix("iable", 0),
]);
assert_lexes(input, expected)
}
#[test]
fn trailing_underscore() {
let input = "some_var_";
let expected = token::Stream::from(vec![Token::external("some_var_", 0)]);
assert_lexes(input, expected)
}
#[test]
fn trailing_underscore_with_tick() {
let input = "some_var_'";
let expected = token::Stream::from(vec![Token::external("some_var_'", 0)]);
assert_lexes(input, expected)
}
#[test]
fn invalid_suffix() {
let input = "some_varД";
let expected =
token::Stream::from(vec![Token::variable("some_var", 0), Token::invalid_suffix("Д", 0)]);
assert_lexes(input, expected)
}
#[test]
fn unrecognized_token() {
let input = "some_var@";
let expected =
token::Stream::from(vec![Token::variable("some_var", 0), Token::unrecognized("@", 0)]);
assert_lexes(input, expected)
}
#[test]
fn chained_identifiers() {
let input = "my_func A' someJavaValue some_python_value";
let expected = token::Stream::from(vec![
Token::variable("my_func", 0),
Token::referent("A'", 1),
Token::external("someJavaValue", 1),
Token::variable("some_python_value", 1),
]);
assert_lexes(input, expected)
}

View File

@ -1,86 +0,0 @@
//! This file contains tests for lexing number literals in the Enso lexer.
// === Features ===
#![feature(test)]
// === Non-Standard Linter Configuration ===
#![deny(non_ascii_idents)]
#![deny(unconditional_recursion)]
#![warn(unsafe_code)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
#![warn(missing_docs)]
#![warn(trivial_casts)]
#![warn(trivial_numeric_casts)]
#![warn(unused_import_braces)]
mod test_utils;
use lexer_definition::library::*;
use test_utils::*;
use lexer_definition::library::token::Token;
// ===============
// === Numbers ===
// ===============
#[test]
fn integer() {
let input = "13831";
let expected = token::Stream::from(vec![Token::number("", "13831", 0)]);
assert_lexes(input, expected);
}
#[test]
fn integer_with_explicit_base() {
let input = "10_13831";
let expected = token::Stream::from(vec![Token::number("10", "13831", 0)]);
assert_lexes(input, expected);
}
#[test]
fn dangling_base() {
let input = "10_";
let expected = token::Stream::from(vec![Token::dangling_base("10", 0)]);
assert_lexes(input, expected);
}
#[test]
fn hex_number() {
let input = "16_ff";
let expected = token::Stream::from(vec![Token::number("16", "ff", 0)]);
assert_lexes(input, expected);
}
#[test]
fn decimal() {
let input = "2.71828";
let expected = token::Stream::from(vec![Token::number("", "2.71828", 0)]);
assert_lexes(input, expected);
}
#[test]
fn decimal_with_explicit_base() {
let input = "10_2.71828";
let expected = token::Stream::from(vec![Token::number("10", "2.71828", 0)]);
assert_lexes(input, expected);
}
#[test]
fn error_base() {
let input = "10.2_2";
let expected =
token::Stream::from(vec![Token::number("", "10.2", 0), Token::invalid_suffix("_2", 0)]);
assert_lexes(input, expected);
}
#[test]
fn offset_number() {
let input = " 10.2";
let expected = token::Stream::from(vec![Token::number("", "10.2", 4)]);
assert_lexes(input, expected);
}

View File

@ -1,238 +0,0 @@
//! This file contains tests for lexing operators in the Enso lexer.
// === Features ===
#![feature(test)]
// === Non-Standard Linter Configuration ===
#![deny(non_ascii_idents)]
#![deny(unconditional_recursion)]
#![warn(unsafe_code)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
#![warn(missing_docs)]
#![warn(trivial_casts)]
#![warn(trivial_numeric_casts)]
#![warn(unused_import_braces)]
mod test_utils;
use lexer_definition::library::*;
use test_utils::*;
use lexer_definition::library::token::Token;
// =================
// === Operators ===
// =================
#[test]
fn function_operator() {
let input = "->";
let expected = token::Stream::from(vec![Token::operator("->", 0)]);
assert_lexes(input, expected);
}
#[test]
fn bind_operator() {
let input = "<-";
let expected = token::Stream::from(vec![Token::operator("<-", 0)]);
assert_lexes(input, expected);
}
#[test]
fn left_pipe_operator() {
let input = "<|";
let expected = token::Stream::from(vec![Token::operator("<|", 0)]);
assert_lexes(input, expected);
}
#[test]
fn right_pipe_operator() {
let input = "|>";
let expected = token::Stream::from(vec![Token::operator("|>", 0)]);
assert_lexes(input, expected);
}
#[test]
fn eq_operator() {
let input = "=";
let expected = token::Stream::from(vec![Token::operator("=", 0)]);
assert_lexes(input, expected);
}
#[test]
fn eq_compare_operator() {
let input = "==";
let expected = token::Stream::from(vec![Token::operator("==", 0)]);
assert_lexes(input, expected);
}
#[test]
fn geq_operator() {
let input = ">=";
let expected = token::Stream::from(vec![Token::operator(">=", 0)]);
assert_lexes(input, expected);
}
#[test]
fn neq_operator() {
let input = "!=";
let expected = token::Stream::from(vec![Token::operator("!=", 0)]);
assert_lexes(input, expected);
}
#[test]
fn dot_operator() {
let input = ".";
let expected = token::Stream::from(vec![Token::operator(".", 0)]);
assert_lexes(input, expected);
}
#[test]
fn comma_operator() {
let input = ",";
let expected = token::Stream::from(vec![Token::operator(",", 0)]);
assert_lexes(input, expected);
}
#[test]
fn double_dot_operator() {
let input = "..";
let expected = token::Stream::from(vec![Token::operator("..", 0)]);
assert_lexes(input, expected);
}
#[test]
fn triple_dot_operator() {
let input = "...";
let expected = token::Stream::from(vec![Token::operator("...", 0)]);
assert_lexes(input, expected);
}
#[test]
fn error_operator() {
let input = "!";
let expected = token::Stream::from(vec![Token::operator("!", 0)]);
assert_lexes(input, expected);
}
#[test]
fn type_ascription_operator() {
let input = ":";
let expected = token::Stream::from(vec![Token::operator(":", 0)]);
assert_lexes(input, expected);
}
#[test]
fn in_operator() {
let input = "in";
let expected = token::Stream::from(vec![Token::operator("in", 0)]);
assert_lexes(input, expected);
}
#[test]
fn typeset_union_operator() {
let input = "|";
let expected = token::Stream::from(vec![Token::operator("|", 0)]);
assert_lexes(input, expected);
}
#[test]
fn typeset_intersection_operator() {
let input = "&";
let expected = token::Stream::from(vec![Token::operator("&", 0)]);
assert_lexes(input, expected);
}
#[test]
fn typeset_subtraction_operator() {
let input = "\\";
let expected = token::Stream::from(vec![Token::operator("\\", 0)]);
assert_lexes(input, expected);
}
#[test]
fn arbitrary_left_operator() {
let input = "<!!-";
let expected = token::Stream::from(vec![Token::operator("<!!-", 0)]);
assert_lexes(input, expected);
}
#[test]
fn arbitrary_right_operator() {
let input = "-->>";
let expected = token::Stream::from(vec![Token::operator("-->>", 0)]);
assert_lexes(input, expected);
}
#[test]
fn modifier_plus() {
let input = "+=";
let expected = token::Stream::from(vec![Token::modifier("+", 0)]);
assert_lexes(input, expected);
}
#[test]
fn modifier_minus() {
let input = "-=";
let expected = token::Stream::from(vec![Token::modifier("-", 0)]);
assert_lexes(input, expected);
}
#[test]
fn arbitrary_modifier() {
let input = "<%=";
let expected = token::Stream::from(vec![Token::modifier("<%", 0)]);
assert_lexes(input, expected);
}
#[test]
fn invalid_eq_suffix() {
let input = "===";
let expected =
token::Stream::from(vec![Token::operator("==", 0), Token::invalid_suffix("=", 0)]);
assert_lexes(input, expected);
}
#[test]
fn invalid_dots_suffix() {
let input = "....";
let expected =
token::Stream::from(vec![Token::operator("...", 0), Token::invalid_suffix(".", 0)]);
assert_lexes(input, expected);
}
#[test]
fn invalid_modifier_suffix() {
let input = "+==";
let expected =
token::Stream::from(vec![Token::operator("+", 0), Token::invalid_suffix("==", 0)]);
assert_lexes(input, expected);
}
#[test]
fn dot_call_operator() {
let input = ".+ .<*>";
let expected = token::Stream::from(vec![
Token::operator(".", 0),
Token::operator("+", 0),
Token::operator(".", 1),
Token::operator("<*>", 0),
]);
assert_lexes(input, expected)
}
#[test]
fn dot_eq_operator() {
let input = ".== . !=";
let expected = token::Stream::from(vec![
Token::operator(".", 0),
Token::operator("==", 0),
Token::operator(".", 1),
Token::operator("!=", 2),
]);
assert_lexes(input, expected);
}

View File

@ -1,62 +0,0 @@
//! Utilities for testing the Enso lexer.
// === Non-Standard Linter Configuration ===
#![allow(dead_code)]
#![warn(unsafe_code)]
use enso_flexer::*;
use lexer_definition::library::*;
use lexer_definition::library::token::Token;
// =================
// === Utilities ===
// =================
/// Assert that `result` is a success with tokens `expected`.
pub fn assert_succeeds_as(result: &LexingResult<token::Stream>, expected: token::Stream) {
match result.kind {
ResultKind::Success => assert_eq!(result.tokens, expected),
_ => panic!("Lexing failed."),
}
}
/// Assert that the provided input lexes as `expected`.
pub fn assert_lexes(input: impl AsRef<str>, expected: token::Stream) {
let input_len = input.as_ref().chars().count();
let result = lex(input);
assert_succeeds_as(&result, expected);
let tokens_vec: Vec<_> = result.tokens.into();
let total_length: usize = tokens_vec.iter().map(|token| token.offset + token.length).sum();
assert_eq!(total_length, input_len);
}
/// Lex the provided string.
pub fn lex(input: impl AsRef<str>) -> LexingResult<token::Stream> {
lexer::run(input)
}
/// Asserts that the input is a block and has a length equal to `length`.
pub fn assert_block_has_length(input: impl AsRef<str>, expected_length: usize) {
let result = lex(input);
match result.kind {
ResultKind::Success => {
let tokens = result.tokens.tokens();
match tokens.first().expect("Token should be present.") {
Token { shape: token::Shape::Block { .. }, length, .. } =>
assert_eq!(*length, expected_length),
_ => panic!("Token not a block."),
}
}
_ => panic!("Lexing failed"),
}
}
/// Makes the test text have unix line endings to ensure consistency regardless of git checkout
/// style.
pub fn make_unix_line_endings(input: &str) -> String {
let string = String::from(input);
string.chars().filter(|c| *c != '\r').collect()
}

View File

@ -1,25 +0,0 @@
[package]
name = "parser-jni"
version = "0.1.0"
authors = ["Enso Team <enso-dev@enso.org>"]
edition = "2021"
description = "A parser for the Enso language"
readme = "README.md"
homepage = "https://github.com/enso-org/enso/lib/rust/parser"
repository = "https://github.com/enso-org/enso"
license-file = "../../../LICENSE"
keywords = ["parser"]
categories = ["parsing"]
publish = false
[lib]
crate-type = ["cdylib", "rlib"]
test = true
bench = true
[dependencies]
jni = { version = "0.19.0" }
ast-new = { version = "0.1.0", path = "../ast" }

View File

@ -1,125 +0,0 @@
//! This module exports JNI interface for parser methods implemented in Rust.
//!
//! The basics steps to add a new method are following:
//! 1. Add the new method in Scala (in `org.enso.parser.Parser`).
//! 2. (Optional) Run `scalac Parser.scala; javah Parser` to generate the C API in `Parser.h`.
//! Note that you can skip this step. It is merely a guidance for you, as it generates
//! the correct function names and type signatures of all `Parser` native methods.
//! Generally, the method interface is going to have the following shape:
//! ```c
//! JNIEXPORT $returnType JNICALL Java_$package_$className_$methodName
//! (JNIEnv* env, jobject this, $argType1 $arg1, $argType2 $arg2)
//! ```
//! For example if the definition is:
//! ```scala
//! package org.enso.parser
//!
//! class Parser {
//! @native def newMethod(string: String, array: Array[Int])
//! }
//! ```
//! Then the JNI API is going to be:
//! ```c
//! JNIEXPORT jobject JNICALL Java_org_enso_parser_Parser_newMethod
//! (JNIEnv* env, jobject this, jstring string, jintArray array)
//! ```
//! The list of all available types can be found in
//! [oracle documentation](https://docs.oracle.com/javase/7/docs/technotes/guides/jni/spec/types.html).
//! 3. Implement the new parser method in this file.
//! For the above definition the implementation is going to be:
//! ```rust
//! use jni::JNIEnv;
//! use jni::objects::*;
//! use jni::sys::*;
//!
//! #[no_mangle]
//! pub extern "system" fn Java_org_enso_parser_Parser_newMethod(
//! env : JNIEnv, // the JVM enviroment, used for calling methods and constructors
//! this : JClass, // the instance of `Parser`
//! string : JString,
//! array : jintArray,
//! ) -> jweak { unimplemented!() }
//! ```
//! 4. (Optional) Generate a shared library from the Rust definition by `cargo build`.
//! It will be generated into `target/rust/debug/`.
//! This step is done automatically by `sbt`.
use jni::objects::*;
use jni::sys::*;
use jni::JNIEnv;
// ======================
// === Parser JNI API ===
// ======================
/// Parses a content a of single source file.
#[allow(unsafe_code)]
#[no_mangle]
pub extern "system" fn Java_org_enso_parser_Parser_parseStr(
env: JNIEnv,
_this: JClass,
input: JString,
) -> jweak {
let txt = env
.new_object(
env.find_class("org/enso/ast/Ast$Txt$Text").unwrap(),
"(Ljava/lang/String;)V",
&[input.into()],
)
.unwrap();
let non = env
.get_static_field(env.find_class("scala/None$").unwrap(), "MODULE$", "Lscala/None$;")
.unwrap()
.l()
.unwrap();
let ast = env
.new_object(
env.find_class("org/enso/ast/Ast$Ast").unwrap(),
"(Lscala/Option;JJLjava/lang/Object;)V",
&[non.into(), 0i64.into(), 0i64.into(), txt.into()],
)
.unwrap();
ast.into_inner()
}
/// Parses a single source file.
#[allow(unsafe_code)]
#[no_mangle]
pub extern "system" fn Java_org_enso_parser_Parser_parseFile(
env: JNIEnv,
this: JClass,
filename: JString,
) -> jweak {
Java_org_enso_parser_Parser_parseStr(env, this, filename)
}
// === Tokens ===
/// Parses a content of a single source file into a stream of tokens.
#[allow(unsafe_code)]
#[no_mangle]
pub extern "system" fn Java_org_enso_parser_Parser_lexStr(
env: JNIEnv,
this: JClass,
input: JString,
) -> jweak {
Java_org_enso_parser_Parser_parseStr(env, this, input)
}
/// Parses a single source file into a stream of tokens.
#[allow(unsafe_code)]
#[no_mangle]
pub extern "system" fn Java_org_enso_parser_Parser_lexFile(
env: JNIEnv,
this: JClass,
filename: JString,
) -> jweak {
Java_org_enso_parser_Parser_parseStr(env, this, filename)
}

View File

@ -1,55 +0,0 @@
//! This module exports the implementation of parser for the Enso language.
// === Features ===
#![feature(test)]
// === Standard Linter Configuration ===
#![deny(non_ascii_idents)]
#![warn(unsafe_code)]
// === Non-Standard Linter Configuration ===
#![deny(unconditional_recursion)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
#![warn(missing_docs)]
#![warn(trivial_casts)]
#![warn(trivial_numeric_casts)]
#![warn(unused_import_braces)]
mod jni;
pub use crate::jni::*;
use ast_new::AnyAst;
use ast_new::Ast;
// =======================
// === Parser Rust API ===
// =======================
/// Parse a content of a single source file.
pub fn parse_str(input: String) -> AnyAst {
Ast::new(ast_new::txt::Text { text: input })
}
/// Parse a single source file.
pub fn parse_file(filename: String) -> AnyAst {
parse_str(filename)
}
// === Tokens ===
/// Parse a content of single source file.
pub fn lexe_str(input: String) -> AnyAst {
parse_str(input)
}
/// Parse a single source file.
pub fn lexe_file(filename: String) -> AnyAst {
parse_str(filename)
}

1090
lib/rust/parser/src/lexer.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,40 +0,0 @@
//! This library contains the implementation of the Enso parser.
// === Standard Linter Configuration ===
#![deny(non_ascii_idents)]
#![warn(unsafe_code)]
// ==============
// === Export ===
// ==============
pub mod macros;
pub mod operator;
pub mod parser;
pub use crate::parser::*;
/// The prelude for the parser.
pub mod prelude {
pub use enso_logger::AnyLogger;
pub use enso_prelude::*;
/// The Enso logging library.
pub mod logger {
pub use enso_logger::Logger;
pub use enso_logger::*;
}
/// The lexer types.
pub mod lexer {
pub use ::lexer::*;
/// The lexer tokens.
pub mod token {
pub use lexer::library::token::*;
}
}
}

View File

@ -1,55 +1,110 @@
//! The macro system for the Enso parser.
//! Enso macro utilities. The parser contains a powerful macro resolution engine and a lot of the
//! language constructs are defined as macros. This module contains macro definition structs and
//! utilities allowing macros management.
//! Read the docs of the main module of this crate to learn more about the parsing process.
//
use crate::prelude::logger::*;
use crate::prelude::*;
use crate::macros::definition::Definition;
use crate::macros::registry::Registry;
use crate::syntax;
use crate::syntax::token::Token;
use enso_data_structures::im_list;
use pattern::Pattern;
// ==============
// === Export ===
// ==============
pub mod definition;
pub mod literal;
pub mod registry;
pub mod pattern;
// ====================
// === Type Aliases ===
// ====================
// ==================
// === Definition ===
// ==================
type DebugLevel = crate::prelude::logger::entry::level::Debug;
// ================
// === Resolver ===
// ================
/// The Enso macro resolver.
#[derive(Clone, Debug, PartialEq)]
/// Macro definition. It contains list of macro segments and optional macro prefix.
///
/// For example, the macro `if ... then ... else ...` contains three segments and no prefix. On the
/// other hand, the macro `... -> ...` contains one segment (starting with the `->` token) and a
/// prefix (it consumes tokens on the left of its first segment).
///
/// If you want to create macro definition in Rust, use the [`macro_definition`] macro instead,
/// which for a nice and concise definitions.
#[derive(Derivative)]
#[derivative(Debug)]
#[allow(missing_docs)]
pub struct Resolver<Logger> {
registry: Registry,
logger: Logger,
pub struct Definition<'a> {
/// The pattern in this field will be matched from right to left, unlike patterns in segments.
pub rev_prefix_pattern: Option<Pattern>,
pub segments: im_list::NonEmpty<SegmentDefinition<'a>>,
#[derivative(Debug = "ignore")]
pub body: Rc<Body>,
}
impl<Logger> Resolver<Logger>
where Logger: AnyLogger<Owned = Logger> + LoggerOps<DebugLevel>
{
/// All the tokens matched as prefix of the resolved macro.
pub type PrefixTokens<'s> = Option<Vec<syntax::Item<'s>>>;
/// All the sections of the resolved macro.
pub type MatchedSections<'s> = NonEmptyVec<(Token<'s>, Vec<syntax::Item<'s>>)>;
/// A function that transforms matched macro tokens into [`syntax::Tree`].
pub type Body = dyn for<'s> Fn(PrefixTokens<'s>, MatchedSections<'s>) -> syntax::Tree<'s>;
// =========================
// === SegmentDefinition ===
// =========================
/// Definition of macro segment. Contains header, such as `if`, or `->` and pattern that following
/// tokens have to match.
#[derive(Clone, Debug)]
#[allow(missing_docs)]
pub struct SegmentDefinition<'a> {
pub header: &'a str,
pub pattern: Pattern,
}
impl<'a> SegmentDefinition<'a> {
/// Constructor.
pub fn new(macros: Vec<Definition>, parent_logger: &Logger) -> Self {
let logger = <Logger>::sub(parent_logger, "Resolver");
let registry = Registry::from(macros);
Self { registry, logger }
}
/// Define the macro described by `definition` in the macro resolver `self`.
pub fn define_macro(&mut self, definition: Definition) {
debug!(self.logger, "Define Macro: {&definition:?}.");
self.registry.insert(definition)
pub fn new(header: &'a str, pattern: Pattern) -> Self {
Self { header, pattern }
}
}
// ===================
// === Rust Macros ===
// ===================
/// Macro allowing for nice macro [`Definition`] generation. For example, the following code defines
/// the `if ... then .. else ...` macro:
///
/// ```text
/// macro_definition! {
// ("if", Pattern::Everything, "then", Pattern::Everything, "else", Pattern::Everything)
// body_handler_fn
// }
/// ```
#[macro_export]
macro_rules! macro_definition {
( ($($section:literal, $pattern:expr),* $(,)?) $body:expr ) => {
$crate::macro_definition!{[None] ($($section, $pattern),*) $body}
};
( ($prefix:expr, $($section:literal, $pattern:expr),* $(,)?) $body:expr ) => {
$crate::macro_definition!{[Some($prefix)] ($($section, $pattern),*) $body}
};
( [$prefix:expr] ($($section:literal, $pattern:expr),* $(,)?) $body:expr ) => {
macros::Definition {
rev_prefix_pattern: $prefix,
segments: im_list::NonEmpty::try_from(vec![
$(macros::SegmentDefinition::new($section, $pattern)),*]).unwrap(),
body: Rc::new($body),
}
};
}

View File

@ -1,70 +0,0 @@
//! Macro definitions in Enso.
use crate::prelude::*;
use crate::macros::literal::Literal;
use itertools::Itertools;
// ==================
// === Definition ===
// ==================
/// A macro definition.
///
/// A macro definition consists of a name, which identifies the macro to users, and a list of
/// [sections](`Section`). The sections are the most important portion of the macro definition, as
/// they define the literal portions of the token stream on which the macro will match.
#[derive(Clone, Debug, Default, Eq, PartialEq)]
#[allow(missing_docs)]
pub struct Definition {
pub name: String,
pub sections: Vec<Section>,
}
impl Definition {
/// Constructor.
pub fn new(name: impl Str, sections: Vec<Section>) -> Self {
let name = name.into();
Self { name, sections }
}
/// Get the path for the definition.
///
/// The definition's path consists of the headers of each of the sections that make it up, and
/// describes the literals that must be matched for the macro to match.
pub fn path(&self) -> Vec<Literal> {
self.sections.iter().map(|s| s.start_symbol.clone()).collect_vec()
}
}
// ===============
// === Section ===
// ===============
/// A section in a macro, representing both a literal section header to match against, and the
/// tokens that the section contains.
///
/// The literal is the _most_ important portion of a section, as they are constants that allow the
/// macro resolver to divide up the input token stream based on these constants.
#[derive(Clone, Debug, Eq, PartialEq)]
#[allow(missing_docs)]
pub struct Section {
start_symbol: Literal, // TODO Pattern
}
impl Section {
/// Constructor.
pub fn new(symbol: Literal) -> Self {
Self { start_symbol: symbol }
}
/// Get a reference to the literal that heads the section.
pub fn start_symbol(&self) -> &Literal {
&self.start_symbol
}
}

View File

@ -1,95 +0,0 @@
//! This file contains the literal matchers that are used to head up macro sections.
use crate::prelude::*;
use crate::prelude::lexer::token;
// ===============
// === Literal ===
// ===============
/// The kinds of literal that can be the head of a macro section.
///
/// For more detailed descriptions of the various literal types, please see the documentation of the
/// tokens in the Lexer.
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum Literal {
Referent(String),
Variable(String),
External(String),
Blank,
Operator(String),
Annotation(String),
}
impl Literal {
/// Construct a referent identifier literal.
pub fn referent(lit: impl Str) -> Literal {
Literal::Referent(lit.into())
}
/// Construct a variable identifier literal.
pub fn variable(lit: impl Str) -> Literal {
Literal::Variable(lit.into())
}
/// Construct an external identifier literal.
pub fn external(lit: impl Str) -> Literal {
Literal::External(lit.into())
}
/// Construct a blank identifier literal.
pub fn blank() -> Literal {
Literal::Blank
}
/// Construct an operator identifier literal.
pub fn operator(lit: impl Str) -> Literal {
Literal::Operator(lit.into())
}
/// Construct an annodation identifier literal.
pub fn annotation(lit: impl Str) -> Literal {
Literal::Annotation(lit.into())
}
}
// === Trait Impls ===
impl From<&Literal> for Literal {
fn from(lit: &Literal) -> Self {
lit.clone()
}
}
impl From<Literal> for token::Shape {
fn from(lit: Literal) -> Self {
match lit {
Literal::Referent(str) => token::Shape::Referent(str),
Literal::Variable(str) => token::Shape::Variable(str),
Literal::External(str) => token::Shape::External(str),
Literal::Blank => token::Shape::Blank,
Literal::Operator(str) => token::Shape::Operator(str),
Literal::Annotation(str) => token::Shape::Annotation(str),
}
}
}
impl TryFrom<token::Shape> for Literal {
type Error = token::Shape;
fn try_from(shape: token::Shape) -> Result<Self, Self::Error> {
match shape {
token::Shape::Referent(name) => Ok(Literal::Referent(name)),
token::Shape::Variable(name) => Ok(Literal::Variable(name)),
token::Shape::External(name) => Ok(Literal::External(name)),
token::Shape::Blank => Ok(Literal::Blank),
token::Shape::Operator(name) => Ok(Literal::Operator(name)),
token::Shape::Annotation(name) => Ok(Literal::Annotation(name)),
_ => Err(shape),
}
}
}

View File

@ -0,0 +1,126 @@
//! This module defines patterns Pattern used to validate incoming token stream against expected
//! macro input.
use crate::prelude::*;
use crate::syntax;
// ===============
// === Pattern ===
// ===============
/// Pattern used to validate incoming token stream against expected macro input.
///
/// The idea is similar to patterns used in `macro_rules` definitions in Rust. There are a few
/// differences though:
/// 1. This pattern implementation exposes different matchers and operations.
/// 2. This macro implementation never attaches types to tokens, which means that every defined
/// pattern behaves like a TT-muncher in Rust.
#[derive(Clone, Debug)]
#[allow(missing_docs)]
pub enum Pattern {
/// Consume all items, till the end of the token stream.
Everything,
/// Consume nothing.
Nothing,
/// Consume items matching the first pattern. If the match was unsuccessful, the second match
/// will be tried.
Or(Box<Pattern>, Box<Pattern>),
/// Consume a single item if it matches the configuration.
Item(Item),
}
/// Item pattern configuration.
#[derive(Clone, Copy, Debug)]
#[allow(missing_docs)]
pub struct Item {
/// Check whether the token has spaces on right-hand-side. The [`None`] value means that the
/// condition would not be checked.
pub has_rhs_spacing: Option<bool>,
}
// =======================
// === ResolutionError ===
// =======================
/// Pattern resolution error.
#[derive(Debug)]
#[allow(missing_docs)]
pub struct ResolutionError<T> {
/// All the incoming tokens. The resolver consumes vector of tokens and returns it back in case
/// an error happened.
pub tokens: Vec<T>,
pub message: String,
}
impl<T> ResolutionError<T> {
/// Constructor.
pub fn new(tokens: Vec<T>, message: impl Into<String>) -> Self {
let message = message.into();
Self { tokens, message }
}
}
/// ==================
/// === Resolution ===
/// ==================
/// Successful pattern match result.
#[derive(Debug, Clone)]
#[allow(missing_docs)]
pub struct Match<T> {
/// All the matched tokens.
pub matched: Vec<T>,
/// The rest of the token stream that was not needed for the successful pattern match.
pub rest: Vec<T>,
}
impl<T> Match<T> {
/// Constructor.
pub fn new(matched: Vec<T>, rest: Vec<T>) -> Self {
Self { matched, rest }
}
}
impl Pattern {
/// Match the token stream with this pattern.
pub fn resolve<'s, T: TryAsRef<syntax::Item<'s>>>(
&self,
mut input: Vec<T>,
has_spacing_at_end: bool,
right_to_left_mode: bool,
) -> Result<Match<T>, ResolutionError<T>> {
match self {
Self::Everything => Ok(Match::new(input, default())),
Self::Nothing => Ok(Match::new(default(), input)),
Self::Or(fst, snd) => fst
.resolve(input, has_spacing_at_end, right_to_left_mode)
.or_else(|err| snd.resolve(err.tokens, has_spacing_at_end, right_to_left_mode)),
Self::Item(item) => match input.first() {
None => Err(ResolutionError::new(input, "Expected an item.")),
Some(first) => match first.try_as_ref() {
None => Err(ResolutionError::new(input, "Expected an item.")),
Some(_) => match item.has_rhs_spacing {
Some(spacing) =>
if right_to_left_mode {
if spacing == has_spacing_at_end {
Ok(Match::new(vec![input.pop_front().unwrap()], input))
} else {
Err(ResolutionError::new(input, "Expected an item."))
}
} else {
todo!()
},
None => Ok(Match::new(vec![input.pop_front().unwrap()], input)),
},
},
},
}
}
}

View File

@ -1,155 +0,0 @@
//! The macro registry that can be queried during the process of macro resolution.
use crate::prelude::*;
use enso_data_structures::hash_map_tree::*;
use crate::macros::definition::Definition;
use crate::macros::literal::Literal;
// ================
// === Registry ===
// ================
/// The type of the tree that underlies the registry.
pub type Tree = HashMapTree<Literal, Option<Definition>>;
/// The registry is responsible for the registration of macro definitions, and the querying of said
/// definitions.
#[derive(Clone, Debug, Default, PartialEq)]
#[allow(missing_docs)]
pub struct Registry {
tree: Tree,
}
impl Registry {
/// Insert `definition` into the macro registry.
pub fn insert(&mut self, definition: Definition) {
self.tree.set(definition.path(), Some(definition));
}
/// Get a reference to the root of the registry.
pub fn root(&self) -> &Tree {
&self.tree
}
/// Query the registry for a tree.
pub fn subtree<P>(&self, path: P) -> Option<&Tree>
where
P: IntoIterator,
P::Item: Into<Literal>, {
self.tree.get_node(path)
}
/// Query the registry for a tree, assuming such a tree is present.
///
/// # Panics
/// If no tree exists at `path`.
pub fn unsafe_subtree<P>(&self, path: P) -> &Tree
where
P: IntoIterator,
P::Item: Into<Literal>, {
self.subtree(path).expect("A tree exists at the input path.")
}
/// Query the registry for a definition.
pub fn definition<P>(&self, path: P) -> Option<&Definition>
where
P: IntoIterator,
P::Item: Into<Literal>, {
match self.tree.get(path) {
Some(Some(def)) => Some(def),
_ => None,
}
}
/// Query the registry for a definition, assuming such a definition is present.
///
/// # Panics
/// If no definition exists at `path`.
pub fn unsafe_definition<P>(&self, path: P) -> &Definition
where
P: IntoIterator,
P::Item: Into<Literal>, {
self.definition(path).expect("A definition exists at the input path.")
}
}
// === Trait Impls ===
impl From<Vec<Definition>> for Registry {
fn from(defs: Vec<Definition>) -> Self {
let mut registry: Registry = default();
defs.into_iter().for_each(|def| registry.insert(def));
registry
}
}
// =============
// === Tests ===
// =============
#[cfg(test)]
mod tests {
use super::*;
use crate::macros::definition::Section;
#[test]
fn insert_query() {
let mut registry = Registry::default();
let definition = Definition::new("Test", vec![
Section::new(Literal::variable("if")),
Section::new(Literal::variable("then")),
Section::new(Literal::variable("else")),
]);
let path_1 =
&[Literal::variable("if"), Literal::variable("then"), Literal::variable("else")];
let path_2 = &[Literal::variable("if"), Literal::variable("then")];
registry.insert(definition.clone());
let result_1 = registry.definition(path_1);
let result_2 = registry.definition(path_2);
assert!(result_1.is_some());
assert_eq!(result_1.unwrap(), &definition);
assert_eq!(result_2, None);
}
#[test]
fn from_defs() {
let definitions = vec![
Definition::new("if_then_else", vec![
Section::new(Literal::variable("if")),
Section::new(Literal::variable("then")),
Section::new(Literal::variable("else")),
]),
Definition::new("if_then", vec![
Section::new(Literal::variable("if")),
Section::new(Literal::variable("then")),
]),
Definition::new("if_let", vec![
Section::new(Literal::variable("if")),
Section::new(Literal::variable("let")),
]),
];
let registry = Registry::from(definitions.clone());
let path_1 =
&[Literal::variable("if"), Literal::variable("then"), Literal::variable("else")];
let path_2 = &[Literal::variable("if"), Literal::variable("then")];
let path_3 = &[Literal::variable("if"), Literal::variable("let")];
let path_4 = &[Literal::variable("if")];
let result_1 = registry.definition(path_1);
let result_2 = registry.definition(path_2);
let result_3 = registry.definition(path_3);
let result_4 = registry.definition(path_4);
assert!(result_1.is_some());
assert!(result_2.is_some());
assert!(result_3.is_some());
assert!(result_4.is_none());
assert_eq!(result_1, definitions.get(0));
assert_eq!(result_2, definitions.get(1));
assert_eq!(result_3, definitions.get(2));
}
}

811
lib/rust/parser/src/main.rs Normal file
View File

@ -0,0 +1,811 @@
//! The Enso parser. Parsing is a multi-stage process:
//!
//! # Lexing.
//! First, the source code is feed to [`lexer::Lexer`], which consumes it and outputs a stream of
//! [`Token`]. Tokens are chunks of the input with a generic description attached, like "operator",
//! or "identifier".
//!
//! # Building macro registry.
//! Macros in Enso are a very powerful mechanism and are used to transform group of tokens into
//! almost any statement. First, macros need to be discovered and registered. Currently, there is no
//! real macro discovery process, as there is no support for user-defined macros. Instead, there is
//! a set of hardcoded macros defined in the compiler.
//!
//! Each macro defines one or more segments. Every segment starts with a predefined token and can
//! contain any number of other tokens. For example, the macro `if ... then ... else ...` contains
//! three segments. Macros can also accept prefix tokens, a set of tokens on the left of the first
//! segment. A good example is the lambda macro `... -> ...`.
//!
//! In this step, a [`MacroMatchTree`] is built. Basically, it is a map from the possible next
//! segment name to information of what other segments are required and what is the macro definition
//! in case these segments were found. For example, let's consider two macros: `if ... then ...`,
//! and `if ... then ... else ...`. In such a case, the macro registry will contain only one entry,
//! "if", and two sets of possible resolution paths: ["then"], and ["then", "else"], each associated
//! with the corresponding macro definition.
//!
//! # Splitting the token stream by the macro segments.
//! The input token stream is being iterated and is being split based on the segments of the
//! registered macros. For example, for the input `if a b then c d else e f`, the token stream will
//! be split into three segments, `a b`, `c d`, and `e f`, which will be associated with the
//! `if ... then ... else ...` macro definition.
//!
//! The splitting process is hierarchical. It means that a new macro can start being resolved during
//! resolution of a parent macro. For example, `if if a then b then c else d` is a correct
//! expression. After finding the first `if` token, the token stream will be split. The next `if`
//! token starts a new token stream splitting. The first `then` token belongs to the nested macro,
//! however, as soon as the resolver sees the second `then` token, it will consider the nested macro
//! to be finished, and will come back to parent macro resolution.
//!
//! # Resolving right-hand-side patterns of macro segments.
//! In the next steps, each macro is being analyzed, started from the most nested ones. For each
//! macro, the [`Pattern`] of last segment is being run to check which tokens belong to that macro,
//! and which tokens should be transferred to parent macro definition. For example, consider the
//! following code `process (read file) content-> print content`. The `(...)` is a macro with two
//! sections `(` and `)`. Let's mark the token splitting with `[` and `]` characters. The previous
//! macro resolution steps would output such split of the token stream:
//! `process [(read file][) content[-> print content]]`. In this step, the most inner macro will be
//! analyzed first. The pattern of the last segment of the inner macro (`->`) defines that it
//! consumes all tokens, so all the tokens `print content` are left as they are. Now, the resolution
//! moves to the parent macro. Its last segment starts with the `)` token, which pattern defines
//! that it does not consume any tokens, so all of its current tokens (`content[-> print content]]`)
//! are popped to a parent definition, forming `process [(read file][)] content[-> print content]`.
//!
//! Please note, that root of the expression is considered a special macro as well. It is done for
//! the algorithm unification purposes.
//!
//! # Resolving left-hand-side patterns of macro segments.
//! In this step, each macro is being analyzed, started from the most nested ones. For each macro,
//! the [`Pattern`] of the macro prefix is being run to check which tokens belong to the prefix of
//! the macro (in case the macro defines the prefix). In the example above, the macro `->` defines
//! complex prefix rules: if the token on the left of the arrow used no space, then only a single
//! token will be consumed. As a result of this step, the following token split will occur:
//! `[process [(read file][)] [content-> print content]`, which is exactly what we wanted.
//!
//! # Resolving patterns of macro segments.
//! In this step, all macro segment patterns are being resolved and errors are reported in case it
//! was not possible. If tokens in a segment match the segment pattern, they are sent to the
//! operator precedence resolver for final transformation.
//!
//! # Operator precedence resolution.
//! Each token stream sent to the operator resolver is processed by a modified Shunting Yard
//! algorithm, which handles such situations as multiple operators placed next to each other,
//! multiple identifiers placed next to each other, and also takes spacing into consideration in
//! order to implement spacing-aware precedence rules. After all segments are resolved, the macro
//! is being treated as a single token in one of the segments of the parent macro, and is being
//! processed by the operator precedence resolver as well. In the end, a single [`syntax::Tree`] is
//! produced, containing the parsed expression.
#![recursion_limit = "256"]
// === Features ===
#![allow(incomplete_features)]
#![feature(allocator_api)]
#![feature(test)]
#![feature(specialization)]
#![feature(let_chains)]
// === Standard Linter Configuration ===
#![deny(non_ascii_idents)]
#![warn(unsafe_code)]
// === Non-Standard Linter Configuration ===
#![allow(clippy::option_map_unit_fn)]
#![allow(clippy::precedence)]
#![allow(dead_code)]
#![deny(unconditional_recursion)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
#![warn(missing_docs)]
#![warn(trivial_casts)]
#![warn(trivial_numeric_casts)]
#![warn(unused_import_braces)]
#![warn(unused_qualifications)]
use crate::prelude::*;
use crate::source::VisibleOffset;
use enso_data_structures::im_list;
use enso_data_structures::im_list::List;
use lexer::Lexer;
use macros::pattern::Pattern;
use syntax::token;
use syntax::token::Token;
// ==============
// === Export ===
// ==============
pub mod lexer;
pub mod macros;
pub mod source;
pub mod syntax;
/// Popular utilities, imported by most modules of this crate.
pub mod prelude {
pub use enso_prelude::*;
pub use enso_types::traits::*;
pub use enso_types::unit2::Bytes;
}
// =================================
// === SyntaxItemOrMacroResolver ===
// =================================
/// One of [`syntax::Item`] or [`MacroResolver`].
#[derive(Debug)]
#[allow(missing_docs)]
pub enum SyntaxItemOrMacroResolver<'s> {
SyntaxItem(syntax::Item<'s>),
MacroResolver(MacroResolver<'s>),
}
impl<'s> From<syntax::Item<'s>> for SyntaxItemOrMacroResolver<'s> {
fn from(t: syntax::Item<'s>) -> Self {
Self::SyntaxItem(t)
}
}
impl<'s> From<MacroResolver<'s>> for SyntaxItemOrMacroResolver<'s> {
fn from(t: MacroResolver<'s>) -> Self {
Self::MacroResolver(t)
}
}
impl<'s> TryAsRef<syntax::Item<'s>> for SyntaxItemOrMacroResolver<'s> {
fn try_as_ref(&self) -> Option<&syntax::Item<'s>> {
match self {
Self::SyntaxItem(t) => Some(t),
_ => None,
}
}
}
// ======================
// === MacroMatchTree ===
// ======================
/// A tree-like structure encoding potential macro matches. The keys are representations of tokens
/// that can be matched. For example, the key could be "if" or "->". Each key is associated with one
/// or more [`PartiallyMatchedMacro`], which stories a list of required segments and a macro
/// definition in case all the segments were matched. For example, for the "if" key, there can be
/// two required segment lists, one for "then" and "else" segments, and one for the "then" segment
/// only.
#[derive(Default, Debug, Deref, DerefMut)]
pub struct MacroMatchTree<'s> {
map: HashMap<&'s str, NonEmptyVec<PartiallyMatchedMacro<'s>>>,
}
/// Partially matched macro info. See docs of [`MacroMatchTree`] to learn more.
#[derive(Clone, Debug)]
#[allow(missing_docs)]
pub struct PartiallyMatchedMacro<'s> {
pub required_segments: List<macros::SegmentDefinition<'s>>,
pub definition: Rc<macros::Definition<'s>>,
}
impl<'a> MacroMatchTree<'a> {
/// Register a new macro definition in this macro tree.
pub fn register(&mut self, definition: macros::Definition<'a>) {
let header = definition.segments.head.header;
let entry = PartiallyMatchedMacro {
required_segments: definition.segments.tail.clone(),
definition: Rc::new(definition),
};
if let Some(node) = self.get_mut(header) {
node.push(entry);
} else {
self.insert(header, NonEmptyVec::singleton(entry));
}
}
}
// =====================
// === MacroResolver ===
// =====================
/// Enso macro resolver. See the docs of the main module to learn more about the macro resolution
/// steps.
#[derive(Debug)]
#[allow(missing_docs)]
pub struct MacroResolver<'s> {
pub current_segment: MatchedSegment<'s>,
pub resolved_segments: Vec<MatchedSegment<'s>>,
pub possible_next_segments: MacroMatchTree<'s>,
pub matched_macro_def: Option<Rc<macros::Definition<'s>>>,
}
impl<'a> MacroResolver<'a> {
/// A new macro resolver with a special "root" segment definition. The "root" segment does not
/// exist in the source code, it is simply the whole expression being parsed. It is treated
/// as a macro in order to unify the algorithms.
pub fn new_root() -> Self {
let current_segment =
MatchedSegment { header: Token("", "", token::Variant::newline()), body: default() };
let resolved_segments = default();
let possible_next_segments = default();
let matched_macro_def = Some(Rc::new(macros::Definition {
rev_prefix_pattern: None,
segments: im_list::NonEmpty::singleton(macros::SegmentDefinition {
header: "__ROOT__",
pattern: Pattern::Everything,
}),
body: Rc::new(|_, v| {
if v.len() != 1 {
panic!()
}
let t = v.into_vec().pop().unwrap().1;
resolve_operator_precedence(t)
}),
}));
Self { current_segment, resolved_segments, possible_next_segments, matched_macro_def }
}
}
/// A matched macro segment. Partial macro resolution product.
#[derive(Debug)]
pub struct MatchedSegment<'s> {
header: Token<'s>,
body: Vec<SyntaxItemOrMacroResolver<'s>>,
}
impl<'s> MatchedSegment<'s> {
/// Constructor.
pub fn new(header: Token<'s>) -> Self {
let body = default();
Self { header, body }
}
}
/// Main macro resolver capable of resolving nested macro usages. See the docs of the main module to
/// learn more about the macro resolution steps.
#[derive(Debug)]
pub struct Resolver<'s> {
current_macro: MacroResolver<'s>,
macro_stack: Vec<MacroResolver<'s>>,
}
/// Result of the macro resolution step.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum ResolverStep {
NormalToken,
NewSegmentStarted,
MacroStackPop,
}
impl<'s> Resolver<'s> {
fn new_root() -> Self {
let current_macro = MacroResolver::new_root();
let macro_stack = default();
Self { current_macro, macro_stack }
}
fn run(
mut self,
lexer: &Lexer<'s>,
root_macro_map: &MacroMatchTree<'s>,
tokens: Vec<syntax::Item<'s>>,
) -> syntax::Tree<'s> {
let mut stream = tokens.into_iter();
let mut opt_token: Option<syntax::Item<'s>>;
macro_rules! next_token {
() => {{
opt_token = stream.next();
if let Some(token) = opt_token.as_ref() {
event!(TRACE, "New token {:#?}", token);
}
}};
}
macro_rules! trace_state {
() => {
event!(TRACE, "Current macro:\n{:#?}", self.current_macro);
event!(TRACE, "Parent macros:\n{:#?}", self.macro_stack);
};
}
next_token!();
while let Some(token) = opt_token {
let step_result = match &token {
// FIXME: clone?
syntax::Item::Token(token) => self.process_token(root_macro_map, token.clone()),
_ => ResolverStep::NormalToken,
};
match step_result {
ResolverStep::MacroStackPop => {
trace_state!();
opt_token = Some(token)
}
ResolverStep::NewSegmentStarted => {
trace_state!();
next_token!()
}
ResolverStep::NormalToken => {
self.current_macro.current_segment.body.push(token.into());
trace_state!();
next_token!();
}
}
}
while let Some(parent_macro) = self.macro_stack.pop() {
self.replace_current_with_parent_macro(parent_macro);
}
trace_state!();
Self::resolve(lexer, self.current_macro, None)
}
fn replace_current_with_parent_macro(&mut self, mut parent_macro: MacroResolver<'s>) {
mem::swap(&mut parent_macro, &mut self.current_macro);
let mut child_macro = parent_macro;
if let Some(def) = &child_macro.matched_macro_def {
let pattern = &def.segments.last().pattern;
let child_tokens = mem::take(&mut child_macro.current_segment.body);
// FIXME: the first [`false`] below is invalid.
let match_result = pattern.resolve(child_tokens, false, false).unwrap();
let mut new_child_tokens = match_result.matched;
let new_parent_tokens = match_result.rest;
mem::swap(&mut child_macro.current_segment.body, &mut new_child_tokens);
self.current_macro.current_segment.body.push(child_macro.into());
self.current_macro.current_segment.body.extend(new_parent_tokens);
} else {
panic!()
}
}
fn resolve(
lexer: &Lexer<'s>,
m: MacroResolver<'s>,
prefix_tokens: Option<Vec<syntax::Item<'s>>>,
) -> syntax::Tree<'s> {
let segments = NonEmptyVec::new_with_last(m.resolved_segments, m.current_segment);
let sss: NonEmptyVec<(Token, Vec<syntax::Item<'s>>)> = segments.mapped(|segment| {
let mut ss: Vec<syntax::Item<'s>> = vec![];
for item in segment.body {
let resolved_token = match item {
SyntaxItemOrMacroResolver::MacroResolver(m2) => {
if let Some(macro_def) = &m2.matched_macro_def
&& let Some(pfx_pattern) = &macro_def.rev_prefix_pattern {
ss.reverse();
let spacing = m2.current_segment.header.left_offset.visible > VisibleOffset(0);
let mut match_result = pfx_pattern.resolve(ss,spacing,true).unwrap();
match_result.matched.reverse();
ss = match_result.rest;
ss.reverse();
Self::resolve(lexer, m2, Some(match_result.matched)).into()
} else {
Self::resolve(lexer, m2, None).into()
}
},
SyntaxItemOrMacroResolver::SyntaxItem(t) => t,
};
ss.push(resolved_token);
}
(segment.header, ss)
});
if let Some(macro_def) = m.matched_macro_def {
(macro_def.body)(prefix_tokens, sss)
} else {
todo!("Handling non-fully-resolved macros")
}
}
fn pop_macro_stack_if_reserved(&mut self, repr: &str) -> Option<MacroResolver<'s>> {
let reserved = self.macro_stack.iter().any(|p| p.possible_next_segments.contains_key(repr));
if reserved {
self.macro_stack.pop()
} else {
None
}
}
fn process_token(
&mut self,
root_macro_map: &MacroMatchTree<'s>,
token: Token<'s>,
) -> ResolverStep {
let repr = &**token.code;
if let Some(subsegments) = self.current_macro.possible_next_segments.get(repr) {
event!(TRACE, "Entering next segment of the current macro.");
let mut new_match_tree =
Self::enter(&mut self.current_macro.matched_macro_def, subsegments);
let mut current_segment = MatchedSegment::new(token);
mem::swap(&mut new_match_tree, &mut self.current_macro.possible_next_segments);
mem::swap(&mut self.current_macro.current_segment, &mut current_segment);
self.current_macro.resolved_segments.push(current_segment);
ResolverStep::NewSegmentStarted
} else if let Some(parent_macro) = self.pop_macro_stack_if_reserved(repr) {
event!(TRACE, "Next token reserved by parent macro. Resolving current macro.");
self.replace_current_with_parent_macro(parent_macro);
ResolverStep::MacroStackPop
} else if let Some(segments) = root_macro_map.get(repr) {
event!(TRACE, "Starting a new nested macro resolution.");
let mut matched_macro_def = default();
let mut current_macro = MacroResolver {
current_segment: MatchedSegment { header: token, body: default() },
resolved_segments: default(),
possible_next_segments: Self::enter(&mut matched_macro_def, segments),
matched_macro_def,
};
mem::swap(&mut self.current_macro, &mut current_macro);
self.macro_stack.push(current_macro);
ResolverStep::NewSegmentStarted
} else {
event!(TRACE, "Consuming token as current segment body.");
ResolverStep::NormalToken
}
}
fn enter(
matched_macro_def: &mut Option<Rc<macros::Definition<'s>>>,
path: &[PartiallyMatchedMacro<'s>],
) -> MacroMatchTree<'s> {
*matched_macro_def = None;
let mut new_section_tree = MacroMatchTree::default();
for v in path {
if let Some(first) = v.required_segments.head() {
let tail = v.required_segments.tail().cloned().unwrap_or_default();
let definition = v.definition.clone_ref();
let x = PartiallyMatchedMacro { required_segments: tail, definition };
if let Some(node) = new_section_tree.get_mut(&first.header) {
node.push(x);
} else {
new_section_tree.insert(first.header, NonEmptyVec::singleton(x));
}
} else {
if matched_macro_def.is_some() {
event!(ERROR, "Internal error. Duplicate macro definition.");
}
*matched_macro_def = Some(v.definition.clone_ref());
}
}
new_section_tree
}
}
// FIXME: hardcoded values + not finished implementation.
fn precedence_of(operator: &str) -> usize {
match operator {
"+" => 3,
"-" => 3,
"*" => 7,
_ => panic!("Operator not supported: {}", operator),
}
}
//
#[derive(Clone, Copy, Debug, Deref, DerefMut)]
struct WithPrecedence<T> {
#[deref]
#[deref_mut]
elem: T,
precedence: usize,
}
impl<T> WithPrecedence<T> {
pub fn new(precedence: usize, elem: T) -> Self {
Self { elem, precedence }
}
}
fn annotate_tokens_that_need_spacing(items: Vec<syntax::Item>) -> Vec<syntax::Item> {
items
.into_iter()
.map(|item| match item {
syntax::Item::Token(_) => item,
syntax::Item::Tree(ast) =>
match &*ast.variant {
syntax::tree::Variant::MultiSegmentApp(data) => {
if data.segments.first().header.variant.marker()
!= token::variant::VariantMarker::Symbol
{
syntax::Item::Tree(ast.with_error(
"This expression cannot be used in a non-spaced equation.",
))
} else {
syntax::Item::Tree(ast)
}
}
_ => syntax::Item::Tree(ast),
},
})
.collect()
}
fn resolve_operator_precedence<'s>(items: Vec<syntax::Item<'s>>) -> syntax::Tree<'s> {
type Tokens<'s> = Vec<syntax::Item<'s>>;
let mut flattened: Tokens<'s> = default();
let mut no_space_group: Tokens<'s> = default();
let processs_no_space_group = |flattened: &mut Tokens<'s>, no_space_group: &mut Tokens<'s>| {
let tokens = mem::take(no_space_group);
if tokens.len() == 1 {
flattened.extend(tokens);
} else {
let tokens = annotate_tokens_that_need_spacing(tokens);
let ast = resolve_operator_precedence_internal(tokens);
flattened.push(ast.into());
}
};
for item in items {
if item.span().left_offset.visible.width_in_spaces == 0 || no_space_group.is_empty() {
no_space_group.push(item)
} else if !no_space_group.is_empty() {
processs_no_space_group(&mut flattened, &mut no_space_group);
no_space_group.push(item);
} else {
// FIXME: this is unreachable.
flattened.push(item);
}
}
if !no_space_group.is_empty() {
processs_no_space_group(&mut flattened, &mut no_space_group);
}
resolve_operator_precedence_internal(flattened)
}
fn resolve_operator_precedence_internal(items: Vec<syntax::Item<'_>>) -> syntax::Tree<'_> {
// Reverse-polish notation encoding.
let mut output: Vec<syntax::Item> = default();
let mut operator_stack: Vec<WithPrecedence<syntax::tree::OperatorOrError>> = default();
let mut last_token_was_ast = false;
let mut last_token_was_opr = false;
for item in items {
let i2 = item.clone(); // FIXME
if let syntax::Item::Token(token) = i2 && let token::Variant::Operator(opr) = token.variant {
// Item is an operator.
let last_token_was_opr_copy = last_token_was_opr;
last_token_was_ast = false;
last_token_was_opr = true;
let prec = precedence_of(&token.code);
let opr = Token(token.left_offset, token.code, opr);
// let opr = item.span().with(opr);
if last_token_was_opr_copy && let Some(prev_opr) = operator_stack.last_mut() {
// Error. Multiple operators next to each other.
match &mut prev_opr.elem {
Err(err) => err.operators.push(opr),
Ok(prev) => {
let operators = NonEmptyVec::new(prev.clone(),vec![opr]); // FIXME: clone?
prev_opr.elem = Err(syntax::tree::MultipleOperatorError{operators});
}
}
} else {
while let Some(prev_opr) = operator_stack.last()
&& prev_opr.precedence >= prec
&& let Some(prev_opr) = operator_stack.pop()
&& let Some(rhs) = output.pop()
{
// Prev operator in the [`operator_stack`] has a higher precedence.
let lhs = output.pop().map(token_to_ast);
let ast = syntax::Tree::opr_app(lhs, prev_opr.elem, Some(token_to_ast(rhs)));
output.push(ast.into());
}
operator_stack.push(WithPrecedence::new(prec, Ok(opr)));
}
} else if last_token_was_ast && let Some(lhs) = output.pop() {
// Multiple non-operators next to each other.
let lhs = token_to_ast(lhs);
let rhs = token_to_ast(item);
let ast = syntax::Tree::app(lhs, rhs);
output.push(ast.into());
} else {
// Non-operator that follows previously consumed operator.
last_token_was_ast = true;
last_token_was_opr = false;
output.push(item);
}
}
let mut opt_rhs = last_token_was_ast.and_option_from(|| output.pop().map(token_to_ast));
while let Some(opr) = operator_stack.pop() {
let opt_lhs = output.pop().map(token_to_ast);
opt_rhs = Some(syntax::Tree::opr_app(opt_lhs, opr.elem, opt_rhs));
}
if !output.is_empty() {
panic!(
"Internal error. Not all tokens were consumed while constructing the
expression."
);
}
syntax::Tree::opr_section_boundary(opt_rhs.unwrap()) // fixme
}
fn token_to_ast(elem: syntax::Item) -> syntax::Tree {
match elem {
syntax::Item::Token(token) => match token.variant {
token::Variant::Ident(ident) => {
let ii2 = token.with_variant(ident);
syntax::tree::Tree::ident(ii2)
}
_ => panic!(),
},
syntax::Item::Tree(ast) => ast,
}
}
fn matched_segments_into_multi_segment_app<'s>(
prefix_tokens: Option<Vec<syntax::Item<'s>>>,
matched_segments: NonEmptyVec<(Token<'s>, Vec<syntax::Item<'s>>)>,
) -> syntax::Tree<'s> {
// FIXME: remove into_vec and use NonEmptyVec::mapped
let segments = matched_segments
.into_vec()
.into_iter()
.map(|segment| {
let header = segment.0;
let body =
(!segment.1.is_empty()).as_some_from(|| resolve_operator_precedence(segment.1));
syntax::tree::MultiSegmentAppSegment { header, body }
})
.collect_vec();
if let Ok(segments) = NonEmptyVec::try_from(segments) {
let prefix = prefix_tokens.map(resolve_operator_precedence);
syntax::Tree::multi_segment_app(prefix, segments)
} else {
panic!()
}
}
// =========================
// === Macro Definitions ===
// =========================
fn macro_if_then_else<'s>() -> macros::Definition<'s> {
macro_definition! {
("if", Pattern::Everything, "then", Pattern::Everything, "else", Pattern::Everything)
matched_segments_into_multi_segment_app
}
}
fn macro_if_then<'s>() -> macros::Definition<'s> {
macro_definition! {
("if", Pattern::Everything, "then", Pattern::Everything)
matched_segments_into_multi_segment_app
}
}
fn macro_group<'s>() -> macros::Definition<'s> {
macro_definition! {
("(", Pattern::Everything, ")", Pattern::Nothing)
matched_segments_into_multi_segment_app
}
}
fn macro_lambda<'s>() -> macros::Definition<'s> {
let prefix = Pattern::Or(
Box::new(Pattern::Item(macros::pattern::Item { has_rhs_spacing: Some(false) })),
Box::new(Pattern::Everything),
);
macro_definition! {
(prefix, "->", Pattern::Everything)
matched_segments_into_multi_segment_app
}
}
fn builtin_macros() -> MacroMatchTree<'static> {
let mut macro_map = MacroMatchTree::default();
macro_map.register(macro_if_then());
macro_map.register(macro_if_then_else());
macro_map.register(macro_group());
macro_map.register(macro_lambda());
macro_map
}
// ============
// === Main ===
// ============
// fn main() {
// lexer::lexer_main();
// }
fn main() {
init_tracing(TRACE);
// let str = "if a then b else c";
// let str = "if if * a + b * then y then b";
// let str = "* a + b *";
// let str = "* a + * b";
// let str = "(a) (b) c";
// let str = "if (a) then b";
// let str = "foo a-> b";
// let str = "a+b * c";
// let str = "foo if a then b";
// let str = "foo *(a)";
let str = "foo if a then b else c";
let mut lexer = Lexer::new(str);
lexer.run();
let root_macro_map = builtin_macros();
event!(TRACE, "Registered macros:\n{:#?}", root_macro_map);
let resolver = Resolver::new_root();
let ast = resolver.run(
&lexer,
&root_macro_map,
lexer.output.iter().map(|t| t.clone().into()).collect_vec(),
);
println!("{:#?}", ast);
println!("\n\n{}", ast.code());
println!("\n\n==================\n\n");
lexer::main();
}
//
//
//
// // =============
// // === Tests ===
// // =============
//
// #[cfg(test)]
// mod test {
// use super::*;
//
// pub fn ident(repr: &str) -> syntax::Tree {
// match token::Variant::to_ident_unchecked(repr) {
// token::Variant::Ident(ident) => span::With::new_no_left_offset_no_start(
// Bytes::from(repr.len()),
// syntax::tree::Type::from(syntax::tree::Ident(ident)),
// ),
// _ => panic!(),
// }
// }
//
// pub fn app_segment(
// header: Token,
// body: Option<syntax::Tree>,
// ) -> syntax::tree::MultiSegmentAppSegment {
// syntax::tree::MultiSegmentAppSegment { header, body }
// }
// }
//
//
//
// #[cfg(test)]
// mod tests {
// use super::*;
// use enso_parser_syntax_tree_builder::ast_builder;
//
// fn one_shot(input: &str) -> syntax::Tree {
// let mut lexer = Lexer::new(input);
// lexer.run();
// let root_macro_map = builtin_macros();
// let resolver = Resolver::new_root();
// let ast = resolver.run(
// &lexer,
// &root_macro_map,
// lexer.output.borrow_vec().iter().map(|t| (*t).into()).collect_vec(),
// );
// ast
// }
//
// macro_rules! test_parse {
// ($input:tt = {$($def:tt)*}) => {
// assert_eq!(
// one_shot($input).with_removed_span_info(),
// ast_builder! { $($def)* }.with_removed_span_info()
// )
// };
// }
//
// #[test]
// fn test_expressions() {
// test_parse!("if a then b" = { {if} a {then} b });
// test_parse!("if a then b else c" = { {if} a {then} b {else} c });
// test_parse!("if a b then c d else e f" = { {if} a b {then} c d {else} e f });
// }
// }

View File

@ -1,9 +0,0 @@
//! The logic for working with operators in the Enso parser.
// ==============
// === Export ===
// ==============
pub mod associativity;
pub mod precedence;

View File

@ -1 +0,0 @@
//! Associativity inference for Enso.

View File

@ -1 +0,0 @@
//! Operator precedence levels.

View File

@ -1,18 +0,0 @@
//! The driver for the Enso parser.
// ==============
// === Parser ===
// ==============
/// The Enso parser itself.
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct Parser;
impl Parser {
/// Constructor.
pub fn new() -> Self {
Self
}
}

View File

@ -0,0 +1,25 @@
//! Enso language source code related utilities, including a structure attaching source code to
//! other types or an abstraction allowing for getting the representation of an entity, such as
//! [`Token`] (tokens remember the location only, in order to get their representation, the source
//! code needs to be sampled).
// ==============
// === Export ===
// ==============
pub mod code;
pub mod span;
pub use code::Code;
pub use span::Offset;
pub use span::Span;
pub use span::VisibleOffset;
/// Popular traits.
pub mod traits {
pub use super::span::traits::*;
}
pub use traits::*;

View File

@ -0,0 +1,66 @@
//! Source code abstraction.
use crate::prelude::*;
// ============
// === Code ===
// ============
/// A code representation. It can either be a borrowed source code or a modified owned one.
#[derive(Clone, Default, Eq, PartialEq, From, Into, Shrinkwrap)]
#[shrinkwrap(mutable)]
#[allow(missing_docs)]
pub struct Code<'s> {
pub repr: Cow<'s, str>,
}
impl<'s> Code<'s> {
/// Length of the code in bytes.
#[inline(always)]
pub fn len(&self) -> Bytes {
Bytes(self.repr.len())
}
}
impl<'a> From<&'a str> for Code<'a> {
#[inline(always)]
fn from(str: &'a str) -> Self {
let repr = str.into();
Self { repr }
}
}
impl<'s> Display for Code<'s> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
Display::fmt(&self.repr, f)
}
}
impl<'s> Debug for Code<'s> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
Debug::fmt(&self.repr, f)
}
}
impl<'a, 'b> PartialEq<&'b str> for Code<'a> {
#[inline(always)]
fn eq(&self, other: &&'b str) -> bool {
self.repr.eq(other)
}
}
impl AsRef<str> for Code<'_> {
#[inline(always)]
fn as_ref(&self) -> &str {
&self.repr
}
}
impl std::borrow::Borrow<str> for Code<'_> {
#[inline(always)]
fn borrow(&self) -> &str {
&self.repr
}
}

View File

@ -0,0 +1,486 @@
//! Source code location. Every token and AST node are using [`Offset`] to remember their location
//! in the source code.
use crate::prelude::*;
use crate::source::*;
use crate::syntax::*;
use crate::lexer;
/// Common traits.
pub mod traits {
pub use super::FirstChildTrim;
}
// =====================
// === VisibleOffset ===
// =====================
/// A strongly typed visible offset size. For example, a space character has value of 1, while the
/// tab character has value of 4. For other space-like character sizes, refer to the lexer
/// implementation.
#[derive(
Clone, Copy, Debug, Default, From, Into, Add, AddAssign, Sub, PartialEq, Eq, Hash, PartialOrd,
Ord
)]
#[allow(missing_docs)]
pub struct VisibleOffset {
pub width_in_spaces: usize,
}
/// Constructor.
#[allow(non_snake_case)]
pub const fn VisibleOffset(width_in_spaces: usize) -> VisibleOffset {
VisibleOffset { width_in_spaces }
}
impl Display for VisibleOffset {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
Display::fmt(&self.width_in_spaces, f)
}
}
impl From<&str> for VisibleOffset {
fn from(code: &str) -> Self {
code.chars()
.map(|char| lexer::space_char_visible_size(char).unwrap_or(VisibleOffset(1)))
.fold(default(), Add::add)
}
}
// ==============
// === Offset ===
// ==============
/// Offset information. In most cases it is used to express the left-hand-side whitespace offset
/// for tokens and AST nodes.
#[derive(Clone, Debug, Default, PartialEq, Eq)]
#[allow(missing_docs)]
pub struct Offset<'s> {
pub visible: VisibleOffset,
pub code: Code<'s>,
}
/// Constructor.
#[allow(non_snake_case)]
pub fn Offset<'s>(visible: VisibleOffset, code: impl Into<Code<'s>>) -> Offset<'s> {
let code = code.into();
Offset { visible, code }
}
impl<'s> Offset<'s> {
/// Length of the offset.
pub fn len(&self) -> Bytes {
self.code.len()
}
}
impl<'s> AsRef<Offset<'s>> for Offset<'s> {
fn as_ref(&self) -> &Offset<'s> {
self
}
}
impl<'s> From<&'s str> for Offset<'s> {
#[inline(always)]
fn from(code: &'s str) -> Self {
Offset(code.into(), code)
}
}
// ============
// === Span ===
// ============
/// A span of a given syntactic element (token or AST). It contains the left offset code and the
/// information about the length of the element. It does not contain the code of the element. This
/// is done in order to not duplicate the data. For example, some AST nodes contain a lot of tokens.
/// They need to remember their span, but they do not need to remember their code, because it is
/// already stored in the tokens.
#[derive(Clone, Debug, Default, Eq, PartialEq)]
#[allow(missing_docs)]
pub struct Span<'s> {
pub left_offset: Offset<'s>,
/// The length of the code, excluding [`left_offset`].
pub code_length: Bytes,
}
impl<'s> Span<'s> {
/// Extend the span with another one. The other span has to be the immediate neighbor of the
/// current span.
#[inline(always)]
pub fn extend<'a, T>(&mut self, other: T)
where
T: Into<Ref<'s, 'a>>,
's: 'a, {
let other = other.into();
self.code_length += other.left_offset.len() + other.code_length;
}
/// Self consuming version of [`extend`].
pub fn extended<'a, T>(mut self, other: T) -> Self
where
T: Into<Ref<'s, 'a>>,
's: 'a, {
self.extend(other);
self
}
/// Get the [`Ref`] of the current span.
pub fn as_ref(&self) -> Ref<'_, 's> {
Ref { left_offset: &self.left_offset, code_length: self.code_length }
}
}
impl<'s> AsRef<Span<'s>> for Span<'s> {
fn as_ref(&self) -> &Span<'s> {
self
}
}
// ===========
// === Ref ===
// ===========
/// A borrowed version of [`Span`]. Used mostly by AST visitors.
///
/// One may wonder why this struct is needed, because it looks like we could use [`&Span<'s>`]
/// instead. The problem is that some structs, such as [`Token`] do not contain [`Span<'s>`], but
/// they contain information the [`Ref`] can be constructed from.
#[derive(Debug, Eq, PartialEq)]
#[allow(missing_docs)]
pub struct Ref<'s, 'a> {
pub left_offset: &'a Offset<'s>,
/// The length of the code, excluding [`left_offset`].
pub code_length: Bytes,
}
impl<'s, 'a> From<&'a Span<'s>> for Ref<'s, 'a> {
#[inline(always)]
fn from(span: &'a Span<'s>) -> Self {
let left_offset = &span.left_offset;
let code_length = span.code_length;
Self { left_offset, code_length }
}
}
// ==============
// === RefMut ===
// ==============
/// A mutably borrowed version of [`Span`]. Used mostly by AST visitors.
///
/// Please note that the [`code_length`] field does not provide the mutable access. Each AST node
/// can contain other AST nodes and tokens. The span of an AST node is computed based on the span of
/// the tokens it contains. Thus, you should never modify the [`code_length`] property, you should
/// modify the AST structure instead and this field should be automatically recomputed.
#[derive(Debug, Eq, PartialEq)]
#[allow(missing_docs)]
pub struct RefMut<'s, 'a> {
pub left_offset: &'a mut Offset<'s>,
/// The length of the code, excluding [`left_offset`].
pub code_length: Bytes,
}
// ======================
// === FirstChildTrim ===
// ======================
/// Trim the left offset and return a new [`Span`] containing the trimmed offset and the length of
/// the code.
///
/// It is used to prepare this element for insertion into parent AST node. Left offsets are kept in
/// a hierarchical way in AST. For example, the expression ` a b` will be represented as two tokens
/// `a` and `b`, each having left offset of 1. However, after constructing the [`App`] AST node, the
/// left span of the `a` token will be removed and will be moved to the AST node instead. This
/// function is responsible exactly for this operation.
#[allow(missing_docs)]
pub trait FirstChildTrim<'s> {
fn trim_as_first_child(&mut self) -> Span<'s>;
}
impl<'s> FirstChildTrim<'s> for Span<'s> {
#[inline(always)]
fn trim_as_first_child(&mut self) -> Span<'s> {
let left_offset = mem::take(&mut self.left_offset);
let code_length = self.code_length;
Span { left_offset, code_length }
}
}
// ===============
// === Builder ===
// ===============
/// A span builder. You can provide it with any elements that contain spans, and it will compute
/// the total span of the provided elements.
#[macro_export]
macro_rules! span_builder {
($($arg:ident),* $(,)?) => {
$crate::source::span::Builder::new() $(.add(&mut $arg))* .span
};
}
/// A marker struct for span building. The [`T`] parameter can be one of:
/// - [`()`], which means that the structure was not used yet.
/// - [`Option<Span<'s>>`], which means that the struct was used to build the span, however, we are
/// unsure whether the span is known in all the cases.
/// - [`Span<'s>`], which means that the total span can be always computed for the provided
/// parameters.
#[derive(Default, Debug)]
#[allow(missing_docs)]
pub struct Builder<T = ()> {
pub span: T,
}
/// Constructor.
#[allow(non_snake_case)]
pub fn Builder<T>(span: T) -> Builder<T> {
Builder { span }
}
impl Builder<()> {
/// Constructor.
pub fn new() -> Self {
default()
}
}
impl<T> Builder<T> {
/// Add a new span to the builder.
#[inline(always)]
#[allow(clippy::should_implement_trait)]
pub fn add<S>(self, elem: &mut S) -> Builder<S::Output>
where S: Build<T> {
Builder(elem.build(self))
}
}
/// A trait defining the behavior of [`Builder`] for different types containing spans.
///
/// The trait definition is a little bit strange, consuming the builder as a parameter instead of
/// consuming it as self. This is done because otherwise Rust type checker goes into infinite
/// loops.
#[allow(missing_docs)]
pub trait Build<T> {
type Output;
fn build(&mut self, builder: Builder<T>) -> Self::Output;
}
// === Instances ===
impl<'s> Build<()> for Span<'s> {
type Output = Span<'s>;
#[inline(always)]
fn build(&mut self, _builder: Builder<()>) -> Self::Output {
self.trim_as_first_child()
}
}
impl<'s> Build<Span<'s>> for Span<'s> {
type Output = Span<'s>;
#[inline(always)]
fn build(&mut self, builder: Builder<Span<'s>>) -> Self::Output {
builder.span.extended(&*self)
}
}
impl<'s> Build<Option<Span<'s>>> for Span<'s> {
type Output = Span<'s>;
#[inline(always)]
fn build(&mut self, builder: Builder<Option<Span<'s>>>) -> Self::Output {
match builder.span {
Some(span) => span.extended(&*self),
None => self.trim_as_first_child(),
}
}
}
impl<'s> Build<()> for Tree<'s> {
type Output = Span<'s>;
#[inline(always)]
fn build(&mut self, builder: Builder<()>) -> Self::Output {
Build::build(&mut self.span, builder)
}
}
impl<'s> Build<Span<'s>> for Tree<'s> {
type Output = Span<'s>;
#[inline(always)]
fn build(&mut self, builder: Builder<Span<'s>>) -> Self::Output {
builder.span.extended(&self.span)
}
}
impl<'s> Build<Option<Span<'s>>> for Tree<'s> {
type Output = Span<'s>;
#[inline(always)]
fn build(&mut self, builder: Builder<Option<Span<'s>>>) -> Self::Output {
Build::build(&mut self.span, builder)
}
}
impl<'s, T> Build<()> for Token<'s, T> {
type Output = Span<'s>;
#[inline(always)]
fn build(&mut self, _builder: Builder<()>) -> Self::Output {
self.trim_as_first_child()
}
}
impl<'s, T> Build<Span<'s>> for Token<'s, T> {
type Output = Span<'s>;
#[inline(always)]
fn build(&mut self, builder: Builder<Span<'s>>) -> Self::Output {
builder.span.extended(self.span())
}
}
impl<'s, T> Build<Option<Span<'s>>> for Token<'s, T> {
type Output = Span<'s>;
#[inline(always)]
fn build(&mut self, builder: Builder<Option<Span<'s>>>) -> Self::Output {
match builder.span {
Some(span) => span.extended(self.span()),
None => self.trim_as_first_child(),
}
}
}
impl<T> Build<()> for Option<T>
where T: Build<()>
{
type Output = Option<<T as Build<()>>::Output>;
#[inline(always)]
fn build(&mut self, builder: Builder<()>) -> Self::Output {
self.as_mut().map(|t| Build::build(t, builder))
}
}
impl<'s, T> Build<Option<Span<'s>>> for Option<T>
where T: Build<Option<Span<'s>>>
{
type Output = Option<<T as Build<Option<Span<'s>>>>::Output>;
#[inline(always)]
fn build(&mut self, builder: Builder<Option<Span<'s>>>) -> Self::Output {
self.as_mut().map(|t| Build::build(t, builder))
}
}
impl<'s, T> Build<Span<'s>> for Option<T>
where T: Build<Span<'s>, Output = Span<'s>>
{
type Output = Span<'s>;
#[inline(always)]
fn build(&mut self, builder: Builder<Span<'s>>) -> Self::Output {
match self.as_mut() {
None => builder.span,
Some(t) => Build::build(t, builder),
}
}
}
impl<S, T, E> Build<S> for Result<T, E>
where
T: Build<S>,
E: Build<S, Output = <T as Build<S>>::Output>,
{
type Output = <T as Build<S>>::Output;
#[inline(always)]
fn build(&mut self, builder: Builder<S>) -> Self::Output {
match self {
Ok(t) => Build::build(t, builder),
Err(t) => Build::build(t, builder),
}
}
}
impl<S, T> Build<S> for NonEmptyVec<T>
where
T: Build<S>,
[T]: Build<<T as Build<S>>::Output>,
{
type Output = <[T] as Build<T::Output>>::Output;
#[inline(always)]
fn build(&mut self, builder: Builder<S>) -> Self::Output {
let b = Build::build(self.first_mut(), builder);
Build::build(self.tail_mut(), Builder(b))
}
}
impl<'s, T> Build<Span<'s>> for Vec<T>
where T: Build<Span<'s>, Output = Span<'s>>
{
type Output = Span<'s>;
#[inline(always)]
fn build(&mut self, builder: Builder<Span<'s>>) -> Self::Output {
let mut out = builder.span;
for elem in self {
out = Build::build(elem, Builder(out))
}
out
}
}
impl<'s, T> Build<Option<Span<'s>>> for Vec<T>
where
T: Build<Option<Span<'s>>>,
T::Output: Into<Option<Span<'s>>>,
{
type Output = Option<Span<'s>>;
#[inline(always)]
fn build(&mut self, builder: Builder<Option<Span<'s>>>) -> Self::Output {
let mut out = builder.span;
for elem in self {
out = Build::build(elem, Builder(out)).into();
}
out
}
}
impl<'s, T> Build<Span<'s>> for [T]
where T: Build<Span<'s>, Output = Span<'s>>
{
type Output = Span<'s>;
#[inline(always)]
fn build(&mut self, builder: Builder<Span<'s>>) -> Self::Output {
let mut out = builder.span;
for elem in self {
out = Build::build(elem, Builder(out));
}
out
}
}
impl<'s, T> Build<Option<Span<'s>>> for [T]
where
T: Build<Option<Span<'s>>>,
T::Output: Into<Option<Span<'s>>>,
{
type Output = Option<Span<'s>>;
#[inline(always)]
fn build(&mut self, builder: Builder<Option<Span<'s>>>) -> Self::Output {
let mut out = builder.span;
for elem in self {
out = Build::build(elem, Builder(out)).into();
}
out
}
}

View File

@ -0,0 +1,15 @@
//! Syntactic structures, including [`Token`] and [`Tree`], known as well as Abstract Syntax
//! Tree, or AST.
// ==============
// === Export ===
// ==============
pub mod item;
pub mod token;
pub mod tree;
pub use item::Item;
pub use token::Token;
pub use tree::Tree;

View File

@ -0,0 +1,83 @@
//! Syntactic structures, including [`Token`] and [`Tree`], known as well as Abstract Syntax
//! Tree, or AST.
use crate::prelude::*;
use crate::source::*;
use crate::syntax::*;
// ============
// === Item ===
// ============
/// Abstraction for [`Token`] and [`Tree`]. Some functions, such as macro resolver need to
/// distinguish between two cases and need to handle both incoming tokens and already constructed
/// [`Tree`] nodes. This structure provides handy utilities to work with such cases.
#[derive(Clone, Debug)]
#[allow(missing_docs)]
pub enum Item<'s> {
Token(Token<'s>),
Tree(Tree<'s>),
}
impl<'s> Item<'s> {
/// Check whether the element is the provided token variant. Returns [`false`] if it was an
/// [`Tree`] node.
pub fn is_variant(&self, variant: token::variant::VariantMarker) -> bool {
match self {
Item::Token(token) => token.is(variant),
_ => false,
}
}
/// [`location::Span`] of the element.
pub fn span(&self) -> span::Ref<'_, 's> {
match self {
Self::Token(t) => t.span(),
Self::Tree(t) => t.span.as_ref(),
}
}
}
impl<'s> FirstChildTrim<'s> for Item<'s> {
#[inline(always)]
fn trim_as_first_child(&mut self) -> Span<'s> {
match self {
Self::Token(t) => t.trim_as_first_child(),
Self::Tree(t) => t.span.trim_as_first_child(),
}
}
}
impl<'s> From<Token<'s>> for Item<'s> {
fn from(t: Token<'s>) -> Self {
Item::Token(t)
}
}
impl<'s> From<Tree<'s>> for Item<'s> {
fn from(t: Tree<'s>) -> Self {
Item::Tree(t)
}
}
impl<'s> TryAsRef<Item<'s>> for Item<'s> {
fn try_as_ref(&self) -> Option<&Item<'s>> {
Some(self)
}
}
// ===========
// === Ref ===
// ===========
/// A borrowed version of [`Item`]. Used mostly by AST visitors.
#[derive(Clone, Copy, Debug)]
#[allow(missing_docs)]
pub enum Ref<'s, 'a> {
Token(token::Ref<'s, 'a>),
Tree(&'a Tree<'s>),
}

View File

@ -0,0 +1,323 @@
//! A lexical token is a string with an assigned and thus identified meaning. Each token remembers
//! its source code and can be printed back. It also contains information about the offset to the
//! previous token if any.
//!
//! The [`Token`] structure has a signature of [`Token<'s, T>`], where [`T`] is the variant type.
//!
//!
//! # Variants
//! Each token contains a variant, a structure defining the meaning of the token. All variants are
//! defined in the [`variant`] module. Every variant is associated with a constructor of the same
//! name (tuple-struct like). For example, the [`variant::Ident`] is defined as:
//!
//! ```text
//! pub mod variant {
//! pub struct Ident {
//! pub is_free: bool,
//! pub lift_level: usize
//! }
//! pub fn Ident(is_free: bool, lift_level: usize) -> Ident { ... }
//! // ... many more variants
//! }
//! ```
//!
//!
//! # Variants as tokens
//! The [`Token`] structure can be parametrized with a variant type to form a token variant. This
//! module defines type aliases for every such a combination. For example, the [`Ident`] token
//! variant is defined as:
//!
//! ```text
//! pub type Ident<'s> = Token<'s, variant::Ident>;
//! ```
//!
//! There is a [`From`] conversion defined between any [`Token<'s, T>`] and [`Token<'s>`] for [`T`]
//! being one of variant structs. Moreover, every such type is accompanied by two constructor utils,
//! one creating a token variant and one creating a generic token instance. For example, the
//! [`Ident`] token variant constructors are defined as:
//!
//! ```text
//! pub fn ident <'s> (is_free: bool, lift_level: usize) -> Ident<'s> { ... }
//! pub fn ident_ <'s> (is_free: bool, lift_level: usize) -> Token<'s> { ... }
//! ```
//!
//!
//! # The [`Variant`] type.
//! There are many variants of tokens, however, some places in the code need to distinguish them,
//! while some need to store several variants in the same collection. The [`Variant`] enum
//! generalizes the variant types:
//!
//! ```text
//! pub enum Variant {
//! Newline (variant::Newline),
//! Symbol (variant::Symbol),
//! Wildcard (variant::Wildcard),
//! Ident (variant::Ident),
//! // ... many more
//! }
//! ```
//!
//! There is a [`From`] conversion defined between each variant and the [`Variant`] struct.
//! Moreover, the [`Variant`] struct defines a constructor function for each of its variants. For
//! example, the identifier variant constructor is defined as:
//!
//! ```text
//! impl Variant {
//! pub fn ident(is_free: bool, lift_level: usize) -> Self {
//! Self::Ident(variant::Ident(is_free, lift_level))
//! }
//! }
//! ```
//!
//! # Generic token type
//! The [`Token`] structure has a default parametrization of [`Token<'s, Variant>`] which basically
//! is a token containing any of the defined variants.
//!
//!
//!
//! # Variant markers
//! There is also a special enum [`VariantMarker`] defined which can be used to mark which token
//! variant is used without keeping any of the variant data. It is defined as:
//!
//! ```text
//! pub enum VariantMarker {
//! Newline,
//! Symbol,
//! Wildcard,
//! Ident,
//! // ... many more
//! }
//! ```
//!
//! See the definitions and macros below to learn more.
use crate::prelude::*;
use crate::source::*;
use enso_shapely_macros::tagged_enum;
// =============
// === Token ===
// =============
/// The lexical token definition. See the module docs to learn more about its usage scenarios.
#[derive(Clone, Deref, DerefMut, Eq, PartialEq)]
#[allow(missing_docs)]
pub struct Token<'s, T = Variant> {
#[deref]
#[deref_mut]
pub variant: T,
pub left_offset: Offset<'s>,
pub code: Code<'s>,
}
/// Constructor.
#[inline(always)]
#[allow(non_snake_case)]
pub fn Token<'s, T>(
left_offset: impl Into<Offset<'s>>,
code: impl Into<Code<'s>>,
variant: T,
) -> Token<'s, T> {
let left_offset = left_offset.into();
let code = code.into();
Token { variant, left_offset, code }
}
impl<'s, T> Token<'s, T> {
/// Split the token at the provided byte offset. The offset is counted from the [`code`] start
/// position, which does not include the [`left_offset`]. It means that `split_at(Bytes(0))`
/// will split the token into left offset only and a left-trimmed token.
#[inline(always)]
pub fn split_at(self, offset: Bytes) -> (Token<'s, ()>, Token<'s, ()>, T) {
let left_lexeme_offset = self.left_offset;
let right_lexeme_offset = Offset::default();
let left = Token(left_lexeme_offset, self.code.slice(Bytes(0)..offset), ());
let right = Token(right_lexeme_offset, self.code.slice(offset..), ());
(left, right, self.variant)
}
/// A version of [`split_at`] that discards the associated variant.
#[inline(always)]
pub fn split_at_(self, offset: Bytes) -> (Token<'s, ()>, Token<'s, ()>) {
let (left, right, _) = self.split_at(offset);
(left, right)
}
/// Modify the associated variant of this token with the provided function.
#[inline(always)]
pub fn map_variant<S>(self, f: impl FnOnce(T) -> S) -> Token<'s, S> {
Token(self.left_offset, self.code, f(self.variant))
}
/// Replace the associated variant in this token.
#[inline(always)]
pub fn with_variant<S>(self, data: S) -> Token<'s, S> {
self.map_variant(|_| data)
}
/// Span of this token.
pub fn span<'a>(&'a self) -> span::Ref<'s, 'a> {
let code_length = self.code.len();
span::Ref { left_offset: &self.left_offset, code_length }
}
}
impl<'s, T: Debug> Debug for Token<'s, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "[{}:\"{}\"] ", self.left_offset.visible, self.code)?;
Debug::fmt(&self.variant, f)
}
}
impl<'s, T: PartialEq> PartialEq<Token<'s, T>> for &Token<'s, T> {
fn eq(&self, other: &Token<'s, T>) -> bool {
<Token<'s, T> as PartialEq<Token<'s, T>>>::eq(*self, other)
}
}
impl<'s, T> FirstChildTrim<'s> for Token<'s, T> {
#[inline(always)]
fn trim_as_first_child(&mut self) -> Span<'s> {
let left_offset = mem::take(&mut self.left_offset);
let code_length = self.code.len();
Span { left_offset, code_length }
}
}
// ===========
// === Ref ===
// ===========
/// A reference of a [`Token`]. It is used mostly by AST visitors.
///
/// There is an important question involved why we don't just use [`&Token<'s, T>`] instead. The
/// reason for that is that sometimes AST nodes contain [`Token<'s, T>`] for a specific [`T`] and
/// we want to traverse them for any possible variant, thus converting [`T`] to [`token::Variant`]
/// first. However, we do not want to clone the code during such an operation. This struct allows
/// viewing any [`Token<'s, T>`] as [`Ref<'s, token::Variant>`].
#[derive(Clone, Copy, Deref, DerefMut, Eq, PartialEq)]
#[allow(missing_docs)]
pub struct Ref<'s, 'a, T = Variant> {
#[deref]
#[deref_mut]
pub data: T,
pub left_offset: &'a Offset<'s>,
pub code: &'a Code<'s>,
}
impl<'s, 'a, T, S> From<&'a Token<'s, T>> for Ref<'s, 'a, S>
where T: Copy + Into<S>
{
fn from(token: &'a Token<'s, T>) -> Self {
Ref {
data: token.variant.into(),
left_offset: &token.left_offset,
code: &token.code,
}
}
}
impl<'s, 'a, T: Debug> Debug for Ref<'s, 'a, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "[off: {}, repr: \"{}\"] ", self.left_offset.visible, self.code)?;
Debug::fmt(&self.data, f)
}
}
// ===============
// === Variant ===
// ===============
/// Macro providing [`Token`] type definition. It is used to both define the token [`Variant`], and
/// to define impls for every token type in other modules.
#[macro_export]
macro_rules! with_token_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)*
/// Elements that can be found in the source code.
#[tagged_enum]
#[derive(Clone, Copy, PartialEq, Eq)]
#[allow(missing_docs)]
pub enum Variant {
Newline,
Symbol,
BlockStart,
BlockEnd,
Wildcard {
pub lift_level: usize
},
Ident {
pub is_free: bool,
pub lift_level: usize
},
Operator,
Modifier,
Comment,
DocComment,
Number,
TextStart,
TextEnd,
TextSection,
TextEscape,
}
}}}
macro_rules! generate_token_aliases {
(
$(#$enum_meta:tt)*
pub enum $enum:ident {
$(
$(#$variant_meta:tt)*
$variant:ident $({ $(pub $field:ident : $field_ty:ty),* $(,)? })?
),* $(,)?
}
) => { paste!{
$(
/// Token variant alias.
pub type $variant<'s> = Token<'s, variant::$variant>;
/// Constructor.
pub fn [<$variant:snake:lower>]<'s> (
left_offset: impl Into<Offset<'s>>,
code: impl Into<Code<'s>>,
$($($field : $field_ty),*)?
) -> $variant<'s> {
Token(left_offset, code, variant::$variant($($($field),*)?))
}
/// Constructor.
pub fn [<$variant:snake:lower _>]<'s> (
left_offset: impl Into<Offset<'s>>,
code: impl Into<Code<'s>>,
$($($field : $field_ty),*)?
) -> Token<'s> {
Token(left_offset, code, variant::$variant($($($field),*)?)).into()
}
impl<'s> From<Token<'s, variant::$variant>> for Token<'s, Variant> {
fn from(token: Token<'s, variant::$variant>) -> Self {
token.map_variant(|t| t.into())
}
}
)*
}};
}
macro_rules! define_token_type {
($($ts:tt)*) => {
/// All token variants.
pub mod variant {
use super::*;
$($ts)*
}
generate_token_aliases! { $($ts)* }
};
}
with_token_definition!(define_token_type());
pub use variant::Variant;

View File

@ -0,0 +1,574 @@
//! Implementation of Syntax Tree, known as well as Abstract Syntax Tree, or AST.
use crate::prelude::*;
use crate::source::*;
use crate::syntax::*;
use crate::span_builder;
use enso_parser_syntax_tree_visitor::Visitor;
use enso_shapely_macros::tagged_enum;
// ============
// === Tree ===
// ============
/// The Abstract Syntax Tree of the language.
#[derive(Clone, Deref, DerefMut, Eq, PartialEq)]
#[allow(missing_docs)]
pub struct Tree<'s> {
#[deref]
#[deref_mut]
pub variant: Box<Variant<'s>>,
pub span: Span<'s>,
}
/// Constructor.
#[allow(non_snake_case)]
pub fn Tree<'s>(span: Span<'s>, variant: impl Into<Variant<'s>>) -> Tree<'s> {
let variant = Box::new(variant.into());
Tree { variant, span }
}
impl<'s> Debug for Tree<'s> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let max_code_len = 30;
let ellipsis = "...";
let mut code = self.code();
if code.len() > max_code_len {
code = format!("{}{}", &code[..max_code_len - ellipsis.len()], ellipsis);
}
write!(f, "[{}:{}:\"{}\"] ", self.span.left_offset.visible, self.span.code_length, code)?;
Debug::fmt(&self.variant, f)
}
}
impl<'s> AsRef<Span<'s>> for Tree<'s> {
fn as_ref(&self) -> &Span<'s> {
&self.span
}
}
/// Macro providing [`Tree`] type definition. It is used to both define the ast [`Variant`], and to
/// define impls for every token type in other modules.
#[macro_export]
macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)*
/// [`Tree`] variants definition. See its docs to learn more.
#[tagged_enum]
#[derive(Clone, Eq, PartialEq, Visitor)]
pub enum Variant<'s> {
/// Invalid [`Tree`] fragment with an attached [`Error`].
Invalid {
pub error: Error,
pub ast: Tree<'s>,
},
/// A simple identifier, like `foo` or `bar`.
Ident {
pub token: token::Ident<'s>,
},
/// A simple application, like `print "hello"`.
App {
pub func: Tree<'s>,
pub arg: Tree<'s>,
},
/// Application of an operator, like `a + b`. The left or right operands might be missing,
/// thus creating an operator section like `a +`, `+ b`, or simply `+`. See the
/// [`OprSectionBoundary`] variant to learn more about operator section scope.
OprApp {
pub lhs: Option<Tree<'s>>,
pub opr: OperatorOrError<'s>,
pub rhs: Option<Tree<'s>>,
},
/// Defines the point where operator sections should be expanded to lambdas. Let's consider
/// the expression `map (.sum 1)`. It should be desugared to `map (x -> x.sum 1)`, not to
/// `map ((x -> x.sum) 1)`. The expression `.sum` will be parsed as operator section
/// ([`OprApp`] with left operand missing), and the [`OprSectionBoundary`] will be placed
/// around the whole `.sum 1` expression.
OprSectionBoundary {
pub ast: Tree<'s>,
},
/// An application of a multi-segment function, such as `if ... then ... else ...`. Each
/// segment starts with a token and contains an expression. Some multi-segment functions can
/// have a prefix, an expression that is argument of the function, but is placed before the
/// first token. Lambda is a good example for that. In an expression
/// `Vector x y z -> x + y + z`, the `->` token is the beginning of the section, the
/// `x + y + z` is the section body, and `Vector x y z` is the prefix of this function
/// application.
MultiSegmentApp {
pub prefix: Option<Tree<'s>>,
pub segments: NonEmptyVec<MultiSegmentAppSegment<'s>>,
}
}
}};}
macro_rules! generate_variant_constructors {
(
$(#$enum_meta:tt)*
pub enum $enum:ident<'s> {
$(
$(#$variant_meta:tt)*
$variant:ident $({ $(pub $field:ident : $field_ty:ty),* $(,)? })?
),* $(,)?
}
) => { paste! {
impl<'s> Tree<'s> {
$(
/// Constructor.
pub fn [<$variant:snake:lower>]($($(mut $field : $field_ty),*)?) -> Self {
let span = span_builder![$($($field),*)?];
Tree(span, $variant($($($field),*)?))
}
)*
}
}};
}
macro_rules! generate_ast_definition {
($($ts:tt)*) => {
$($ts)*
generate_variant_constructors!{$($ts)*}
};
}
with_ast_definition!(generate_ast_definition());
// === Invalid ===
/// Error of parsing attached to an [`Tree`] node.
#[derive(Clone, Copy, Debug, Eq, PartialEq, Visitor)]
#[allow(missing_docs)]
pub struct Error {
pub message: &'static str,
}
impl Error {
/// Constructor.
pub fn new(message: &'static str) -> Self {
Self { message }
}
}
impl<'s> Tree<'s> {
/// Constructor.
pub fn with_error(self, message: &'static str) -> Self {
Tree::invalid(Error::new(message), self)
}
}
impl<S> span::Build<S> for Error {
type Output = S;
fn build(&mut self, builder: span::Builder<S>) -> Self::Output {
builder.span
}
}
// === OprApp ===
/// Operator or [`MultipleOperatorError`].
pub type OperatorOrError<'s> = Result<token::Operator<'s>, MultipleOperatorError<'s>>;
/// Error indicating multiple operators found next to each other, like `a + * b`.
#[derive(Clone, Debug, Eq, PartialEq, Visitor)]
#[allow(missing_docs)]
pub struct MultipleOperatorError<'s> {
pub operators: NonEmptyVec<token::Operator<'s>>,
}
impl<'s, S> span::Build<S> for MultipleOperatorError<'s>
where NonEmptyVec<token::Operator<'s>>: span::Build<S>
{
type Output = <NonEmptyVec<token::Operator<'s>> as span::Build<S>>::Output;
fn build(&mut self, builder: span::Builder<S>) -> Self::Output {
self.operators.build(builder)
}
}
// === MultiSegmentApp ===
/// A segment of [`MultiSegmentApp`], like `if cond` in the `if cond then ok else fail` expression.
#[derive(Clone, Debug, Eq, PartialEq, Visitor)]
#[allow(missing_docs)]
pub struct MultiSegmentAppSegment<'s> {
pub header: Token<'s>,
pub body: Option<Tree<'s>>,
}
impl<'s, S> span::Build<S> for MultiSegmentAppSegment<'s>
where Token<'s>: span::Build<S, Output = Span<'s>>
{
type Output = Span<'s>;
fn build(&mut self, builder: span::Builder<S>) -> Self::Output {
builder.add(&mut self.header).add(&mut self.body).span
}
}
// ================
// === Visitors ===
// ================
/// The visitor pattern for [`AST`].
///
/// # Visitor traits
/// There are several visitor traits defined allowing for traversal of specific AST elements, such
/// as AST nodes ([`TreeVisitor`]), span information ([`SpanVisitor`]), and AST nodes or tokens
/// altogether ([`ItemVisitor`]). A visitor is a struct that is modified when traversing the target
/// elements. Visitors are also capable of tracking when they entered or exited a nested
/// [`Tree`] structure, and they can control how deep the traversal should be performed. To learn
/// more, see the [`RefCollectorVisitor`] implementation, which traverses [`Tree`] and collects
/// references to all [`Tree`] nodes in a vector.
///
/// # Visitable traits
/// This macro also defines visitable traits, such as [`TreeVisitable`] or [`SpanVisitable`], which
/// provide [`Tree`] elements with such functions as [`visit`], [`visit_mut`], [`visit_span`], or
/// [`visit_span_mut`]. These functions let you run visitors. However, as defining a visitor is
/// relatively complex, a set of traversal functions are provided, such as [`map`], [`map_mut`],
/// [`map_span`], or [`map_span_mut`].
///
/// # Generalization of the implementation
/// The current implementation bases on a few non-generic traits. One might define a way better
/// implementation (causing way less boilerplate), such as:
/// ```text
/// pub trait Visitor<T> {
/// fn visit(&mut self, elem: &T);
/// }
/// ```
/// Such definition could be implemented for every [`Tree`] node (the [`T`] parameter).
/// Unfortunately, due to Rust compiler errors, Rust is not able to compile such a definition. We
/// could move to it as soon as this error gets resolved:
/// https://github.com/rust-lang/rust/issues/96634.
#[allow(missing_docs)]
pub trait Visitor {
fn before_visiting_children(&mut self) {}
fn after_visiting_children(&mut self) {}
}
/// The visitor trait allowing for [`Tree`] nodes traversal.
#[allow(missing_docs)]
pub trait TreeVisitor<'s, 'a>: Visitor {
fn visit(&mut self, ast: &'a Tree<'s>) -> bool;
}
/// The visitor trait allowing for [`Tree`] nodes mutable traversal.
#[allow(missing_docs)]
pub trait TreeVisitorMut<'s>: Visitor {
fn visit_mut(&mut self, ast: &mut Tree<'s>) -> bool;
}
/// The visitor trait allowing for [`Span`] traversal.
#[allow(missing_docs)]
pub trait SpanVisitor<'s, 'a>: Visitor {
fn visit(&mut self, ast: span::Ref<'s, 'a>) -> bool;
}
/// The visitor trait allowing for [`Span`] mutable traversal.
#[allow(missing_docs)]
pub trait SpanVisitorMut<'s>: Visitor {
fn visit_mut(&mut self, ast: span::RefMut<'s, '_>) -> bool;
}
/// The visitor trait allowing for [`Item`] traversal.
#[allow(missing_docs)]
pub trait ItemVisitor<'s, 'a>: Visitor {
fn visit_item(&mut self, ast: item::Ref<'s, 'a>) -> bool;
}
macro_rules! define_visitor {
($name:ident, $visit:ident) => {
define_visitor_no_mut! {$name, $visit}
define_visitor_mut! {$name, $visit}
};
}
macro_rules! define_visitor_no_mut {
($name:ident, $visit:ident) => {
paste! {
define_visitor_internal! {
$name,
$visit,
[[<$name Visitor>]<'s, 'a>],
[<$name Visitable>],
}
}
};
}
macro_rules! define_visitor_mut {
($name:ident, $visit:ident) => {
paste! {
define_visitor_internal! {
[_mut mut]
$name,
[<$visit _mut>],
[[<$name VisitorMut>]<'s>],
[<$name VisitableMut>],
}
}
};
}
macro_rules! define_visitor_internal {
(
$([$pfx_mod:ident $mod:ident])?
$name:ident,
$visit:ident,
[$($visitor:tt)*],
$visitable:ident,
) => { paste! {
/// The visitable trait. See documentation of [`define_visitor`] to learn more.
#[allow(missing_docs)]
pub trait $visitable<'s, 'a> {
fn $visit<V: $($visitor)*>(&'a $($mod)? self, _visitor: &mut V) {}
}
impl<'s, 'a, T: $visitable<'s, 'a>> $visitable<'s, 'a> for Box<T> {
fn $visit<V: $($visitor)*>(&'a $($mod)? self, visitor: &mut V) {
$visitable::$visit(& $($mod)? **self, visitor)
}
}
impl<'s, 'a, T: $visitable<'s, 'a>> $visitable<'s, 'a> for Option<T> {
fn $visit<V: $($visitor)*>(&'a $($mod)? self, visitor: &mut V) {
if let Some(elem) = self {
$visitable::$visit(elem, visitor)
}
}
}
impl<'s, 'a, T: $visitable<'s, 'a>, E: $visitable<'s, 'a>> $visitable<'s, 'a>
for Result<T, E>
{
fn $visit<V: $($visitor)*>(&'a $($mod)? self, visitor: &mut V) {
match self {
Ok(elem) => $visitable::$visit(elem, visitor),
Err(elem) => $visitable::$visit(elem, visitor),
}
}
}
impl<'s, 'a, T: $visitable<'s, 'a>> $visitable<'s, 'a> for Vec<T> {
fn $visit<V: $($visitor)*>(&'a $($mod)? self, visitor: &mut V) {
self.[<iter $($pfx_mod)?>]().map(|t| $visitable::$visit(t, visitor)).for_each(drop);
}
}
impl<'s, 'a, T: $visitable<'s, 'a>> $visitable<'s, 'a> for NonEmptyVec<T> {
fn $visit<V: $($visitor)*>(&'a $($mod)? self, visitor: &mut V) {
self.[<iter $($pfx_mod)?>]().map(|t| $visitable::$visit(t, visitor)).for_each(drop);
}
}
impl<'s, 'a> $visitable<'s, 'a> for &str {}
impl<'s, 'a> $visitable<'s, 'a> for str {}
}};
}
macro_rules! define_visitor_for_tokens {
(
$(#$kind_meta:tt)*
pub enum $kind:ident {
$( $variant:ident $({$($args:tt)*})? ),* $(,)?
}
) => {
impl<'s, 'a> TreeVisitable<'s, 'a> for token::$kind {}
impl<'s, 'a> TreeVisitableMut<'s, 'a> for token::$kind {}
};
}
define_visitor!(Tree, visit);
define_visitor!(Span, visit_span);
define_visitor_no_mut!(Item, visit_item);
crate::with_token_definition!(define_visitor_for_tokens());
// === TreeVisitable special cases ===
impl<'s, 'a> TreeVisitable<'s, 'a> for Tree<'s> {
fn visit<V: TreeVisitor<'s, 'a>>(&'a self, visitor: &mut V) {
if visitor.visit(self) {
self.variant.visit(visitor)
}
}
}
impl<'s, 'a> TreeVisitableMut<'s, 'a> for Tree<'s> {
fn visit_mut<V: TreeVisitorMut<'s>>(&'a mut self, visitor: &mut V) {
if visitor.visit_mut(self) {
self.variant.visit_mut(visitor)
}
}
}
impl<'s, 'a, T> TreeVisitable<'s, 'a> for Token<'s, T> {}
impl<'s, 'a, T> TreeVisitableMut<'s, 'a> for Token<'s, T> {}
// === SpanVisitable special cases ===
impl<'s, 'a> SpanVisitable<'s, 'a> for Tree<'s> {
fn visit_span<V: SpanVisitor<'s, 'a>>(&'a self, visitor: &mut V) {
if visitor.visit(span::Ref {
left_offset: &self.span.left_offset,
code_length: self.span.code_length,
}) {
self.variant.visit_span(visitor)
}
}
}
impl<'s, 'a> SpanVisitableMut<'s, 'a> for Tree<'s> {
fn visit_span_mut<V: SpanVisitorMut<'s>>(&'a mut self, visitor: &mut V) {
if visitor.visit_mut(span::RefMut {
left_offset: &mut self.span.left_offset,
code_length: self.span.code_length,
}) {
self.variant.visit_span_mut(visitor)
}
}
}
impl<'a, 't, 's, T> SpanVisitable<'s, 'a> for Token<'s, T> {
fn visit_span<V: SpanVisitor<'s, 'a>>(&'a self, visitor: &mut V) {
let code_length = self.code.len();
visitor.visit(span::Ref { left_offset: &self.left_offset, code_length });
}
}
impl<'a, 't, 's, T> SpanVisitableMut<'s, 'a> for Token<'s, T> {
fn visit_span_mut<V: SpanVisitorMut<'s>>(&'a mut self, visitor: &mut V) {
let code_length = self.code.len();
visitor.visit_mut(span::RefMut { left_offset: &mut self.left_offset, code_length });
}
}
// === ItemVisitable special cases ===
impl<'s, 'a> ItemVisitable<'s, 'a> for Tree<'s> {
fn visit_item<V: ItemVisitor<'s, 'a>>(&'a self, visitor: &mut V) {
if visitor.visit_item(item::Ref::Tree(self)) {
self.variant.visit_item(visitor)
}
}
}
impl<'s: 'a, 'a, T: 'a> ItemVisitable<'s, 'a> for Token<'s, T>
where &'a Token<'s, T>: Into<token::Ref<'s, 'a>>
{
fn visit_item<V: ItemVisitor<'s, 'a>>(&'a self, visitor: &mut V) {
visitor.visit_item(item::Ref::Token(self.into()));
}
}
// ==========================
// === CodePrinterVisitor ===
// ==========================
/// A visitor collecting code representation of AST nodes.
#[derive(Debug, Default)]
#[allow(missing_docs)]
struct CodePrinterVisitor {
pub code: String,
}
impl Visitor for CodePrinterVisitor {}
impl<'s, 'a> ItemVisitor<'s, 'a> for CodePrinterVisitor {
fn visit_item(&mut self, item: item::Ref<'s, 'a>) -> bool {
match item {
item::Ref::Tree(tree) => self.code.push_str(&tree.span.left_offset.code),
item::Ref::Token(token) => {
self.code.push_str(&token.left_offset.code);
self.code.push_str(token.code);
}
}
true
}
}
impl<'s> Tree<'s> {
/// Code generator of this AST.
pub fn code(&self) -> String {
let mut visitor = CodePrinterVisitor::default();
self.visit_item(&mut visitor);
visitor.code
}
}
// ===========================
// === RefCollectorVisitor ===
// ===========================
/// A visitor collecting references to all [`Tree`] nodes.
#[derive(Debug, Default)]
#[allow(missing_docs)]
struct RefCollectorVisitor<'s, 'a> {
pub vec: Vec<&'a Tree<'s>>,
}
impl<'s, 'a> Visitor for RefCollectorVisitor<'s, 'a> {}
impl<'s, 'a> TreeVisitor<'s, 'a> for RefCollectorVisitor<'s, 'a> {
fn visit(&mut self, ast: &'a Tree<'s>) -> bool {
self.vec.push(ast);
true
}
}
impl<'s> Tree<'s> {
/// Collect references to all [`Tree`] nodes and return them in a vector.
pub fn collect_vec_ref(&self) -> Vec<&Tree<'s>> {
let mut visitor = RefCollectorVisitor::default();
self.visit(&mut visitor);
visitor.vec
}
}
// =================
// === FnVisitor ===
// =================
/// A visitor allowing running a function on every [`Tree`] node.
#[derive(Debug, Default)]
#[allow(missing_docs)]
pub struct FnVisitor<F>(pub F);
impl<F> Visitor for FnVisitor<F> {}
impl<'s: 'a, 'a, T, F: Fn(&'a Tree<'s>) -> T> TreeVisitor<'s, 'a> for FnVisitor<F> {
fn visit(&mut self, ast: &'a Tree<'s>) -> bool {
(self.0)(ast);
true
}
}
impl<'s, T, F: Fn(&mut Tree<'s>) -> T> TreeVisitorMut<'s> for FnVisitor<F> {
fn visit_mut(&mut self, ast: &mut Tree<'s>) -> bool {
(self.0)(ast);
true
}
}
impl<'s> Tree<'s> {
/// Map the provided function over each [`Tree`] node. The function results will be discarded.
pub fn map<T>(&self, f: impl Fn(&Tree<'s>) -> T) {
let mut visitor = FnVisitor(f);
self.visit(&mut visitor);
}
/// Map the provided function over each [`Tree`] node. The function results will be discarded.
pub fn map_mut<T>(&mut self, f: impl Fn(&mut Tree<'s>) -> T) {
let mut visitor = FnVisitor(f);
self.visit_mut(&mut visitor);
}
}

View File

@ -0,0 +1,25 @@
[package]
name = "enso-parser-syntax-tree-builder"
version = "0.1.0"
authors = ["Enso Team <enso-dev@enso.org>"]
edition = "2021"
description = "Enso Parser AST Builder."
readme = "README.md"
homepage = "https://github.com/enso-org/enso"
repository = "https://github.com/enso-org/enso"
license-file = "../../LICENSE"
[lib]
proc-macro = true
[dependencies]
proc-macro2 = "1.0"
enso-macro-utils = { path = "../../../../../macro-utils" }
quote = "1.0"
[dependencies.syn]
version = "1.0"
features = [
'extra-traits', 'visit', 'full'
]

View File

@ -0,0 +1,124 @@
//! Definition of a macro allowing building mock AST structures, mostly useful for testing.
// === Standard Linter Configuration ===
#![deny(non_ascii_idents)]
#![warn(unsafe_code)]
// === Non-Standard Linter Configuration ===
#![allow(clippy::option_map_unit_fn)]
#![allow(clippy::precedence)]
#![allow(dead_code)]
#![deny(unconditional_recursion)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
#![warn(missing_docs)]
#![warn(trivial_casts)]
#![warn(trivial_numeric_casts)]
#![warn(unused_import_braces)]
#![warn(unused_qualifications)]
use proc_macro2::TokenStream;
use quote::quote;
use std::mem;
/// A macro allowing building mock AST structures, mostly useful for testing.
///
/// Currently supported syntax:
///
/// - `a b c` Application of arguments. Arguments are applied in-order, from left to right. Here,
/// this expression would be the same as `[[a b] c]`.
///
/// - `a [b c] d` Grouping syntax that does not produce AST group expression. Here, `b c` is just
/// the first argument passed to `a`.
///
/// - `{if} a {then} b {else} c` Multi-segment application. All segments should be enclosed in curly
/// braces. You can also place segments in quotes, like `{"("} a {")"}`.
#[proc_macro]
pub fn ast_builder(tokens: proc_macro::TokenStream) -> proc_macro::TokenStream {
let output = expr(tokens);
let output = quote!(syntax::Tree::opr_section_boundary(#output));
output.into()
}
struct Segment {
header: TokenStream,
body: TokenStream,
}
impl Segment {
fn new(header: TokenStream) -> Self {
let body = quote!();
Self { header, body }
}
}
fn expr(tokens: proc_macro::TokenStream) -> TokenStream {
use proc_macro::TokenTree::*;
let mut output = quote! {};
let mut prefix: Option<TokenStream> = None;
let mut segments: Vec<Segment> = vec![];
let mut current_segment: Option<Segment> = None;
let app_to_output = |output: &mut TokenStream, tok| {
if output.is_empty() {
*output = tok;
} else {
*output = quote! {syntax::Tree::app(#output,#tok)};
}
};
for token in tokens {
match token {
// a b c ...
Ident(ident) => {
let ident = ident.to_string();
app_to_output(&mut output, quote! {test::ident(#ident)});
}
// {if} a {then} b {else} c
// {"("} a {")"}
Group(group) if group.delimiter() == proc_macro::Delimiter::Brace => {
if let Some(mut current_segment) = mem::take(&mut current_segment) {
current_segment.body = mem::take(&mut output);
segments.push(current_segment);
} else if !output.is_empty() {
prefix = Some(mem::take(&mut output));
}
let body = group.stream().to_string();
current_segment = Some(Segment::new(quote! {Token::ident(#body)})); // Token::symbol
}
// a [b c] d
Group(group) if group.delimiter() == proc_macro::Delimiter::Bracket => {
app_to_output(&mut output, expr(group.stream()));
}
_ => panic!("Unsupported token {:?}", token),
}
}
if let Some(mut current_segment) = current_segment {
current_segment.body = mem::take(&mut output);
segments.push(current_segment);
let segments: Vec<TokenStream> = segments
.into_iter()
.map(|t| {
let header = t.header;
let body = t.body;
let body = if !body.is_empty() {
quote!(Some(syntax::Tree::opr_section_boundary(#body)))
} else {
quote!(None)
};
quote! { syntax::tree::MultiSegmentAppSegment { header: #header, body: #body } }
})
.collect();
let pfx = prefix
.map(|t| quote! {Some(Box::new(syntax::Tree::opr_section_boundary(#t)))})
.unwrap_or_else(|| quote! {None});
let segments = quote! {NonEmptyVec::try_from(vec![#(#segments),*]).unwrap()};
output = quote! {
span::With::new_no_left_offset_no_start(
Bytes::from(0),
syntax::tree::Type::MultiSegmentApp(Box::new(syntax::tree::MultiSegmentApp {prefix: #pfx, segments: #segments}))
)
}
}
output
}

View File

@ -0,0 +1,25 @@
[package]
name = "enso-parser-syntax-tree-visitor"
version = "0.1.0"
authors = ["Enso Team <enso-dev@enso.org>"]
edition = "2021"
description = "Enso Parser AST Visitor."
readme = "README.md"
homepage = "https://github.com/enso-org/enso"
repository = "https://github.com/enso-org/enso"
license-file = "../../LICENSE"
[lib]
proc-macro = true
[dependencies]
proc-macro2 = "1.0"
enso-macro-utils = { path = "../../../../../macro-utils" }
quote = "1.0"
[dependencies.syn]
version = "1.0"
features = [
'extra-traits', 'visit', 'full'
]

View File

@ -0,0 +1,179 @@
//! Definition of [`Visitor`] deriving. It implements the visitor pattern for [`Ast`].
// === Standard Linter Configuration ===
#![deny(non_ascii_idents)]
#![warn(unsafe_code)]
// === Non-Standard Linter Configuration ===
#![allow(clippy::option_map_unit_fn)]
#![allow(clippy::precedence)]
#![allow(dead_code)]
#![deny(unconditional_recursion)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
#![warn(missing_docs)]
#![warn(trivial_casts)]
#![warn(trivial_numeric_casts)]
#![warn(unused_import_braces)]
#![warn(unused_qualifications)]
extern crate proc_macro;
use enso_macro_utils::field_names;
use enso_macro_utils::identifier_sequence;
use enso_macro_utils::index_sequence;
use proc_macro2::TokenStream;
use quote::quote;
use syn::Data;
use syn::DataEnum;
use syn::DataStruct;
use syn::DeriveInput;
use syn::Fields;
use syn::Variant;
/// ======================
/// === Derive Visitor ===
/// ======================
use quote::ToTokens;
/// Implements [`TreeVisitable`], [`TreeVisitableMut`], [`SpanVisitable`], and [`SpanVisitableMut`].
/// These traits are defined in the [`crate::ast`] module. Macros in this module hardcode the names
/// of the traits and are not implemented in a generic way because the current Rust implementation
/// does not understand generic definition. See the [`crate::ast`] module to learn more about the
/// design and the Rust compiler issue.
#[proc_macro_derive(Visitor)]
pub fn derive_visitor(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
let decl = syn::parse_macro_input!(input as DeriveInput);
let ident = &decl.ident;
let (impl_generics, ty_generics, _inherent_where_clause_opt) = &decl.generics.split_for_impl();
let body = gen_body(quote!(TreeVisitable::visit), &decl.data, false);
let body_mut = gen_body(quote!(TreeVisitableMut::visit_mut), &decl.data, true);
let body_span = gen_body(quote!(SpanVisitable::visit_span), &decl.data, false);
let body_span_mut = gen_body(quote!(SpanVisitableMut::visit_span_mut), &decl.data, true);
let body_item = gen_body(quote!(ItemVisitable::visit_item), &decl.data, false);
let impl_generics_vec: Vec<_> = impl_generics.to_token_stream().into_iter().collect();
let impl_generics_len = impl_generics_vec.len();
let mut impl_generics;
if impl_generics_len > 0 {
let v: Vec<_> = impl_generics_vec.into_iter().take(impl_generics_len - 1).skip(1).collect();
impl_generics = quote!(#(#v)*);
if !v.is_empty() {
impl_generics = quote!(#impl_generics,);
}
} else {
impl_generics = quote!('s,);
}
let impl_generics = quote!(<#impl_generics 'a>);
let output = quote! {
impl #impl_generics TreeVisitable #impl_generics for #ident #ty_generics {
fn visit<T: TreeVisitor #impl_generics>(&'a self, visitor:&mut T) {
visitor.before_visiting_children();
#body
visitor.after_visiting_children();
}
}
impl #impl_generics TreeVisitableMut #impl_generics for #ident #ty_generics {
fn visit_mut<T: TreeVisitorMut<'s>>(&'a mut self, visitor:&mut T) {
visitor.before_visiting_children();
#body_mut
visitor.after_visiting_children();
}
}
impl #impl_generics SpanVisitable #impl_generics for #ident #ty_generics {
fn visit_span<T: SpanVisitor #impl_generics>(&'a self, visitor:&mut T) {
visitor.before_visiting_children();
#body_span
visitor.after_visiting_children();
}
}
impl #impl_generics SpanVisitableMut #impl_generics for #ident #ty_generics {
fn visit_span_mut<T: SpanVisitorMut<'s>>(&'a mut self, visitor:&mut T) {
visitor.before_visiting_children();
#body_span_mut
visitor.after_visiting_children();
}
}
impl #impl_generics ItemVisitable #impl_generics for #ident #ty_generics {
fn visit_item<T: ItemVisitor #impl_generics>(&'a self, visitor:&mut T) {
visitor.before_visiting_children();
#body_item
visitor.after_visiting_children();
}
}
};
// #[allow(missing_docs)]
// pub trait ItemVisitable<'s, 'a> {
// fn visit_item<V: ItemVisitor<'s, 'a>>(&'a self, _visitor: &mut V) {}
// }
output.into()
}
fn gen_body(f: TokenStream, data: &Data, is_mut: bool) -> TokenStream {
match data {
Data::Struct(t) => body_for_struct(&f, t, is_mut),
Data::Enum(t) => body_for_enum(&f, t),
Data::Union(_) => panic!("Untagged union types not supported."),
}
}
fn body_for_struct(f: &TokenStream, data: &DataStruct, is_mut: bool) -> TokenStream {
match &data.fields {
Fields::Unit => quote!({}),
Fields::Unnamed(fields) => {
let indices = index_sequence(fields.unnamed.len());
if is_mut {
quote!(#( #f(&mut self.#indices, visitor); )*)
} else {
quote!(#( #f(&self.#indices, visitor); )*)
}
}
Fields::Named(fields) => {
let names = field_names(fields);
if is_mut {
quote!(#( #f(&mut self.#names, visitor); )*)
} else {
quote!(#( #f(&self.#names, visitor); )*)
}
}
}
}
/// Prepares a match arm for a single variant that `clone_ref`s such value.
fn arm_for_variant(f: &TokenStream, variant: &Variant) -> TokenStream {
let variant_ident = &variant.ident;
match &variant.fields {
Fields::Unit => {
quote!(Self::#variant_ident => {})
}
Fields::Named(fields) => {
let names = field_names(fields);
quote!(Self::#variant_ident { #(#names),* } => {
#( #f(#names, visitor); )*
})
}
Fields::Unnamed(fields) => {
let names = identifier_sequence(fields.unnamed.len());
quote!(Self::#variant_ident(#(#names),*) => {
#( #f(#names, visitor); )*
})
}
}
}
fn body_for_enum(f: &TokenStream, data: &DataEnum) -> TokenStream {
let make_arm = |variant| arm_for_variant(f, variant);
let arms = data.variants.iter().map(make_arm);
let body = quote!(match self { #(#arms)* });
body
}

View File

@ -6,8 +6,8 @@ edition = "2021"
description = "An augmented standard library in the vein of Haskell's prelude."
readme = "README.md"
homepage = "https://github.com/enso-org/rust-lib/src/-prelude"
repository = "https://github.com/enso-org/rust-lib"
homepage = "https://github.com/enso-org/enso"
repository = "https://github.com/enso-org/enso"
license-file = "../../LICENSE"
keywords = ["prelude", "standard-library"]
@ -24,7 +24,6 @@ enso-shapely = { version = "^0.2.0", path = "../shapely" }
anyhow = "1.0.37"
backtrace = "0.3.53"
boolinator = "2.4.0"
bumpalo = "3.4.0"
cfg-if = "1.0.0"
colored = "2.0.0"
derivative = "2.2.0"
@ -42,6 +41,9 @@ shrinkwraprs = "0.3.0"
serde = { version = "1.0.126", features = ["derive", "rc"], optional = true }
serde_json = { version = "1.0", optional = true }
smallvec = "1.0.0"
tracing = "0.1"
tracing-subscriber = "0.3"
tracing-wasm = "0.2"
wasm-bindgen = { version = "0.2.78" , features = ["nightly"], optional = true }
weak-table = "0.3.0"
nalgebra = { version = "0.26.2", optional = true }

View File

@ -14,24 +14,14 @@ use std::vec::Splice;
/// A version of [`std::vec::Vec`] that can't be empty.
#[allow(missing_docs)]
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, Eq, PartialEq, Deref, DerefMut)]
pub struct NonEmptyVec<T> {
elems: Vec<T>,
}
impl<T> Deref for NonEmptyVec<T> {
type Target = Vec<T>;
fn deref(&self) -> &Self::Target {
&self.elems
}
pub elems: Vec<T>,
}
impl<T> NonEmptyVec<T> {
/// Construct a new non-empty vector.
///
/// The vector will not allocate more than the space required to contain `first` and `rest`.
///
/// # Examples
///
/// ```
@ -45,6 +35,20 @@ impl<T> NonEmptyVec<T> {
NonEmptyVec { elems }
}
/// Construct a new non-empty vector.
///
/// # Examples
///
/// ```
/// #![allow(unused_mut)]
/// use enso_prelude::NonEmptyVec;
/// let mut vec: NonEmptyVec<usize> = NonEmptyVec::new_with_last(vec![], 0);
/// ```
pub fn new_with_last(mut elems: Vec<T>, last: T) -> NonEmptyVec<T> {
elems.push(last);
NonEmptyVec { elems }
}
/// Construct a `NonEmptyVec` containing a single element.
///
/// # Examples
@ -56,7 +60,8 @@ impl<T> NonEmptyVec<T> {
/// assert_eq!(vec.len(), 1);
/// ```
pub fn singleton(first: T) -> NonEmptyVec<T> {
NonEmptyVec::new(first, vec![])
let elems = vec![first];
Self { elems }
}
/// Construct a new, `NonEmptyVec<T>` containing the provided element and with the provided
@ -157,21 +162,26 @@ impl<T> NonEmptyVec<T> {
/// Remove an element from the back of the collection, returning it.
///
/// Will not pop any item if there is only one item left in the vector.
///
/// # Examples
///
/// ```
/// use enso_prelude::NonEmptyVec;
/// let mut vec = NonEmptyVec::new(0, vec![1]);
/// assert!(vec.pop().is_some());
/// assert!(vec.pop().is_none());
/// assert!(vec.pop_if_has_more_than_1_elem().is_some());
/// assert!(vec.pop_if_has_more_than_1_elem().is_none());
/// assert_eq!(vec.len(), 1);
/// ```
pub fn pop(&mut self) -> Option<T> {
pub fn pop_if_has_more_than_1_elem(&mut self) -> Option<T> {
(self.len() > 1).and_option_from(|| self.elems.pop())
}
/// Remove an element from the back of the collection, returning it and a new possibly empty
/// vector.
pub fn pop(mut self) -> (T, Vec<T>) {
let first = self.elems.pop().unwrap();
(first, self.elems)
}
/// Obtain a mutable reference to teh element in the vector at the specified `index`.
///
/// # Examples
@ -213,6 +223,16 @@ impl<T> NonEmptyVec<T> {
self.elems.first_mut().expect("The NonEmptyVec always has an item in it.")
}
/// Get the tail reference.
pub fn tail(&mut self) -> &[T] {
&self.elems[1..]
}
/// Get the mutable tail reference.
pub fn tail_mut(&mut self) -> &mut [T] {
&mut self.elems[1..]
}
/// Obtain an immutable reference to the last element in the `NonEmptyVec`.
///
/// # Examples
@ -307,6 +327,17 @@ impl<T> NonEmptyVec<T> {
{
self.elems.splice(range, replace_with)
}
/// Convert this non-empty vector to vector.
pub fn into_vec(self) -> Vec<T> {
self.elems
}
/// Consume this non-empty vector, map each element with a function, and produce a new one.
pub fn mapped<S>(self, f: impl FnMut(T) -> S) -> NonEmptyVec<S> {
let elems = self.elems.into_iter().map(f).collect();
NonEmptyVec { elems }
}
}
@ -317,3 +348,40 @@ impl<T: Default> Default for NonEmptyVec<T> {
Self::singleton(default())
}
}
impl<T> TryFrom<Vec<T>> for NonEmptyVec<T> {
type Error = ();
fn try_from(elems: Vec<T>) -> Result<Self, Self::Error> {
(!elems.is_empty()).as_result_from(|| NonEmptyVec { elems }, || ())
}
}
impl<T> From<NonEmptyVec<T>> for Vec<T> {
fn from(v: NonEmptyVec<T>) -> Self {
v.elems
}
}
impl<T> IntoIterator for NonEmptyVec<T> {
type Item = T;
type IntoIter = std::vec::IntoIter<T>;
fn into_iter(self) -> Self::IntoIter {
self.elems.into_iter()
}
}
impl<'a, T> IntoIterator for &'a NonEmptyVec<T> {
type Item = &'a T;
type IntoIter = slice::Iter<'a, T>;
fn into_iter(self) -> Self::IntoIter {
self.elems.iter()
}
}
impl<'a, T> IntoIterator for &'a mut NonEmptyVec<T> {
type Item = &'a mut T;
type IntoIter = slice::IterMut<'a, T>;
fn into_iter(self) -> Self::IntoIter {
self.elems.iter_mut()
}
}

View File

@ -103,6 +103,33 @@ pub mod serde_reexports {
pub use serde::Serialize;
}
// ===============
// === Tracing ===
// ===============
pub mod tracing {
pub use tracing::*;
pub use tracing_subscriber::*;
}
pub use ::tracing::event;
pub use ::tracing::span as log_span;
pub const ERROR: tracing::Level = tracing::Level::ERROR;
pub const WARN: tracing::Level = tracing::Level::WARN;
pub const INFO: tracing::Level = tracing::Level::INFO;
pub const DEBUG: tracing::Level = tracing::Level::DEBUG;
pub const TRACE: tracing::Level = tracing::Level::TRACE;
pub fn init_tracing(level: tracing::Level) {
let subscriber =
tracing::fmt().compact().with_target(false).with_max_level(level).without_time().finish();
tracing::subscriber::set_global_default(subscriber).expect("Failed to initialize logger.");
}
// =================
// === Immutable ===
// =================

View File

@ -36,6 +36,18 @@ pub fn with<T, F: FnOnce(T) -> Out, Out>(t: T, f: F) -> Out {
// ================
// === TryAsRef ===
// ================
/// Just like [`AsRef`], but might return [`None`] for some data.
#[allow(missing_docs)]
pub trait TryAsRef<T> {
fn try_as_ref(&self) -> Option<&T>;
}
// =============
// === ToRef ===
// =============

View File

@ -6,8 +6,8 @@ edition = "2021"
description = "Automated typeclass derivation."
readme = "README.md"
homepage = "https://github.com/enso-org/rust-lib/src/shapely"
repository = "https://github.com/enso-org/rust-lib"
homepage = "https://github.com/enso-org/enso/lib/rust/shapely"
repository = "https://github.com/enso-org/enso"
license-file = "../../../LICENSE"
keywords = ["typeclass", "deriving"]

View File

@ -6,8 +6,8 @@ edition = "2021"
description = "Automated typeclass derivation."
readme = "README.md"
homepage = "https://github.com/enso-org/rust-lib/src/shapely/macros"
repository = "https://github.com/enso-org/rust-lib"
homepage = "https://github.com/enso-org/enso/lib/rust/shapely/macros"
repository = "https://github.com/enso-org/enso"
license-file = "../../../LICENSE"
keywords = ["typeclass", "deriving", "macro"]
@ -22,7 +22,7 @@ proc-macro = true
default = []
[dependencies]
enso-macro-utils = { version = "^0.2.0", path = "../../macro-utils" }
enso-macro-utils = { path = "../../macro-utils" }
proc-macro2 = "1.0"
quote = "1.0"
Inflector = "0.11.4"

View File

@ -26,6 +26,7 @@ mod derive_entry_point;
mod derive_iterator;
mod derive_no_clone;
mod overlappable;
mod tagged_enum;
mod prelude {
pub use enso_macro_utils::repr;
@ -131,3 +132,14 @@ pub fn overlappable(
) -> proc_macro::TokenStream {
overlappable::overlappable(attrs, input)
}
/// Transforms Rust enums into enums where each variant is a separate type. It also implements
/// several traits (such as conversions between variants and the enum type) and defines utility
/// functions, such as constructors. See [`tagged_enum::run`] to learn more.
#[proc_macro_attribute]
pub fn tagged_enum(
attr: proc_macro::TokenStream,
input: proc_macro::TokenStream,
) -> proc_macro::TokenStream {
tagged_enum::run(attr, input)
}

View File

@ -0,0 +1,320 @@
use crate::prelude::*;
use inflector::cases::snakecase::to_snake_case;
use syn::Data;
use syn::DeriveInput;
use syn::Fields;
// ===================
// === Entry Point ===
// ===================
/// Transforms Rust enums into enums where each variant is a separate type. It also implements
/// several traits (such as conversions between variants and the enum type) and defines utility
/// functions, such as constructors.
///
/// To learn more about what code is being generated, parts of the code generation were provided
/// with comments showing the output of application of this macro to the following structure:
///
/// ```text
/// #[tagged_enum(boxed)]
/// pub enum Ast<'s> {
/// Ident {
/// token: Token<'s>
/// }
/// App {
/// func: Ast<'s>,
/// arg: Ast<'s>,
/// }
/// }
/// ```
///
/// # Attributes
/// All attributes defined before the `#[tagged_enum]` one will be applied to the enum only, while
/// all other attributes will be applied to both the enum and all the variant structs.
pub fn run(
attr: proc_macro::TokenStream,
input: proc_macro::TokenStream,
) -> proc_macro::TokenStream {
let mut is_boxed = false;
let attrs: Vec<_> = attr.into_iter().collect();
if attrs.len() == 1 && &attrs[0].to_string() == "boxed" {
is_boxed = true;
} else if !attrs.is_empty() {
panic!("Unsupported attributes: {:?}", attrs);
}
let decl = syn::parse_macro_input!(input as DeriveInput);
let (impl_generics, ty_generics, inherent_where_clause_opt) = &decl.generics.split_for_impl();
let mut where_clause = enso_macro_utils::new_where_clause(vec![]);
for inherent_where_clause in inherent_where_clause_opt {
where_clause.predicates.extend(inherent_where_clause.predicates.iter().cloned())
}
let data = match &decl.data {
Data::Enum(data) => data,
_ => panic!("This macro is meant for enum structs only."),
};
let mut output = vec![];
// ========================
// === Main Enum Struct ===
// ========================
// pub enum Ast<'s> {
// Ident(Box<Ident<'s>>),
// App(Box<App<'s>>)
// }
//
// impl<'s> Debug for Ast<'s> {
// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// match self {
// Self::Ident(t) => Debug::fmt(&t,f),
// Self::App(t) => Debug::fmt(&t,f),
// }
// }
// }
let vis = &decl.vis;
let enum_name = &decl.ident;
let enum_attrs = &decl.attrs;
let variant_names: Vec<_> = data.variants.iter().map(|v| &v.ident).collect();
let variant_bodies = variant_names.iter().map(|v| {
if is_boxed {
quote!(Box<#v #ty_generics>)
} else {
quote!(#v #ty_generics)
}
});
output.push(quote! {
#(#enum_attrs)*
#[allow(missing_docs)]
#vis enum #enum_name #ty_generics #where_clause {
#(#variant_names(#variant_bodies)),*
}
impl #impl_generics Debug for #enum_name #ty_generics #where_clause {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
#(Self::#variant_names(t) => Debug::fmt(&t,f)),*
}
}
}
});
// ==========================
// === Marker Enum Struct ===
// ==========================
// #[derive(Clone, Copy, Debug, PartialEq, Eq)]
// pub enum AstMarker {
// Ident,
// App
// }
//
// impl<'s> From<&Ast<'s>> for AstMarker {
// fn from(t:&Ast<'s>) -> Self {
// match t {
// Ast::Ident(_) => AstMarker::Ident,
// Ast::App(_) => AstMarker::App,
// }
// }
// }
//
// impl<'s> Ast<'s> {
// pub fn marker(&self) -> AstMarker {
// self.into()
// }
//
// pub fn is(&self, marker: AstMarker) -> bool {
// self.marker() == marker
// }
// }
let enum_marker_name = quote::format_ident!("{}Marker", enum_name);
output.push(quote! {
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[allow(missing_docs)]
#vis enum #enum_marker_name {
#(#variant_names),*
}
impl #impl_generics From<&#enum_name #ty_generics> for #enum_marker_name #where_clause {
fn from(t:&#enum_name #ty_generics) -> Self {
match t {
#(#enum_name::#variant_names(_) => Self::#variant_names),*
}
}
}
impl #impl_generics #enum_name #ty_generics #where_clause {
/// Abstract variant representation of this struct.
#[inline(always)]
pub fn marker(&self) -> #enum_marker_name {
self.into()
}
/// Check whether this struct is the given variant.
#[inline(always)]
pub fn is(&self, marker: #enum_marker_name) -> bool {
self.marker() == marker
}
}
});
for variant in &data.variants {
// =======================
// === Variant Structs ===
// =======================
// pub struct Ident<'s> {
// pub token: Token<'s>
// }
// pub struct App<'s> {
// pub func: Ast<'s>,
// pub args: Ast<'s>,
// }
let variant_attrs = &variant.attrs;
let variant_name = &variant.ident;
let fields = &variant.fields;
let fields = if fields.is_empty() { quote!({}) } else { quote!(#fields) };
output.push(quote! {
#(#enum_attrs)*
#(#variant_attrs)*
#[derive(Debug)]
#[allow(missing_docs)]
#vis struct #variant_name #ty_generics #fields #where_clause
});
// ====================
// === Constructors ===
// ====================
// impl<'s> Ast<'s> {
// pub fn ident(name: String) -> Self {
// Self::Ident(Box::new(Ident{name}))
// }
// pub fn app(func: Ast, args: Ast) -> Self {
// Self::App(Box::new(App{func, args}))
// }
// }
let variant_snake_name = to_snake_case(&variant_name.to_string());
let variant_snake_ident = quote::format_ident!("{}", variant_snake_name);
let (names, types) = match &variant.fields {
Fields::Unit => (vec![], vec![]),
Fields::Named(fields) => {
let names = fields.named.iter().map(|f| f.ident.as_ref().unwrap()).collect();
let types = fields.named.iter().map(|f| &f.ty).collect();
(names, types)
}
_ => panic!(),
};
let cons = if is_boxed {
quote!(Box::new(#variant_name { #(#names),* }))
} else {
quote!(#variant_name { #(#names),* })
};
output.push(quote! {
impl #impl_generics #enum_name #ty_generics #where_clause {
/// Constructor.
#[inline(always)]
pub fn #variant_snake_ident(#(#names: #types),*) -> Self {
Self::#variant_name (#cons)
}
}
});
// ========================================
// === Unnamed Struct Like Constructors ===
// ========================================
// pub fn Ident<'s>(token: Token<'s>) -> Token<'s> {
// Ident {name}
// }
// pub fn App<'s>(func: Ast<'s>, args: Ast<'s>) -> App<'s> {
// App {func, args}
// }
output.push(quote! {
/// Constructor.
#[inline(always)]
#[allow(non_snake_case)]
pub fn #variant_name #impl_generics (#(#names: #types),*)
-> #variant_name #ty_generics #where_clause {
#variant_name { #(#names),* }
}
});
// ======================
// === Variant Checks ===
// ======================
// impl<'s> Ast<'s> {
// pub fn is_ident(&self) -> bool {
// self.is(AstMarker::Ident)
// }
//
// pub fn is_app(&self) -> bool {
// self.is(AstMarker::App)
// }
// }
let variant_check_ident = quote::format_ident!("is_{}", variant_snake_name);
output.push(quote! {
impl #impl_generics #enum_name #ty_generics #where_clause {
/// Check if this struct is the given variant.
#[inline(always)]
pub fn #variant_check_ident(&self) -> bool {
self.is(#enum_marker_name::#variant_name)
}
}
});
// ===================
// === Conversions ===
// ===================
// impl<'s> From<Ident<'s>> for Ast<'s> {
// fn from(variant: Ident<'s>) -> Self {
// Self::Ident(Box::new(variant))
// }
// }
//
// impl<'s> From<App<'s>> for Ast<'s> {
// fn from(variant: App<'s>) -> Self {
// Self::App(Box::new(variant))
// }
// }
let cons = if is_boxed { quote!(Box::new(variant)) } else { quote!(variant) };
output.push(quote! {
impl #impl_generics From<#variant_name #ty_generics> for #enum_name #ty_generics
#where_clause {
#[inline(always)]
fn from(variant: #variant_name #ty_generics) -> Self {
Self::#variant_name(#cons)
}
}
});
}
// =============================
// === Final Code Generation ===
// =============================
let output = quote! {
#(#output)*
};
output.into()
}

View File

@ -40,4 +40,5 @@ pub use topology::*;
/// Common traits.
pub mod traits {
pub use super::topology::traits::*;
pub use super::unit2::traits::*;
}

View File

@ -7,14 +7,20 @@
//! and rules of how the result inference should be performed.
use paste::paste;
use std::borrow::Cow;
use std::marker::PhantomData;
/// Common traits for built-in units.
pub mod traits {
pub use super::BytesCowOps;
pub use super::BytesOps;
pub use super::BytesStrOps;
pub use super::DurationNumberOps;
pub use super::DurationOps;
pub use super::IntoUncheckedRawRange;
pub use super::UncheckedFrom;
}
mod ops {
@ -25,27 +31,40 @@ mod ops {
// =====================
// === UncheckedInto ===
// === UncheckedFrom ===
// =====================
/// Unchecked unit conversion. You should use it only for unit conversion definition, never in
/// unit-usage code.
#[allow(missing_docs)]
pub trait UncheckedFrom<T> {
fn unchecked_from(t: T) -> Self;
}
impl<T> const UncheckedFrom<T> for T {
fn unchecked_from(t: T) -> Self {
t
}
}
impl<V, R> const UncheckedFrom<R> for UnitData<V, R> {
fn unchecked_from(repr: R) -> Self {
let variant = PhantomData;
UnitData { repr, variant }
}
}
/// Unchecked unit conversion. See [`UncheckedFrom`] to learn more.
#[allow(missing_docs)]
pub trait UncheckedInto<T> {
fn unchecked_into(self) -> T;
}
impl<T> const UncheckedInto<T> for T {
impl<T, S> const UncheckedInto<T> for S
where T: ~const UncheckedFrom<S>
{
fn unchecked_into(self) -> T {
self
}
}
impl<V, R> const UncheckedInto<UnitData<V, R>> for R {
fn unchecked_into(self) -> UnitData<V, R> {
let repr = self;
let variant = PhantomData;
UnitData { repr, variant }
T::unchecked_from(self)
}
}
@ -74,6 +93,7 @@ pub trait Variant {
}
/// Internal representation of every unit.
#[repr(transparent)]
pub struct UnitData<V, R> {
repr: R,
variant: PhantomData<V>,
@ -165,6 +185,8 @@ impl<V, R: PartialEq> PartialEq for UnitData<V, R> {
}
}
impl<V> Eq for UnitData<V, usize> {}
// ===========
@ -177,6 +199,12 @@ impl<V, R: PartialOrd> PartialOrd for UnitData<V, R> {
}
}
impl<V> Ord for UnitData<V, usize> {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.repr.cmp(&other.repr)
}
}
// ============
@ -193,6 +221,33 @@ where R: Copy
// =============================
// === IntoUncheckedRawRange ===
// =============================
/// Allows transmuting [`Range<UnitData<V,R>>`] to [`Range<R>`].
#[allow(missing_docs)]
pub trait IntoUncheckedRawRange {
type Output;
fn into_unchecked_raw_range(self) -> Self::Output;
}
impl<V, R> IntoUncheckedRawRange for ops::Range<UnitData<V, R>> {
type Output = ops::Range<R>;
fn into_unchecked_raw_range(self) -> Self::Output {
self.start.repr..self.end.repr
}
}
impl<V, R> IntoUncheckedRawRange for ops::RangeFrom<UnitData<V, R>> {
type Output = ops::RangeFrom<R>;
fn into_unchecked_raw_range(self) -> Self::Output {
self.start.repr..
}
}
// ===============
// === gen_ops ===
// ===============
@ -288,6 +343,34 @@ macro_rules! gen_ops {
}
}
// Please note that this impl is not as generic as the following ones because Rust compiler
// is unable to compile the more generic version.
impl<V, R> const ops::$trait<UnitData<V, R>> for f64
where
R: Copy,
V: $rev_trait<f64>,
f64: ~const ops::$trait<R>,
{
type Output = UnitData<<V as $rev_trait<f64>>::Output, <f64 as ops::$trait<R>>::Output>;
fn $op(self, rhs: UnitData<V, R>) -> Self::Output {
self.$op(rhs.repr).unchecked_into()
}
}
// Please note that this impl is not as generic as the following ones because Rust compiler
// is unable to compile the more generic version.
impl<V> const ops::$trait<UnitData<V, usize>> for usize
where
V: $rev_trait<usize>,
usize: ~const ops::$trait<usize>,
{
type Output =
UnitData<<V as $rev_trait<usize>>::Output, <usize as ops::$trait<usize>>::Output>;
fn $op(self, rhs: UnitData<V, usize>) -> Self::Output {
self.$op(rhs.repr).unchecked_into()
}
}
impl<V, R, T> const ops::$trait<T> for UnitData<V, R>
where
UnitData<V, R>: $trait<T>,
@ -331,6 +414,29 @@ macro_rules! gen_ops_mut {
self.$op(rhs.repr)
}
}
impl<V, R> const ops::$trait_mut<UnitData<V, R>> for f64
where
f64: ~const ops::$trait_mut<R>,
R: Copy,
UnitData<V, R>: $rev_trait<f32>,
{
fn $op(&mut self, rhs: UnitData<V, R>) {
self.$op(rhs.repr)
}
}
impl<V, R> const ops::$trait_mut<UnitData<V, R>> for usize
where
usize: ~const ops::$trait_mut<R>,
R: Copy,
UnitData<V, R>: $rev_trait<f32>,
{
fn $op(&mut self, rhs: UnitData<V, R>) {
self.$op(rhs.repr)
}
}
impl<V, R, T> const ops::$trait_mut<T> for UnitData<V, R>
where
T: IsNotUnit,
@ -341,6 +447,7 @@ macro_rules! gen_ops_mut {
self.repr.$op(rhs)
}
}
impl<V1, V2, R1, R2> const ops::$trait_mut<UnitData<V2, R2>> for UnitData<V1, R1>
where
R1: ~const ops::$trait_mut<R2>,
@ -479,6 +586,9 @@ macro_rules! define_single_op_switch {
(f64 $op:tt $rhs:ident = $out:ident) => {
$crate::define_single_rev_op! {f64 $op $rhs = $out}
};
(usize $op:tt $rhs:ident = $out:ident) => {
$crate::define_single_rev_op! {usize $op $rhs = $out}
};
($lhs:ident $op:tt $rhs:ident = $out:ident) => {
$crate::define_single_op! {$lhs $op $rhs = $out}
};
@ -488,25 +598,25 @@ macro_rules! define_single_op_switch {
#[macro_export]
macro_rules! define_single_op {
($lhs:ident + $rhs:ident = $out:ident) => {
impl Add<$rhs> for $lhs {
impl $crate::unit2::Add<$rhs> for $lhs {
type Output = $out;
}
};
($lhs:ident - $rhs:ident = $out:ident) => {
impl Sub<$rhs> for $lhs {
impl $crate::unit2::Sub<$rhs> for $lhs {
type Output = $out;
}
};
($lhs:ident * $rhs:ident = $out:ident) => {
impl Mul<$rhs> for $lhs {
impl $crate::unit2::Mul<$rhs> for $lhs {
type Output = $out;
}
};
($lhs:ident / $rhs:ident = $out:ident) => {
impl Div<$rhs> for $lhs {
impl $crate::unit2::Div<$rhs> for $lhs {
type Output = $out;
}
};
@ -522,25 +632,25 @@ macro_rules! define_single_op {
#[macro_export]
macro_rules! define_single_rev_op {
($lhs:ident + $rhs:ident = $out:ident) => {
impl RevAdd<$rhs> for $lhs {
impl $crate::unit2::RevAdd<$rhs> for $lhs {
type Output = $out;
}
};
($lhs:ident - $rhs:ident = $out:ident) => {
impl RevSub<$rhs> for $lhs {
impl $crate::unit2::RevSub<$rhs> for $lhs {
type Output = $out;
}
};
($lhs:ident * $rhs:ident = $out:ident) => {
impl RevMul<$rhs> for $lhs {
impl $crate::unit2::RevMul<$rhs> for $lhs {
type Output = $out;
}
};
($lhs:ident / $rhs:ident = $out:ident) => {
impl RevDiv<$rhs> for $lhs {
impl $crate::unit2::RevDiv<$rhs> for $lhs {
type Output = $out;
}
};
@ -574,7 +684,7 @@ define_ops![
f32 * Duration = Duration,
];
/// Methods for the [`Duration`] unit.
/// Methods of the [`Duration`] unit.
#[allow(missing_docs)]
pub trait DurationOps {
fn ms(t: f32) -> Duration;
@ -653,3 +763,106 @@ impl From<Duration> for std::time::Duration {
std::time::Duration::from_millis(duration.as_ms() as u64)
}
}
// =============
// === Bytes ===
// =============
define! {
/// Number of bytes.
Bytes: usize = 0
}
define_ops![
Bytes [+,-] Bytes = Bytes,
Bytes * usize = Bytes,
usize * Bytes = Bytes,
];
/// Constructor.
#[allow(non_snake_case)]
pub fn Bytes(size: usize) -> Bytes {
Bytes::from(size)
}
impl From<usize> for Bytes {
fn from(t: usize) -> Self {
Bytes::unchecked_from(t)
}
}
/// Additional methods for [`Bytes`].
pub trait BytesOps {
/// Check whether this bytes value is zero.
fn is_zero(&self) -> bool;
/// Check whether this bytes value is positive.
fn is_positive(&self) -> bool;
/// Check whether this bytes value is negative.
fn is_negative(&self) -> bool;
}
impl BytesOps for Bytes {
fn is_zero(&self) -> bool {
*self == Bytes::from(0)
}
fn is_positive(&self) -> bool {
*self > Bytes::from(0)
}
fn is_negative(&self) -> bool {
*self < Bytes::from(0)
}
}
/// Methods of the [`Bytes`] unit as extensions for the [`str`] type.
#[allow(missing_docs)]
pub trait BytesStrOps<Range> {
/// Slice the provided string.
///
/// # Panics
/// Panics if the range start or end is not on a UTF-8 code point boundary, or if it is past the
/// end of the last code point of the string slice.
fn slice(&self, range: Range) -> &str;
}
impl BytesStrOps<ops::Range<Bytes>> for str {
#[inline(always)]
fn slice(&self, range: ops::Range<Bytes>) -> &str {
&self[range.into_unchecked_raw_range()]
}
}
impl BytesStrOps<ops::RangeFrom<Bytes>> for str {
#[inline(always)]
fn slice(&self, range: ops::RangeFrom<Bytes>) -> &str {
&self[range.into_unchecked_raw_range()]
}
}
/// Methods of the [`Bytes`] unit as extensions for the [`Cow`] type.
#[allow(missing_docs)]
pub trait BytesCowOps<'t, Range> {
fn slice(&self, range: Range) -> Cow<'t, str>;
}
impl<'t> BytesCowOps<'t, ops::Range<Bytes>> for Cow<'t, str> {
fn slice(&self, range: ops::Range<Bytes>) -> Cow<'t, str> {
match self {
Cow::Borrowed(t) => Cow::Borrowed(t.slice(range)),
Cow::Owned(t) => Cow::Owned(t.slice(range).to_owned()),
}
}
}
impl<'t> BytesCowOps<'t, ops::RangeFrom<Bytes>> for Cow<'t, str> {
fn slice(&self, range: ops::RangeFrom<Bytes>) -> Cow<'t, str> {
match self {
Cow::Borrowed(t) => Cow::Borrowed(t.slice(range)),
Cow::Owned(t) => Cow::Owned(t.slice(range).to_owned()),
}
}
}