mirror of
https://github.com/enso-org/enso.git
synced 2025-01-08 22:56:16 +03:00
Initial parser implementation in Rust (#3341)
This commit is contained in:
parent
0b34346c19
commit
9e219d698c
183
Cargo.lock
generated
183
Cargo.lock
generated
@ -44,6 +44,15 @@ dependencies = [
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ansi_term"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
|
||||
dependencies = [
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.57"
|
||||
@ -948,6 +957,7 @@ version = "0.2.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"enso-prelude",
|
||||
"failure",
|
||||
"itertools 0.9.0",
|
||||
"rustversion",
|
||||
"serde",
|
||||
@ -973,25 +983,6 @@ dependencies = [
|
||||
"debug-scene-visualization",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "enso-flexer"
|
||||
version = "0.2.1"
|
||||
dependencies = [
|
||||
"enso-automata",
|
||||
"enso-lazy-reader",
|
||||
"enso-logger",
|
||||
"enso-macro-utils",
|
||||
"enso-prelude",
|
||||
"itertools 0.8.2",
|
||||
"nonempty",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"unicode-segmentation",
|
||||
"wasm-bindgen",
|
||||
"wasm-bindgen-test",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "enso-formatter"
|
||||
version = "0.1.0"
|
||||
@ -1096,14 +1087,6 @@ dependencies = [
|
||||
"wasm-bindgen-test",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "enso-lazy-reader"
|
||||
version = "0.2.0"
|
||||
dependencies = [
|
||||
"enso-prelude",
|
||||
"itertools 0.8.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "enso-logger"
|
||||
version = "0.3.1"
|
||||
@ -1132,6 +1115,38 @@ dependencies = [
|
||||
"enso-prelude",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "enso-parser"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"enso-data-structures",
|
||||
"enso-parser-syntax-tree-builder",
|
||||
"enso-parser-syntax-tree-visitor",
|
||||
"enso-prelude",
|
||||
"enso-shapely-macros",
|
||||
"enso-types",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "enso-parser-syntax-tree-builder"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"enso-macro-utils",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "enso-parser-syntax-tree-visitor"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"enso-macro-utils",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "enso-prelude"
|
||||
version = "0.2.6"
|
||||
@ -1139,7 +1154,6 @@ dependencies = [
|
||||
"anyhow",
|
||||
"backtrace",
|
||||
"boolinator",
|
||||
"bumpalo",
|
||||
"cfg-if 1.0.0",
|
||||
"colored",
|
||||
"derivative",
|
||||
@ -1159,6 +1173,9 @@ dependencies = [
|
||||
"serde_json",
|
||||
"shrinkwraprs 0.3.0",
|
||||
"smallvec 1.8.0",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"tracing-wasm",
|
||||
"wasm-bindgen",
|
||||
"wasm-bindgen-test",
|
||||
"weak-table",
|
||||
@ -2576,25 +2593,6 @@ version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "lexer"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"enso-flexer",
|
||||
"enso-prelude",
|
||||
"lexer-definition",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lexer-definition"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"enso-flexer",
|
||||
"enso-prelude",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.125"
|
||||
@ -2849,12 +2847,6 @@ dependencies = [
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nonempty"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4f962080273ac958f790079cfc886b5b9d722969dbd7b03f473902bdfe5c69b1"
|
||||
|
||||
[[package]]
|
||||
name = "normalize-line-endings"
|
||||
version = "0.3.0"
|
||||
@ -3170,18 +3162,6 @@ dependencies = [
|
||||
"websocket",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parser-new"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"enso-data-structures",
|
||||
"enso-logger",
|
||||
"enso-prelude",
|
||||
"itertools 0.10.3",
|
||||
"lexer",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "paste"
|
||||
version = "0.1.18"
|
||||
@ -3884,6 +3864,15 @@ dependencies = [
|
||||
"opaque-debug",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sharded-slab"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shrinkwraprs"
|
||||
version = "0.2.3"
|
||||
@ -4097,6 +4086,15 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thread_local"
|
||||
version = "1.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.1.44"
|
||||
@ -4297,9 +4295,21 @@ dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"log 0.4.17",
|
||||
"pin-project-lite 0.2.9",
|
||||
"tracing-attributes",
|
||||
"tracing-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-attributes"
|
||||
version = "0.1.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cc6b8ad3567499f98a1db7a752b07a7c8c7c7c34c332ec00effb2b0027974b7c"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-core"
|
||||
version = "0.1.26"
|
||||
@ -4307,6 +4317,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f54c8ca710e81886d498c2fd3331b56c93aa248d49de2222ad2742247c60072f"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"valuable",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -4319,6 +4330,42 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-log"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "78ddad33d2d10b1ed7eb9d1f518a5674713876e97e5bb9b7345a7984fbb4f922"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"log 0.4.17",
|
||||
"tracing-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-subscriber"
|
||||
version = "0.3.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4bc28f93baff38037f64e6f43d34cfa1605f27a49c34e8a04c5e78b0babf2596"
|
||||
dependencies = [
|
||||
"ansi_term",
|
||||
"sharded-slab",
|
||||
"smallvec 1.8.0",
|
||||
"thread_local",
|
||||
"tracing-core",
|
||||
"tracing-log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-wasm"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4575c663a174420fa2d78f4108ff68f65bf2fbb7dd89f33749b6e826b3626e07"
|
||||
dependencies = [
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "traitobject"
|
||||
version = "0.1.0"
|
||||
@ -4428,6 +4475,12 @@ dependencies = [
|
||||
"sha1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "valuable"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
|
||||
|
||||
[[package]]
|
||||
name = "value-bag"
|
||||
version = "1.0.0-alpha.9"
|
||||
|
@ -9,6 +9,8 @@ members = [
|
||||
"build/enso-formatter",
|
||||
"build/rust-scripts",
|
||||
"lib/rust/*",
|
||||
"lib/rust/parser/src/syntax/tree/visitor",
|
||||
"lib/rust/parser/src/syntax/tree/builder",
|
||||
"lib/rust/profiler/data",
|
||||
"integration-test"
|
||||
]
|
||||
|
@ -8,6 +8,7 @@ use crate::config::InitialView;
|
||||
use crate::executor::web::EventLoopExecutor;
|
||||
use crate::initializer::setup_global_executor;
|
||||
use crate::Ide;
|
||||
|
||||
use enso_frp::future::EventOutputExt;
|
||||
use enso_web::Closure;
|
||||
use enso_web::HtmlDivElement;
|
||||
|
@ -59,7 +59,6 @@
|
||||
use wasm_bindgen::prelude::*;
|
||||
|
||||
|
||||
|
||||
// ==============
|
||||
// === Export ===
|
||||
// ==============
|
||||
|
@ -38,15 +38,9 @@ use ensogl_text as text;
|
||||
// === Export ===
|
||||
// ==============
|
||||
|
||||
pub mod entry;
|
||||
pub mod wide;
|
||||
|
||||
|
||||
|
||||
// ==============
|
||||
// === Export ===
|
||||
// ==============
|
||||
|
||||
pub mod entry;
|
||||
pub use entry::View as Entry;
|
||||
|
||||
|
||||
|
@ -207,7 +207,7 @@ commands.build.rust = async function (argv) {
|
||||
console.log('Minimizing the WASM binary.')
|
||||
await gzip(paths.wasm.main, paths.wasm.mainGz)
|
||||
|
||||
const releaseLimitMb = 4.36
|
||||
const releaseLimitMb = 4.37
|
||||
let limitMb = releaseLimitMb + allowExtraMb
|
||||
await checkWasmSize(paths.wasm.mainGz, limitMb)
|
||||
}
|
||||
|
@ -6,8 +6,8 @@ edition = "2021"
|
||||
|
||||
description = "A finite-automata-based lexing engine."
|
||||
readme = "README.md"
|
||||
homepage = "https://github.com/enso-org/rust-lib/src/automata"
|
||||
repository = "https://github.com/enso-org/rust-lib"
|
||||
homepage = "https://github.com/enso-org/enso/lib/rust/automata"
|
||||
repository = "https://github.com/enso-org/enso"
|
||||
license-file = "../../LICENSE"
|
||||
|
||||
keywords = ["lexer", "finite-automata"]
|
||||
|
@ -6,8 +6,8 @@ edition = "2021"
|
||||
|
||||
description = "A collection of useful data structures."
|
||||
readme = "README.md"
|
||||
homepage = "https://github.com/enso-org/rust-lib/src/data"
|
||||
repository = "https://github.com/enso-org/rust-lib"
|
||||
homepage = "https://github.com/enso-org/enso/lib/rust/data"
|
||||
repository = "https://github.com/enso-org/enso"
|
||||
license-file = "../../LICENSE"
|
||||
|
||||
keywords = []
|
||||
@ -23,7 +23,7 @@ enso-prelude = { version = "^0.2.1", path = "../prelude" }
|
||||
serde = { version = "1.0" , features = ["derive"] }
|
||||
typenum = { version = "1.11.2" }
|
||||
rustversion = { version = "1.0" }
|
||||
|
||||
failure = { version = "0.1.6" }
|
||||
|
||||
[dev-dependencies]
|
||||
itertools = "0.9.0"
|
||||
|
@ -21,10 +21,10 @@ pub type Branches<K, V, S> = HashMap<K, HashMapTree<K, V, S>, S>;
|
||||
/// more branches accessible by the given key type.
|
||||
#[derive(Derivative)]
|
||||
#[derivative(Clone)]
|
||||
#[derivative(Debug(bound = "K:Eq+Hash+Debug , V:Debug , S:BuildHasher"))]
|
||||
#[derivative(Default(bound = "K:Eq+Hash , V:Default , S:BuildHasher+Default"))]
|
||||
#[derivative(PartialEq(bound = "K:Eq+Hash , V:PartialEq , S:BuildHasher"))]
|
||||
#[derivative(Eq(bound = "K:Eq+Hash , V:Eq , S:BuildHasher"))]
|
||||
#[derivative(Debug(bound = "K:Eq+Hash+Debug, V:Debug, S:BuildHasher"))]
|
||||
#[derivative(Default(bound = "K:Eq+Hash, V:Default, S:BuildHasher+Default"))]
|
||||
#[derivative(PartialEq(bound = "K:Eq+Hash, V:PartialEq, S:BuildHasher"))]
|
||||
#[derivative(Eq(bound = "K:Eq+Hash, V:Eq, S:BuildHasher"))]
|
||||
pub struct HashMapTree<K, V, S = RandomState> {
|
||||
/// Value of the current tree node.
|
||||
pub value: V,
|
||||
|
207
lib/rust/data-structures/src/im_list.rs
Normal file
207
lib/rust/data-structures/src/im_list.rs
Normal file
@ -0,0 +1,207 @@
|
||||
//! An immutable linked list implementation.
|
||||
|
||||
use crate::prelude::*;
|
||||
|
||||
|
||||
|
||||
// ============
|
||||
// === List ===
|
||||
// ============
|
||||
|
||||
/// Immutable linked list containing values of type [`T`]. As every node of the list is kept in
|
||||
/// [`Rc`], cloning of any subsection of this list is very fast.
|
||||
#[derive(Derivative, Deref)]
|
||||
#[derivative(Clone(bound = ""))]
|
||||
#[derivative(Default(bound = ""))]
|
||||
pub struct List<T> {
|
||||
#[allow(missing_docs)]
|
||||
pub data: Option<NonEmpty<T>>,
|
||||
}
|
||||
|
||||
/// Non-empty list. It is guaranteed to have at least one element. See [`List`] to learn more.
|
||||
#[derive(Derivative, Deref, Debug)]
|
||||
#[derivative(Clone(bound = ""))]
|
||||
pub struct NonEmpty<T> {
|
||||
#[allow(missing_docs)]
|
||||
pub node: Rc<Node<T>>,
|
||||
}
|
||||
|
||||
/// A node of the [`List`]. Contains the current value and link to list [`tail`].
|
||||
#[derive(Clone, Debug)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Node<T> {
|
||||
pub head: T,
|
||||
pub tail: List<T>,
|
||||
}
|
||||
|
||||
impl<T> Node<T> {
|
||||
/// Constructor.
|
||||
pub fn singleton(head: T) -> Self {
|
||||
let tail = default();
|
||||
Self { head, tail }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> NonEmpty<T> {
|
||||
/// Constructor.
|
||||
pub fn singleton(head: T) -> Self {
|
||||
let node = Rc::new(Node::singleton(head));
|
||||
Self { node }
|
||||
}
|
||||
|
||||
/// Convert this non-empty list to list of unknown length.
|
||||
pub fn into_list(self) -> List<T> {
|
||||
let data = Some(self);
|
||||
List { data }
|
||||
}
|
||||
|
||||
/// Prepend the element to this list.
|
||||
pub fn prepend(self, head: T) -> Self {
|
||||
self.into_list().prepend(head)
|
||||
}
|
||||
|
||||
/// Get the head element of this list.
|
||||
pub fn head(&self) -> &T {
|
||||
&self.head
|
||||
}
|
||||
|
||||
/// Get tail of this list.
|
||||
pub fn tail(&self) -> &List<T> {
|
||||
&self.tail
|
||||
}
|
||||
|
||||
/// Get the last element of this list.
|
||||
pub fn last(&self) -> &T {
|
||||
self.tail.last().unwrap_or_else(|| self.head())
|
||||
}
|
||||
|
||||
/// Check whether this list is empty.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
/// Convert this list to a vector.
|
||||
fn to_vec(&self) -> Vec<&T> {
|
||||
let mut out = vec![&self.head];
|
||||
let mut list = self.tail();
|
||||
loop {
|
||||
match list.head() {
|
||||
None => break,
|
||||
Some(head) => {
|
||||
out.push(head);
|
||||
match list.tail() {
|
||||
None => break,
|
||||
Some(tail) => list = tail,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> List<T> {
|
||||
/// Prepend the element to the list.
|
||||
pub fn prepend(self, head: T) -> NonEmpty<T> {
|
||||
let tail = self;
|
||||
let node = Rc::new(Node { head, tail });
|
||||
NonEmpty { node }
|
||||
}
|
||||
|
||||
/// Get the head element.
|
||||
pub fn head(&self) -> Option<&T> {
|
||||
self.as_ref().map(|t| t.head())
|
||||
}
|
||||
|
||||
/// Get the tail of this list.
|
||||
pub fn tail(&self) -> Option<&List<T>> {
|
||||
self.as_ref().map(|t| t.tail())
|
||||
}
|
||||
|
||||
/// Get the last element of this list.
|
||||
pub fn last(&self) -> Option<&T> {
|
||||
self.data.as_ref().map(|t| t.last())
|
||||
}
|
||||
|
||||
/// Check whether this list is empty.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.is_none()
|
||||
}
|
||||
|
||||
/// Convert this list to a vector.
|
||||
fn to_vec(&self) -> Vec<&T> {
|
||||
self.data.as_ref().map(|t| t.to_vec()).unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Convert this list to a non-empty list. Return [`None`] if the list is empty.
|
||||
pub fn as_non_empty(&self) -> &Option<NonEmpty<T>> {
|
||||
&self.data
|
||||
}
|
||||
|
||||
/// Convert this list to a non-empty list. Return [`None`] if the list is empty.
|
||||
pub fn into_non_empty(self) -> Option<NonEmpty<T>> {
|
||||
self.data
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> From<NonEmpty<T>> for List<T> {
|
||||
fn from(list: NonEmpty<T>) -> Self {
|
||||
list.into_list()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Debug> Debug for List<T> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
Debug::fmt(&self.to_vec(), f)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> IntoIterator for &'a List<T> {
|
||||
type Item = &'a T;
|
||||
type IntoIter = std::vec::IntoIter<&'a T>;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.to_vec().into_iter()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> IntoIterator for &'a NonEmpty<T> {
|
||||
type Item = &'a T;
|
||||
type IntoIter = std::vec::IntoIter<&'a T>;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.to_vec().into_iter()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> FromIterator<T> for List<T> {
|
||||
// Clippy reports false warning here as we cannot add a bound to `I` that it needs to be a
|
||||
// double-ended iterator.
|
||||
#[allow(clippy::needless_collect)]
|
||||
fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
|
||||
let vec: Vec<T> = iter.into_iter().collect();
|
||||
let mut list = List::default();
|
||||
for item in vec.into_iter().rev() {
|
||||
list = list.prepend(item).into()
|
||||
}
|
||||
list
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> From<Vec<T>> for List<T> {
|
||||
fn from(v: Vec<T>) -> Self {
|
||||
let mut out = List::default();
|
||||
for item in v.into_iter().rev() {
|
||||
out = out.prepend(item).into_list();
|
||||
}
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> TryFrom<Vec<T>> for NonEmpty<T> {
|
||||
type Error = failure::Error;
|
||||
fn try_from(v: Vec<T>) -> Result<Self, Self::Error> {
|
||||
let err = "Cannot convert empty Vec to NonEmpty one.";
|
||||
List::<T>::from(v).into_non_empty().ok_or_else(|| failure::err_msg(err))
|
||||
}
|
||||
}
|
@ -24,6 +24,7 @@
|
||||
pub mod dependency_graph;
|
||||
pub mod diet;
|
||||
pub mod hash_map_tree;
|
||||
pub mod im_list;
|
||||
pub mod index;
|
||||
pub mod opt_vec;
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
//! Functionality for producing debug information.
|
||||
|
||||
// === Features ===
|
||||
#![feature(extern_types)]
|
||||
// === Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
|
@ -6,8 +6,8 @@ edition = "2021"
|
||||
|
||||
description = "A library for supporting generic programming."
|
||||
readme = "README.md"
|
||||
homepage = "https://github.com/enso-org/rust-lib/src/generics"
|
||||
repository = "https://github.com/enso-org/rust-lib"
|
||||
homepage = "https://github.com/enso-org/enso/lib/rust/generics"
|
||||
repository = "https://github.com/enso-org/enso"
|
||||
license-file = "../../LICENSE"
|
||||
|
||||
keywords = ["generic"]
|
||||
|
@ -6,8 +6,8 @@ edition = "2021"
|
||||
|
||||
description = "An efficient logger for writing applications in Rust."
|
||||
readme = "README.md"
|
||||
homepage = "https://github.com/enso-org/rust-lib/src/logger"
|
||||
repository = "https://github.com/enso-org/rust-lib"
|
||||
homepage = "https://github.com/enso-org/enso/lib/rust/logger"
|
||||
repository = "https://github.com/enso-org/enso"
|
||||
license-file = "../../LICENSE"
|
||||
|
||||
keywords = ["logging"]
|
||||
|
@ -6,8 +6,8 @@ edition = "2021"
|
||||
|
||||
description = "Utilities for writing macros."
|
||||
readme = "README.md"
|
||||
homepage = "https://github.com/enso-org/rust-lib/src/macro-utils"
|
||||
repository = "https://github.com/enso-org/rust-lib"
|
||||
homepage = "https://github.com/enso-org/enso/lib/rust/macro-utils"
|
||||
repository = "https://github.com/enso-org/enso"
|
||||
license-file = "../../LICENSE"
|
||||
|
||||
keywords = ["macro", "utility"]
|
||||
|
@ -6,8 +6,8 @@ edition = "2021"
|
||||
|
||||
description = "An implementation of functional optics."
|
||||
readme = "README.md"
|
||||
homepage = "https://github.com/enso-org/rust-lib/src/optics"
|
||||
repository = "https://github.com/enso-org/rust-lib"
|
||||
homepage = "https://github.com/enso-org/enso/lib/rust/optics"
|
||||
repository = "https://github.com/enso-org/enso"
|
||||
license-file = "../../LICENSE"
|
||||
|
||||
keywords = ["optics","lenses"]
|
||||
|
@ -1,24 +1,20 @@
|
||||
[package]
|
||||
name = "parser-new"
|
||||
name = "enso-parser"
|
||||
version = "0.1.0"
|
||||
authors = ["Enso Team <enso-dev@enso.org>"]
|
||||
edition = "2021"
|
||||
|
||||
publish = false
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib", "rlib"]
|
||||
test = true
|
||||
bench = true
|
||||
description = "Enso Parser."
|
||||
readme = "README.md"
|
||||
homepage = "https://github.com/enso-org/enso"
|
||||
repository = "https://github.com/enso-org/enso"
|
||||
license-file = "../../LICENSE"
|
||||
|
||||
[dependencies]
|
||||
enso-data-structures = { version = "0.2.0", path = "../data-structures" }
|
||||
enso-logger = { version = "0.3.0", path = "../logger" }
|
||||
enso-prelude = { version = "0.2.0", path = "../prelude" }
|
||||
lexer = { version = "0.1.0", path = "lexer/generation" }
|
||||
itertools = { version = "0.10.0" }
|
||||
enso-prelude = { path = "../prelude" }
|
||||
enso-data-structures = { path = "../data-structures" }
|
||||
enso-types = { path = "../types" }
|
||||
enso-shapely-macros = { path = "../shapely/macros" }
|
||||
enso-parser-syntax-tree-visitor = { path = "src/syntax/tree/visitor" }
|
||||
enso-parser-syntax-tree-builder = { path = "src/syntax/tree/builder" }
|
||||
|
||||
[build-dependencies]
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.3"
|
||||
|
@ -1,28 +0,0 @@
|
||||
[package]
|
||||
name = "ast-new"
|
||||
version = "0.1.0"
|
||||
authors = ["Enso Team <enso-dev@enso.org>"]
|
||||
edition = "2021"
|
||||
|
||||
description = "An abstract syntax tree for the Enso language."
|
||||
readme = "README.md"
|
||||
homepage = "https://github.com/enso-org/enso/lib/rust/ast"
|
||||
repository = "https://github.com/enso-org/enso"
|
||||
license-file = "../../../LICENSE"
|
||||
|
||||
keywords = ["ast"]
|
||||
categories = ["parsing"]
|
||||
|
||||
publish = false
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib", "rlib"]
|
||||
test = true
|
||||
bench = true
|
||||
|
||||
[dependencies]
|
||||
clap = { version = "2.33.3" }
|
||||
itertools = { version = "0.10.0" }
|
||||
proc-macro2 = { version = "1.0.26" }
|
||||
syn = { version = "1.0.72", features = ["full", "extra-traits", "visit-mut", "visit"] }
|
||||
uuid = { version = "0.8.1" , features = ["serde","v4","wasm-bindgen"] }
|
@ -1,366 +0,0 @@
|
||||
//! This module exports the implementation of the enso abstract syntax tree.
|
||||
|
||||
use app::*;
|
||||
use def::*;
|
||||
use invalid::*;
|
||||
use lines::*;
|
||||
use name::*;
|
||||
use num::*;
|
||||
use txt::*;
|
||||
|
||||
use uuid::Uuid;
|
||||
|
||||
|
||||
|
||||
// ===================================
|
||||
// === Abstract Syntax Tree (Stub) ===
|
||||
// ===================================
|
||||
|
||||
/// An ast node of unknown shape.
|
||||
pub type AnyAst = Ast<Shape>;
|
||||
|
||||
/// An ast node with an unique id and length.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Ast<T> {
|
||||
/// A unique identifier.
|
||||
pub uid: Option<Uuid>,
|
||||
/// Length in number of chars of this ast node.
|
||||
pub len: usize,
|
||||
/// The number of trailing spaces.
|
||||
pub off: usize,
|
||||
/// The ast node itself.
|
||||
pub ast: T,
|
||||
}
|
||||
|
||||
// The set of all ast nodes.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Shape {
|
||||
Unrecognized(invalid::Unrecognized),
|
||||
Blank(name::Blank),
|
||||
Var(name::Var),
|
||||
Cons(name::Cons),
|
||||
Opr(name::Opr),
|
||||
Number(num::Number),
|
||||
Text(txt::Text),
|
||||
Prefix(app::Prefix),
|
||||
Infix(app::Infix),
|
||||
Module(lines::Module),
|
||||
Block(lines::Block),
|
||||
FunDef(def::FunDef),
|
||||
OprDef(def::OprDef),
|
||||
VarDef(def::VarDef),
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===================
|
||||
// === Application ===
|
||||
// ===================
|
||||
|
||||
/// This module exports ast shapes that represent function application.
|
||||
pub mod app {
|
||||
use super::*;
|
||||
|
||||
|
||||
|
||||
/// The ast node for application.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Prefix {
|
||||
pub func: Box<AnyAst>,
|
||||
pub arg: Box<AnyAst>,
|
||||
}
|
||||
|
||||
/// The ast node for an infix operator application.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Infix {
|
||||
pub larg: Box<AnyAst>,
|
||||
pub opr: Box<Ast<name::Opr>>,
|
||||
pub rarg: Box<AnyAst>,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ======================
|
||||
// === Block & Module ===
|
||||
// ======================
|
||||
|
||||
/// This module exports ast shapes that are represented as sequence of equally indented lines.
|
||||
pub mod lines {
|
||||
use super::*;
|
||||
|
||||
|
||||
|
||||
/// The ast node for a module that represents the file's root block.
|
||||
///
|
||||
/// The module consists of a sequence of possibly empty lines with no leading indentation.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Module {
|
||||
pub lines: Vec<Option<AnyAst>>,
|
||||
}
|
||||
|
||||
/// The ast node for a block that represents a sequence of equally indented lines.
|
||||
///
|
||||
/// Lines may contain some child ast or be empty. Block is used for all code blocks except
|
||||
/// for the root one, which uses `Module`.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Block {
|
||||
/// Absolute's block indent, counting from the module's root.
|
||||
pub indent: usize,
|
||||
/// Leading empty lines. Each line is represented by absolute count of spaces
|
||||
/// it contains, counting from the root.
|
||||
pub empty_lines: Vec<usize>,
|
||||
/// First line with non-empty item.
|
||||
pub first_line: Box<AnyAst>,
|
||||
/// Rest of lines, each of them optionally having contents.
|
||||
pub lines: Vec<Option<AnyAst>>,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==================
|
||||
// === Definition ===
|
||||
// ==================
|
||||
|
||||
/// This module exports ast shapes that represent definition of variable, function etc.
|
||||
pub mod def {
|
||||
use super::*;
|
||||
|
||||
|
||||
|
||||
/// The ast node for a method definition.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FunDef {
|
||||
pub name: Box<Ast<name::Var>>,
|
||||
pub args: Vec<AnyAst>,
|
||||
pub body: Box<AnyAst>,
|
||||
}
|
||||
|
||||
/// The ast node for an operator definition.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct OprDef {
|
||||
pub name: Box<Ast<name::Opr>>,
|
||||
pub args: Vec<AnyAst>,
|
||||
pub body: Box<AnyAst>,
|
||||
}
|
||||
|
||||
/// The ast node for a variable definition.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct VarDef {
|
||||
pub name: Box<Ast<name::Var>>,
|
||||
pub value: Box<AnyAst>,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===================
|
||||
// === Identifiers ===
|
||||
// ===================
|
||||
|
||||
/// This module exports ast shapes for basic identifiers.
|
||||
pub mod name {
|
||||
/// The ast node for the underscore `_`.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct Blank {}
|
||||
|
||||
/// The ast node for a variable.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Var {
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
/// The ast node for a constructor.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Cons {
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
/// The ast node for an operator.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Opr {
|
||||
pub name: String,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===============
|
||||
// === Invalid ===
|
||||
// ===============
|
||||
|
||||
/// This module exports invalid ast shapes.
|
||||
pub mod invalid {
|
||||
/// Unrecognized token.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Unrecognized {
|
||||
pub str: String,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==============
|
||||
// === Number ===
|
||||
// ==============
|
||||
|
||||
/// This module exports ast shapes that represent numbers.
|
||||
pub mod num {
|
||||
/// The ast node for a number.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Number {
|
||||
pub number: String,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ============
|
||||
// === Text ===
|
||||
// ============
|
||||
|
||||
|
||||
/// This module exports ast shapes that represent text (strings).
|
||||
pub mod txt {
|
||||
/// The ast node for a string of text.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Text {
|
||||
pub text: String,
|
||||
}
|
||||
}
|
||||
|
||||
// === Into<Shape> ===
|
||||
|
||||
impl From<Unrecognized> for Shape {
|
||||
fn from(val: Unrecognized) -> Self {
|
||||
Self::Unrecognized(val)
|
||||
}
|
||||
}
|
||||
impl From<Blank> for Shape {
|
||||
fn from(val: Blank) -> Self {
|
||||
Self::Blank(val)
|
||||
}
|
||||
}
|
||||
impl From<Var> for Shape {
|
||||
fn from(val: Var) -> Self {
|
||||
Self::Var(val)
|
||||
}
|
||||
}
|
||||
impl From<Cons> for Shape {
|
||||
fn from(val: Cons) -> Self {
|
||||
Self::Cons(val)
|
||||
}
|
||||
}
|
||||
impl From<Opr> for Shape {
|
||||
fn from(val: Opr) -> Self {
|
||||
Self::Opr(val)
|
||||
}
|
||||
}
|
||||
impl From<Number> for Shape {
|
||||
fn from(val: Number) -> Self {
|
||||
Self::Number(val)
|
||||
}
|
||||
}
|
||||
impl From<Text> for Shape {
|
||||
fn from(val: Text) -> Self {
|
||||
Self::Text(val)
|
||||
}
|
||||
}
|
||||
impl From<Prefix> for Shape {
|
||||
fn from(val: Prefix) -> Self {
|
||||
Self::Prefix(val)
|
||||
}
|
||||
}
|
||||
impl From<Infix> for Shape {
|
||||
fn from(val: Infix) -> Self {
|
||||
Self::Infix(val)
|
||||
}
|
||||
}
|
||||
impl From<Module> for Shape {
|
||||
fn from(val: Module) -> Self {
|
||||
Self::Module(val)
|
||||
}
|
||||
}
|
||||
impl From<Block> for Shape {
|
||||
fn from(val: Block) -> Self {
|
||||
Self::Block(val)
|
||||
}
|
||||
}
|
||||
impl From<FunDef> for Shape {
|
||||
fn from(val: FunDef) -> Self {
|
||||
Self::FunDef(val)
|
||||
}
|
||||
}
|
||||
impl From<OprDef> for Shape {
|
||||
fn from(val: OprDef) -> Self {
|
||||
Self::OprDef(val)
|
||||
}
|
||||
}
|
||||
impl From<VarDef> for Shape {
|
||||
fn from(val: VarDef) -> Self {
|
||||
Self::VarDef(val)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ====================
|
||||
// === Constructors ===
|
||||
// ====================
|
||||
|
||||
impl AnyAst {
|
||||
/// Creates a new ast node with random `Uuid` from `Shape`.
|
||||
pub fn new(ast: impl Into<Shape>) -> Self {
|
||||
Self { ast: ast.into(), uid: Some(Uuid::new_v4()), len: 0, off: 0 }
|
||||
}
|
||||
|
||||
/// Creates a new ast node with `Shape::Unrecognized`.
|
||||
pub fn unrecognized(str: String) -> Self {
|
||||
Self::new(Unrecognized { str })
|
||||
}
|
||||
|
||||
/// Creates a new ast node with `Shape::Blank`.
|
||||
pub fn blank() -> Self {
|
||||
Self::new(Blank {})
|
||||
}
|
||||
|
||||
/// Creates a new ast node with `Shape::Var`.
|
||||
pub fn var(name: String) -> Self {
|
||||
Self::new(Var { name })
|
||||
}
|
||||
|
||||
/// Creates a new ast node with `Shape::Cons`.
|
||||
pub fn cons(name: String) -> Self {
|
||||
Self::new(Cons { name })
|
||||
}
|
||||
|
||||
/// Creates a new ast node with `Shape::Opr`.
|
||||
pub fn opr(name: String) -> Self {
|
||||
Self::new(Opr { name })
|
||||
}
|
||||
|
||||
/// Creates a new ast node with `Shape::Number`.
|
||||
pub fn num(number: i64) -> Self {
|
||||
Self::new(Number { number: number.to_string() })
|
||||
}
|
||||
|
||||
/// Creates a new ast node with `Shape::Text`.
|
||||
pub fn text(text: String) -> Self {
|
||||
Self::new(Text { text })
|
||||
}
|
||||
}
|
@ -1,367 +0,0 @@
|
||||
//! This module exports scala ast generator.
|
||||
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![allow(unused_must_use)]
|
||||
|
||||
use std::io::prelude::*;
|
||||
|
||||
use itertools::Itertools;
|
||||
use proc_macro2::Span;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Write;
|
||||
use std::fs::File;
|
||||
use syn;
|
||||
use syn::Ident;
|
||||
|
||||
|
||||
|
||||
// =======================
|
||||
// === Scala Generator ===
|
||||
// =======================
|
||||
|
||||
/// A Scala ast generator.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct ScalaGenerator {
|
||||
/// The content of the file.
|
||||
code: String,
|
||||
/// Current indentation.
|
||||
indent: usize,
|
||||
/// Inheritance hierarchy.
|
||||
extends: HashMap<Ident, Ident>,
|
||||
}
|
||||
|
||||
impl ScalaGenerator {
|
||||
/// Generates a Scala ast from `lib/rust/ast/src/lib.rs`.
|
||||
pub fn ast() -> std::io::Result<String> {
|
||||
let mut content = String::new();
|
||||
let mut file = File::open("lib/rust/ast/src/ast.rs")?;
|
||||
file.read_to_string(&mut content);
|
||||
|
||||
Ok(Self::file("ast", syn::parse_file(content.as_str()).unwrap()))
|
||||
}
|
||||
|
||||
/// Generates a Scala ast definition from a parsed Rust ast definition.
|
||||
pub fn file(name: &str, file: syn::File) -> String {
|
||||
let mut this = Self::default();
|
||||
writeln!(this.code, "package org.enso.ast\n");
|
||||
writeln!(this.code, "import java.util.UUID\n\n");
|
||||
this.block(&Ident::new(name, Span::call_site()), &file.items[..]);
|
||||
this.code
|
||||
}
|
||||
|
||||
/// Generates a block of Scala code.
|
||||
fn block(&mut self, ident: &Ident, lines: &[syn::Item]) {
|
||||
write!(self.code, "\n{:i$}object ", "", i = self.indent);
|
||||
self.typ_name(ident);
|
||||
writeln!(self.code, " {{");
|
||||
self.indent += 2;
|
||||
if self.extends.contains_key(ident) {
|
||||
write!(self.code, "{:i$}sealed trait ", "", i = self.indent);
|
||||
self.typ_name(ident);
|
||||
self.extends(ident);
|
||||
}
|
||||
|
||||
for item in lines {
|
||||
match item {
|
||||
syn::Item::Enum(val) => self.adt(val),
|
||||
syn::Item::Type(val) => {
|
||||
write!(self.code, "\n{:i$}type ", "", i = self.indent);
|
||||
self.typ_name(&val.ident);
|
||||
self.generics(&val.generics);
|
||||
write!(self.code, " = ");
|
||||
self.typ(val.ty.as_ref());
|
||||
writeln!(self.code);
|
||||
}
|
||||
syn::Item::Struct(val) =>
|
||||
if let syn::Fields::Named(fields) = &val.fields {
|
||||
self.class(&val.ident, &val.generics, fields);
|
||||
} else {
|
||||
panic!("All struct fields must be named!");
|
||||
},
|
||||
syn::Item::Mod(val) => {
|
||||
if let Some(content) = &val.content {
|
||||
self.block(&val.ident, &content.1[..]);
|
||||
};
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
self.indent -= 2;
|
||||
writeln!(self.code, "{:i$}}}", "", i = self.indent);
|
||||
}
|
||||
|
||||
/// Generates a Scala case class.
|
||||
///
|
||||
/// `struct Foo { bar:Bar, baz:Baz }` => `case class Foo(bar:Bar, baz:Baz)`
|
||||
fn class(&mut self, ident: &Ident, generics: &syn::Generics, fields: &syn::FieldsNamed) {
|
||||
write!(self.code, "{:i$}case class ", "", i = self.indent);
|
||||
self.typ_name(ident);
|
||||
self.generics(generics);
|
||||
write!(self.code, "(");
|
||||
for (i, field) in fields.named.iter().enumerate() {
|
||||
if i != 0 {
|
||||
write!(self.code, ", ");
|
||||
}
|
||||
if let Some(ident) = &field.ident {
|
||||
self.var_name(ident);
|
||||
}
|
||||
write!(self.code, ": ");
|
||||
self.typ(&field.ty);
|
||||
}
|
||||
write!(self.code, ")");
|
||||
self.extends(ident);
|
||||
}
|
||||
|
||||
/// Generates Scala ADT - case classes extending a sealed trait.
|
||||
///
|
||||
/// There are two modes of conversion:
|
||||
///
|
||||
/// 1) When the Rust enum variant has named fields:
|
||||
/// ```
|
||||
/// enum Foo {
|
||||
/// Bar { x: isize },
|
||||
/// Baz { y: isize },
|
||||
/// }
|
||||
/// ```
|
||||
/// ===>
|
||||
/// ```scala
|
||||
/// sealed trait Foo
|
||||
/// case class Bar(x:Int) extends Foo
|
||||
/// case class Baz(y:Int) extends Foo
|
||||
/// ```
|
||||
///
|
||||
/// 2) When the Rust enum variant has one unnamed field with qualified type:
|
||||
/// ```
|
||||
/// enum Foo {
|
||||
/// Bar(barz::Bar),
|
||||
/// Baz(barz::Baz),
|
||||
/// }
|
||||
/// mod barz {
|
||||
/// pub struct Bar {}
|
||||
/// pub struct Baz {
|
||||
/// y: isize,
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
/// ===>
|
||||
/// ```scala
|
||||
/// sealed trait Foo
|
||||
/// object barz {
|
||||
/// sealed trait Barz extends Foo
|
||||
/// case class Bar() extends Barz
|
||||
/// case class Baz(y:size) extends Barz
|
||||
/// }
|
||||
/// ```
|
||||
fn adt(&mut self, adt: &syn::ItemEnum) {
|
||||
write!(self.code, "\n{:i$}sealed trait {}", "", adt.ident, i = self.indent);
|
||||
self.generics(&adt.generics);
|
||||
self.extends(&adt.ident);
|
||||
for variant in &adt.variants {
|
||||
match &variant.fields {
|
||||
syn::Fields::Named(fields) => {
|
||||
self.extends.insert(variant.ident.clone(), adt.ident.clone());
|
||||
self.class(&variant.ident, &adt.generics, fields);
|
||||
}
|
||||
syn::Fields::Unnamed(fields) => {
|
||||
if let Some(syn::Type::Path(path)) = fields.unnamed.first().map(|f| &f.ty) {
|
||||
let path = path.path.segments.iter().rev().take(2).collect_tuple();
|
||||
if let Some((class, object)) = path {
|
||||
self.extends.insert(object.ident.clone(), adt.ident.clone());
|
||||
self.extends.insert(class.ident.clone(), object.ident.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generates Scala class extension.
|
||||
///
|
||||
/// `foo` => `extends Foo`
|
||||
fn extends(&mut self, ident: &Ident) {
|
||||
if let Some(name) = self.extends.get(ident).cloned() {
|
||||
write!(self.code, " extends ");
|
||||
self.typ_name(&name);
|
||||
}
|
||||
writeln!(self.code);
|
||||
}
|
||||
|
||||
/// Generates Scala type parameters.
|
||||
///
|
||||
/// `<Foo, Bar>` = `[Foo, Bar]`
|
||||
fn generics(&mut self, generics: &syn::Generics) {
|
||||
if generics.params.is_empty() {
|
||||
return;
|
||||
}
|
||||
write!(self.code, "[");
|
||||
for (i, param) in generics.params.iter().enumerate() {
|
||||
if i != 0 {
|
||||
write!(self.code, ", ");
|
||||
}
|
||||
if let syn::GenericParam::Type(typ) = param {
|
||||
self.typ_name(&typ.ident)
|
||||
}
|
||||
}
|
||||
write!(self.code, "]");
|
||||
}
|
||||
|
||||
/// Generates a qualified scala type with type arguments.
|
||||
///
|
||||
/// `foo::Bar<Baz>` => `Foo.Bar[Baz]`
|
||||
fn typ(&mut self, typ: &syn::Type) {
|
||||
if let syn::Type::Path(path) = typ {
|
||||
for (i, typ) in path.path.segments.iter().enumerate() {
|
||||
if i != 0 {
|
||||
write!(self.code, ".");
|
||||
}
|
||||
self.typ_segment(typ);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generates a Scala type with type arguments.
|
||||
///
|
||||
/// `Foo<Bar<Baz>>` => `Foo[Bar[Baz]]`
|
||||
fn typ_segment(&mut self, typ: &syn::PathSegment) {
|
||||
let boxed = typ.ident.to_string().as_str() == "Box";
|
||||
if !boxed {
|
||||
self.typ_name(&typ.ident);
|
||||
}
|
||||
if let syn::PathArguments::AngleBracketed(typ) = &typ.arguments {
|
||||
if !boxed {
|
||||
write!(self.code, "[");
|
||||
}
|
||||
for (i, typ) in typ.args.iter().enumerate() {
|
||||
if i != 0 {
|
||||
write!(self.code, ", ");
|
||||
}
|
||||
if let syn::GenericArgument::Type(typ) = typ {
|
||||
self.typ(typ);
|
||||
}
|
||||
}
|
||||
if !boxed {
|
||||
write!(self.code, "]");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generates a Scala variable name (camel case).
|
||||
///
|
||||
/// `foo_bar` => `fooBar`
|
||||
fn var_name(&mut self, ident: &Ident) {
|
||||
let mut underscore = false;
|
||||
for char in ident.to_string().chars() {
|
||||
if char == '_' {
|
||||
underscore = true;
|
||||
} else if underscore {
|
||||
underscore = false;
|
||||
for char in char.to_uppercase() {
|
||||
self.code.push(char)
|
||||
}
|
||||
} else {
|
||||
self.code.push(char);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generates a Scala type name.
|
||||
///
|
||||
/// The following Rust types are automatically converted to Scala types:
|
||||
/// ```code
|
||||
/// u32 | i32 | u16 | i16 | i8 => Int,
|
||||
/// usize | isize | u64 | i64 => Long,
|
||||
/// u8 => Byte,
|
||||
/// char => Char,
|
||||
/// Vec => Vector,
|
||||
/// Uuid => UUID,
|
||||
/// ```
|
||||
fn typ_name(&mut self, ident: &Ident) {
|
||||
let name = match ident.to_string().as_str() {
|
||||
"u32" | "i32" | "u16" | "i16" | "i8" => "Int",
|
||||
"usize" | "isize" | "u64" | "i64" => "Long",
|
||||
"u8" => "Byte",
|
||||
"char" => "Char",
|
||||
"Vec" => "Vector",
|
||||
"Uuid" => "UUID",
|
||||
name => {
|
||||
let mut chars = name.chars();
|
||||
if let Some(char) = chars.next() {
|
||||
write!(self.code, "{}", char.to_uppercase().to_string() + chars.as_str());
|
||||
}
|
||||
""
|
||||
}
|
||||
};
|
||||
write!(self.code, "{}", name);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === Tests ===
|
||||
// =============
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_file() {
|
||||
let rust = syn::parse_quote! {
|
||||
type A<X> = B<X,Y>;
|
||||
|
||||
pub enum FooBarBaz {
|
||||
Foo(a::Foo),
|
||||
Bar(a::Bar),
|
||||
Baz(b::Baz),
|
||||
}
|
||||
mod a {
|
||||
struct Foo {}
|
||||
struct Bar {x:usize, y:u8, z:b::Type}
|
||||
}
|
||||
mod b {
|
||||
type Type = Baz;
|
||||
|
||||
enum Baz {
|
||||
Baz1 {},
|
||||
Baz2 {foo_bar:Box<Vec<i32>>},
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let scala = "\
|
||||
package org.enso.ast
|
||||
|
||||
import java.util.UUID
|
||||
|
||||
|
||||
|
||||
object Ast {
|
||||
|
||||
type A[X] = B[X, Y]
|
||||
|
||||
sealed trait FooBarBaz
|
||||
|
||||
object A {
|
||||
sealed trait A extends FooBarBaz
|
||||
case class Foo() extends A
|
||||
case class Bar(x: Long, y: Byte, z: B.Type) extends A
|
||||
}
|
||||
|
||||
object B {
|
||||
sealed trait B extends FooBarBaz
|
||||
|
||||
type Type = Baz
|
||||
|
||||
sealed trait Baz extends B
|
||||
case class Baz1() extends Baz
|
||||
case class Baz2(fooBar: Vector[Int]) extends Baz
|
||||
}
|
||||
}
|
||||
";
|
||||
assert_eq!(ScalaGenerator::file("ast", rust), scala);
|
||||
}
|
||||
}
|
@ -1,24 +0,0 @@
|
||||
//! This module exports the implementation of the enso abstract syntax tree.
|
||||
|
||||
// === Features ===
|
||||
#![feature(test)]
|
||||
|
||||
// === Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![warn(unsafe_code)]
|
||||
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![deny(unconditional_recursion)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(trivial_casts)]
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unused_import_braces)]
|
||||
|
||||
|
||||
|
||||
mod ast;
|
||||
pub mod generation;
|
||||
|
||||
pub use crate::ast::*;
|
@ -1,24 +0,0 @@
|
||||
// === Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![warn(unsafe_code)]
|
||||
|
||||
use ast_new::generation::ScalaGenerator;
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
|
||||
|
||||
|
||||
pub fn main() -> std::io::Result<()> {
|
||||
let matches = clap::App::new("Enso AST")
|
||||
.version("1.0")
|
||||
.author("Enso Team <enso-dev@enso.org>")
|
||||
.about("Enso AST generator.")
|
||||
.args_from_usage("--generate-scala-ast [FILE] 'Generates a scala ast in specified file.'")
|
||||
.get_matches();
|
||||
|
||||
if let Some(file) = matches.value_of("generate-scala-ast") {
|
||||
File::create(file)?.write_all(ScalaGenerator::ast()?.as_bytes())?;
|
||||
println!("Generated scala ast at path: {}", file);
|
||||
}
|
||||
Ok(())
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
[package]
|
||||
name = "flexer-test-definition"
|
||||
version = "0.1.0"
|
||||
authors = ["Enso Team <enso-dev@enso.org>"]
|
||||
edition = "2021"
|
||||
|
||||
publish = false
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib", "rlib"]
|
||||
test = true
|
||||
bench = true
|
||||
|
||||
[dependencies]
|
||||
enso-flexer = { version = "^0.2.0", path = "../../flexer" }
|
@ -1,285 +0,0 @@
|
||||
//! This file contains the code defining a lexer for the following small language. Due to the way in
|
||||
//! which the code-generation from the flexer is used, it has to be defined in a separate crate from
|
||||
//! the site at which it's used. For the actual tests of this code, please see
|
||||
//! `flexer-testing/generation`.
|
||||
//!
|
||||
//! The language here is being defined as follows:
|
||||
//!
|
||||
//! a-word = 'a'+;
|
||||
//! b-word = 'b'+;
|
||||
//! word = a-word | b-word;
|
||||
//! space = ' ';
|
||||
//! spaced-word = space, word;
|
||||
//! language = word, spaced-word*;
|
||||
//!
|
||||
//! Please note that there is a fair amount of duplicated code between this test and the
|
||||
//! `lexer_generated_api_test` file. This is to present the full view of what each portion of the
|
||||
//! process looks like.
|
||||
|
||||
// === Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![warn(unsafe_code)]
|
||||
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![deny(unconditional_recursion)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(trivial_casts)]
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unused_import_braces)]
|
||||
|
||||
use enso_flexer::prelude::*;
|
||||
use enso_flexer::*;
|
||||
|
||||
use enso_flexer::automata::pattern::Pattern;
|
||||
use enso_flexer::group::Registry;
|
||||
use enso_flexer::prelude::logger::Disabled;
|
||||
use enso_flexer::prelude::reader::BookmarkManager;
|
||||
|
||||
|
||||
|
||||
// ====================
|
||||
// === Type Aliases ===
|
||||
// ====================
|
||||
|
||||
type Logger = Disabled;
|
||||
|
||||
|
||||
|
||||
// ===========
|
||||
// === AST ===
|
||||
// ===========
|
||||
|
||||
/// A very simple AST, sufficient for the simple language being defined.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Token {
|
||||
/// A word from the input, consisting of a sequence of all `a` or all `b`.
|
||||
Word(String),
|
||||
/// A token that the lexer is unable to recognise.
|
||||
Unrecognized(String),
|
||||
}
|
||||
impl Token {
|
||||
/// Construct a new word token.
|
||||
pub fn word(name: impl Into<String>) -> Token {
|
||||
Token::Word(name.into())
|
||||
}
|
||||
|
||||
/// Construct a new unrecognized token.
|
||||
pub fn unrecognized(name: impl Into<String>) -> Token {
|
||||
Token::Unrecognized(name.into())
|
||||
}
|
||||
}
|
||||
|
||||
/// A representation of a stream of tokens.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Clone, Debug, Default, PartialEq)]
|
||||
pub struct TokenStream {
|
||||
tokens: Vec<Token>,
|
||||
}
|
||||
|
||||
impl TokenStream {
|
||||
/// Append the provided token to the token stream.
|
||||
pub fn push(&mut self, token: Token) {
|
||||
self.tokens.push(token);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// === Trait Impls ===
|
||||
|
||||
impl From<Vec<Token>> for TokenStream {
|
||||
fn from(tokens: Vec<Token>) -> Self {
|
||||
TokenStream { tokens }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==================
|
||||
// === Test Lexer ===
|
||||
// ==================
|
||||
|
||||
/// The definition of a test lexer for the above-described language.
|
||||
#[derive(Debug)]
|
||||
pub struct TestLexer {
|
||||
lexer: Flexer<TestState, TokenStream, Logger>,
|
||||
}
|
||||
|
||||
impl Deref for TestLexer {
|
||||
type Target = Flexer<TestState, TokenStream, Logger>;
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.lexer
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for TestLexer {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.lexer
|
||||
}
|
||||
}
|
||||
|
||||
impl TestLexer {
|
||||
/// Creates a new instance of this lexer.
|
||||
pub fn new() -> Self {
|
||||
let logger = Logger::new("TestLexer");
|
||||
let lexer = Flexer::new(logger);
|
||||
TestLexer { lexer }
|
||||
}
|
||||
}
|
||||
|
||||
/// Rules for the root state.
|
||||
#[allow(dead_code, missing_docs)]
|
||||
impl TestLexer {
|
||||
fn on_first_word<R: ReaderOps>(&mut self, _reader: &mut R) {
|
||||
let str = self.current_match.clone();
|
||||
let ast = Token::Word(str);
|
||||
self.output.push(ast);
|
||||
let id = self.seen_first_word_state;
|
||||
self.push_state(id);
|
||||
}
|
||||
|
||||
fn on_err_suffix_first_word<R: ReaderOps>(&mut self, _reader: &mut R) {
|
||||
let ast = Token::Unrecognized(self.current_match.clone());
|
||||
self.output.push(ast);
|
||||
}
|
||||
|
||||
fn on_no_err_suffix_first_word<R: ReaderOps>(&mut self, _reader: &mut R) {}
|
||||
|
||||
fn rules_in_root(lexer: &mut TestLexer) {
|
||||
let a_word = Pattern::char('a').many1();
|
||||
let b_word = Pattern::char('b').many1();
|
||||
let any = Pattern::any();
|
||||
let end = Pattern::eof();
|
||||
|
||||
let root_group_id = lexer.initial_state;
|
||||
let root_group = lexer.groups_mut().group_mut(root_group_id);
|
||||
|
||||
root_group.create_rule(&a_word, "self.on_first_word(reader)");
|
||||
root_group.create_rule(&b_word, "self.on_first_word(reader)");
|
||||
root_group.create_rule(&end, "self.on_no_err_suffix_first_word(reader)");
|
||||
root_group.create_rule(&any, "self.on_err_suffix_first_word(reader)");
|
||||
}
|
||||
}
|
||||
|
||||
/// Rules for the "seen first word" state.
|
||||
#[allow(dead_code, missing_docs)]
|
||||
impl TestLexer {
|
||||
fn on_spaced_word<R: ReaderOps>(&mut self, _reader: &mut R, _test_arg: bool) {
|
||||
let str = self.current_match.clone();
|
||||
let ast = Token::Word(String::from(str.trim()));
|
||||
self.output.push(ast);
|
||||
}
|
||||
|
||||
fn on_err_suffix<R: ReaderOps>(&mut self, reader: &mut R) {
|
||||
self.on_err_suffix_first_word(reader);
|
||||
self.pop_state();
|
||||
}
|
||||
|
||||
fn on_no_err_suffix<R: ReaderOps>(&mut self, reader: &mut R) {
|
||||
self.on_no_err_suffix_first_word(reader);
|
||||
self.pop_state();
|
||||
}
|
||||
|
||||
fn rules_in_seen_first_word(lexer: &mut TestLexer) {
|
||||
let a_word = Pattern::char('a').many1();
|
||||
let b_word = Pattern::char('b').many1();
|
||||
let space = Pattern::char(' ');
|
||||
let spaced_a_word = &space >> &a_word;
|
||||
let spaced_b_word = &space >> &b_word;
|
||||
let any = Pattern::any();
|
||||
let end = Pattern::eof();
|
||||
|
||||
let seen_first_word_group_id = lexer.seen_first_word_state;
|
||||
let seen_first_word_group = lexer.groups_mut().group_mut(seen_first_word_group_id);
|
||||
|
||||
seen_first_word_group.create_rule(&spaced_a_word, "self.on_spaced_word(reader,true)");
|
||||
seen_first_word_group.create_rule(&spaced_b_word, "self.on_spaced_word(reader,false)");
|
||||
seen_first_word_group.create_rule(&end, "self.on_no_err_suffix(reader)");
|
||||
seen_first_word_group.create_rule(&any, "self.on_err_suffix(reader)");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// === Trait Impls ===
|
||||
|
||||
impl enso_flexer::Definition for TestLexer {
|
||||
fn define() -> Self {
|
||||
let mut lexer = TestLexer::new();
|
||||
|
||||
TestLexer::rules_in_seen_first_word(&mut lexer);
|
||||
TestLexer::rules_in_root(&mut lexer);
|
||||
|
||||
lexer
|
||||
}
|
||||
|
||||
fn groups(&self) -> &Registry {
|
||||
self.lexer.groups()
|
||||
}
|
||||
|
||||
fn set_up(&mut self) {}
|
||||
|
||||
fn tear_down(&mut self) {}
|
||||
}
|
||||
|
||||
impl Default for TestLexer {
|
||||
fn default() -> Self {
|
||||
TestLexer::new()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===================
|
||||
// === Lexer State ===
|
||||
// ===================
|
||||
|
||||
/// The stateful components of the test lexer.
|
||||
#[derive(Debug)]
|
||||
pub struct TestState {
|
||||
/// The registry for groups in the lexer.
|
||||
lexer_states: group::Registry,
|
||||
/// The initial state of the lexer.
|
||||
initial_state: group::Identifier,
|
||||
/// The state entered when the first word has been seen.
|
||||
seen_first_word_state: group::Identifier,
|
||||
/// The bookmarks for this lexer.
|
||||
bookmarks: BookmarkManager,
|
||||
}
|
||||
|
||||
|
||||
// === Trait Impls ===
|
||||
|
||||
impl enso_flexer::State for TestState {
|
||||
fn new(_logger: &impl AnyLogger) -> Self {
|
||||
let mut lexer_states = group::Registry::default();
|
||||
let initial_state = lexer_states.define_group("ROOT", None);
|
||||
let seen_first_word_state = lexer_states.define_group("SEEN FIRST WORD", None);
|
||||
let bookmarks = BookmarkManager::new();
|
||||
Self { lexer_states, initial_state, seen_first_word_state, bookmarks }
|
||||
}
|
||||
|
||||
fn initial_state(&self) -> group::Identifier {
|
||||
self.initial_state
|
||||
}
|
||||
|
||||
fn groups(&self) -> &group::Registry {
|
||||
&self.lexer_states
|
||||
}
|
||||
|
||||
fn groups_mut(&mut self) -> &mut group::Registry {
|
||||
&mut self.lexer_states
|
||||
}
|
||||
|
||||
fn bookmarks(&self) -> &BookmarkManager {
|
||||
&self.bookmarks
|
||||
}
|
||||
|
||||
fn bookmarks_mut(&mut self) -> &mut BookmarkManager {
|
||||
&mut self.bookmarks
|
||||
}
|
||||
|
||||
fn specialize(&self) -> Result<String, GenError> {
|
||||
generate::specialize(self, "TestLexer", "TokenStream")
|
||||
}
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
[package]
|
||||
name = "flexer-test-generation"
|
||||
version = "0.1.0"
|
||||
authors = ["Enso Team <enso-dev@enso.org>"]
|
||||
edition = "2021"
|
||||
|
||||
publish = false
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib", "rlib"]
|
||||
test = true
|
||||
bench = true
|
||||
|
||||
[dependencies]
|
||||
enso-flexer = { version = "^0.2.0", path = "../../flexer" }
|
||||
flexer-test-definition = { version = "0.1.0", path = "../definition" }
|
||||
|
||||
[build-dependencies]
|
||||
enso-flexer = { version = "^0.2.0", path = "../../flexer" }
|
||||
flexer-test-definition = { version = "0.1.0", path = "../definition" }
|
@ -1,32 +0,0 @@
|
||||
use std::io::prelude::*;
|
||||
|
||||
use enso_flexer::Definition;
|
||||
use enso_flexer::State;
|
||||
use flexer_test_definition::TestLexer;
|
||||
use std::fs::File;
|
||||
|
||||
|
||||
|
||||
/// Generates the lexer engine and saves the result into the file `src/engine.rs`.
|
||||
///
|
||||
/// The content of the generated file can be used with the `include!` macro.
|
||||
fn generate_engine() {
|
||||
let definition_path = "../definition/src/lib.rs";
|
||||
let output_directory = "src/generated";
|
||||
let _ = std::fs::create_dir(output_directory);
|
||||
let output_path = "src/generated/engine.rs";
|
||||
let mut lexer_def = File::open(definition_path)
|
||||
.unwrap_or_else(|_| panic!("The lexer definition should exist at {}.", definition_path));
|
||||
let mut contents = String::new();
|
||||
let mut file = File::create(output_path)
|
||||
.unwrap_or_else(|_| panic!("Cannot open output file at {}.", output_path));
|
||||
let lexer = TestLexer::define();
|
||||
let engine = lexer.specialize().unwrap();
|
||||
lexer_def.read_to_string(&mut contents).expect("Unable to read lexer definition.");
|
||||
file.write_all(contents.as_bytes()).expect("Unable to write lexer definition.");
|
||||
file.write_all(engine.as_bytes()).expect("Unable to write lexer specialization.");
|
||||
}
|
||||
|
||||
fn main() {
|
||||
generate_engine()
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
//! This module serves to re-export the generated lexer.
|
||||
|
||||
|
||||
// ==============
|
||||
// === Export ===
|
||||
// ==============
|
||||
|
||||
pub mod engine;
|
||||
|
||||
|
||||
|
@ -1,34 +0,0 @@
|
||||
//! This library exposes the specialized version of the Enso lexer.
|
||||
//!
|
||||
//! Its sole purpose is to avoid the lexer definition getting out of sync with its implementation
|
||||
//! (the generated engine), which requires the engine to live in a separate crate.
|
||||
//!
|
||||
//! This separation enables generation of the enso lexer source code with `build.rs` during
|
||||
//! compilation. Its output is then stored in a new file `engine.rs`and exported by `lexer.rs`.
|
||||
|
||||
// === Features ===
|
||||
#![feature(test)]
|
||||
|
||||
// === Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![warn(unsafe_code)]
|
||||
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![deny(unconditional_recursion)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(trivial_casts)]
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unused_import_braces)]
|
||||
|
||||
|
||||
// ==============
|
||||
// === Export ===
|
||||
// ==============
|
||||
|
||||
#[rustfmt::skip]
|
||||
pub mod generated;
|
||||
|
||||
|
||||
|
@ -1,114 +0,0 @@
|
||||
//! This file contains tests for the generated lexer.
|
||||
|
||||
// === Features ===
|
||||
#![feature(test)]
|
||||
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![deny(unconditional_recursion)]
|
||||
#![warn(unsafe_code)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(trivial_casts)]
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unused_import_braces)]
|
||||
|
||||
use enso_flexer::prelude::*;
|
||||
|
||||
use enso_flexer::prelude::reader::decoder::DecoderUTF8;
|
||||
use flexer_test_generation::generated::engine::TestLexer;
|
||||
use flexer_test_generation::generated::engine::Token;
|
||||
use flexer_test_generation::generated::engine::TokenStream;
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === Tests ===
|
||||
// =============
|
||||
|
||||
/// Executes the test on the provided input string slice.
|
||||
fn run_test_on(str: impl AsRef<str>) -> TokenStream {
|
||||
// Hardcoded for ease of use here.
|
||||
let reader = Reader::new(str.as_ref().as_bytes(), DecoderUTF8());
|
||||
let mut lexer = TestLexer::new();
|
||||
let run_result = lexer.run(reader);
|
||||
|
||||
match run_result.kind {
|
||||
enso_flexer::ResultKind::Success => run_result.tokens,
|
||||
_ => default(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_single_a_word() {
|
||||
let input = "aaaaa";
|
||||
let expected_output = TokenStream::from(vec![Token::word(input)]);
|
||||
let result = run_test_on(input);
|
||||
assert_eq!(result, expected_output);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_single_b_word() {
|
||||
let input = "bbbbb";
|
||||
let expected_output = TokenStream::from(vec![Token::word(input)]);
|
||||
let result = run_test_on(input);
|
||||
assert_eq!(result, expected_output);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_two_word() {
|
||||
let input = "aaaaa bbbbb";
|
||||
let expected_output = TokenStream::from(vec![Token::word("aaaaa"), Token::word("bbbbb")]);
|
||||
let result = run_test_on(input);
|
||||
assert_eq!(result, expected_output);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multi_word() {
|
||||
let input = "bbb aa a b bbbbb aa";
|
||||
let expected_output = TokenStream::from(vec![
|
||||
Token::word("bbb"),
|
||||
Token::word("aa"),
|
||||
Token::word("a"),
|
||||
Token::word("b"),
|
||||
Token::word("bbbbb"),
|
||||
Token::word("aa"),
|
||||
]);
|
||||
let result = run_test_on(input);
|
||||
assert_eq!(result, expected_output);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_single_word() {
|
||||
let input = "c";
|
||||
let expected_output = TokenStream::from(vec![Token::unrecognized(input)]);
|
||||
let result = run_test_on(input);
|
||||
assert_eq!(result, expected_output);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multi_word_invalid() {
|
||||
let input = "aaaaaa c bbbbbb";
|
||||
let expected_output = TokenStream::from(vec![
|
||||
Token::word("aaaaaa"),
|
||||
Token::unrecognized(" "),
|
||||
Token::unrecognized("c"),
|
||||
Token::unrecognized(" "),
|
||||
Token::word("bbbbbb"),
|
||||
]);
|
||||
let result = run_test_on(input);
|
||||
assert_eq!(result, expected_output);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_end_invalid() {
|
||||
let input = "bbbbbb c";
|
||||
let expected_output = TokenStream::from(vec![
|
||||
Token::word("bbbbbb"),
|
||||
Token::unrecognized(" "),
|
||||
Token::unrecognized("c"),
|
||||
]);
|
||||
let result = run_test_on(input);
|
||||
assert_eq!(result, expected_output);
|
||||
}
|
@ -1,40 +0,0 @@
|
||||
[package]
|
||||
name = "enso-flexer"
|
||||
version = "0.2.1"
|
||||
authors = ["Enso Team <enso-dev@enso.org>"]
|
||||
edition = "2021"
|
||||
|
||||
description = "A finite-automata-based lexing engine."
|
||||
readme = "README.md"
|
||||
homepage = "https://github.com/enso-org/enso/lib/rust/flexer"
|
||||
repository = "https://github.com/enso-org/enso"
|
||||
license-file = "../../LICENSE"
|
||||
|
||||
keywords = ["lexer", "finite-automata"]
|
||||
categories = ["parsing"]
|
||||
|
||||
publish = true
|
||||
|
||||
[lib]
|
||||
name = "enso_flexer"
|
||||
crate-type = ["cdylib", "rlib"]
|
||||
test = true
|
||||
bench = true
|
||||
|
||||
[dependencies]
|
||||
enso-automata = { version = "^0.2.0", path = "../../automata" }
|
||||
enso-logger = { version = "^0.3.0", path = "../../logger" }
|
||||
enso-prelude = { version = "^0.2.1", path = "../../prelude" }
|
||||
enso-lazy-reader = { version = "^0.2.0", path = "../lazy-reader" }
|
||||
enso-macro-utils = { version = "^0.2.0", path = "../../macro-utils" }
|
||||
|
||||
itertools = "0.8"
|
||||
proc-macro2 = "1.0.19"
|
||||
nonempty = "0.1.5"
|
||||
quote = "1.0"
|
||||
syn = { version = "1.0.12", features = ["full", "extra-traits", "visit-mut", "visit", "parsing", "printing"] }
|
||||
unicode-segmentation = "1.6.0"
|
||||
wasm-bindgen = "0.2"
|
||||
|
||||
[dev-dependencies]
|
||||
wasm-bindgen-test = "0.3.8"
|
@ -1,4 +0,0 @@
|
||||
# Flexer
|
||||
|
||||
This library provides a finite-automata-based lexing engine that can flexibly
|
||||
tokenize an input stream.
|
@ -1,544 +0,0 @@
|
||||
//! This file contains utilities for generating rust code from lexer definitions, allowing the
|
||||
//! flexer to be specialised for a specific language.
|
||||
|
||||
use crate::prelude::*;
|
||||
use quote::*;
|
||||
use syn::*;
|
||||
|
||||
use crate as flexer;
|
||||
use crate::automata::dfa;
|
||||
use crate::automata::dfa::Dfa;
|
||||
use crate::automata::nfa;
|
||||
use crate::automata::state::State;
|
||||
use crate::group;
|
||||
use crate::group::AutomatonData;
|
||||
use crate::group::Group;
|
||||
|
||||
use enso_macro_utils::repr;
|
||||
use proc_macro2::Literal;
|
||||
use std::fmt;
|
||||
use std::hash::BuildHasher;
|
||||
use std::result::Result;
|
||||
|
||||
|
||||
|
||||
// =======================
|
||||
// === Code Generation ===
|
||||
// =======================
|
||||
|
||||
/// Generate specialized code for the provided lexer `definition`.
|
||||
///
|
||||
/// This specialized code is a highly-optimised and tailored lexer that dispatches based on simple
|
||||
/// code-point switches, with no dynamic lookup. This means that it is very fast, and very low
|
||||
/// overhead.
|
||||
pub fn specialize(
|
||||
definition: &impl flexer::State,
|
||||
state_type_name: impl Str,
|
||||
output_type_name: impl Str,
|
||||
) -> Result<String, GenError> {
|
||||
let group_registry = definition.groups();
|
||||
let mut body_items =
|
||||
vec![run_function(output_type_name)?, run_current_state_function(), step(group_registry)];
|
||||
for group in group_registry.all().iter() {
|
||||
body_items.extend(automaton_for_group(group, group_registry)?)
|
||||
}
|
||||
let result = wrap_in_impl_for(state_type_name, body_items)?;
|
||||
let code = show_code(&result);
|
||||
Ok(code)
|
||||
}
|
||||
|
||||
|
||||
// === Whole-Lexer Codegen Utilities ===
|
||||
|
||||
/// Wrap the provided implementation items into an `impl` block for the provided `state_name` type.
|
||||
pub fn wrap_in_impl_for(
|
||||
state_name: impl Into<String>,
|
||||
body: Vec<ImplItem>,
|
||||
) -> Result<ItemImpl, GenError> {
|
||||
let state_name: Ident = str_to_ident(state_name.into().as_str())?;
|
||||
let mut tree: ItemImpl = parse_quote! {
|
||||
#[allow(missing_docs,dead_code,clippy::all)]
|
||||
impl #state_name {}
|
||||
};
|
||||
tree.items.extend(body);
|
||||
Ok(tree)
|
||||
}
|
||||
|
||||
/// Generate the `run` function for the specialized lexer.
|
||||
///
|
||||
/// This function is what the user of the lexer will call to begin execution.
|
||||
pub fn run_function(output_type_name: impl Str) -> Result<ImplItem, GenError> {
|
||||
let output_type_name = str_to_path(output_type_name)?;
|
||||
let tree: ImplItem = parse_quote! {
|
||||
pub fn run<R:ReaderOps>(&mut self, mut reader:R) -> LexingResult<#output_type_name> {
|
||||
self.set_up();
|
||||
reader.advance_char(&mut self.bookmarks);
|
||||
while self.run_current_state(&mut reader) == StageStatus::ExitSuccess {}
|
||||
let result = match self.status {
|
||||
StageStatus::ExitFinished => LexingResult::success(
|
||||
mem::take(&mut self.output)
|
||||
),
|
||||
StageStatus::ExitFail => LexingResult::failure(
|
||||
mem::take(&mut self.output)
|
||||
),
|
||||
_ => LexingResult::partial(mem::take(&mut self.output))
|
||||
};
|
||||
self.tear_down();
|
||||
result
|
||||
}
|
||||
};
|
||||
Ok(tree)
|
||||
}
|
||||
|
||||
/// Generate the function responsible for executing the lexer in its current state.
|
||||
pub fn run_current_state_function() -> ImplItem {
|
||||
let tree: ImplItem = parse_quote! {
|
||||
fn run_current_state<R:ReaderOps>(&mut self, reader:&mut R) -> StageStatus {
|
||||
self.status = StageStatus::Initial;
|
||||
let mut finished = false;
|
||||
|
||||
// Runs until reaching a state that no longer says to continue.
|
||||
while let Some(next_state) = self.status.continue_as() {
|
||||
// debug!(self.logger,"Current character is {reader.character().char:?}.");
|
||||
// debug!(self.logger,"Continuing in {next_state:?}.");
|
||||
self.status = self.step(next_state,reader);
|
||||
|
||||
if finished && reader.finished(self.bookmarks()) {
|
||||
// info!(self.logger,"Input finished.");
|
||||
self.status = StageStatus::ExitFinished
|
||||
}
|
||||
finished = reader.character().is_eof();
|
||||
|
||||
if self.status.should_continue() {
|
||||
match reader.character().char {
|
||||
Ok(char) => {
|
||||
reader.append_result(char);
|
||||
// info!(self.logger,"Result is {reader.result():?}.");
|
||||
},
|
||||
Err(enso_flexer::prelude::reader::Error::EOF) => {
|
||||
// info!(self.logger,"Reached EOF.");
|
||||
},
|
||||
Err(enso_flexer::prelude::reader::Error::EndOfGroup) => {
|
||||
let current_state = self.current_state();
|
||||
let group_name = self.groups().group(current_state).name.as_str();
|
||||
panic!("Missing rules for state {}.", group_name)
|
||||
}
|
||||
Err(_) => {
|
||||
// error!(self.logger,"Unexpected error!");
|
||||
panic!("Unexpected error!")
|
||||
}
|
||||
}
|
||||
reader.advance_char(&mut self.bookmarks);
|
||||
}
|
||||
}
|
||||
self.status
|
||||
}
|
||||
};
|
||||
tree
|
||||
}
|
||||
|
||||
/// Generate the `step` function for the lexer.
|
||||
///
|
||||
/// This function is responsible for dispatching based on the current state, consuming a character,
|
||||
/// and returning the state to transition to.
|
||||
pub fn step(groups: &group::Registry) -> ImplItem {
|
||||
let arms = groups.all().iter().map(|g| step_match_arm(g.id.into())).collect_vec();
|
||||
parse_quote! {
|
||||
fn step<R:ReaderOps>(&mut self, next_state:SubStateId, reader:&mut R) -> StageStatus {
|
||||
let current_state:usize = self.current_state().into();
|
||||
match current_state {
|
||||
#(#arms)*
|
||||
_ => unreachable_panic!("Unreachable state reached in lexer."),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate a match arm for the step function.
|
||||
///
|
||||
/// There is one match arm per lexer state.
|
||||
pub fn step_match_arm(number: usize) -> Arm {
|
||||
let literal = Literal::usize_unsuffixed(number);
|
||||
let function_name_str = format!("dispatch_in_state_{}", number);
|
||||
let func_name: Ident = parse_str(function_name_str.as_str()).unwrap();
|
||||
let arm: Arm = parse_quote! {
|
||||
#literal => self.#func_name(next_state,reader),
|
||||
};
|
||||
arm
|
||||
}
|
||||
|
||||
|
||||
// === Generation for a Specific Lexer State ===
|
||||
|
||||
/// Generate the functions that implement the lexer automaton for a given lexer state.
|
||||
pub fn automaton_for_group(
|
||||
group: &Group,
|
||||
registry: &group::Registry,
|
||||
) -> Result<Vec<ImplItem>, GenError> {
|
||||
let mut nfa = registry.to_nfa_from(group.id);
|
||||
let mut rules = Vec::with_capacity(nfa.states().len());
|
||||
for state in nfa.public_states().iter() {
|
||||
if nfa.name(*state).is_some() {
|
||||
rules.push(rule_for_state(*state, &nfa)?);
|
||||
}
|
||||
}
|
||||
let mut dfa = Dfa::from(nfa.automaton());
|
||||
let dispatch_for_dfa = dispatch_in_state(&dfa, group.id.into())?;
|
||||
let mut dfa_transitions = transitions_for_dfa(&mut dfa, &mut nfa, group.id.into())?;
|
||||
dfa_transitions.push(dispatch_for_dfa);
|
||||
dfa_transitions.extend(rules);
|
||||
Ok(dfa_transitions)
|
||||
}
|
||||
|
||||
/// Generate a set of transition functions for the provided `dfa`, with identifier `id`.
|
||||
pub fn transitions_for_dfa(
|
||||
dfa: &mut Dfa,
|
||||
data: &mut AutomatonData,
|
||||
id: usize,
|
||||
) -> Result<Vec<ImplItem>, GenError> {
|
||||
let mut state_has_overlapping_rules: HashMap<usize, bool> = HashMap::new();
|
||||
state_has_overlapping_rules.insert(0, false);
|
||||
let state_names: Vec<_> =
|
||||
dfa.links.row_indices().map(|ix| (ix, name_for_step(id, ix))).collect();
|
||||
let mut transitions = Vec::with_capacity(state_names.len());
|
||||
for (ix, name) in state_names.into_iter() {
|
||||
transitions.push(transition_for_dfa(dfa, name, data, ix, &mut state_has_overlapping_rules)?)
|
||||
}
|
||||
Ok(transitions)
|
||||
}
|
||||
|
||||
/// Generate a specific transition function for
|
||||
#[allow(clippy::implicit_hasher)]
|
||||
pub fn transition_for_dfa<S: BuildHasher>(
|
||||
dfa: &mut Dfa,
|
||||
transition_name: Ident,
|
||||
data: &mut AutomatonData,
|
||||
state_ix: usize,
|
||||
has_overlaps: &mut HashMap<usize, bool, S>,
|
||||
) -> Result<ImplItem, GenError> {
|
||||
let match_expr: Expr = match_for_transition(dfa, state_ix, data, has_overlaps)?;
|
||||
let function: ImplItem = parse_quote! {
|
||||
fn #transition_name<R:ReaderOps>(&mut self, reader:&mut R) -> StageStatus {
|
||||
#match_expr
|
||||
}
|
||||
};
|
||||
Ok(function)
|
||||
}
|
||||
|
||||
/// Generate the pattern match for a given transition function.
|
||||
pub fn match_for_transition<S: BuildHasher>(
|
||||
dfa: &mut Dfa,
|
||||
state_ix: usize,
|
||||
data: &mut AutomatonData,
|
||||
has_overlaps: &mut HashMap<usize, bool, S>,
|
||||
) -> Result<Expr, GenError> {
|
||||
let overlaps = *has_overlaps.get(&state_ix).unwrap_or(&false);
|
||||
let mut trigger_state = dfa.links[(state_ix, 0)];
|
||||
let mut range_start = enso_automata::symbol::SymbolIndex::min_value();
|
||||
let divisions = dfa.alphabet.division_map.clone();
|
||||
let mut branches = Vec::with_capacity(divisions.len());
|
||||
for (sym, ix) in divisions.into_iter() {
|
||||
let new_trigger_state = dfa.links[(state_ix, ix)];
|
||||
if new_trigger_state != trigger_state {
|
||||
let range_end = if sym.index != 0 { sym.index - 1 } else { sym.index };
|
||||
let current_trigger_state = trigger_state;
|
||||
let current_range_start = range_start;
|
||||
trigger_state = new_trigger_state;
|
||||
range_start = sym.index;
|
||||
let body =
|
||||
branch_body(dfa, current_trigger_state, state_ix, data, has_overlaps, overlaps)?;
|
||||
branches.push(Branch::new(Some(current_range_start..=range_end), body));
|
||||
} else {
|
||||
}
|
||||
}
|
||||
let catch_all_branch_body =
|
||||
branch_body(dfa, trigger_state, state_ix, data, has_overlaps, overlaps)?;
|
||||
let catch_all_branch = Branch::new(None, catch_all_branch_body);
|
||||
branches.push(catch_all_branch);
|
||||
let arms: Vec<Arm> = branches.into_iter().map(Into::into).collect();
|
||||
let mut match_expr: ExprMatch = parse_quote! {
|
||||
match u64::from(reader.character()) {
|
||||
#(#arms)*
|
||||
}
|
||||
};
|
||||
match_expr.arms = arms;
|
||||
Ok(Expr::Match(match_expr))
|
||||
}
|
||||
|
||||
/// Generate the branch body for a transition in the DFA.
|
||||
pub fn branch_body<S: BuildHasher>(
|
||||
dfa: &mut Dfa,
|
||||
target_state: State<Dfa>,
|
||||
state_ix: usize,
|
||||
data: &mut AutomatonData,
|
||||
has_overlaps: &mut HashMap<usize, bool, S>,
|
||||
rules_overlap: bool,
|
||||
) -> Result<Block, GenError> {
|
||||
let sources = dfa.sources.get(state_ix).expect("Internal error.");
|
||||
let rule_name_for_state = data.name_for_dfa_state(sources);
|
||||
if target_state == State::<Dfa>::INVALID {
|
||||
match rule_name_for_state {
|
||||
None => Ok(parse_quote! {{
|
||||
StageStatus::ExitFail
|
||||
}}),
|
||||
Some(rule) => {
|
||||
let rule: Expr = match parse_str(rule) {
|
||||
Ok(rule) => rule,
|
||||
Err(_) => return Err(GenError::BadExpression(rule.to_string())),
|
||||
};
|
||||
if rules_overlap {
|
||||
Ok(parse_quote! {{
|
||||
let rule_bookmark = self.bookmarks.rule_bookmark;
|
||||
let matched_bookmark = self.bookmarks.matched_bookmark;
|
||||
self.bookmarks.rewind(rule_bookmark,reader);
|
||||
self.current_match = reader.pop_result();
|
||||
self.#rule(reader);
|
||||
self.bookmarks.bookmark(matched_bookmark,reader);
|
||||
StageStatus::ExitSuccess
|
||||
}})
|
||||
} else {
|
||||
Ok(parse_quote! {{
|
||||
let matched_bookmark = self.bookmarks.matched_bookmark;
|
||||
self.current_match = reader.pop_result();
|
||||
self.#rule(reader);
|
||||
self.bookmarks.bookmark(matched_bookmark,reader);
|
||||
StageStatus::ExitSuccess
|
||||
}})
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let target_state_has_no_rule = match rule_name_for_state {
|
||||
Some(_) =>
|
||||
if !dfa_has_rule_name_for(data, dfa, target_state) {
|
||||
dfa.sources[target_state.id()] = (*sources).clone();
|
||||
has_overlaps.insert(target_state.id(), true);
|
||||
true
|
||||
} else {
|
||||
false
|
||||
},
|
||||
None => false,
|
||||
};
|
||||
|
||||
let state_id = Literal::usize_unsuffixed(target_state.id());
|
||||
let ret: Expr = parse_quote! {
|
||||
StageStatus::ContinueWith(#state_id.into())
|
||||
};
|
||||
|
||||
if target_state_has_no_rule && !rules_overlap {
|
||||
Ok(parse_quote! {{
|
||||
let rule_bookmark = self.bookmarks.rule_bookmark;
|
||||
self.bookmarks.bookmark(rule_bookmark,reader);
|
||||
#ret
|
||||
}})
|
||||
} else {
|
||||
Ok(parse_quote! {{
|
||||
#ret
|
||||
}})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate the dispatch function for a given lexer state.
|
||||
///
|
||||
/// This dispatch function is responsible for dispatching based on the sub-state of any given lexer
|
||||
/// state, and is the main part of implementing the actual lexer transitions.
|
||||
pub fn dispatch_in_state(dfa: &Dfa, id: usize) -> Result<ImplItem, GenError> {
|
||||
let dispatch_name: Ident = str_to_ident(format!("dispatch_in_state_{}", id))?;
|
||||
let state_names = dfa.links.row_indices().map(|ix| (ix, name_for_step(id, ix))).collect_vec();
|
||||
let mut branches = Vec::with_capacity(state_names.len());
|
||||
for (ix, name) in state_names.into_iter() {
|
||||
let literal = Literal::usize_unsuffixed(ix);
|
||||
let arm: Arm = parse_quote! {
|
||||
#literal => self.#name(reader),
|
||||
};
|
||||
branches.push(arm);
|
||||
}
|
||||
|
||||
let pattern_match: ExprMatch = parse_quote! {
|
||||
match new_state_index.into() {
|
||||
#(#branches)*
|
||||
_ => unreachable_panic!("Unreachable state reached in lexer.")
|
||||
}
|
||||
};
|
||||
let func: ImplItem = parse_quote! {
|
||||
fn #dispatch_name<R:ReaderOps>
|
||||
( &mut self
|
||||
, new_state_index:SubStateId
|
||||
, reader:&mut R
|
||||
) -> StageStatus {
|
||||
#pattern_match
|
||||
}
|
||||
};
|
||||
|
||||
Ok(func)
|
||||
}
|
||||
|
||||
/// Generate a name for a given step function.
|
||||
pub fn name_for_step(in_state: usize, to_state: usize) -> Ident {
|
||||
let name_str = format!("state_{}_to_{}", in_state, to_state);
|
||||
parse_str(name_str.as_str()).expect("Impossible to not be a valid identifier.")
|
||||
}
|
||||
|
||||
/// Generate an executable rule function for a given lexer state.
|
||||
pub fn rule_for_state(state: nfa::State, automaton: &AutomatonData) -> Result<ImplItem, GenError> {
|
||||
let state_name = automaton.name(state);
|
||||
match state_name {
|
||||
None => unreachable_panic!("Rule for state requested, but state has none."),
|
||||
Some(name) => {
|
||||
let rule_name = str_to_ident(name)?;
|
||||
let callback = automaton.code(state).expect("If it is named it has a callback.");
|
||||
let code: Expr = match parse_str(callback) {
|
||||
Ok(expr) => expr,
|
||||
Err(_) => return Err(GenError::BadExpression(callback.into())),
|
||||
};
|
||||
if !has_reader_arg(&code) {
|
||||
return Err(GenError::BadCallbackArgument);
|
||||
}
|
||||
let tree: ImplItem = parse_quote! {
|
||||
fn #rule_name<R:ReaderOps>(&mut self, reader:&mut R) {
|
||||
#code
|
||||
}
|
||||
};
|
||||
Ok(tree)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks if the given `expr` is a call with a single argument "reader" being passed.
|
||||
#[allow(clippy::cmp_owned)]
|
||||
pub fn has_reader_arg(expr: &Expr) -> bool {
|
||||
match expr {
|
||||
Expr::MethodCall(expr) => match expr.args.first() {
|
||||
Some(Expr::Path(path)) => match path.path.segments.first() {
|
||||
Some(segment) => segment.ident.to_string() == "reader",
|
||||
_ => false,
|
||||
},
|
||||
_ => false,
|
||||
},
|
||||
Expr::Call(expr) => match expr.args.last() {
|
||||
Some(Expr::Path(path)) => match path.path.segments.first() {
|
||||
Some(segment) => segment.ident.to_string() == "reader",
|
||||
_ => false,
|
||||
},
|
||||
_ => false,
|
||||
},
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ================
|
||||
// === GenError ===
|
||||
// ================
|
||||
|
||||
/// Errors that arise during code generation.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum GenError {
|
||||
/// The callback function does not take a single argument `reader`.
|
||||
BadCallbackArgument,
|
||||
/// The provided string is not a valid rust identifier.
|
||||
BadIdentifier(String),
|
||||
/// The provided expression isn't a valid rust expression.
|
||||
BadExpression(String),
|
||||
/// The provided string is not a valid rust literal.
|
||||
BadLiteral(String),
|
||||
/// The provided string is not a valid rust path.
|
||||
BadPath(String),
|
||||
}
|
||||
|
||||
|
||||
// === Trait Impls ===
|
||||
|
||||
impl Display for GenError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
GenError::BadCallbackArgument => write!(
|
||||
f,
|
||||
"Bad argument to a callback function. It must take a single argument `reader`."
|
||||
),
|
||||
GenError::BadIdentifier(str) => write!(f, "`{}` is not a valid rust identifier.", str),
|
||||
GenError::BadExpression(str) => write!(f, "`{}` is not a valid rust expression.", str),
|
||||
GenError::BadLiteral(str) => write!(f, "`{}` is not a valid rust literal.", str),
|
||||
GenError::BadPath(str) => write!(f, "`{}` is not a valid rust path.", str),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==============
|
||||
// === Branch ===
|
||||
// ==============
|
||||
|
||||
/// A representation of a dispatch branch for helping to generate pattern arms.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
struct Branch {
|
||||
pub range: Option<RangeInclusive<enso_automata::symbol::SymbolIndex>>,
|
||||
pub body: Block,
|
||||
}
|
||||
|
||||
impl Branch {
|
||||
/// Create a new branch, from the provided `range` and with `body` as the code it executes.
|
||||
pub fn new(
|
||||
range: Option<RangeInclusive<enso_automata::symbol::SymbolIndex>>,
|
||||
body: Block,
|
||||
) -> Branch {
|
||||
Branch { range, body }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// === Trait Impls ===
|
||||
|
||||
impl From<Branch> for Arm {
|
||||
fn from(value: Branch) -> Self {
|
||||
let body = value.body;
|
||||
match value.range {
|
||||
Some(range) => {
|
||||
let range_start = Literal::u64_unsuffixed(*range.start());
|
||||
let range_end = Literal::u64_unsuffixed(*range.end());
|
||||
if range.start() == range.end() {
|
||||
parse_quote! {
|
||||
#range_start => #body,
|
||||
}
|
||||
} else {
|
||||
parse_quote! {
|
||||
#range_start..=#range_end => #body,
|
||||
}
|
||||
}
|
||||
}
|
||||
None => parse_quote! {
|
||||
_ => #body,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =================
|
||||
// === Utilities ===
|
||||
// =================
|
||||
|
||||
/// Check if the DFA has a rule name for the provided target `state`.
|
||||
pub fn dfa_has_rule_name_for(nfa: &AutomatonData, dfa: &Dfa, state: dfa::State) -> bool {
|
||||
nfa.name_for_dfa_state(&dfa.sources[state.id()]).is_some()
|
||||
}
|
||||
|
||||
/// Convert a string to an identifier.
|
||||
pub fn str_to_ident(str: impl Str) -> Result<Ident, GenError> {
|
||||
parse_str(str.as_ref()).map_err(|_| GenError::BadIdentifier(str.into()))
|
||||
}
|
||||
|
||||
/// Convert a string to a path.
|
||||
pub fn str_to_path(str: impl Str) -> Result<Path, GenError> {
|
||||
parse_str(str.as_ref()).map_err(|_| GenError::BadPath(str.into()))
|
||||
}
|
||||
|
||||
/// Convert the syntax tree into a string.
|
||||
pub fn show_code(tokens: &impl ToTokens) -> String {
|
||||
repr(tokens)
|
||||
}
|
@ -1,458 +0,0 @@
|
||||
//! This module provides an API for grouping multiple flexer rules.
|
||||
|
||||
use crate::prelude::*;
|
||||
|
||||
use crate::automata::nfa;
|
||||
use crate::automata::nfa::Nfa;
|
||||
use crate::automata::pattern::Pattern;
|
||||
use crate::automata::state;
|
||||
use crate::group::rule::Rule;
|
||||
use crate::prelude::fmt::Formatter;
|
||||
use crate::prelude::HashMap;
|
||||
|
||||
use itertools::Itertools;
|
||||
use std::fmt::Display;
|
||||
|
||||
|
||||
// ==============
|
||||
// === Export ===
|
||||
// ==============
|
||||
|
||||
pub mod rule;
|
||||
|
||||
|
||||
|
||||
// ================
|
||||
// === Registry ===
|
||||
// ================
|
||||
|
||||
/// The group Registry is a container for [`Group`]s in the flexer implementation.
|
||||
///
|
||||
/// It allows groups to contain associations between themselves, and also implements useful
|
||||
/// conversions for groups.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct Registry {
|
||||
/// The groups defined for the lexer.
|
||||
groups: Vec<Group>,
|
||||
}
|
||||
|
||||
impl Registry {
|
||||
/// Defines a new group of rules for the lexer with the specified `name` and `parent`.
|
||||
///
|
||||
/// It returns the identifier of the newly-created group.
|
||||
pub fn define_group(
|
||||
&mut self,
|
||||
name: impl Into<String>,
|
||||
parent_index: Option<Identifier>,
|
||||
) -> Identifier {
|
||||
let id = self.next_id();
|
||||
let group = Group::new(id, name.into(), parent_index);
|
||||
self.groups.push(group);
|
||||
id
|
||||
}
|
||||
|
||||
/// Adds an existing `group` to the registry, updating and returning its identifier.
|
||||
pub fn add_group(&mut self, mut group: Group) -> Identifier {
|
||||
let new_id = self.next_id();
|
||||
group.id = new_id;
|
||||
self.groups.push(group);
|
||||
new_id
|
||||
}
|
||||
|
||||
/// Creates a rule that matches `pattern` for the group identified by `group_id`.
|
||||
///
|
||||
/// Panics if `group_id` refers to a nonexistent group.
|
||||
pub fn create_rule(&mut self, group: Identifier, pattern: &Pattern, callback: impl AsRef<str>) {
|
||||
let group = self.group_mut(group);
|
||||
group.create_rule(pattern, callback.as_ref());
|
||||
}
|
||||
|
||||
/// Associates the provided `rule` with the group identified by `group_id`.
|
||||
///
|
||||
/// Panics if `group_id` refers to a nonexistent group.
|
||||
pub fn add_rule(&mut self, group: Identifier, rule: Rule) {
|
||||
let group = self.group_mut(group);
|
||||
group.add_rule(rule);
|
||||
}
|
||||
|
||||
/// Collates the entire set of rules that are matchable when the lexer has the group identified
|
||||
/// by `group_id` as active.
|
||||
///
|
||||
/// This set of rules includes the rules inherited from any parent groups.
|
||||
pub fn rules_for(&self, group: Identifier) -> Vec<&Rule> {
|
||||
let group_handle = self.group(group);
|
||||
let mut parent = group_handle.parent_index.map(|p| self.group(p));
|
||||
let mut rules = (&group_handle.rules).iter().collect_vec();
|
||||
while let Some(parent_group) = parent {
|
||||
if parent_group.id == group_handle.id {
|
||||
panic!("There should not be cycles in parent links for lexer groups.")
|
||||
}
|
||||
rules.extend((&parent_group.rules).iter());
|
||||
parent = parent_group.parent_index.map(|p| self.group(p));
|
||||
}
|
||||
rules
|
||||
}
|
||||
|
||||
/// Obtains a reference to the group for the given `group_id`.
|
||||
///
|
||||
/// As group identifiers can only be created by use of this `Registry`, this will always
|
||||
/// succeed.
|
||||
pub fn group(&self, group: Identifier) -> &Group {
|
||||
self.groups.get(group.0).expect("The group must exist.")
|
||||
}
|
||||
|
||||
/// Obtains a mutable reference to the group for the given `group_id`.
|
||||
///
|
||||
/// As group identifiers can only be created by use of this `Registry`, this will always
|
||||
/// succeed.
|
||||
pub fn group_mut(&mut self, group: Identifier) -> &mut Group {
|
||||
self.groups.get_mut(group.0).expect("The group should exist.")
|
||||
}
|
||||
|
||||
/// Converts the group identified by `group_id` into an NFA.
|
||||
///
|
||||
/// Returns `None` if the group does not exist, or if the conversion fails.
|
||||
pub fn to_nfa_from(&self, group_id: Identifier) -> AutomatonData {
|
||||
let group = self.group(group_id);
|
||||
let mut nfa = AutomatonData::default();
|
||||
let start = nfa.automaton.start;
|
||||
nfa.add_public_state(start);
|
||||
let build = |rule: &Rule| nfa.new_pattern(start, &rule.pattern);
|
||||
let rules = self.rules_for(group.id);
|
||||
let callbacks = rules.iter().map(|r| r.callback.clone()).collect_vec();
|
||||
let states = rules.into_iter().map(build).collect_vec();
|
||||
let end = nfa.new_state_exported();
|
||||
for (ix, state) in states.into_iter().enumerate() {
|
||||
nfa.add_public_state(state);
|
||||
nfa.set_name(state, group.callback_name(ix));
|
||||
nfa.set_code(state, callbacks.get(ix).unwrap().clone());
|
||||
nfa.connect(state, end);
|
||||
}
|
||||
nfa.add_public_state(end);
|
||||
nfa
|
||||
}
|
||||
|
||||
/// Generates the next group identifier for this registry.
|
||||
fn next_id(&self) -> Identifier {
|
||||
let val = self.groups.len();
|
||||
Identifier(val)
|
||||
}
|
||||
|
||||
/// Get an immutable reference to the groups contained within the registry.
|
||||
pub fn all(&self) -> &Vec<Group> {
|
||||
&self.groups
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ====================
|
||||
// === AutomataData ===
|
||||
// ====================
|
||||
|
||||
/// Storage for the generated automaton and auxiliary data required for code generation.
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
pub struct AutomatonData {
|
||||
/// The non-deterministic finite automaton implementing the group of rules it was generated
|
||||
/// from.
|
||||
automaton: Nfa,
|
||||
/// The states defined in the automaton.
|
||||
states: Vec<nfa::State>,
|
||||
/// The names of callbacks, where provided.
|
||||
transition_names: HashMap<nfa::State, String>,
|
||||
/// The code to execute on a callback, where available.
|
||||
callback_code: HashMap<nfa::State, String>,
|
||||
}
|
||||
|
||||
impl AutomatonData {
|
||||
/// Set the name for the provided `state_id`.
|
||||
pub fn set_name(&mut self, state_id: nfa::State, name: impl Str) {
|
||||
self.transition_names.insert(state_id, name.into());
|
||||
}
|
||||
|
||||
/// Set the callback code for the provided `state_id`.
|
||||
pub fn set_code(&mut self, state_id: nfa::State, code: impl Str) {
|
||||
self.callback_code.insert(state_id, code.into());
|
||||
}
|
||||
|
||||
/// Add the provided `state` to the state registry.
|
||||
pub fn add_public_state(&mut self, state: nfa::State) {
|
||||
self.states.push(state);
|
||||
}
|
||||
|
||||
/// Get the name for the provided `state_id`, if present.
|
||||
pub fn name(&self, state_id: nfa::State) -> Option<&str> {
|
||||
self.transition_names.get(&state_id).map(|s| s.as_str())
|
||||
}
|
||||
|
||||
/// Get the callback code for the provided `state_id`, if present.
|
||||
pub fn code(&self, state_id: nfa::State) -> Option<&str> {
|
||||
self.callback_code.get(&state_id).map(|s| s.as_str())
|
||||
}
|
||||
|
||||
/// Get a reference to the public states for this automaton.
|
||||
///
|
||||
/// A public state is one that was explicitly defined by the user.
|
||||
pub fn public_states(&self) -> &Vec<nfa::State> {
|
||||
&self.states
|
||||
}
|
||||
|
||||
/// Get a reference to the states for this automaton.
|
||||
pub fn states(&self) -> &Vec<state::Data> {
|
||||
self.automaton.states()
|
||||
}
|
||||
|
||||
/// Get a reference to the state names for this automaton.
|
||||
pub fn names(&self) -> &HashMap<nfa::State, String> {
|
||||
&self.transition_names
|
||||
}
|
||||
|
||||
/// Get a reference to the callbacks for this automaton.
|
||||
pub fn callbacks(&self) -> &HashMap<nfa::State, String> {
|
||||
&self.callback_code
|
||||
}
|
||||
|
||||
/// Get a reference to the automaton itself.
|
||||
pub fn automaton(&self) -> &Nfa {
|
||||
&self.automaton
|
||||
}
|
||||
|
||||
/// Get the rule name for a the provided state.
|
||||
pub fn name_for_dfa_state(&self, sources: &[nfa::State]) -> Option<&str> {
|
||||
let mut result = None;
|
||||
for source in sources.iter() {
|
||||
let name = self.name(*source);
|
||||
if name.is_some() {
|
||||
result = name;
|
||||
break;
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
/// Errors that can occur when querying callbacks for a DFA state.
|
||||
#[derive(Copy, Clone, Debug, Display, Eq, PartialEq)]
|
||||
pub enum CallbackError {
|
||||
/// There are no available callbacks for this state.
|
||||
NoCallback,
|
||||
/// There is more than one callback available for this state.
|
||||
DuplicateCallbacks,
|
||||
}
|
||||
|
||||
|
||||
// === Trait Impls ===
|
||||
|
||||
impl Deref for AutomatonData {
|
||||
type Target = Nfa;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.automaton
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for AutomatonData {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.automaton
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==================
|
||||
// === Identifier ===
|
||||
// ==================
|
||||
|
||||
/// An identifier for a group.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
|
||||
pub struct Identifier(usize);
|
||||
|
||||
|
||||
// === Trait Impls ===
|
||||
|
||||
impl From<usize> for Identifier {
|
||||
fn from(id: usize) -> Self {
|
||||
Identifier(id)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&usize> for Identifier {
|
||||
fn from(id: &usize) -> Self {
|
||||
Identifier(*id)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Identifier> for usize {
|
||||
fn from(value: Identifier) -> Self {
|
||||
value.0
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===========
|
||||
// == Group ==
|
||||
// ===========
|
||||
|
||||
/// A group is a structure for associating multiple rules with each other, and is the basic building
|
||||
/// block of the flexer.
|
||||
///
|
||||
/// A group consists of the following:
|
||||
///
|
||||
/// - A set of [`Rule`s](Rule), each containing a regex pattern and associated callback.
|
||||
/// - Inherited rules from a parent group, if such a group exists.
|
||||
///
|
||||
/// Internally, the flexer maintains a stack of groups, where only one group can be active at any
|
||||
/// given time. Rules are matched _in order_, and hence overlaps are handled by the order in which
|
||||
/// the rules are matched, with the first callback being triggered.
|
||||
///
|
||||
/// Whenever a [`rule.pattern`](Rule::pattern) from the active group is matched against part of the
|
||||
/// input, the associated [`rule.callback`](Rule::callback) is executed. This callback may exit the
|
||||
/// current group or even enter a new one. As a result, groups allow us to elegantly model a
|
||||
/// situation where certain parts of a program (e.g. within a string literal) have very different
|
||||
/// lexing rules than other portions of a program (e.g. the body of a function).
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct Group {
|
||||
/// A unique identifier for the group.
|
||||
pub id: Identifier,
|
||||
/// A name for the group (useful in debugging).
|
||||
pub name: String,
|
||||
/// The parent group from which rules are inherited.
|
||||
///
|
||||
/// It is ensured that the group is held mutably.
|
||||
pub parent_index: Option<Identifier>,
|
||||
/// A set of flexer rules.
|
||||
pub rules: Vec<Rule>,
|
||||
/// The names for the user-defined states.
|
||||
pub state_names: HashMap<usize, String>,
|
||||
/// The callback functions for the user-defined states.
|
||||
pub state_callbacks: HashMap<usize, String>,
|
||||
}
|
||||
|
||||
impl Group {
|
||||
/// Creates a new group.
|
||||
pub fn new(id: Identifier, name: impl Into<String>, parent_index: Option<Identifier>) -> Self {
|
||||
let rules = default();
|
||||
let state_names = default();
|
||||
let state_callbacks = default();
|
||||
Group { id, name: name.into(), parent_index, rules, state_names, state_callbacks }
|
||||
}
|
||||
|
||||
/// Adds a new rule to the current group.
|
||||
pub fn add_rule(&mut self, rule: Rule) {
|
||||
self.rules.push(rule)
|
||||
}
|
||||
|
||||
/// Creates a new rule.
|
||||
pub fn create_rule(&mut self, pattern: &Pattern, code: &str) {
|
||||
let pattern_clone = pattern.clone();
|
||||
let rule = Rule::new(pattern_clone, code);
|
||||
self.rules.push(rule)
|
||||
}
|
||||
|
||||
/// The canonical name for a given rule.
|
||||
pub fn callback_name(&self, rule_ix: usize) -> String {
|
||||
format!("group_{}_rule_{}", self.id.0, rule_ix)
|
||||
}
|
||||
}
|
||||
|
||||
// === Trait Impls ===
|
||||
|
||||
impl From<Group> for Registry {
|
||||
fn from(value: Group) -> Self {
|
||||
let mut registry = Registry::default();
|
||||
registry.add_group(value);
|
||||
registry
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Group {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "Group {}", self.name)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === Tests ===
|
||||
// =============
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn group_create_rule() {
|
||||
let pattern = Pattern::all_of("abcde");
|
||||
let mut group = Group::new(0.into(), "Test Name", None);
|
||||
group.create_rule(&pattern, "code");
|
||||
let rule = Rule::new(pattern, "code");
|
||||
assert!(group.rules.contains(&rule));
|
||||
assert_eq!(group.rules[0].callback, "code".to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn group_callback_name() {
|
||||
let pattern_1 = Pattern::all_of("abcde");
|
||||
let pattern_2 = Pattern::all_of("abcde");
|
||||
let mut group = Group::new(0.into(), "Test Name", None);
|
||||
group.create_rule(&pattern_1, "code");
|
||||
group.create_rule(&pattern_2, "code");
|
||||
assert_eq!(group.callback_name(0), "group_0_rule_0");
|
||||
assert_eq!(group.callback_name(1), "group_0_rule_1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn group_registry_define_group() {
|
||||
let mut registry = Registry::default();
|
||||
registry.define_group("TEST_GROUP", None);
|
||||
assert!(registry.all().iter().any(|g| g.name == *"TEST_GROUP"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn group_registry_create_rule() {
|
||||
let pattern = Pattern::none_of("abcde");
|
||||
let mut registry = Registry::default();
|
||||
let group_1_id = registry.define_group("GROUP_1", None);
|
||||
let group_2_id = registry.define_group("GROUP_2", None);
|
||||
|
||||
let group_1 = registry.group_mut(group_1_id);
|
||||
group_1.create_rule(&pattern, "rule_1");
|
||||
|
||||
let group_2 = registry.group_mut(group_2_id);
|
||||
group_2.create_rule(&pattern, "rule_2");
|
||||
|
||||
let rules_1 = registry.rules_for(group_1_id);
|
||||
let rules_2 = registry.rules_for(group_2_id);
|
||||
assert!(rules_1.iter().any(|r| **r == Rule::new(pattern.clone(), "rule_1")));
|
||||
assert!(rules_2.iter().any(|r| **r == Rule::new(pattern.clone(), "rule_2")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn group_registry_group_parents() {
|
||||
let pattern_1 = Pattern::char('a');
|
||||
let pattern_2 = Pattern::char('b');
|
||||
let pattern_3 = Pattern::char('c');
|
||||
|
||||
let mut registry = Registry::default();
|
||||
let group_1_id = registry.define_group("GROUP_1", None);
|
||||
let group_2_id = registry.define_group("GROUP_2", Some(group_1_id));
|
||||
let group_3_id = registry.define_group("GROUP_3", Some(group_2_id));
|
||||
|
||||
let group_1 = registry.group_mut(group_1_id);
|
||||
group_1.create_rule(&pattern_1, "rule_1");
|
||||
|
||||
let group_2 = registry.group_mut(group_2_id);
|
||||
group_2.create_rule(&pattern_2, "rule_2");
|
||||
|
||||
let group_3 = registry.group_mut(group_3_id);
|
||||
group_3.create_rule(&pattern_3, "rule_3");
|
||||
|
||||
let rules = registry.rules_for(group_3_id);
|
||||
assert_eq!(rules.len(), 3);
|
||||
assert!(rules.iter().any(|r| **r == Rule::new(pattern_1.clone(), "rule_1")));
|
||||
assert!(rules.iter().any(|r| **r == Rule::new(pattern_2.clone(), "rule_2")));
|
||||
assert!(rules.iter().any(|r| **r == Rule::new(pattern_3.clone(), "rule_3")));
|
||||
}
|
||||
}
|
@ -1,34 +0,0 @@
|
||||
//! An API for declaring rust-code callbacks to be executed when a given pattern is matched.
|
||||
//!
|
||||
//! A flexer rule is a [`crate::automata::pattern`] associated with rust code to be executed as a
|
||||
//! callback.
|
||||
|
||||
use crate::automata::pattern::Pattern;
|
||||
|
||||
|
||||
|
||||
// ==========
|
||||
// == Rule ==
|
||||
// ==========
|
||||
|
||||
/// A flexer rule.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct Rule {
|
||||
/// The pattern that triggers the callback.
|
||||
pub pattern: Pattern,
|
||||
|
||||
/// The code to execute when [`Rule::pattern`] matches, containing rust code as a
|
||||
/// [`std::string::String`].
|
||||
///
|
||||
/// This code will be called directly from a method defined on your Lexer (the one that
|
||||
/// contains a [`crate::Flexer`] instance. To this end, the code you provide as a string
|
||||
/// must be valid in that context.
|
||||
pub callback: String,
|
||||
}
|
||||
|
||||
impl Rule {
|
||||
/// Creates a new rule.
|
||||
pub fn new(pattern: Pattern, callback: impl Into<String>) -> Self {
|
||||
Rule { pattern, callback: callback.into() }
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,452 +0,0 @@
|
||||
//! This file contains tests for the user-facing error-handling logic in the flexer code generator.
|
||||
//!
|
||||
//! This file includes quite a bit of duplicated code, but this is known and intentional as it
|
||||
//! allows for increased clarity in the testing.
|
||||
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![allow(missing_docs)]
|
||||
#![deny(non_ascii_idents)]
|
||||
#![warn(unsafe_code)]
|
||||
#![allow(clippy::blacklisted_name)] // `foo` is fine here.
|
||||
#![allow(clippy::new_without_default)] // No need for boilerplate in throwaway test code.
|
||||
|
||||
use enso_flexer::*;
|
||||
|
||||
use crate::prelude::logger::AnyLogger;
|
||||
use crate::prelude::logger::Disabled;
|
||||
use crate::prelude::reader::BookmarkManager;
|
||||
use crate::prelude::ReaderOps;
|
||||
use enso_flexer::automata::pattern::Pattern;
|
||||
use enso_flexer::generate;
|
||||
use enso_flexer::group;
|
||||
use enso_flexer::group::Identifier;
|
||||
use enso_flexer::group::Registry;
|
||||
use enso_flexer::prelude::*;
|
||||
use enso_flexer::Flexer;
|
||||
use enso_flexer::State;
|
||||
|
||||
|
||||
|
||||
// ====================
|
||||
// === Type Aliases ===
|
||||
// ====================
|
||||
|
||||
type Logger = Disabled;
|
||||
|
||||
|
||||
|
||||
// ====================
|
||||
// === Shared Setup ===
|
||||
// ====================
|
||||
|
||||
/// A token type for these lexers.
|
||||
#[derive(Copy, Clone, Debug, PartialEq)]
|
||||
pub enum Token {
|
||||
Foo,
|
||||
Bar,
|
||||
}
|
||||
|
||||
/// An output type for these lexers.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Clone, Debug, Default, PartialEq)]
|
||||
pub struct Output {
|
||||
tokens: Vec<Token>,
|
||||
}
|
||||
|
||||
/// A testing lexer state.
|
||||
pub struct LexerState {
|
||||
lexer_states: group::Registry,
|
||||
initial_state: group::Identifier,
|
||||
}
|
||||
impl enso_flexer::State for LexerState {
|
||||
fn new(_logger: &impl AnyLogger) -> Self {
|
||||
let mut lexer_states = group::Registry::default();
|
||||
let initial_state = lexer_states.define_group("ROOT", None);
|
||||
LexerState { lexer_states, initial_state }
|
||||
}
|
||||
|
||||
fn initial_state(&self) -> Identifier {
|
||||
self.initial_state
|
||||
}
|
||||
|
||||
fn groups(&self) -> &Registry {
|
||||
&self.lexer_states
|
||||
}
|
||||
|
||||
fn groups_mut(&mut self) -> &mut Registry {
|
||||
&mut self.lexer_states
|
||||
}
|
||||
|
||||
fn bookmarks(&self) -> &BookmarkManager {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn bookmarks_mut(&mut self) -> &mut BookmarkManager {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn specialize(&self) -> Result<String, GenError> {
|
||||
// Note [Naming "Lexer"]
|
||||
generate::specialize(self, "Lexer", "Output")
|
||||
}
|
||||
}
|
||||
|
||||
/* Note [Naming "Lexer"]
|
||||
* ~~~~~~~~~~~~~~~~~~~~~
|
||||
* In general, the name passed to `specialize` should match that of your lexer definition.
|
||||
* However here, as we never compile the code, we set it to a generic constant that is a valid
|
||||
* rust identifier so as to reduce testing boilerplate.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
// ====================
|
||||
// === Definition 1 ===
|
||||
// ====================
|
||||
|
||||
pub struct Lexer1 {
|
||||
lexer: Flexer<LexerState, Output, Logger>,
|
||||
}
|
||||
|
||||
impl Deref for Lexer1 {
|
||||
type Target = Flexer<LexerState, Output, Logger>;
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.lexer
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for Lexer1 {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.lexer
|
||||
}
|
||||
}
|
||||
|
||||
impl Lexer1 {
|
||||
pub fn new() -> Lexer1 {
|
||||
let logger = Logger::new("Lexer1");
|
||||
let lexer = Flexer::new(logger);
|
||||
Lexer1 { lexer }
|
||||
}
|
||||
|
||||
pub fn my_test_fun<R: ReaderOps>(&mut self, _reader: &mut R) {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
impl enso_flexer::Definition for Lexer1 {
|
||||
fn define() -> Self {
|
||||
let mut lexer = Self::new();
|
||||
|
||||
let foo = Pattern::all_of("foo");
|
||||
|
||||
let root_group_id = lexer.initial_state();
|
||||
let root_group = lexer.groups_mut().group_mut(root_group_id);
|
||||
root_group.create_rule(&foo, "ETERNAL SCREAMING");
|
||||
|
||||
lexer
|
||||
}
|
||||
|
||||
fn groups(&self) -> &Registry {
|
||||
self.lexer.groups()
|
||||
}
|
||||
|
||||
fn set_up(&mut self) {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn tear_down(&mut self) {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bad_rule_expression() {
|
||||
let lexer = Lexer1::define();
|
||||
let result = lexer.specialize();
|
||||
assert!(result.is_err());
|
||||
let message = result.unwrap_err().to_string();
|
||||
assert_eq!(message, "`ETERNAL SCREAMING` is not a valid rust expression.");
|
||||
}
|
||||
|
||||
|
||||
// ====================
|
||||
// === Definition 2 ===
|
||||
// ====================
|
||||
|
||||
pub struct Lexer2 {
|
||||
lexer: Flexer<LexerState, Output, Logger>,
|
||||
}
|
||||
|
||||
impl Deref for Lexer2 {
|
||||
type Target = Flexer<LexerState, Output, Logger>;
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.lexer
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for Lexer2 {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.lexer
|
||||
}
|
||||
}
|
||||
|
||||
impl Lexer2 {
|
||||
pub fn new() -> Lexer2 {
|
||||
let logger = Logger::new("Lexer2");
|
||||
let lexer = Flexer::new(logger);
|
||||
Lexer2 { lexer }
|
||||
}
|
||||
|
||||
pub fn my_test_fun<R: ReaderOps>(&mut self, _reader: &mut R) {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
impl enso_flexer::Definition for Lexer2 {
|
||||
fn define() -> Self {
|
||||
let mut lexer = Self::new();
|
||||
|
||||
let foo = Pattern::all_of("foo");
|
||||
|
||||
let root_group_id = lexer.initial_state();
|
||||
let root_group = lexer.groups_mut().group_mut(root_group_id);
|
||||
root_group.create_rule(&foo, "self.test_function_no_reader()");
|
||||
|
||||
lexer
|
||||
}
|
||||
|
||||
fn groups(&self) -> &Registry {
|
||||
self.lexer.groups()
|
||||
}
|
||||
|
||||
fn set_up(&mut self) {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn tear_down(&mut self) {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_no_reader_arg() {
|
||||
let lexer = Lexer2::define();
|
||||
let result = lexer.specialize();
|
||||
let expected_message =
|
||||
"Bad argument to a callback function. It must take a single argument `reader`.";
|
||||
assert!(result.is_err());
|
||||
let message = result.unwrap_err().to_string();
|
||||
assert_eq!(message, expected_message);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ====================
|
||||
// === Definition 3 ===
|
||||
// ====================
|
||||
|
||||
pub struct Lexer3 {
|
||||
lexer: Flexer<LexerState1, Output, Logger>,
|
||||
}
|
||||
|
||||
impl Deref for Lexer3 {
|
||||
type Target = Flexer<LexerState1, Output, Logger>;
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.lexer
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for Lexer3 {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.lexer
|
||||
}
|
||||
}
|
||||
|
||||
impl Lexer3 {
|
||||
pub fn new() -> Lexer3 {
|
||||
let logger = Logger::new("Lexer3");
|
||||
let lexer = Flexer::new(logger);
|
||||
Lexer3 { lexer }
|
||||
}
|
||||
|
||||
pub fn my_test_fun<R: ReaderOps>(&mut self, _reader: &mut R) {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
impl enso_flexer::Definition for Lexer3 {
|
||||
fn define() -> Self {
|
||||
let mut lexer = Self::new();
|
||||
|
||||
let foo = Pattern::all_of("foo");
|
||||
|
||||
let root_group_id = lexer.initial_state();
|
||||
let root_group = lexer.groups_mut().group_mut(root_group_id);
|
||||
root_group.create_rule(&foo, "self.test_function_reader(reader)");
|
||||
|
||||
lexer
|
||||
}
|
||||
|
||||
fn groups(&self) -> &Registry {
|
||||
self.lexer.groups()
|
||||
}
|
||||
|
||||
fn set_up(&mut self) {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn tear_down(&mut self) {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct LexerState1 {
|
||||
lexer_states: group::Registry,
|
||||
initial_state: group::Identifier,
|
||||
}
|
||||
impl enso_flexer::State for LexerState1 {
|
||||
fn new(_logger: &impl AnyLogger) -> Self {
|
||||
let mut lexer_states = group::Registry::default();
|
||||
let initial_state = lexer_states.define_group("ROOT", None);
|
||||
LexerState1 { lexer_states, initial_state }
|
||||
}
|
||||
|
||||
fn initial_state(&self) -> Identifier {
|
||||
self.initial_state
|
||||
}
|
||||
|
||||
fn groups(&self) -> &Registry {
|
||||
&self.lexer_states
|
||||
}
|
||||
|
||||
fn groups_mut(&mut self) -> &mut Registry {
|
||||
&mut self.lexer_states
|
||||
}
|
||||
|
||||
fn bookmarks(&self) -> &BookmarkManager {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn bookmarks_mut(&mut self) -> &mut BookmarkManager {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn specialize(&self) -> Result<String, GenError> {
|
||||
generate::specialize(self, "Bad Lexer Name", "Output")
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_bad_state_name() {
|
||||
let lexer = Lexer3::define();
|
||||
let result = lexer.specialize();
|
||||
assert!(result.is_err());
|
||||
let message = result.unwrap_err().to_string();
|
||||
assert_eq!(message, "`Bad Lexer Name` is not a valid rust identifier.");
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ====================
|
||||
// === Definition 4 ===
|
||||
// ====================
|
||||
|
||||
pub struct Lexer4 {
|
||||
lexer: Flexer<LexerState2, Output, Logger>,
|
||||
}
|
||||
|
||||
impl Deref for Lexer4 {
|
||||
type Target = Flexer<LexerState2, Output, Logger>;
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.lexer
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for Lexer4 {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.lexer
|
||||
}
|
||||
}
|
||||
|
||||
impl Lexer4 {
|
||||
pub fn new() -> Lexer4 {
|
||||
let logger = Logger::new("Lexer4");
|
||||
let lexer = Flexer::new(logger);
|
||||
Lexer4 { lexer }
|
||||
}
|
||||
|
||||
pub fn my_test_fun<R: ReaderOps>(&mut self, _reader: &mut R) {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
impl enso_flexer::Definition for Lexer4 {
|
||||
fn define() -> Self {
|
||||
let mut lexer = Self::new();
|
||||
|
||||
let foo = Pattern::all_of("foo");
|
||||
|
||||
let root_group_id = lexer.initial_state();
|
||||
let root_group = lexer.groups_mut().group_mut(root_group_id);
|
||||
root_group.create_rule(&foo, "self.test_function_reader(reader)");
|
||||
|
||||
lexer
|
||||
}
|
||||
|
||||
fn groups(&self) -> &Registry {
|
||||
self.lexer.groups()
|
||||
}
|
||||
|
||||
fn set_up(&mut self) {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn tear_down(&mut self) {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct LexerState2 {
|
||||
lexer_states: group::Registry,
|
||||
initial_state: group::Identifier,
|
||||
}
|
||||
impl enso_flexer::State for LexerState2 {
|
||||
fn new(_logger: &impl AnyLogger) -> Self {
|
||||
let mut lexer_states = group::Registry::default();
|
||||
let initial_state = lexer_states.define_group("ROOT", None);
|
||||
LexerState2 { lexer_states, initial_state }
|
||||
}
|
||||
|
||||
fn initial_state(&self) -> Identifier {
|
||||
self.initial_state
|
||||
}
|
||||
|
||||
fn groups(&self) -> &Registry {
|
||||
&self.lexer_states
|
||||
}
|
||||
|
||||
fn groups_mut(&mut self) -> &mut Registry {
|
||||
&mut self.lexer_states
|
||||
}
|
||||
|
||||
fn bookmarks(&self) -> &BookmarkManager {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn bookmarks_mut(&mut self) -> &mut BookmarkManager {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn specialize(&self) -> Result<String, GenError> {
|
||||
generate::specialize(self, "Lexer4", "Bad output name")
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_bad_output_name() {
|
||||
let lexer = Lexer4::define();
|
||||
let result = lexer.specialize();
|
||||
assert!(result.is_err());
|
||||
let message = result.unwrap_err().to_string();
|
||||
assert_eq!(message, "`Bad output name` is not a valid rust path.");
|
||||
}
|
@ -1,25 +0,0 @@
|
||||
[package]
|
||||
name = "enso-lazy-reader"
|
||||
version = "0.2.0"
|
||||
authors = ["Enso Team <enso-dev@enso.org>"]
|
||||
edition = "2021"
|
||||
|
||||
description = "An efficient buffered reader."
|
||||
readme = "README.md"
|
||||
homepage = "https://github.com/enso-org/rust-lib/src/lazy-reader"
|
||||
repository = "https://github.com/enso-org/rust-lib"
|
||||
license-file = "../../LICENSE"
|
||||
|
||||
keywords = ["read", "UTF"]
|
||||
|
||||
publish = true
|
||||
|
||||
[lib]
|
||||
name = "lazy_reader"
|
||||
crate-type = ["cdylib", "rlib"]
|
||||
test = true
|
||||
bench = true
|
||||
|
||||
[dependencies]
|
||||
itertools = "0.8"
|
||||
enso-prelude = { version = "^0.2.1", path = "../../prelude" }
|
@ -1,3 +0,0 @@
|
||||
# Enso Lazy Reader
|
||||
|
||||
An efficient lazy reader.
|
@ -1,197 +0,0 @@
|
||||
//! This module exports various UTF decoders for decoding UTF32 characters.
|
||||
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![allow(unsafe_code)]
|
||||
|
||||
use std::fmt::Debug;
|
||||
|
||||
|
||||
|
||||
// ===============
|
||||
// === Decoder ===
|
||||
// ===============
|
||||
|
||||
/// The error for an invalid character.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct InvalidChar();
|
||||
|
||||
/// Trait for decoding UTF32 characters.
|
||||
pub trait Decoder {
|
||||
/// The input of the decoder.
|
||||
type Word: Default + Copy + Debug;
|
||||
/// The maximum amount of words needed to decode one symbol.
|
||||
const MAX_CODEPOINT_LEN: usize;
|
||||
|
||||
/// Decodes the first symbol from the slice and returns it with its length (in words).
|
||||
///
|
||||
/// This function can panic if `words.len() < MAX_CODEPOINT_LEN`.
|
||||
fn decode(words: &[Self::Word]) -> Char<InvalidChar>;
|
||||
}
|
||||
|
||||
|
||||
// === Char ===
|
||||
|
||||
/// The result of `decoder.decode`.
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub struct Char<Error> {
|
||||
/// The decoded character.
|
||||
pub char: Result<char, Error>,
|
||||
/// The number of words read.
|
||||
pub size: usize,
|
||||
}
|
||||
|
||||
impl Char<crate::Error> {
|
||||
/// Check if the character represents the end of file.
|
||||
pub fn is_eof(&self) -> bool {
|
||||
match self.char {
|
||||
Ok(_) => false,
|
||||
Err(crate::Error::EOF) => true,
|
||||
Err(_) => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =====================
|
||||
// === UTF-8 Decoder ===
|
||||
// =====================
|
||||
|
||||
/// Decoder for UTF-8.
|
||||
///
|
||||
/// For more info on UTF-8 and the algorithm used see [UTF-8](https://en.wikipedia.org/wiki/UTF-8).
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct DecoderUTF8();
|
||||
|
||||
|
||||
// === Trait Impls ===
|
||||
|
||||
impl Decoder for DecoderUTF8 {
|
||||
type Word = u8;
|
||||
|
||||
const MAX_CODEPOINT_LEN: usize = 4;
|
||||
|
||||
fn decode(words: &[u8]) -> Char<InvalidChar> {
|
||||
let size = match !words[0] >> 4 {
|
||||
0 => 4,
|
||||
1 => 3,
|
||||
2 | 3 => 2,
|
||||
_ => 1,
|
||||
};
|
||||
|
||||
let mut char = (words[0] << size >> size) as u32;
|
||||
for word in &words[1..size] {
|
||||
char = char << 6 | (word & 0b_0011_1111) as u32;
|
||||
}
|
||||
|
||||
Char { char: std::char::from_u32(char).ok_or_else(InvalidChar), size }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ======================
|
||||
// === UTF-16 Decoder ===
|
||||
// ======================
|
||||
|
||||
/// Decoder for UTF-16.
|
||||
///
|
||||
/// For more info on UTF-16 and the algorithm used see [UTF-16](https://en.wikipedia.org/wiki/UTF-16).
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct DecoderUTF16();
|
||||
|
||||
|
||||
// === Trait Impls ===
|
||||
|
||||
impl Decoder for DecoderUTF16 {
|
||||
type Word = u16;
|
||||
|
||||
const MAX_CODEPOINT_LEN: usize = 2;
|
||||
|
||||
fn decode(words: &[u16]) -> Char<InvalidChar> {
|
||||
if words[0] < 0xD800 || 0xDFFF < words[0] {
|
||||
let char = Ok(unsafe { std::char::from_u32_unchecked(words[0] as u32) });
|
||||
return Char { char, size: 1 };
|
||||
}
|
||||
let char = (((words[0] - 0xD800) as u32) << 10 | (words[1] - 0xDC00) as u32) + 0x1_0000;
|
||||
|
||||
Char { char: std::char::from_u32(char).ok_or_else(InvalidChar), size: 2 }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ======================
|
||||
// === UTF-32 Decoder ===
|
||||
// ======================
|
||||
|
||||
/// Trivial decoder for UTF-32 (`char`).
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct DecoderUTF32();
|
||||
|
||||
|
||||
// === Trait Impls ===
|
||||
|
||||
impl Decoder for DecoderUTF32 {
|
||||
type Word = char;
|
||||
|
||||
const MAX_CODEPOINT_LEN: usize = 1;
|
||||
|
||||
fn decode(words: &[char]) -> Char<InvalidChar> {
|
||||
Char { char: Ok(words[0]), size: 1 }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === Tests ===
|
||||
// =============
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use itertools::Itertools;
|
||||
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_utf8() {
|
||||
let string = "a.b^c! #𤭢界んにち𤭢#𤭢";
|
||||
let mut buf = string.as_bytes();
|
||||
let mut str = String::from("");
|
||||
while !buf.is_empty() {
|
||||
let char = DecoderUTF8::decode(buf);
|
||||
str.push(char.char.unwrap());
|
||||
buf = &buf[char.size..];
|
||||
}
|
||||
assert_eq!(str, string);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_utf16() {
|
||||
let string = "a.b^c! #𤭢界んにち𤭢#𤭢";
|
||||
let buffer = string.encode_utf16().collect_vec();
|
||||
let mut buf = &buffer[..];
|
||||
let mut str = String::from("");
|
||||
while !buf.is_empty() {
|
||||
let char = DecoderUTF16::decode(buf);
|
||||
str.push(char.char.unwrap());
|
||||
buf = &buf[char.size..];
|
||||
}
|
||||
assert_eq!(str, string);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_utf32() {
|
||||
let string = "a.b^c! #𤭢界んにち𤭢#𤭢".chars().collect_vec();
|
||||
let mut buf = &string[..];
|
||||
let mut str = vec![];
|
||||
while !buf.is_empty() {
|
||||
let char = DecoderUTF32::decode(buf);
|
||||
str.push(char.char.unwrap());
|
||||
buf = &buf[char.size..];
|
||||
}
|
||||
assert_eq!(str, string);
|
||||
}
|
||||
}
|
@ -1,600 +0,0 @@
|
||||
//! This module exports a reader that is able to process large textual inputs in constant memory.
|
||||
|
||||
// === Features ===
|
||||
#![feature(test)]
|
||||
// === Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![warn(unsafe_code)]
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![deny(unconditional_recursion)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(trivial_casts)]
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unused_import_braces)]
|
||||
|
||||
use enso_prelude::*;
|
||||
|
||||
use crate::decoder::Char;
|
||||
use crate::decoder::InvalidChar;
|
||||
|
||||
use decoder::Decoder;
|
||||
|
||||
|
||||
// ==============
|
||||
// === Export ===
|
||||
// ==============
|
||||
|
||||
pub mod decoder;
|
||||
|
||||
|
||||
|
||||
// ============
|
||||
// === Read ===
|
||||
// ============
|
||||
|
||||
/// Trait for reading input data into a buffer.
|
||||
///
|
||||
/// Compared to `std::io::Read` this reader supports multiple input encodings.
|
||||
pub trait Read {
|
||||
/// The type of the data in the buffer.
|
||||
type Item;
|
||||
|
||||
/// Fills the buffer and returns amount of elements read.
|
||||
///
|
||||
/// In case it isn't possible to fill the whole buffer (i.e. if an error like EOF is
|
||||
/// encountered), the buffer will be filled with all the data read before encountering such an
|
||||
/// error.
|
||||
fn read(&mut self, buffer: &mut [Self::Item]) -> usize;
|
||||
}
|
||||
|
||||
|
||||
// === Trait Impls ===
|
||||
|
||||
impl<R: std::io::Read> Read for R {
|
||||
type Item = u8;
|
||||
|
||||
fn read(&mut self, mut buffer: &mut [u8]) -> usize {
|
||||
let length = buffer.len();
|
||||
while !buffer.is_empty() {
|
||||
match self.read(buffer) {
|
||||
Err(_) => break,
|
||||
Ok(0) => break,
|
||||
Ok(n) => {
|
||||
buffer = &mut buffer[n..];
|
||||
}
|
||||
}
|
||||
}
|
||||
length - buffer.len()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === Error ===
|
||||
// =============
|
||||
|
||||
/// Set of errors returned by lazy reader.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum Error {
|
||||
/// End Of Input.
|
||||
EOF,
|
||||
/// Couldn't decode character.
|
||||
InvalidChar,
|
||||
/// The lexer has found no matching rule in the current state.
|
||||
EndOfGroup,
|
||||
}
|
||||
|
||||
impl Error {
|
||||
/// The `u32` value that corresponds to EOF.
|
||||
pub const END_OF_FILE: u32 = u32::max_value();
|
||||
/// The `u32` value that corresponds to an invalid unicode character.
|
||||
pub const INVALID_CHAR: u32 = 0xFFFF;
|
||||
/// The `u32` value corresponding to the end of group.
|
||||
pub const END_OF_GROUP: u32 = u32::max_value() - 1;
|
||||
/// The `u64` value that corresponds to EOF.
|
||||
pub const END_OF_FILE_64: u64 = u64::max_value();
|
||||
/// The `u64` value that corresponds to an invalid unicode character.
|
||||
pub const INVALID_CHAR_64: u64 = 0xFFFF;
|
||||
/// The `u32` value corresponding to the end of group.
|
||||
pub const END_OF_GROUP_64: u64 = u64::max_value() - 1;
|
||||
}
|
||||
|
||||
|
||||
// === Trait Impls ===
|
||||
|
||||
impl From<decoder::Char<decoder::InvalidChar>> for decoder::Char<Error> {
|
||||
fn from(char: Char<InvalidChar>) -> Self {
|
||||
let size = char.size;
|
||||
let char = match char.char {
|
||||
Ok(char) => Ok(char),
|
||||
Err(_) => Err(Error::InvalidChar),
|
||||
};
|
||||
decoder::Char { char, size }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<decoder::Char<Error>> for u32 {
|
||||
fn from(char: decoder::Char<Error>) -> Self {
|
||||
match char.char {
|
||||
Ok(char) => char as u32,
|
||||
Err(Error::EOF) => Error::END_OF_FILE,
|
||||
Err(Error::InvalidChar) => Error::INVALID_CHAR,
|
||||
Err(Error::EndOfGroup) => Error::END_OF_GROUP,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<decoder::Char<Error>> for u64 {
|
||||
fn from(char: decoder::Char<Error>) -> Self {
|
||||
match char.char {
|
||||
Ok(char) => char as u64,
|
||||
Err(Error::EOF) => Error::END_OF_FILE_64,
|
||||
Err(Error::InvalidChar) => Error::INVALID_CHAR_64,
|
||||
Err(Error::EndOfGroup) => Error::END_OF_GROUP_64,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==================
|
||||
// === BookmarkId ===
|
||||
// ==================
|
||||
|
||||
/// Strongly typed identifier of `Bookmark`
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub struct BookmarkId {
|
||||
#[allow(missing_docs)]
|
||||
id: usize,
|
||||
}
|
||||
|
||||
impl BookmarkId {
|
||||
/// Creates a new bookmark handle using the specified identifier.
|
||||
pub fn new(id: usize) -> BookmarkId {
|
||||
BookmarkId { id }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =================
|
||||
// === ReaderOps ===
|
||||
// =================
|
||||
|
||||
/// The behaviour needed by the reader interface.
|
||||
pub trait ReaderOps {
|
||||
/// Read the next character from input.
|
||||
fn next_char(&mut self, bookmarks: &mut BookmarkManager) -> Result<char, Error>;
|
||||
/// Advance along the input without returning the character.
|
||||
fn advance_char(&mut self, bookmarks: &mut BookmarkManager);
|
||||
/// Get the current character from the reader.
|
||||
fn character(&self) -> decoder::Char<Error>;
|
||||
/// Check if the reader has finished reading.
|
||||
///
|
||||
/// A reader is finished when it has no further input left to read, and when it does not need to
|
||||
/// rewind to any point.
|
||||
fn finished(&self, bookmarks: &BookmarkManager) -> bool;
|
||||
/// Check if the reader is empty.
|
||||
fn empty(&self) -> bool;
|
||||
/// Fill the buffer with words from the input.
|
||||
fn fill(&mut self, bookmarks: &mut BookmarkManager);
|
||||
/// Get the maximum possible rewind for the reader.
|
||||
fn max_possible_rewind_len(&self, bookmarks: &BookmarkManager) -> usize;
|
||||
/// Append the provided character to the reader's result.
|
||||
fn append_result(&mut self, char: char);
|
||||
/// Return `self.result` and sets the internal result to empty.
|
||||
fn pop_result(&mut self) -> String;
|
||||
/// Get the reader's current offset in the buffer.
|
||||
fn offset(&self) -> usize;
|
||||
/// Get an immutable reference to the reader's result.
|
||||
fn result(&self) -> &String;
|
||||
/// Get a mutable reference to the reader's result.
|
||||
fn result_mut(&mut self) -> &mut String;
|
||||
/// Get the current length of the reader's buffer.
|
||||
fn buffer_len(&self) -> usize;
|
||||
/// Set the buffer offset to the specified value.
|
||||
fn set_offset(&mut self, off: usize);
|
||||
/// Truncate the current match to the provided length.
|
||||
fn truncate_match(&mut self, len: usize);
|
||||
}
|
||||
|
||||
/// The default size of the buffer.
|
||||
pub const BUFFER_SIZE: usize = 32768;
|
||||
|
||||
|
||||
|
||||
// ==============
|
||||
// === Reader ===
|
||||
// ==============
|
||||
|
||||
/// A buffered reader able to efficiently read big inputs in constant memory.
|
||||
///
|
||||
/// It supports various encodings via `Decoder` and also bookmarks which allow it to return
|
||||
/// back to a character at specific offset.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Reader<D: Decoder, Read> {
|
||||
/// The reader that holds the input.
|
||||
pub reader: Read,
|
||||
/// The buffer that stores the input data.
|
||||
pub buffer: Vec<D::Word>,
|
||||
/// The string representation of data that has been read.
|
||||
pub result: String,
|
||||
/// The buffer offset of the current element read.
|
||||
pub offset: usize,
|
||||
/// The number of elements stored in buffer.
|
||||
pub length: usize,
|
||||
/// The last character read.
|
||||
pub character: decoder::Char<Error>,
|
||||
}
|
||||
|
||||
impl<D: Decoder, R: Read<Item = D::Word>> Reader<D, R> {
|
||||
/// Creates a new instance of the reader.
|
||||
pub fn new(reader: R, _decoder: D) -> Self {
|
||||
let mut reader = Reader::<D, R> {
|
||||
reader,
|
||||
buffer: vec![D::Word::default(); BUFFER_SIZE],
|
||||
result: String::from(""),
|
||||
offset: 0,
|
||||
length: 0,
|
||||
character: decoder::Char { char: Err(Error::EOF), size: 0 },
|
||||
};
|
||||
reader.length = reader.reader.read(&mut reader.buffer[..]);
|
||||
reader
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// === Trait Impls ===
|
||||
|
||||
impl<D: Decoder, R: Read<Item = D::Word>> ReaderOps for Reader<D, R> {
|
||||
fn next_char(&mut self, bookmarks: &mut BookmarkManager) -> Result<char, Error> {
|
||||
if self.empty() {
|
||||
self.character.char = Err(Error::EOF);
|
||||
return Err(Error::EOF);
|
||||
}
|
||||
|
||||
if self.offset >= self.buffer.len() - D::MAX_CODEPOINT_LEN {
|
||||
self.fill(bookmarks);
|
||||
}
|
||||
|
||||
self.character = D::decode(&self.buffer[self.offset..]).into();
|
||||
self.offset += self.character.size;
|
||||
|
||||
self.character.char
|
||||
}
|
||||
|
||||
fn advance_char(&mut self, bookmarks: &mut BookmarkManager) {
|
||||
let _ = self.next_char(bookmarks);
|
||||
}
|
||||
|
||||
fn character(&self) -> Char<Error> {
|
||||
self.character
|
||||
}
|
||||
|
||||
fn finished(&self, _bookmarks: &BookmarkManager) -> bool {
|
||||
let rewinded = self.max_possible_rewind_len(_bookmarks) != 0;
|
||||
self.empty() && rewinded
|
||||
}
|
||||
|
||||
fn empty(&self) -> bool {
|
||||
self.length < self.buffer.len() && self.length <= self.offset
|
||||
}
|
||||
|
||||
fn fill(&mut self, bookmarks: &mut BookmarkManager) {
|
||||
let len = self.buffer.len();
|
||||
let words = len - self.offset;
|
||||
self.offset = self.max_possible_rewind_len(bookmarks);
|
||||
if self.offset == len {
|
||||
panic!("Rewind won't be possible. Buffer is too small.")
|
||||
}
|
||||
|
||||
bookmarks.decrease_bookmark_offsets(len - self.offset);
|
||||
for i in 1..=self.offset {
|
||||
self.buffer[self.offset - i] = self.buffer[len - i];
|
||||
}
|
||||
self.length = self.offset + self.reader.read(&mut self.buffer[self.offset..]);
|
||||
self.offset -= words;
|
||||
}
|
||||
|
||||
fn max_possible_rewind_len(&self, bookmarks: &BookmarkManager) -> usize {
|
||||
if let Some(offset) = bookmarks.min_offset() {
|
||||
return self.buffer_len() - offset;
|
||||
}
|
||||
D::MAX_CODEPOINT_LEN
|
||||
}
|
||||
|
||||
fn append_result(&mut self, char: char) {
|
||||
self.result.push(char);
|
||||
}
|
||||
|
||||
fn pop_result(&mut self) -> String {
|
||||
let str = self.result.clone();
|
||||
self.result.truncate(0);
|
||||
str
|
||||
}
|
||||
|
||||
fn offset(&self) -> usize {
|
||||
self.offset
|
||||
}
|
||||
|
||||
fn result(&self) -> &String {
|
||||
&self.result
|
||||
}
|
||||
|
||||
fn result_mut(&mut self) -> &mut String {
|
||||
&mut self.result
|
||||
}
|
||||
|
||||
fn buffer_len(&self) -> usize {
|
||||
self.buffer.len()
|
||||
}
|
||||
|
||||
fn set_offset(&mut self, off: usize) {
|
||||
self.offset = off;
|
||||
}
|
||||
|
||||
fn truncate_match(&mut self, len: usize) {
|
||||
self.result.truncate(len);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ================
|
||||
// === Bookmark ===
|
||||
// ================
|
||||
|
||||
/// Bookmarks a specific character in buffer, so that `LazyReader` can return to it when needed.
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq)]
|
||||
pub struct Bookmark {
|
||||
/// The position of the bookmarked character in the `reader.buffer`.
|
||||
offset: usize,
|
||||
/// The length of `reader.result` up to the bookmarked character.
|
||||
length: usize,
|
||||
/// Whether or not the bookmark has been set by the user.
|
||||
set: bool,
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =======================
|
||||
// === BookmarkManager ===
|
||||
// =======================
|
||||
|
||||
/// Contains and manages bookmarks for a running lexer.
|
||||
///
|
||||
/// Some of its operations operate on a specific [`Reader`]. It is undefined behaviour to not pass
|
||||
/// the same reader to all calls for a given bookmark manager.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct BookmarkManager {
|
||||
bookmarks: Vec<Bookmark>,
|
||||
/// The bookmark used by the flexer to mark the end of the last matched segment of the input.
|
||||
pub matched_bookmark: BookmarkId,
|
||||
/// A bookmark used by the flexer to deal with overlapping rules that may fail later.
|
||||
pub rule_bookmark: BookmarkId,
|
||||
}
|
||||
|
||||
#[allow(missing_docs)]
|
||||
impl BookmarkManager {
|
||||
/// Create a new bookmark manager, with no associated bookmarks.
|
||||
pub fn new() -> BookmarkManager {
|
||||
let mut bookmarks = Vec::new();
|
||||
let matched_bookmark = BookmarkManager::make_bookmark(&mut bookmarks);
|
||||
let rule_bookmark = BookmarkManager::make_bookmark(&mut bookmarks);
|
||||
BookmarkManager { bookmarks, matched_bookmark, rule_bookmark }
|
||||
}
|
||||
|
||||
/// Create a new bookmark in the manager, returning a handle to it.
|
||||
fn make_bookmark(bookmarks: &mut Vec<Bookmark>) -> BookmarkId {
|
||||
bookmarks.push(Bookmark::default());
|
||||
BookmarkId::new(bookmarks.len() - 1)
|
||||
}
|
||||
|
||||
/// Add a bookmark to the manager, returning a handle to that bookmark.
|
||||
pub fn add_bookmark(&mut self) -> BookmarkId {
|
||||
BookmarkManager::make_bookmark(&mut self.bookmarks)
|
||||
}
|
||||
|
||||
/// Bookmarks the current position in `reader` using `bookmark`.
|
||||
pub fn bookmark<R: ReaderOps>(&mut self, bookmark: BookmarkId, reader: &mut R) {
|
||||
self.bookmarks[bookmark.id].offset = reader.offset() - reader.character().size;
|
||||
self.bookmarks[bookmark.id].length = reader.result().len();
|
||||
self.bookmarks[bookmark.id].set = true
|
||||
}
|
||||
|
||||
/// Unsets a bookmark.
|
||||
pub fn unset<R: ReaderOps>(&mut self, bookmark: BookmarkId) {
|
||||
self.bookmarks[bookmark.id].offset = 0;
|
||||
self.bookmarks[bookmark.id].length = 0;
|
||||
self.bookmarks[bookmark.id].set = false
|
||||
}
|
||||
|
||||
/// Decrease the offset for all bookmarks by the specified `amount` in preparation for
|
||||
/// truncating the reader's buffer.
|
||||
pub fn decrease_bookmark_offsets(&mut self, amount: usize) {
|
||||
for bookmark in self.bookmarks.iter_mut() {
|
||||
if bookmark.set {
|
||||
bookmark.offset -= amount
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Rewind the reader to the position marked by `bookmark`.
|
||||
pub fn rewind<R: ReaderOps>(&mut self, bookmark: BookmarkId, reader: &mut R) {
|
||||
let bookmark = self.bookmarks.get(bookmark.id).expect("Bookmark must exist.");
|
||||
reader.set_offset(bookmark.offset);
|
||||
reader.truncate_match(bookmark.length);
|
||||
reader.advance_char(self);
|
||||
}
|
||||
|
||||
/// Obtains the minimum offset from the start of the buffer for any bookmark.
|
||||
pub fn min_offset(&self) -> Option<usize> {
|
||||
self.bookmarks.iter().filter_map(|b| b.set.and_option(Some(b.offset))).min()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// === Trait Impls ===
|
||||
|
||||
impl Default for BookmarkManager {
|
||||
fn default() -> Self {
|
||||
BookmarkManager::new()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === Tests ===
|
||||
// =============
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
extern crate test;
|
||||
|
||||
use super::*;
|
||||
use decoder::*;
|
||||
|
||||
use test::Bencher;
|
||||
|
||||
// ================
|
||||
// === Repeater ===
|
||||
// ================
|
||||
|
||||
/// Struct that holds state of `Reader` that repeats an input n times.
|
||||
#[derive(Debug, Clone)]
|
||||
struct Repeat<T> {
|
||||
/// The input to be repeated.
|
||||
buffer: Vec<T>,
|
||||
/// The current offset of element currently read from buffer.
|
||||
offset: usize,
|
||||
/// How many more times the input should be repeated.
|
||||
repeat: usize,
|
||||
}
|
||||
|
||||
/// Creates a reader that repeats an input n times.
|
||||
fn repeat<T: Copy>(input: Vec<T>, repeat: usize) -> impl Read<Item = T> {
|
||||
Repeat { buffer: input, repeat, offset: 0 }
|
||||
}
|
||||
|
||||
|
||||
// === Trait Impls ===
|
||||
|
||||
impl<T: Copy> Read for Repeat<T> {
|
||||
type Item = T;
|
||||
|
||||
fn read(&mut self, mut buffer: &mut [Self::Item]) -> usize {
|
||||
if self.repeat == 0 {
|
||||
return 0;
|
||||
}
|
||||
|
||||
let len = self.buffer.len();
|
||||
let read = buffer.len();
|
||||
|
||||
if read < len - self.offset {
|
||||
buffer.copy_from_slice(&self.buffer[self.offset..self.offset + read]);
|
||||
self.offset += read;
|
||||
return read;
|
||||
}
|
||||
|
||||
buffer[..len - self.offset].copy_from_slice(&self.buffer[self.offset..]);
|
||||
buffer = &mut buffer[len - self.offset..];
|
||||
|
||||
let repeat = std::cmp::min(buffer.len() / len, self.repeat - 1);
|
||||
self.repeat = self.repeat - repeat - 1;
|
||||
for _ in 0..repeat {
|
||||
buffer[..len].copy_from_slice(&self.buffer[..]);
|
||||
buffer = &mut buffer[len..];
|
||||
}
|
||||
|
||||
if self.repeat == 0 {
|
||||
return len - self.offset + repeat * len;
|
||||
}
|
||||
buffer.copy_from_slice(&self.buffer[..buffer.len()]);
|
||||
self.offset = buffer.len();
|
||||
read
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === Utils ===
|
||||
// =============
|
||||
|
||||
/// Constructs an _empty_ bookmark manager for testing purposes.
|
||||
pub fn bookmark_manager() -> BookmarkManager {
|
||||
BookmarkManager::new()
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === Tests ===
|
||||
// =============
|
||||
|
||||
#[test]
|
||||
fn test_repeater_with_small_buffer() {
|
||||
let mut repeater = repeat(vec![1, 2, 3], 1);
|
||||
let mut buffer = [0; 2];
|
||||
assert_eq!(repeater.read(&mut buffer), 2);
|
||||
assert_eq!(&buffer, &[1, 2]);
|
||||
assert_eq!(repeater.read(&mut buffer), 1);
|
||||
assert_eq!(&buffer, &[3, 2])
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_repeater_with_big_buffer() {
|
||||
let mut repeater = repeat(vec![1, 2], 3);
|
||||
let mut buffer = [0; 5];
|
||||
assert_eq!(repeater.read(&mut buffer), 5);
|
||||
assert_eq!(&buffer, &[1, 2, 1, 2, 1]);
|
||||
assert_eq!(repeater.read(&mut buffer), 1);
|
||||
assert_eq!(&buffer, &[2, 2, 1, 2, 1])
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_reader_small_input() {
|
||||
let mut mgr = bookmark_manager();
|
||||
let str = "a.b^c! #𤭢界んにち𤭢#𤭢";
|
||||
let mut reader = Reader::new(str.as_bytes(), DecoderUTF8());
|
||||
let mut result = String::from("");
|
||||
while let Ok(char) = reader.next_char(&mut mgr) {
|
||||
result.push(char);
|
||||
}
|
||||
assert_eq!(&result, str);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_reader_big_input() {
|
||||
let mut mgr = bookmark_manager();
|
||||
let str = "a.b^c! #𤭢界んにち𤭢#𤭢".repeat(10_000);
|
||||
let mut reader = Reader::new(str.as_bytes(), DecoderUTF8());
|
||||
let mut result = String::from("");
|
||||
while let Ok(char) = reader.next_char(&mut mgr) {
|
||||
mgr.bookmark(mgr.matched_bookmark, &mut reader);
|
||||
result.push(char);
|
||||
}
|
||||
assert_eq!(&result, &str);
|
||||
assert_eq!(reader.buffer.len(), BUFFER_SIZE);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_reader(bencher: &mut Bencher) {
|
||||
let run = || {
|
||||
let mut mgr = bookmark_manager();
|
||||
let str = repeat("Hello, World!".as_bytes().to_vec(), 10_000_000);
|
||||
let mut reader = Reader::new(str, DecoderUTF8());
|
||||
let mut count = 0;
|
||||
while reader.next_char(&mut mgr) != Err(Error::EOF) {
|
||||
count += 1;
|
||||
}
|
||||
count
|
||||
};
|
||||
bencher.iter(run);
|
||||
}
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
[package]
|
||||
name = "lexer-definition"
|
||||
version = "0.1.0"
|
||||
authors = ["Enso Team <enso-dev@enso.org>"]
|
||||
edition = "2021"
|
||||
|
||||
publish = false
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib", "rlib"]
|
||||
test = true
|
||||
bench = true
|
||||
|
||||
[dependencies]
|
||||
enso-flexer = { version = "0.2.0", path = "../../flexer" }
|
||||
enso-prelude = { version = "0.2.0", path = "../../../prelude" }
|
||||
|
||||
uuid = { version = "0.8.1" , features = ["serde","v4","wasm-bindgen"] }
|
@ -1,371 +0,0 @@
|
||||
//! This crate describes valid escape sequences inside Enso text literals.
|
||||
|
||||
use crate::prelude::*;
|
||||
|
||||
use crate::lexeme;
|
||||
use crate::library::token;
|
||||
use crate::token::EscapeStyle;
|
||||
use crate::token::Shape;
|
||||
|
||||
|
||||
|
||||
// =======================
|
||||
// === EscapeCharacter ===
|
||||
// =======================
|
||||
|
||||
/// A representation of an escape character.
|
||||
#[derive(Clone, Debug, Default, Eq, PartialEq)]
|
||||
pub struct EscapeCharacter {
|
||||
/// The lexing representation of the escape.
|
||||
///
|
||||
/// This is the literal string that must occur in the Enso source code to be interpreted as
|
||||
/// this escape code.
|
||||
pub pattern: String,
|
||||
/// The literal representation of the escape.
|
||||
///
|
||||
/// This is the character-level encoding of this escape character in Rust, as the Rust escape
|
||||
/// representation and the Enso escape representation may differ, or Rust may not support the
|
||||
/// same literal escape code as Enso.
|
||||
pub repr: String,
|
||||
}
|
||||
impl EscapeCharacter {
|
||||
fn new(pattern: impl Str, repr: impl Str) -> EscapeCharacter {
|
||||
let pattern = pattern.into();
|
||||
let repr = repr.into();
|
||||
Self { pattern, repr }
|
||||
}
|
||||
|
||||
/// The set of character escape codes that Enso supports.
|
||||
pub fn codes() -> Vec<EscapeCharacter> {
|
||||
vec![
|
||||
// === Null ===
|
||||
Self::new(r"\0", "\0"),
|
||||
// === Newlines ===
|
||||
Self::new(r"\n", "\n"),
|
||||
Self::new(r"\r", "\r"),
|
||||
Self::new(r"\f", "\x0C"),
|
||||
// === Tabs ===
|
||||
Self::new(r"\t", "\t"),
|
||||
Self::new(r"\v", "\x0B"),
|
||||
// === Backspace ===
|
||||
Self::new(r"\b", "\x08"),
|
||||
// === Misc ===
|
||||
Self::new(r"\a", "\x07"),
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =================
|
||||
// === Utilities ===
|
||||
// =================
|
||||
|
||||
/// Check if `c` is a hexadecimal digit.
|
||||
fn is_hex_digit(c: char) -> bool {
|
||||
let small_letters = 'a'..='f';
|
||||
let large_letters = 'A'..='F';
|
||||
let digits = '0'..='9';
|
||||
small_letters.contains(&c) || large_letters.contains(&c) || digits.contains(&c)
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ======================
|
||||
// === EscapeSequence ===
|
||||
// ======================
|
||||
|
||||
/// A trait representing various kinds of escape sequence.
|
||||
///
|
||||
/// An escape sequence built using this trait will have its digits calculated by stripping the
|
||||
/// [`Self::prefix_length()`] and [`Self::suffix_length()`] from the input string, and then
|
||||
/// validated using [`Self::digits_min_length()`], [`Self::digits_max_length()`], and
|
||||
/// [`Self::validator()`]. All digits must be valid hexadecimal digits as defined by
|
||||
/// [`is_hex_digit`] above.
|
||||
///
|
||||
/// In addition, the implementation must define [`Self::style_on_success()`] and
|
||||
/// [`Self::style_on_failure()`] to determine the type of escape output on success and failure.
|
||||
pub trait EscapeSequence {
|
||||
/// Create a token of the relevant escape sequence type.
|
||||
///
|
||||
/// This function should be passed the _full_ match for the escape sequence as `repr`, including
|
||||
/// the delimiters. For example, if we have the escape sequence `\uAFAF`, we want to pass the
|
||||
/// whole string `"\uAFAF"`, not just `"AFAF"` to this function..
|
||||
fn build(repr: impl Str) -> Shape {
|
||||
if let Some(digits) = Self::validate(repr.as_ref()) {
|
||||
Shape::text_segment_escape(Self::style_on_success(), digits)
|
||||
} else {
|
||||
Shape::text_segment_escape(Self::style_on_failure(), repr)
|
||||
}
|
||||
}
|
||||
|
||||
/// Obtain the digits portion of the escape sequence.
|
||||
fn get_digits(repr: &str) -> &str {
|
||||
let start = Self::prefix_length();
|
||||
let end = repr.len().saturating_sub(Self::suffix_length());
|
||||
&repr[start..end]
|
||||
}
|
||||
|
||||
/// Validate the provided unicode string for this type of escape sequence.
|
||||
fn validate(repr: &str) -> Option<String> {
|
||||
let digits = Self::get_digits(repr);
|
||||
let ge_min = digits.len() >= Self::digits_min_length();
|
||||
let le_max = digits.len() <= Self::digits_max_length();
|
||||
let valid_length = ge_min && le_max;
|
||||
let valid_escape = Self::validator(digits);
|
||||
let valid_digits = digits.chars().all(is_hex_digit);
|
||||
let is_valid = valid_length && valid_escape && valid_digits;
|
||||
is_valid.as_some(digits.into())
|
||||
}
|
||||
|
||||
/// Return the length of the escape prefix.
|
||||
///
|
||||
/// The suffix is the characters that need to be stripped from the front of the escape sequence
|
||||
/// to get, in conjunction with [`EscapeSequence::suffix_length()`] the escape value itself.
|
||||
fn prefix_length() -> usize;
|
||||
|
||||
/// Return the length of the escape suffix.
|
||||
///
|
||||
/// The suffix is the characters that need to be stripped from the end of the escape sequence to
|
||||
/// get, in conjunction with [`EscapeSequence::prefix_length()`] the escape value itself.
|
||||
///
|
||||
/// This defaults to `0`.
|
||||
fn suffix_length() -> usize {
|
||||
0
|
||||
}
|
||||
|
||||
/// Return the minimum number of digits accepted by the escape sequence type.
|
||||
fn digits_min_length() -> usize;
|
||||
|
||||
/// Return the maximum number of digits accepted by the escape sequence type.
|
||||
///
|
||||
/// This defaults to `digits_min_length()`.
|
||||
fn digits_max_length() -> usize {
|
||||
Self::digits_min_length()
|
||||
}
|
||||
|
||||
/// A validator for any additional properties of the escape sequence.
|
||||
///
|
||||
/// It will be passed the _digits_ of the escape sequence, as defined by
|
||||
/// [`EscapeSequence::get_digits()`], and has a default implementation that always succeeds.
|
||||
/// Please implement this validator yourself if you would like to assert _additional_ properties
|
||||
/// on your escape sequence.
|
||||
fn validator(_digits: &str) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
/// The style of escape after successful validation.
|
||||
fn style_on_success() -> token::EscapeStyle;
|
||||
|
||||
/// The style of escape after unsuccessful validation.
|
||||
fn style_on_failure() -> token::EscapeStyle;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==================
|
||||
// === ByteEscape ===
|
||||
// ==================
|
||||
|
||||
/// A validator for ASCII escapes.
|
||||
///
|
||||
/// An ascii escape begins with the sequence `\x` and is followed by two hexadecimal digits (e.g.
|
||||
/// `\x0F`.
|
||||
#[derive(Clone, Copy, Default, Debug, Eq, PartialEq)]
|
||||
pub struct Byte;
|
||||
impl EscapeSequence for Byte {
|
||||
fn prefix_length() -> usize {
|
||||
lexeme::len(lexeme::literal::BYTE_ESCAPE_START)
|
||||
}
|
||||
fn digits_min_length() -> usize {
|
||||
2
|
||||
}
|
||||
fn style_on_success() -> EscapeStyle {
|
||||
token::EscapeStyle::Byte
|
||||
}
|
||||
fn style_on_failure() -> EscapeStyle {
|
||||
token::EscapeStyle::Invalid
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===========
|
||||
// === U16 ===
|
||||
// ===========
|
||||
|
||||
/// A validator for U16 unicode escapes.
|
||||
///
|
||||
/// A U16 unicode escape begins with the sequence `\u` and is followed by four hexadecimal digits,
|
||||
/// e.g. `\u0F0F`.
|
||||
#[derive(Clone, Copy, Default, Debug, Eq, PartialEq)]
|
||||
pub struct U16;
|
||||
impl EscapeSequence for U16 {
|
||||
fn prefix_length() -> usize {
|
||||
lexeme::len(lexeme::literal::U16_ESCAPE_START)
|
||||
}
|
||||
fn digits_min_length() -> usize {
|
||||
4
|
||||
}
|
||||
fn style_on_success() -> EscapeStyle {
|
||||
token::EscapeStyle::U16
|
||||
}
|
||||
fn style_on_failure() -> EscapeStyle {
|
||||
token::EscapeStyle::InvalidUnicode
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===========
|
||||
// === U21 ===
|
||||
// ===========
|
||||
|
||||
/// A validator for U21 unicode escapes.
|
||||
///
|
||||
/// A U21 unicode escape begins with the sequence `\u`, followed by a sequence of 1-6 hexadecimal
|
||||
/// digits enclosed in braces (`{}`). Both `\u{F}` and `\u{AFAFAF}` are valid U21 escapes.
|
||||
#[derive(Clone, Copy, Default, Debug, Eq, PartialEq)]
|
||||
pub struct U21;
|
||||
impl EscapeSequence for U21 {
|
||||
fn prefix_length() -> usize {
|
||||
lexeme::len(lexeme::literal::U21_ESCAPE_START)
|
||||
}
|
||||
fn suffix_length() -> usize {
|
||||
lexeme::len(lexeme::literal::U21_ESCAPE_END)
|
||||
}
|
||||
fn digits_min_length() -> usize {
|
||||
1
|
||||
}
|
||||
fn digits_max_length() -> usize {
|
||||
6
|
||||
}
|
||||
fn style_on_success() -> EscapeStyle {
|
||||
token::EscapeStyle::U21
|
||||
}
|
||||
fn style_on_failure() -> EscapeStyle {
|
||||
token::EscapeStyle::InvalidUnicode
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===========
|
||||
// === U32 ===
|
||||
// ===========
|
||||
|
||||
/// A validator for U32 unicode escapes.
|
||||
///
|
||||
/// A U32 unicode escape begins with the sequence \U, followed by 8 hexadecimal digits. Due to the
|
||||
/// restrictions of unicode, the first two digits _must_ be zero (e.g. `\U00AFAFAF`).
|
||||
#[derive(Clone, Copy, Default, Debug, Eq, PartialEq)]
|
||||
pub struct U32;
|
||||
impl EscapeSequence for U32 {
|
||||
fn prefix_length() -> usize {
|
||||
lexeme::len(lexeme::literal::U32_ESCAPE_START)
|
||||
}
|
||||
fn digits_min_length() -> usize {
|
||||
8
|
||||
}
|
||||
fn validator(digits: &str) -> bool {
|
||||
digits.starts_with("00")
|
||||
}
|
||||
fn style_on_success() -> EscapeStyle {
|
||||
token::EscapeStyle::U32
|
||||
}
|
||||
fn style_on_failure() -> EscapeStyle {
|
||||
token::EscapeStyle::InvalidUnicode
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === Tests ===
|
||||
// =============
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
|
||||
// === Utilities ===
|
||||
|
||||
/// Tests a valid input to ensure that it succeeds.
|
||||
fn test_valid<Esc: EscapeSequence>(escape: &str, out: &str, out_style: token::EscapeStyle) {
|
||||
let shape = Shape::text_segment_escape(out_style, out);
|
||||
assert_eq!(Esc::build(escape), shape);
|
||||
}
|
||||
|
||||
/// Tests invalid inputs to ensure they fail for the provided escape type `Esc`.
|
||||
fn test_invalid<Esc: EscapeSequence>(invalid_cases: Vec<&str>, fail_with: token::EscapeStyle) {
|
||||
for escape in invalid_cases {
|
||||
let shape = Shape::text_segment_escape(fail_with, escape);
|
||||
assert_eq!(Esc::build(escape), shape)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// === Is Hex Digit ===
|
||||
|
||||
#[test]
|
||||
fn test_is_hex_digit() {
|
||||
for val in u8::min_value()..=u8::max_value() {
|
||||
let char = char::from(val);
|
||||
let is_in_small = ('a'..='f').contains(&char);
|
||||
let is_in_large = ('A'..='F').contains(&char);
|
||||
let is_in_dec_digit = ('0'..='9').contains(&char);
|
||||
let expected_result = is_in_small || is_in_large || is_in_dec_digit;
|
||||
assert_eq!(is_hex_digit(char), expected_result);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// === Build ===
|
||||
|
||||
#[test]
|
||||
fn test_byte_build_valid() {
|
||||
test_valid::<Byte>(r"\x05", "05", token::EscapeStyle::Byte);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_byte_build_invalid() {
|
||||
test_invalid::<Byte>(vec![r"\x5", r"\x", r"\x033", r"\xz2"], token::EscapeStyle::Invalid);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_u16_build_valid() {
|
||||
test_valid::<U16>(r"\u4fe3", "4fe3", token::EscapeStyle::U16);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_u16_build_invalid() {
|
||||
test_invalid::<U16>(
|
||||
vec![r"\u123", r"\u", r"\u123aff", r"\uazaz"],
|
||||
token::EscapeStyle::InvalidUnicode,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_u21_build_valid() {
|
||||
test_valid::<U21>(r"\u{fa4e}", "fa4e", token::EscapeStyle::U21);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_u21_build_invalid() {
|
||||
test_invalid::<U21>(vec![r"\u{1234567}", r"\u{}"], token::EscapeStyle::InvalidUnicode);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_u32_build_valid() {
|
||||
test_valid::<U32>(r"\U0014A890", "0014A890", token::EscapeStyle::U32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_u32_build_invalid() {
|
||||
test_invalid::<U32>(
|
||||
vec![r"\U12121212", r"\U", r"\U001234", r"\U001234567"],
|
||||
token::EscapeStyle::InvalidUnicode,
|
||||
);
|
||||
}
|
||||
}
|
@ -1,303 +0,0 @@
|
||||
//! This module defines the base lexemes for the Enso language.
|
||||
|
||||
use crate::prelude::*;
|
||||
|
||||
use enso_flexer::automata::pattern::Pattern;
|
||||
|
||||
|
||||
|
||||
// =================================
|
||||
// === Basic Pattern Definitions ===
|
||||
// =================================
|
||||
|
||||
/// Basic lexemes as patterns.
|
||||
///
|
||||
/// These must _only_ be used as part of the lexer definition, not used at runtime as they are not
|
||||
/// performant at all.
|
||||
pub mod definition_pattern {
|
||||
use super::*;
|
||||
|
||||
/// Match lower-case ASCII letters.
|
||||
pub fn lower_ascii_letter() -> Pattern {
|
||||
Pattern::range('a'..='z')
|
||||
}
|
||||
|
||||
/// Match upper-case ASCII letters.
|
||||
pub fn upper_ascii_letter() -> Pattern {
|
||||
Pattern::range('A'..='Z')
|
||||
}
|
||||
|
||||
/// Match ASCII digits.
|
||||
pub fn ascii_digit() -> Pattern {
|
||||
Pattern::range('0'..='9')
|
||||
}
|
||||
|
||||
/// Match ASCII letters.
|
||||
pub fn ascii_letter() -> Pattern {
|
||||
lower_ascii_letter() | upper_ascii_letter()
|
||||
}
|
||||
|
||||
/// Match ASCII alphanumeric characters.
|
||||
pub fn ascii_alpha_num() -> Pattern {
|
||||
ascii_digit() | ascii_letter()
|
||||
}
|
||||
|
||||
/// Match at least one ASCII space character.
|
||||
pub fn spaces() -> Pattern {
|
||||
into_pattern(literal::SPACE).many1()
|
||||
}
|
||||
|
||||
/// Match the end-of-file character.
|
||||
pub fn eof() -> Pattern {
|
||||
Pattern::eof()
|
||||
}
|
||||
|
||||
/// Match a newline.
|
||||
///
|
||||
/// This matches both Unix (LF) and Windows (CRLF) styles of newlines. This is particularly
|
||||
/// important so as not to result in incorrect spans on windows clients.
|
||||
pub fn newline() -> Pattern {
|
||||
let lf = into_pattern(literal::LF);
|
||||
let crlf = into_pattern(literal::CRLF);
|
||||
lf | crlf
|
||||
}
|
||||
|
||||
/// The characters that break tokens in Enso.
|
||||
pub fn whitespace_break_chars() -> String {
|
||||
[literal::TAB, literal::LF, literal::CR].concat()
|
||||
}
|
||||
|
||||
/// The characters that break token lexing in Enso.
|
||||
pub fn break_chars() -> String {
|
||||
[
|
||||
literal::INTERPOLATE_QUOTE,
|
||||
literal::COMMENT,
|
||||
literal::ANNOTATION_SYMBOL,
|
||||
literal::SPACE,
|
||||
literal::COMMA,
|
||||
literal::DOT,
|
||||
literal::OPERATOR_CHARS,
|
||||
literal::GROUP_CHARS,
|
||||
&whitespace_break_chars(),
|
||||
]
|
||||
.concat()
|
||||
}
|
||||
|
||||
/// Adds the basic characters not allowed in a raw segment in a format text literal.
|
||||
fn add_base_format_disallows(chars: &mut String) {
|
||||
chars.push_str(literal::INTERPOLATE_QUOTE);
|
||||
chars.push_str(literal::SLASH);
|
||||
chars.push_str(literal::LF);
|
||||
chars.push_str(literal::CR);
|
||||
}
|
||||
|
||||
/// Characters allowable inside a raw segment in a format line.
|
||||
pub fn format_line_raw_char() -> Pattern {
|
||||
let mut chars = String::new();
|
||||
chars.push_str(literal::FORMAT_QUOTE);
|
||||
add_base_format_disallows(&mut chars);
|
||||
Pattern::none_of(&chars)
|
||||
}
|
||||
|
||||
/// Characters allowable inside a raw segment in a format block.
|
||||
pub fn format_block_raw_char() -> Pattern {
|
||||
let mut chars = String::new();
|
||||
add_base_format_disallows(&mut chars);
|
||||
Pattern::none_of(&chars)
|
||||
}
|
||||
|
||||
/// Adds the basic characters not allowed in a raw segment in a raw text literal.
|
||||
fn add_base_raw_disallows(chars: &mut String) {
|
||||
chars.push_str(literal::SLASH);
|
||||
chars.push_str(literal::LF);
|
||||
chars.push_str(literal::CR);
|
||||
}
|
||||
|
||||
/// Characters allowable inside a raw segment in a raw line.
|
||||
pub fn raw_line_raw_char() -> Pattern {
|
||||
let mut chars = String::new();
|
||||
chars.push_str(literal::RAW_QUOTE);
|
||||
add_base_raw_disallows(&mut chars);
|
||||
Pattern::none_of(&chars)
|
||||
}
|
||||
|
||||
/// Characters allowable inside a raw segment in a raw block.
|
||||
pub fn raw_block_raw_char() -> Pattern {
|
||||
let mut chars = String::new();
|
||||
add_base_raw_disallows(&mut chars);
|
||||
Pattern::none_of(&chars)
|
||||
}
|
||||
|
||||
/// The characters allowed as digits in a unicode escape.
|
||||
pub fn unicode_escape_digit() -> Pattern {
|
||||
let chars = &[
|
||||
literal::FORMAT_QUOTE,
|
||||
literal::RAW_QUOTE,
|
||||
literal::INTERPOLATE_QUOTE,
|
||||
literal::SLASH,
|
||||
literal::LF,
|
||||
literal::CR,
|
||||
"{}",
|
||||
]
|
||||
.concat();
|
||||
Pattern::none_of(chars)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===============================
|
||||
// === Enso Lexeme Definitions ===
|
||||
// ===============================
|
||||
|
||||
/// The literal lexemes that make up the Enso language.
|
||||
pub mod literal {
|
||||
|
||||
/// The type of a literal lexeme.
|
||||
pub type Literal = &'static str;
|
||||
|
||||
// === The Lexemes ===
|
||||
|
||||
/// The space character.
|
||||
pub const SPACE: Literal = " ";
|
||||
|
||||
/// The line-feed character.
|
||||
pub const LF: Literal = "\n";
|
||||
|
||||
/// The carriage-return character.
|
||||
pub const CR: Literal = "\r";
|
||||
|
||||
/// The crlf windows-style line ending.
|
||||
pub const CRLF: Literal = "\r\n";
|
||||
|
||||
/// The tab character.
|
||||
pub const TAB: Literal = "\t";
|
||||
|
||||
/// The comment character.
|
||||
pub const COMMENT: Literal = "#";
|
||||
|
||||
/// The doc comment character.
|
||||
pub const DOC_COMMENT: Literal = "##";
|
||||
|
||||
/// The symbol for beginning an annotation.
|
||||
pub const ANNOTATION_SYMBOL: Literal = "@";
|
||||
|
||||
/// The dot symbol
|
||||
pub const DOT: Literal = ".";
|
||||
|
||||
/// Two dots.
|
||||
pub const TWO_DOTS: Literal = "..";
|
||||
|
||||
/// Three dots.
|
||||
pub const THREE_DOTS: Literal = "...";
|
||||
|
||||
/// Three dots.
|
||||
pub const COMMA: Literal = ",";
|
||||
|
||||
/// The `in` operator.
|
||||
pub const OPERATOR_IN: Literal = "in";
|
||||
|
||||
/// The tick allowable at the end of an identifier.
|
||||
pub const IDENTIFIER_TICK: Literal = "'";
|
||||
|
||||
/// The quote used to delimit interpolations in format text literals.
|
||||
pub const INTERPOLATE_QUOTE: Literal = "`";
|
||||
|
||||
/// The quote used to delimit format text literals.
|
||||
pub const FORMAT_QUOTE: Literal = "'";
|
||||
|
||||
/// The quote used to delimit format block literals.
|
||||
pub const FORMAT_BLOCK_QUOTE: Literal = "'''";
|
||||
|
||||
/// The quote used to delimit raw text literals.
|
||||
pub const RAW_QUOTE: Literal = "\"";
|
||||
|
||||
/// The quote used to delimit raw block literals.
|
||||
pub const RAW_BLOCK_QUOTE: Literal = "\"\"\"";
|
||||
|
||||
/// The equals operator.
|
||||
pub const EQUALS: Literal = "=";
|
||||
|
||||
/// The equality comparison operator.
|
||||
pub const EQUALS_COMP: Literal = "==";
|
||||
|
||||
/// Greater-than or equal.
|
||||
pub const GE_OPERATOR: Literal = ">=";
|
||||
|
||||
/// Less-than or equal.
|
||||
pub const LE_OPERATOR: Literal = "<=";
|
||||
|
||||
/// Inequality comparison operator.
|
||||
pub const NOT_EQUAL: Literal = "!=";
|
||||
|
||||
/// The hash eq operator.
|
||||
pub const HASH_EQ: Literal = "#=";
|
||||
|
||||
/// The wide arrow operator.
|
||||
pub const WIDE_ARROW: Literal = "=>";
|
||||
|
||||
/// The blank identifier.
|
||||
pub const BLANK_IDENT: Literal = "_";
|
||||
|
||||
/// The identifier segment separator.
|
||||
pub const IDENT_SEGMENT_SEPARATOR: Literal = "_";
|
||||
|
||||
/// The separator between a number literal's explicit base and the number itself.
|
||||
pub const NUMBER_BASE_SEPARATOR: Literal = "_";
|
||||
|
||||
/// The separator between the integer and fractional parts of the number literal.
|
||||
pub const DECIMAL_SEPARATOR: Literal = ".";
|
||||
|
||||
/// The backslash character.
|
||||
pub const SLASH: Literal = r"\";
|
||||
|
||||
/// An escaped [`SLASH`].
|
||||
pub const ESCAPED_SLASH: Literal = r"\\";
|
||||
|
||||
/// The beginning of a byte escape.
|
||||
pub const BYTE_ESCAPE_START: Literal = r"\x";
|
||||
|
||||
/// The beginning of a u16 escape.
|
||||
pub const U16_ESCAPE_START: Literal = r"\u";
|
||||
|
||||
/// The beginning of a u21 escape.
|
||||
pub const U21_ESCAPE_START: Literal = r"\u{";
|
||||
|
||||
/// The end of a u21 escape.
|
||||
pub const U21_ESCAPE_END: Literal = "}";
|
||||
|
||||
/// The beginning of a u32 escape.
|
||||
pub const U32_ESCAPE_START: Literal = r"\U";
|
||||
|
||||
/// The allowable group characters in Enso.
|
||||
pub const GROUP_CHARS: Literal = "()[]{}";
|
||||
|
||||
/// The allowable operator characters in Enso.
|
||||
pub const OPERATOR_CHARS: Literal = ";!$%&*+-/<>?^~|:\\";
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =========================
|
||||
// === Utility Functions ===
|
||||
// =========================
|
||||
|
||||
/// Get the first character of the lexeme, if it exists.
|
||||
pub fn char(literal: &'static str) -> Option<char> {
|
||||
literal.chars().next()
|
||||
}
|
||||
|
||||
/// Get the first character of the lexeme, assuming that it exists.
|
||||
pub fn unsafe_char(literal: &'static str) -> char {
|
||||
char(literal).expect("The first character of the literal exists.")
|
||||
}
|
||||
|
||||
/// Convert the lexeme into a pattern.
|
||||
pub fn into_pattern(literal: &'static str) -> Pattern {
|
||||
literal.into()
|
||||
}
|
||||
|
||||
/// The proper length of the `literal`.
|
||||
pub fn len(literal: &'static str) -> usize {
|
||||
literal.chars().count()
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,46 +0,0 @@
|
||||
//! This library defines the lexer for the syntax of the Enso language.
|
||||
|
||||
// === Features ===
|
||||
#![feature(test)]
|
||||
// === Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![warn(unsafe_code)]
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![deny(unconditional_recursion)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(trivial_casts)]
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unused_import_braces)]
|
||||
|
||||
|
||||
// ==============
|
||||
// === Export ===
|
||||
// ==============
|
||||
|
||||
pub mod escape;
|
||||
pub mod lexeme;
|
||||
pub mod lexer;
|
||||
pub mod rule;
|
||||
pub mod token;
|
||||
|
||||
|
||||
|
||||
/// A module that can be re-exported under the same name in the generation crate.
|
||||
///
|
||||
/// This is necessary to avoid issues with paths getting wonky when the code is generated from the
|
||||
/// Enso lexer definition. In this project, imports should _not_ be made from the crate root
|
||||
/// _except_ through use of this `library` module.
|
||||
pub mod library {
|
||||
pub use crate::escape;
|
||||
pub use crate::lexeme;
|
||||
pub use crate::rules;
|
||||
pub use crate::token;
|
||||
}
|
||||
|
||||
/// A collection of functionality for working with the lexer definition.
|
||||
pub mod prelude {
|
||||
pub use enso_flexer::prelude::logger::*;
|
||||
pub use enso_flexer::prelude::*;
|
||||
}
|
@ -1,26 +0,0 @@
|
||||
//! This file contains a macro to simplify writing the lexer rules.
|
||||
|
||||
|
||||
|
||||
// ===================
|
||||
// === Rules Macro ===
|
||||
// ===================
|
||||
|
||||
/// Define a group of rules for the lexer.
|
||||
///
|
||||
/// All of the rules must be defined for the same `state_name`, which must be the in-scope name of
|
||||
/// the state for which the rules are being defined. Each `pattern` is a non-reference pattern that
|
||||
/// the rule is being defined to match, and `code` is the code that will be executed when the rule
|
||||
/// matches, omitting the (first) `reader` argument).
|
||||
///
|
||||
/// Branches are matched _in order_, from top-to-bottom, much like a standard `match` statement.
|
||||
///
|
||||
/// Please see `lexer.rs` for myriad examples of this macro's use.
|
||||
#[macro_export]
|
||||
macro_rules! rules {
|
||||
($state_name:ident with $($pattern:expr => $path_root:ident $(.$path:ident)* ($($arg:tt)*)),+ $(,)?) => {
|
||||
$($state_name.create_rule(&$pattern,stringify!{
|
||||
$path_root $(.$path)* (reader,$($arg)*)
|
||||
});)*
|
||||
};
|
||||
}
|
@ -1,778 +0,0 @@
|
||||
//! This file defines the various tokens requried by the Enso lexer.
|
||||
//!
|
||||
//! This file makes heavy use of terminology from the Enso design documentation, particularly the
|
||||
//! [syntax](https://enso.org/docs/developer/docs/enso/syntax) documentation. For the sake of
|
||||
//! brevity, many terms will _not_ be defined here.
|
||||
|
||||
use crate::prelude::*;
|
||||
|
||||
use crate::lexeme;
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === Token ===
|
||||
// =============
|
||||
|
||||
/// A lexer token.
|
||||
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
|
||||
pub struct Token {
|
||||
/// The shape of the token.
|
||||
pub shape: Shape,
|
||||
/// The length (in characters) of this token.
|
||||
pub length: usize,
|
||||
/// The number of trailing spaces after this token before the next.
|
||||
pub offset: usize,
|
||||
}
|
||||
|
||||
impl Token {
|
||||
/// Constructor.
|
||||
pub fn new(shape: Shape, length: usize, offset: usize) -> Token {
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
|
||||
/// Get the length that the token takes up in the program source.
|
||||
pub fn source_length(&self) -> usize {
|
||||
self.length + self.offset
|
||||
}
|
||||
}
|
||||
|
||||
/// Constructors for the various forms of token.
|
||||
impl Token {
|
||||
/// Construct a token representing a referent identifier.
|
||||
pub fn referent(name: impl Str, offset: usize) -> Token {
|
||||
let str = name.into();
|
||||
let length = str.chars().count();
|
||||
let shape = Shape::Referent(str);
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
|
||||
/// Construct a token representing a variable identifier.
|
||||
pub fn variable(name: impl Str, offset: usize) -> Token {
|
||||
let str = name.into();
|
||||
let length = str.chars().count();
|
||||
let shape = Shape::Variable(str);
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
|
||||
/// Construct a token representing an external identifier.
|
||||
pub fn external(name: impl Str, offset: usize) -> Token {
|
||||
let str = name.into();
|
||||
let length = str.chars().count();
|
||||
let shape = Shape::External(str);
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
|
||||
/// Construct a token representing a blank identifier.
|
||||
pub fn blank(offset: usize) -> Token {
|
||||
let shape = Shape::Blank;
|
||||
let length = lexeme::len(lexeme::literal::BLANK_IDENT);
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
|
||||
/// Construct a token representing an operator.
|
||||
pub fn operator(name: impl Str, offset: usize) -> Token {
|
||||
let name = name.into();
|
||||
let length = name.chars().count();
|
||||
let shape = Shape::Operator(name);
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
|
||||
/// Construct a token representing a modifier operator.
|
||||
pub fn modifier(name: impl Str, offset: usize) -> Token {
|
||||
let name = name.into();
|
||||
let modifier_len = lexeme::len(lexeme::literal::EQUALS);
|
||||
let length = name.chars().count() + modifier_len;
|
||||
let shape = Shape::Modifier(name);
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
|
||||
/// Construct a token representing
|
||||
pub fn annotation(name_str: impl Str, offset: usize) -> Token {
|
||||
let name = name_str.into();
|
||||
let annotation_len = lexeme::len(lexeme::literal::ANNOTATION_SYMBOL);
|
||||
let length = name.chars().count() + annotation_len;
|
||||
let shape = Shape::Annotation(name);
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
|
||||
/// Construct a token representing a number literal.
|
||||
pub fn number(base: impl Str, num: impl Into<String>, offset: usize) -> Token {
|
||||
let number = num.into();
|
||||
let base = base.into();
|
||||
let length = if base.is_empty() {
|
||||
number.chars().count()
|
||||
} else {
|
||||
let base_sep_len = lexeme::len(lexeme::literal::NUMBER_BASE_SEPARATOR);
|
||||
base.chars().count() + base_sep_len + number.chars().count()
|
||||
};
|
||||
let shape = Shape::Number { base, number };
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
|
||||
/// Construct a token representing a dangling number base.
|
||||
pub fn dangling_base(base: impl Str, offset: usize) -> Token {
|
||||
let base_str = base.into();
|
||||
let base_sep_len = lexeme::len(lexeme::literal::NUMBER_BASE_SEPARATOR);
|
||||
let length = base_str.chars().count() + base_sep_len;
|
||||
let shape = Shape::DanglingBase(base_str);
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
|
||||
/// Construct a token representing a line of text.
|
||||
pub fn text_line(style: TextStyle, segments: Vec<Token>, offset: usize) -> Token {
|
||||
let segments_len: usize = segments.iter().map(|s| s.source_length()).sum();
|
||||
let length = style.length() + segments_len;
|
||||
let shape = Shape::TextLine { style, segments };
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
|
||||
/// Construct a token representing an inline block text literal.
|
||||
pub fn text_inline_block(style: TextStyle, segments: Vec<Token>, offset: usize) -> Token {
|
||||
let segments_length: usize = segments.iter().map(|s| s.source_length()).sum();
|
||||
let length = style.length() + segments_length;
|
||||
let shape = Shape::TextInlineBlock { style, segments };
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
|
||||
/// Construct a token representing a block of text.
|
||||
pub fn text_block(
|
||||
start_line_ending: LineEnding,
|
||||
style: TextStyle,
|
||||
lines: Vec<Token>,
|
||||
indent: usize,
|
||||
offset: usize,
|
||||
) -> Token {
|
||||
let length = style.length()
|
||||
+ start_line_ending.size()
|
||||
+ lines.iter().fold(0, |l, r| {
|
||||
l + match r.shape {
|
||||
Shape::Line { .. } => indent + r.source_length(),
|
||||
Shape::BlankLine(_) => r.source_length(),
|
||||
_ => unreachable_panic!("Text blocks should only contain lines."),
|
||||
}
|
||||
});
|
||||
let shape = Shape::TextBlock { start_line_ending, style, lines };
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
|
||||
/// Construct a token representing an invalid quote.
|
||||
pub fn invalid_quote(bad_quotes: impl Str, offset: usize) -> Token {
|
||||
let bad_string = bad_quotes.into();
|
||||
let length = bad_string.chars().count();
|
||||
let shape = Shape::InvalidQuote(bad_string);
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
|
||||
/// Construct a token representing a raw text segment.
|
||||
pub fn text_segment_raw(str: impl Str, offset: usize) -> Token {
|
||||
let string = str.into();
|
||||
let length = string.chars().count();
|
||||
let shape = Shape::TextSegmentRaw(string);
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
|
||||
/// Construct a token representing an escape sequence.
|
||||
pub fn text_segment_escape(style: EscapeStyle, repr_str: impl Str, offset: usize) -> Token {
|
||||
let repr = repr_str.into();
|
||||
let length = style.size() + repr.chars().count();
|
||||
let shape = Shape::TextSegmentEscape { style, repr };
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
|
||||
/// Construct a token representing an escape sequence using a literal `shape`.
|
||||
pub fn text_segment_escape_from_shape(shape: Shape, offset: usize) -> Token {
|
||||
match &shape {
|
||||
Shape::TextSegmentEscape { style, repr } => {
|
||||
let length = style.size() + repr.chars().count();
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
_ => unreachable_panic!("Shape must be a TextSegmentEscape."),
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct a token representing an interpolated text segment.
|
||||
pub fn text_segment_interpolate(tokens: Vec<Token>, offset: usize) -> Token {
|
||||
let length_of_interpolation_ticks = 2;
|
||||
let length =
|
||||
length_of_interpolation_ticks + tokens.iter().fold(0, |l, r| l + r.source_length());
|
||||
let shape = Shape::TextSegmentInterpolate { tokens };
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
|
||||
/// Construct a token representing an unclosed interpolated text segment.
|
||||
pub fn text_segment_unclosed_interpolate(tokens: Vec<Token>, offset: usize) -> Token {
|
||||
let length_of_interpolation_tick = 1;
|
||||
let length =
|
||||
length_of_interpolation_tick + tokens.iter().fold(0, |l, r| l + r.source_length());
|
||||
let shape = Shape::TextSegmentUnclosedInterpolate { tokens };
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
|
||||
/// Construct a token representing a line of tokens.
|
||||
pub fn line(tokens: Vec<Token>, offset: usize, trailing_line_ending: LineEnding) -> Token {
|
||||
let line_ending_len = trailing_line_ending.size();
|
||||
let length = tokens.iter().fold(line_ending_len, |l, r| l + r.source_length());
|
||||
let shape = Shape::Line { tokens, trailing_line_ending };
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
|
||||
/// Construct a token representing a blank line.
|
||||
///
|
||||
/// The `offset` for blank lines is from the leftmost column, not from the parent block's
|
||||
/// indentation.
|
||||
pub fn blank_line(offset: usize, trailing_line_ending: LineEnding) -> Token {
|
||||
let length = trailing_line_ending.size();
|
||||
let shape = Shape::BlankLine(trailing_line_ending);
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
|
||||
/// Construct a token representing a block.
|
||||
pub fn block(block_type: BlockType, indent: usize, lines: Vec<Token>, offset: usize) -> Token {
|
||||
let length = lines
|
||||
.iter()
|
||||
.map(|line| match line.shape {
|
||||
Shape::Line { .. } => indent + line.source_length(),
|
||||
Shape::BlankLine(_) => line.source_length(),
|
||||
_ => unreachable_panic!("Tokens in a blocks should always be lines."),
|
||||
})
|
||||
.sum();
|
||||
let shape = Shape::Block { block_type, indent, lines };
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
|
||||
/// Construct a token representing an invalid suffix.
|
||||
pub fn invalid_suffix(text: impl Str, offset: usize) -> Token {
|
||||
let text = text.into();
|
||||
let length = text.chars().count();
|
||||
let shape = Shape::InvalidSuffix(text);
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
|
||||
/// Construct a token representing an unrecognised lexeme.
|
||||
pub fn unrecognized(text: impl Str, offset: usize) -> Token {
|
||||
let text = text.into();
|
||||
let length = text.chars().count();
|
||||
let shape = Shape::Unrecognized(text);
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
|
||||
/// Construct a token representing a disable comment.
|
||||
pub fn disable_comment(text: impl Str, offset: usize) -> Token {
|
||||
let text = text.into();
|
||||
let comment_len = lexeme::len(lexeme::literal::COMMENT);
|
||||
let length = text.chars().count() + comment_len;
|
||||
let shape = Shape::DisableComment(text);
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
|
||||
/// Construct a token representing a documentation comment.
|
||||
pub fn doc_comment(lines: Vec<Token>, indent: usize, offset: usize) -> Token {
|
||||
let length = lines
|
||||
.iter()
|
||||
.map(|line| match line.shape {
|
||||
Shape::Line { .. } => indent + line.source_length(),
|
||||
Shape::BlankLine(_) => line.source_length(),
|
||||
_ => unreachable_panic!("Tokens in a doc comment should always be lines."),
|
||||
})
|
||||
.sum();
|
||||
let shape = Shape::DocComment { lines, indent };
|
||||
Token { shape, length, offset }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =================
|
||||
// === BlockType ===
|
||||
// =================
|
||||
|
||||
/// The type for an Enso Block token.
|
||||
#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)]
|
||||
pub enum BlockType {
|
||||
/// A block made up of arguments to a function.
|
||||
Continuous,
|
||||
/// A block made up of separate lines.
|
||||
Discontinuous,
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==================
|
||||
// === LineEnding ===
|
||||
// ==================
|
||||
|
||||
/// The type of newline associated with the line.
|
||||
#[derive(Copy, Clone, Debug, Display, Eq, Hash, PartialEq)]
|
||||
pub enum LineEnding {
|
||||
/// There is no newline.
|
||||
None,
|
||||
/// The unix-style line-feed (`'\n'`),
|
||||
LF,
|
||||
/// The windows-style carriage-return, line-feed (`"\r\n"`).
|
||||
CRLF,
|
||||
}
|
||||
|
||||
impl LineEnding {
|
||||
const NO_LENGTH: usize = 0;
|
||||
|
||||
/// Get the number of rust `char`s that the newline type takes up.
|
||||
pub fn size(self) -> usize {
|
||||
match self {
|
||||
Self::None => Self::NO_LENGTH,
|
||||
Self::LF => lexeme::len(lexeme::literal::LF),
|
||||
Self::CRLF => lexeme::len(lexeme::literal::CRLF),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// === Trait Impls ===
|
||||
|
||||
impl Default for LineEnding {
|
||||
fn default() -> Self {
|
||||
LineEnding::None
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =================
|
||||
// === TextStyle ===
|
||||
// =================
|
||||
|
||||
/// The style of the text literal.
|
||||
#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)]
|
||||
pub enum TextStyle {
|
||||
// === Line ===
|
||||
/// A interpolated text line literal.
|
||||
FormatLine,
|
||||
/// A raw text line literal.
|
||||
RawLine,
|
||||
/// An unclosed text line literal.
|
||||
UnclosedLine,
|
||||
|
||||
// === Inline Block ===
|
||||
/// A format inline block text literal.
|
||||
FormatInlineBlock,
|
||||
/// A raw inline block text literal.
|
||||
RawInlineBlock,
|
||||
|
||||
// === Block ===
|
||||
/// An interpolated text block literal.
|
||||
FormatBlock,
|
||||
/// A raw text block literal.
|
||||
RawBlock,
|
||||
}
|
||||
|
||||
impl TextStyle {
|
||||
/// Calculate the length of the delimiters for a particular style of text literal.
|
||||
pub fn length(self) -> usize {
|
||||
match self {
|
||||
TextStyle::FormatLine => lexeme::len(lexeme::literal::FORMAT_QUOTE) * 2,
|
||||
TextStyle::RawLine => lexeme::len(lexeme::literal::RAW_QUOTE) * 2,
|
||||
TextStyle::FormatInlineBlock => lexeme::len(lexeme::literal::FORMAT_BLOCK_QUOTE),
|
||||
TextStyle::RawInlineBlock => lexeme::len(lexeme::literal::RAW_BLOCK_QUOTE),
|
||||
TextStyle::UnclosedLine => lexeme::len(lexeme::literal::FORMAT_QUOTE),
|
||||
TextStyle::FormatBlock => lexeme::len(lexeme::literal::FORMAT_BLOCK_QUOTE),
|
||||
TextStyle::RawBlock => lexeme::len(lexeme::literal::RAW_BLOCK_QUOTE),
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if the text literal is a line literal.
|
||||
pub fn is_line_literal(self) -> bool {
|
||||
matches!(self, TextStyle::RawLine | TextStyle::FormatLine | TextStyle::UnclosedLine)
|
||||
}
|
||||
|
||||
/// Check if the text literal is an inline block literal.
|
||||
pub fn is_inline_block_literal(self) -> bool {
|
||||
matches!(self, TextStyle::FormatInlineBlock | TextStyle::RawInlineBlock)
|
||||
}
|
||||
|
||||
/// Check if the text literal is a block literal.
|
||||
pub fn is_block_literal(self) -> bool {
|
||||
matches!(self, TextStyle::FormatBlock | TextStyle::RawBlock)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===================
|
||||
// === EscapeStyle ===
|
||||
// ===================
|
||||
|
||||
/// A description of the style of escape sequence seen.
|
||||
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
|
||||
pub enum EscapeStyle {
|
||||
/// A \xNN-style byte escape.
|
||||
Byte,
|
||||
/// Unicode 16-bit escape sequence.
|
||||
U16,
|
||||
/// Unicode 21-bit escape sequence.
|
||||
U21,
|
||||
/// Unicode 32-bit escape sequence.
|
||||
U32,
|
||||
/// A literal escape character.
|
||||
Literal,
|
||||
/// An invalid unicode escape.
|
||||
InvalidUnicode,
|
||||
/// An invalid escape.
|
||||
Invalid,
|
||||
/// An escape slash without any following escape.
|
||||
Unfinished,
|
||||
}
|
||||
impl EscapeStyle {
|
||||
const NO_ADDITIONAL_LENGTH: usize = 0;
|
||||
|
||||
/// Get the length taken up in source by the delimiters to an escape type.
|
||||
pub fn size(self) -> usize {
|
||||
match self {
|
||||
EscapeStyle::Byte => lexeme::len(lexeme::literal::BYTE_ESCAPE_START),
|
||||
EscapeStyle::Literal => lexeme::len(lexeme::literal::SLASH),
|
||||
EscapeStyle::U16 => lexeme::len(lexeme::literal::U16_ESCAPE_START),
|
||||
EscapeStyle::U32 => lexeme::len(lexeme::literal::U32_ESCAPE_START),
|
||||
EscapeStyle::U21 => {
|
||||
let start_len = lexeme::len(lexeme::literal::U21_ESCAPE_START);
|
||||
let end_len = lexeme::len(lexeme::literal::U21_ESCAPE_END);
|
||||
start_len + end_len
|
||||
}
|
||||
_ => Self::NO_ADDITIONAL_LENGTH,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === Shape ===
|
||||
// =============
|
||||
|
||||
/// The shapes of tokens needed by the Enso lexer.
|
||||
///
|
||||
/// This is a very small set of shapes, because the [`Token`] type only deals with the tokens that
|
||||
/// the lexer works with, not the full complexity of Enso's syntax.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
|
||||
pub enum Shape {
|
||||
// === Identifiers ===
|
||||
/// An identifier in referent form.
|
||||
Referent(String),
|
||||
/// An identifier in variable form.
|
||||
Variable(String),
|
||||
/// An identifier not conforming to the Enso identifier rules (e.g. a Java identifier).
|
||||
External(String),
|
||||
/// A blank identifier (`_`).
|
||||
Blank,
|
||||
/// An operator identifier.
|
||||
Operator(String),
|
||||
/// A modifier identifier.
|
||||
Modifier(String),
|
||||
/// An annotation.
|
||||
Annotation(String),
|
||||
|
||||
// === Literals ===
|
||||
/// A literal number.
|
||||
Number {
|
||||
/// The (optional) base for the number to be interpreted in.
|
||||
base: String,
|
||||
/// The number itself, possibly with a decimal point.
|
||||
number: String,
|
||||
},
|
||||
/// A dangling base from a number literal.
|
||||
DanglingBase(String),
|
||||
/// A text line literal.
|
||||
TextLine {
|
||||
/// The type of literal being encoded.
|
||||
style: TextStyle,
|
||||
/// The segments that make up the line of text.
|
||||
segments: Vec<Token>,
|
||||
},
|
||||
/// An inline block text literal.
|
||||
TextInlineBlock {
|
||||
/// The type of literal being encoded.
|
||||
style: TextStyle,
|
||||
/// The segments that make up the line of text.
|
||||
segments: Vec<Token>,
|
||||
},
|
||||
/// A text block literal.
|
||||
TextBlock {
|
||||
/// The line ending that occurs directly after the opening quote marks.
|
||||
start_line_ending: LineEnding,
|
||||
/// The type of literal being encoded.
|
||||
style: TextStyle,
|
||||
/// The lines in the text block literal.
|
||||
lines: Vec<Token>,
|
||||
},
|
||||
/// An invalid quote for a text literal.
|
||||
InvalidQuote(String),
|
||||
/// A segment of a line of text containing only literal text.
|
||||
TextSegmentRaw(String),
|
||||
/// A segment of a line of text that represents an escape sequence.
|
||||
TextSegmentEscape {
|
||||
/// The type of escape being represented.
|
||||
style: EscapeStyle,
|
||||
/// The literal escape sequence.
|
||||
repr: String,
|
||||
},
|
||||
/// A segment of a line of text that contains an interpolated expression.
|
||||
TextSegmentInterpolate {
|
||||
/// The tokens making up the interpolated expression.
|
||||
tokens: Vec<Token>,
|
||||
},
|
||||
/// An interpolated expression that hasn't been closed.
|
||||
TextSegmentUnclosedInterpolate {
|
||||
/// The tokens making up the interpolated expression.
|
||||
tokens: Vec<Token>,
|
||||
},
|
||||
/// An invalid text segment (e.g. unclosed interpolate segment).
|
||||
TextSegmentInvalid(String),
|
||||
|
||||
// === Lines ===
|
||||
/// A line containing tokens.
|
||||
///
|
||||
/// The offset for a line is always zero, as it is contained in a block with a defined
|
||||
/// indentation.
|
||||
Line {
|
||||
/// The tokens on the line.
|
||||
tokens: Vec<Token>,
|
||||
/// The line ending that _ends_ the line.
|
||||
///
|
||||
/// Please note that the concept of 'ending' the line is a bit strange, as blocks are
|
||||
/// treated as tokens in their own right, and hence are included in lines.
|
||||
trailing_line_ending: LineEnding,
|
||||
},
|
||||
/// A blank line.
|
||||
///
|
||||
/// The offset for a blank line is from the leftmost column, as it may be negative from the
|
||||
/// block's indentation level.
|
||||
BlankLine(LineEnding),
|
||||
|
||||
// === Block ===
|
||||
/// A block of tokens.
|
||||
Block {
|
||||
/// The type of the block.
|
||||
block_type: BlockType,
|
||||
/// The leading indentation of the block.
|
||||
indent: usize,
|
||||
/// The lines in the block.
|
||||
lines: Vec<Token>,
|
||||
},
|
||||
|
||||
// === Errors ===
|
||||
/// An invalid suffix.
|
||||
InvalidSuffix(String),
|
||||
/// An unrecognized token.
|
||||
Unrecognized(String),
|
||||
|
||||
// === Comments ===
|
||||
/// A disable comment (`# ...`).
|
||||
DisableComment(String),
|
||||
/// An Enso documentation comment (`## ...`).
|
||||
DocComment {
|
||||
/// The lines in the doc comment body. Each line must contain raw text segments only.
|
||||
lines: Vec<Token>,
|
||||
/// The indentation of the doc comment's body from the baseline.
|
||||
indent: usize,
|
||||
},
|
||||
}
|
||||
|
||||
impl Shape {
|
||||
/// Construct an identifier in referent form.
|
||||
pub fn referent(name: impl Into<String>) -> Shape {
|
||||
Shape::Referent(name.into())
|
||||
}
|
||||
|
||||
/// Construct an identifier in variable form.
|
||||
pub fn variable(name: impl Into<String>) -> Shape {
|
||||
Shape::Variable(name.into())
|
||||
}
|
||||
|
||||
/// Construct an identifier in external form.
|
||||
pub fn external(name: impl Into<String>) -> Shape {
|
||||
Shape::External(name.into())
|
||||
}
|
||||
|
||||
/// Construct a blank identifier.
|
||||
///
|
||||
/// This is provided as a function for completeness.
|
||||
pub fn blank() -> Shape {
|
||||
Shape::Blank
|
||||
}
|
||||
|
||||
/// Construct an operator identifier.
|
||||
pub fn operator(opr: impl Into<String>) -> Shape {
|
||||
Shape::Operator(opr.into())
|
||||
}
|
||||
|
||||
/// Construct a modifier identifier.
|
||||
pub fn modifier(opr: impl Into<String>) -> Shape {
|
||||
Shape::Modifier(opr.into())
|
||||
}
|
||||
|
||||
/// Construct an annotation identifier.
|
||||
pub fn annotation(name: impl Into<String>) -> Shape {
|
||||
Shape::Annotation(name.into())
|
||||
}
|
||||
|
||||
/// Construct a number literal.
|
||||
pub fn number(base: impl Into<String>, num: impl Into<String>) -> Shape {
|
||||
let base = base.into();
|
||||
let number = num.into();
|
||||
Shape::Number { base, number }
|
||||
}
|
||||
|
||||
/// Construct a dangling base literal.
|
||||
pub fn dangling_base(base: impl Into<String>) -> Shape {
|
||||
Shape::DanglingBase(base.into())
|
||||
}
|
||||
|
||||
/// Construct a text line literal.
|
||||
pub fn text_line(style: TextStyle, segments: Vec<Token>) -> Shape {
|
||||
Shape::TextLine { style, segments }
|
||||
}
|
||||
|
||||
/// Construct an inline block text literal.
|
||||
pub fn text_inline_block(style: TextStyle, segments: Vec<Token>) -> Shape {
|
||||
Shape::TextInlineBlock { style, segments }
|
||||
}
|
||||
|
||||
/// Construct a text block literal.
|
||||
pub fn text_block(start_line_ending: LineEnding, style: TextStyle, lines: Vec<Token>) -> Shape {
|
||||
Shape::TextBlock { start_line_ending, style, lines }
|
||||
}
|
||||
|
||||
/// Construct an invalid quote literal.
|
||||
pub fn invalid_quote(bad_quotes: impl Str) -> Shape {
|
||||
Shape::InvalidQuote(bad_quotes.into())
|
||||
}
|
||||
|
||||
/// Construct a raw text segment.
|
||||
pub fn text_segment_raw(text: impl Str) -> Shape {
|
||||
Shape::TextSegmentRaw(text.into())
|
||||
}
|
||||
|
||||
/// Construct a text segment containing an escape sequence.
|
||||
pub fn text_segment_escape(style: EscapeStyle, repr_str: impl Str) -> Shape {
|
||||
let repr = repr_str.into();
|
||||
Shape::TextSegmentEscape { style, repr }
|
||||
}
|
||||
|
||||
/// Construct a text segment containing an interpolated expression.
|
||||
pub fn text_segment_interpolate(tokens: Vec<Token>) -> Shape {
|
||||
Shape::TextSegmentInterpolate { tokens }
|
||||
}
|
||||
|
||||
/// Construct a text segment containing an unclosed interpolated expression.
|
||||
pub fn text_segment_unclosed_interpolate(tokens: Vec<Token>) -> Shape {
|
||||
Shape::TextSegmentUnclosedInterpolate { tokens }
|
||||
}
|
||||
|
||||
/// Construct an invalid text segment.
|
||||
pub fn text_segment_invalid(str: impl Str) -> Shape {
|
||||
Shape::TextSegmentInvalid(str.into())
|
||||
}
|
||||
|
||||
/// Construct a line that contains tokens.
|
||||
pub fn line(tokens: Vec<Token>, trailing_line_ending: LineEnding) -> Shape {
|
||||
Shape::Line { tokens, trailing_line_ending }
|
||||
}
|
||||
|
||||
/// Construct a line that is blank.
|
||||
pub fn blank_line(trailing_line_ending: LineEnding) -> Shape {
|
||||
Shape::BlankLine(trailing_line_ending)
|
||||
}
|
||||
|
||||
/// Construct a block containing lines.
|
||||
pub fn block(block_type: BlockType, indent: usize, lines: Vec<Token>) -> Shape {
|
||||
Shape::Block { block_type, indent, lines }
|
||||
}
|
||||
|
||||
/// Construct an invalid suffix.
|
||||
pub fn invalid_suffix(text: impl Into<String>) -> Shape {
|
||||
Shape::InvalidSuffix(text.into())
|
||||
}
|
||||
|
||||
/// Construct an unrecognised token.
|
||||
pub fn unrecognized(text: impl Into<String>) -> Shape {
|
||||
Shape::Unrecognized(text.into())
|
||||
}
|
||||
|
||||
/// Construct a disable comment shape.
|
||||
pub fn disable_comment(text: impl Str) -> Shape {
|
||||
Shape::DisableComment(text.into())
|
||||
}
|
||||
|
||||
/// Construct a doc comment shape.
|
||||
pub fn doc_comment(lines: Vec<Token>, indent: usize) -> Shape {
|
||||
Shape::DocComment { lines, indent }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==============
|
||||
// === Stream ===
|
||||
// ==============
|
||||
|
||||
/// A representation of the Enso token stream.
|
||||
#[derive(Clone, Debug, Default, PartialEq)]
|
||||
pub struct Stream {
|
||||
/// The tokens in the token stream.
|
||||
tokens: Vec<Token>,
|
||||
}
|
||||
|
||||
impl Stream {
|
||||
/// Append the provided `token` to the token stream.
|
||||
pub fn append(&mut self, token: Token) {
|
||||
self.tokens.push(token)
|
||||
}
|
||||
|
||||
/// Get a reference to the tokens in the stream.
|
||||
pub fn tokens(&self) -> &Vec<Token> {
|
||||
&self.tokens
|
||||
}
|
||||
|
||||
/// Get the length of the elements in the token stream.
|
||||
pub fn tokens_len(&self) -> usize {
|
||||
self.tokens.iter().map(|token| token.length + token.offset).sum()
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a consuming iterator over the token stream.
|
||||
impl std::iter::IntoIterator for Stream {
|
||||
type Item = Token;
|
||||
type IntoIter = std::vec::IntoIter<Self::Item>;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.tokens.into_iter()
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for Stream {
|
||||
type Target = Vec<Token>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.tokens
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for Stream {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.tokens
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// === Trait Impls ===
|
||||
|
||||
impl From<Vec<Token>> for Stream {
|
||||
fn from(tokens: Vec<Token>) -> Self {
|
||||
Stream { tokens }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Stream> for Vec<Token> {
|
||||
fn from(stream: Stream) -> Self {
|
||||
stream.tokens
|
||||
}
|
||||
}
|
@ -1,28 +0,0 @@
|
||||
[package]
|
||||
name = "lexer"
|
||||
version = "0.1.0"
|
||||
authors = ["Enso Team <enso-dev@enso.org>"]
|
||||
edition = "2021"
|
||||
|
||||
publish = false
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib", "rlib"]
|
||||
test = true
|
||||
bench = true
|
||||
|
||||
[dependencies]
|
||||
enso-flexer = { version = "0.2.0", path = "../../flexer" }
|
||||
enso-prelude = { version = "0.2.0", path = "../../../prelude" }
|
||||
lexer-definition = { path = "../definition", version = "0.1.0" }
|
||||
|
||||
[build-dependencies]
|
||||
enso-flexer = { version = "0.2.0", path = "../../flexer" }
|
||||
lexer-definition = { path = "../definition", version = "0.1.0" }
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.3"
|
||||
|
||||
[[bench]]
|
||||
name = "lexer_time_bench"
|
||||
harness = false
|
@ -1,328 +0,0 @@
|
||||
//! This file contains the sources that are replicated many times over for the purposes of
|
||||
//! benchmarking the Enso lexer.
|
||||
|
||||
use criterion::black_box;
|
||||
use criterion::Criterion;
|
||||
use criterion::Throughput;
|
||||
use std::time::Duration;
|
||||
|
||||
|
||||
|
||||
// ===============================
|
||||
// === Benchmark Configuration ===
|
||||
// ===============================
|
||||
|
||||
/// Configures the benchmarking process.
|
||||
pub fn bench_config() -> Criterion {
|
||||
Criterion::default()
|
||||
.measurement_time(Duration::from_secs(60))
|
||||
.warm_up_time(Duration::from_secs(3))
|
||||
.sample_size(25)
|
||||
.retain_baseline("EnsoLexer".to_string())
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =======================
|
||||
// === Benchmark Setup ===
|
||||
// =======================
|
||||
|
||||
/// The sizes of text to run the benchmarks over.
|
||||
pub const SIZES: [(usize, &str); 4] =
|
||||
[(1024, "1KB"), (1024 * 100, "100KB"), (1024 * 1024, "1MB"), (1024 * 1024 * 10, "10MB")];
|
||||
|
||||
|
||||
|
||||
// ==============================
|
||||
// === Benchmarking Utilities ===
|
||||
// ==============================
|
||||
|
||||
/// Execute the provided benchmark for each of the [`SIZES`] above.
|
||||
pub fn run_bench_sizes(name: &str, input: &str, add_newline: bool, c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group(name);
|
||||
SIZES.iter().for_each(|(size, size_name)| {
|
||||
group.throughput(Throughput::Bytes(*size as u64));
|
||||
let input = replicate_to_size(input, *size, add_newline);
|
||||
group.bench_function(*size_name, |b| {
|
||||
b.iter(|| {
|
||||
lexer::run(black_box(input.as_str()));
|
||||
})
|
||||
});
|
||||
})
|
||||
}
|
||||
|
||||
/// This function replicates `input` until it reaches `size` (in bytes).
|
||||
///
|
||||
/// If this cannot be done exactly, it will err on the side of over-replication,
|
||||
/// meaning that the output will be _larger_ than `size` bytes. If the size of
|
||||
/// the input already exceeds `size`, it is returned unchanged.
|
||||
pub fn replicate_to_size(input: &str, size: usize, add_newline: bool) -> String {
|
||||
let input_size = input.len();
|
||||
let times = 1 + (size / input_size);
|
||||
let mut input_newline = input.to_string();
|
||||
let to_add = if add_newline { '\n' } else { ' ' };
|
||||
input_newline.push(to_add);
|
||||
input_newline.repeat(times)
|
||||
}
|
||||
|
||||
/// Replace any windows-style line-endings in `input` with unix-style line-endings.
|
||||
fn preprocess(input: &str) -> String {
|
||||
input.replace("\r\n", "\n")
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==============
|
||||
// === Macros ===
|
||||
// ==============
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! bench {
|
||||
(bench_name = $bench_name:literal; fun_name = $fun_name:ident; bench_input = $bench_input:expr;) => {
|
||||
pub fn $fun_name(c: &mut Criterion) {
|
||||
src::run_bench_sizes($bench_name, $bench_input.as_str(), true, c)
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =================================
|
||||
// === Literal Benchmark Sources ===
|
||||
// =================================
|
||||
|
||||
#[allow(missing_docs)]
|
||||
pub mod literal {
|
||||
use super::*;
|
||||
|
||||
pub mod number {
|
||||
use super::*;
|
||||
|
||||
pub fn integer() -> String {
|
||||
preprocess("12345")
|
||||
}
|
||||
|
||||
pub fn integer_explicit_base() -> String {
|
||||
preprocess("16_a4fd31")
|
||||
}
|
||||
|
||||
pub fn decimal() -> String {
|
||||
preprocess("1.3141")
|
||||
}
|
||||
|
||||
pub fn decimal_explicit_base() -> String {
|
||||
preprocess("10_1.000999")
|
||||
}
|
||||
|
||||
pub fn error_base() -> String {
|
||||
preprocess("10.2_2")
|
||||
}
|
||||
}
|
||||
|
||||
pub mod text {
|
||||
use super::*;
|
||||
|
||||
pub fn format_line() -> String {
|
||||
preprocess(r"'dearest creature in \n creation studying english pronunciation'")
|
||||
}
|
||||
|
||||
pub fn format_inline_block() -> String {
|
||||
preprocess(r"''' An inline block. It's a very good inline block carl \u{AB}")
|
||||
}
|
||||
|
||||
pub fn format_block() -> String {
|
||||
preprocess(
|
||||
r#"''' Here is my block of format text. I can `interpolate + things` like that.
|
||||
It goes on and on and on for `times` times because I feel like it.
|
||||
|
||||
Complex interpolated expression `x -> y ~> x | y` woo!
|
||||
"#,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn raw_line() -> String {
|
||||
preprocess(r#""dearest creature in '''' creation studying english pronunciation""#)
|
||||
}
|
||||
|
||||
pub fn raw_inline_block() -> String {
|
||||
preprocess(r#"""" An inline block. It's a very good inline block carl ""#)
|
||||
}
|
||||
|
||||
pub fn raw_block() -> String {
|
||||
preprocess(
|
||||
r#"""" Here is my block of raw text. `Interpolations` are nothing special here.
|
||||
It goes on and on and on for I can escape \" though.
|
||||
|
||||
It also supports blank lines!
|
||||
"#,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==============================
|
||||
// === Name Benchmark Sources ===
|
||||
// ==============================
|
||||
|
||||
#[allow(missing_docs)]
|
||||
pub mod name {
|
||||
use super::*;
|
||||
|
||||
pub fn line_of() -> String {
|
||||
preprocess("Referent_Ident var_ident JavaType _ @annotation ticked_ident' number_1")
|
||||
}
|
||||
|
||||
pub fn invalid_suffix() -> String {
|
||||
preprocess("some_var'iable some_varД")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===================================
|
||||
// === Operator Benchmarks Sources ===
|
||||
// ===================================
|
||||
|
||||
#[allow(missing_docs)]
|
||||
pub mod operator {
|
||||
use super::*;
|
||||
|
||||
pub fn line_of() -> String {
|
||||
preprocess("+ - * -> ~> <~ <- ! & | /")
|
||||
}
|
||||
|
||||
pub fn dot_call() -> String {
|
||||
preprocess(".== . != .<*> .*> .|>")
|
||||
}
|
||||
|
||||
pub fn invalid_suffix() -> String {
|
||||
preprocess(".... +==")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ================================
|
||||
// === Block Benchmarks Sources ===
|
||||
// ================================
|
||||
|
||||
#[allow(missing_docs)]
|
||||
pub mod block {
|
||||
use super::*;
|
||||
|
||||
pub fn top_level() -> String {
|
||||
preprocess("foo\nbar\nbaz")
|
||||
}
|
||||
|
||||
pub fn nested() -> String {
|
||||
preprocess("foo\nbar\n baz\n quux")
|
||||
}
|
||||
|
||||
pub fn deeply_nested() -> String {
|
||||
preprocess(
|
||||
r#"foo
|
||||
bar
|
||||
baz
|
||||
quux
|
||||
bim
|
||||
bam
|
||||
oh
|
||||
no
|
||||
"#,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===================================
|
||||
// === Comments Benchmarks Sources ===
|
||||
// ===================================
|
||||
|
||||
#[allow(missing_docs)]
|
||||
pub mod comment {
|
||||
use super::*;
|
||||
|
||||
pub fn line() -> String {
|
||||
preprocess("# foo bar baz I have a really long line comment here that goes on and on")
|
||||
}
|
||||
|
||||
pub fn in_line() -> String {
|
||||
preprocess("a + b # A useless comment: add a to b")
|
||||
}
|
||||
|
||||
pub fn doc() -> String {
|
||||
preprocess(
|
||||
r#"## I have a really big doc comment here
|
||||
That just keeps prattling on and on and on.
|
||||
|
||||
With blank lines
|
||||
|
||||
Forever
|
||||
|
||||
and
|
||||
ever
|
||||
|
||||
and
|
||||
|
||||
|
||||
|
||||
|
||||
ever
|
||||
documented
|
||||
"#,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===========================
|
||||
// === Combined Benchmarks ===
|
||||
// ===========================
|
||||
|
||||
pub mod combined {
|
||||
use super::*;
|
||||
|
||||
pub fn simple() -> String {
|
||||
preprocess(
|
||||
r#"
|
||||
import Base.Meta
|
||||
|
||||
## Decompose the value using runtime reflection and print its decomposition.
|
||||
Main.print_decomp a b =
|
||||
y = a + b
|
||||
decomp = Meta.decompose y
|
||||
Io.println decomp
|
||||
"#,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn complex() -> String {
|
||||
preprocess(
|
||||
r#"
|
||||
import Base.Meta
|
||||
|
||||
## Frobnicate the doodads by constructing a new type operator through runtime reflection such that
|
||||
it can be passed to another language.
|
||||
|
||||
! WARNING
|
||||
Type-checking code like this is virtually impossible, and it is treated as `Dynamic` inside
|
||||
Enso code.
|
||||
Main.foo a b =
|
||||
y = x -> z ->
|
||||
ty = a.gen_type (~>) (<-) b
|
||||
ty (z x)
|
||||
decomp = Meta.decompose (y a b)
|
||||
Io.println decomp
|
||||
|
||||
## Execute the main function of this project.
|
||||
main =
|
||||
func = Meta.reify (here.foo "My_Name" "my_field")
|
||||
Io.println(func)
|
||||
"#,
|
||||
)
|
||||
}
|
||||
}
|
@ -1,300 +0,0 @@
|
||||
//! This file contains the time-based benchmarks for the Enso lexer.
|
||||
|
||||
|
||||
|
||||
mod lexer_bench_sources;
|
||||
|
||||
use criterion::black_box;
|
||||
use criterion::criterion_group;
|
||||
use criterion::criterion_main;
|
||||
use criterion::Criterion;
|
||||
use criterion::Throughput;
|
||||
use lexer_bench_sources as src;
|
||||
|
||||
|
||||
|
||||
// ==========================
|
||||
// === Literal Benchmarks ===
|
||||
// ==========================
|
||||
|
||||
bench! {
|
||||
bench_name = "Integer";
|
||||
fun_name = bench_literal_number_integer;
|
||||
bench_input = src::literal::number::integer();
|
||||
}
|
||||
|
||||
bench! {
|
||||
bench_name = "Integer Explicit Base";
|
||||
fun_name = bench_literal_number_integer_explicit_base;
|
||||
bench_input = src::literal::number::integer_explicit_base();
|
||||
}
|
||||
|
||||
bench! {
|
||||
bench_name = "Decimal";
|
||||
fun_name = bench_literal_number_decimal;
|
||||
bench_input = src::literal::number::decimal();
|
||||
}
|
||||
|
||||
bench! {
|
||||
bench_name = "Decimal Explicit Base";
|
||||
fun_name = bench_literal_number_decimal_explicit_base;
|
||||
bench_input = src::literal::number::decimal_explicit_base();
|
||||
}
|
||||
|
||||
bench! {
|
||||
bench_name = "Number Error Base";
|
||||
fun_name = bench_literal_number_error_base;
|
||||
bench_input = src::literal::number::error_base();
|
||||
}
|
||||
|
||||
bench! {
|
||||
bench_name = "Text Format Line";
|
||||
fun_name = bench_literal_text_format_line;
|
||||
bench_input = src::literal::text::format_line();
|
||||
}
|
||||
|
||||
bench! {
|
||||
bench_name = "Text Format Inline Block";
|
||||
fun_name = bench_literal_text_format_inline_block;
|
||||
bench_input = src::literal::text::format_inline_block();
|
||||
}
|
||||
|
||||
bench! {
|
||||
bench_name = "Text Format Block";
|
||||
fun_name = bench_literal_text_format_block;
|
||||
bench_input = src::literal::text::format_block();
|
||||
}
|
||||
|
||||
bench! {
|
||||
bench_name = "Text Raw Line";
|
||||
fun_name = bench_literal_text_raw_line;
|
||||
bench_input = src::literal::text::raw_line();
|
||||
}
|
||||
|
||||
bench! {
|
||||
bench_name = "Text Raw Inline Block";
|
||||
fun_name = bench_literal_text_raw_inline_block;
|
||||
bench_input = src::literal::text::raw_inline_block();
|
||||
}
|
||||
|
||||
bench! {
|
||||
bench_name = "Text Raw Block";
|
||||
fun_name = bench_literal_text_raw_block;
|
||||
bench_input = src::literal::text::raw_block();
|
||||
}
|
||||
|
||||
criterion_group! {
|
||||
name = literal_benchmarks;
|
||||
config = src::bench_config();
|
||||
targets =
|
||||
bench_literal_number_integer,
|
||||
bench_literal_number_integer_explicit_base,
|
||||
bench_literal_number_decimal,
|
||||
bench_literal_number_decimal_explicit_base,
|
||||
bench_literal_number_error_base,
|
||||
bench_literal_text_format_line,
|
||||
bench_literal_text_format_inline_block,
|
||||
bench_literal_text_format_block,
|
||||
bench_literal_text_raw_line,
|
||||
bench_literal_text_raw_inline_block,
|
||||
bench_literal_text_raw_block,
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ========================
|
||||
// === Names Benchmarks ===
|
||||
// ========================
|
||||
|
||||
bench! {
|
||||
bench_name = "Line of Names";
|
||||
fun_name = bench_names_line_of;
|
||||
bench_input = src::name::line_of();
|
||||
}
|
||||
|
||||
bench! {
|
||||
bench_name = "Names with invalid Suffixes";
|
||||
fun_name = bench_names_invalid_suffix;
|
||||
bench_input = src::name::invalid_suffix();
|
||||
}
|
||||
|
||||
criterion_group! {
|
||||
name = name_benchmarks;
|
||||
config = src::bench_config();
|
||||
targets =
|
||||
bench_names_line_of,
|
||||
bench_names_invalid_suffix,
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===========================
|
||||
// === Operator Benchmarks ===
|
||||
// ===========================
|
||||
|
||||
bench! {
|
||||
bench_name = "Line of Operators";
|
||||
fun_name = bench_operator_line_of;
|
||||
bench_input = src::operator::line_of();
|
||||
}
|
||||
|
||||
bench! {
|
||||
bench_name = "Dot Call Operators";
|
||||
fun_name = bench_operator_dot_call;
|
||||
bench_input = src::operator::dot_call();
|
||||
}
|
||||
|
||||
bench! {
|
||||
bench_name = "Operators with Invalid Suffixes";
|
||||
fun_name = bench_operator_invalid_suffix;
|
||||
bench_input = src::operator::invalid_suffix();
|
||||
}
|
||||
|
||||
criterion_group! {
|
||||
name = operator_benchmarks;
|
||||
config = src::bench_config();
|
||||
targets =
|
||||
bench_operator_line_of,
|
||||
bench_operator_dot_call,
|
||||
bench_operator_invalid_suffix
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ========================
|
||||
// === Block Benchmarks ===
|
||||
// ========================
|
||||
|
||||
bench! {
|
||||
bench_name = "Top Level Block";
|
||||
fun_name = bench_block_top_level;
|
||||
bench_input = src::block::top_level();
|
||||
}
|
||||
|
||||
bench! {
|
||||
bench_name = "Nested Block";
|
||||
fun_name = bench_block_nested;
|
||||
bench_input = src::block::nested();
|
||||
}
|
||||
|
||||
bench! {
|
||||
bench_name = "Deeply Nested Blocks";
|
||||
fun_name = bench_block_deeply_nested;
|
||||
bench_input = src::block::deeply_nested();
|
||||
}
|
||||
|
||||
criterion_group! {
|
||||
name = block_benchmarks;
|
||||
config = src::bench_config();
|
||||
targets =
|
||||
bench_block_top_level,
|
||||
bench_block_nested,
|
||||
bench_block_deeply_nested,
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==========================
|
||||
// === Comment Benchmarks ===
|
||||
// ==========================
|
||||
|
||||
bench! {
|
||||
bench_name = "Line Comment";
|
||||
fun_name = bench_comment_line;
|
||||
bench_input = src::comment::line();
|
||||
}
|
||||
|
||||
bench! {
|
||||
bench_name = "Comment in Line";
|
||||
fun_name = bench_comment_in_line;
|
||||
bench_input = src::comment::in_line();
|
||||
}
|
||||
|
||||
bench! {
|
||||
bench_name = "Doc Comment";
|
||||
fun_name = bench_comment_doc;
|
||||
bench_input = src::comment::doc();
|
||||
}
|
||||
|
||||
criterion_group! {
|
||||
name = comment_benchmarks;
|
||||
config = src::bench_config();
|
||||
targets =
|
||||
bench_comment_line,
|
||||
bench_comment_in_line,
|
||||
bench_comment_doc,
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===========================
|
||||
// === Combined Benchmarks ===
|
||||
// ===========================
|
||||
|
||||
bench! {
|
||||
bench_name = "Simple Combined Example";
|
||||
fun_name = bench_combined_simple;
|
||||
bench_input = src::combined::simple();
|
||||
}
|
||||
|
||||
bench! {
|
||||
bench_name = "Complex Combined Example";
|
||||
fun_name = bench_combined_complex;
|
||||
bench_input = src::combined::complex();
|
||||
}
|
||||
|
||||
criterion_group! {
|
||||
name = combined_benchmarks;
|
||||
config = src::bench_config();
|
||||
targets =
|
||||
bench_combined_simple,
|
||||
bench_combined_complex,
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===================
|
||||
// === Comparisons ===
|
||||
// ===================
|
||||
|
||||
fn bench_rust_reader(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("Rust Vector");
|
||||
src::SIZES.iter().for_each(|(size, name)| {
|
||||
group.throughput(Throughput::Bytes(*size as u64));
|
||||
let input = "abcdefghijklmnopqrstuvwxyz".repeat(1 + size / 26);
|
||||
group.bench_function(*name, |b| {
|
||||
b.iter(|| {
|
||||
let mut counter = 0usize;
|
||||
for c in black_box(input.as_str()).chars() {
|
||||
if c == 'f' {
|
||||
counter += 1;
|
||||
}
|
||||
}
|
||||
counter
|
||||
})
|
||||
});
|
||||
})
|
||||
}
|
||||
|
||||
criterion_group! {
|
||||
name = rust_comparison;
|
||||
config = src::bench_config();
|
||||
targets =
|
||||
bench_rust_reader,
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===================
|
||||
// === The Harness ===
|
||||
// ===================
|
||||
|
||||
criterion_main!(
|
||||
literal_benchmarks,
|
||||
name_benchmarks,
|
||||
operator_benchmarks,
|
||||
block_benchmarks,
|
||||
comment_benchmarks,
|
||||
combined_benchmarks,
|
||||
rust_comparison,
|
||||
);
|
@ -1,34 +0,0 @@
|
||||
use std::io::prelude::*;
|
||||
|
||||
use enso_flexer::Definition;
|
||||
use enso_flexer::State;
|
||||
use lexer_definition::lexer::EnsoLexer;
|
||||
use std::fs::File;
|
||||
|
||||
|
||||
|
||||
/// Generates the lexer engine and saves the result into the file `src/engine.rs`.
|
||||
///
|
||||
/// The content of the generated file can be used with the `include!` macro.
|
||||
fn generate_engine() -> std::io::Result<()> {
|
||||
let definition_path = "../definition/src/lexer.rs";
|
||||
let output_directory = "src/generated";
|
||||
let _ = std::fs::create_dir(output_directory);
|
||||
let output_path = "src/generated/engine.rs";
|
||||
let definition_error = format!("The lexer definition should exist at {}.", definition_path);
|
||||
let output_error = format!("Cannot open output file at {}.", output_path);
|
||||
let mut lexer_def = File::open(definition_path).expect(&definition_error);
|
||||
let mut contents = String::new();
|
||||
let mut file = File::create(output_path).expect(&output_error);
|
||||
let lexer = EnsoLexer::define();
|
||||
let engine = lexer.specialize().unwrap();
|
||||
lexer_def.read_to_string(&mut contents).expect("Unable to read lexer definition.");
|
||||
file.write_all(contents.as_bytes()).expect("Unable to write lexer definition.");
|
||||
file.write_all("\n".as_bytes())?;
|
||||
file.write_all(engine.as_bytes()).expect("Unable to write lexer specialization.");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn main() -> std::io::Result<()> {
|
||||
generate_engine()
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
//! This module re-exports the generated lexer sources.
|
||||
|
||||
|
||||
// ==============
|
||||
// === Export ===
|
||||
// ==============
|
||||
|
||||
pub mod engine;
|
||||
|
||||
|
||||
|
@ -1,22 +0,0 @@
|
||||
//! A driver for the Enso lexer.
|
||||
|
||||
use crate::prelude::*;
|
||||
|
||||
use crate::generated::engine::EnsoLexer;
|
||||
use crate::library::token;
|
||||
use crate::prelude::reader::decoder::DecoderUTF8;
|
||||
|
||||
use enso_flexer::LexingResult;
|
||||
|
||||
|
||||
|
||||
// ====================
|
||||
// === Lexer Driver ===
|
||||
// ====================
|
||||
|
||||
/// Execute the lexer on the provided `input`, assuming utf-8 encoding.
|
||||
pub fn run(input: impl AsRef<str>) -> LexingResult<token::Stream> {
|
||||
let mut lexer = EnsoLexer::new();
|
||||
let reader = Reader::new(input.as_ref().as_bytes(), DecoderUTF8());
|
||||
lexer.run(reader)
|
||||
}
|
@ -1,41 +0,0 @@
|
||||
//! This module exports the interface to the generated Enso lexer.
|
||||
|
||||
// === Features ===
|
||||
#![feature(test)]
|
||||
// === Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![warn(unsafe_code)]
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![deny(unconditional_recursion)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(trivial_casts)]
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unused_import_braces)]
|
||||
|
||||
|
||||
// ==============
|
||||
// === Export ===
|
||||
// ==============
|
||||
|
||||
#[rustfmt::skip]
|
||||
pub mod generated;
|
||||
pub mod lexer;
|
||||
|
||||
pub use crate::lexer::*;
|
||||
|
||||
|
||||
|
||||
/// Support libraries for the lexer definition.
|
||||
///
|
||||
/// This is an intentional re-export in this crate's namespace.
|
||||
pub mod library {
|
||||
pub use lexer_definition::library::*;
|
||||
}
|
||||
|
||||
|
||||
/// A library of commonly useful functionality.
|
||||
mod prelude {
|
||||
pub use lexer_definition::prelude::*;
|
||||
}
|
@ -1,277 +0,0 @@
|
||||
//! This file contains tests for lexing blocks in the Enso lexer.
|
||||
|
||||
// === Features ===
|
||||
#![feature(test)]
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![deny(unconditional_recursion)]
|
||||
#![warn(unsafe_code)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(trivial_casts)]
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unused_import_braces)]
|
||||
|
||||
|
||||
|
||||
mod test_utils;
|
||||
|
||||
use lexer_definition::library::*;
|
||||
use test_utils::*;
|
||||
|
||||
use lexer_definition::library::token::Token;
|
||||
use lexer_definition::token::BlockType;
|
||||
use lexer_definition::token::LineEnding;
|
||||
|
||||
|
||||
|
||||
// ==============
|
||||
// === Blocks ===
|
||||
// ==============
|
||||
|
||||
#[test]
|
||||
fn function_call() {
|
||||
let input = make_unix_line_endings(
|
||||
r#"f
|
||||
argument_1
|
||||
argument_2
|
||||
fn a1 a2 a3
|
||||
argument_4
|
||||
argument_5"#,
|
||||
);
|
||||
let block_fn_args = Token::block(
|
||||
BlockType::Continuous,
|
||||
4,
|
||||
vec![
|
||||
Token::line(vec![Token::variable("argument_1", 0)], 0, LineEnding::LF),
|
||||
Token::line(vec![Token::variable("argument_2", 0)], 0, LineEnding::LF),
|
||||
Token::line(
|
||||
vec![
|
||||
Token::variable("fn", 0),
|
||||
Token::variable("a1", 1),
|
||||
Token::variable("a2", 1),
|
||||
Token::variable("a3", 1),
|
||||
],
|
||||
0,
|
||||
LineEnding::LF,
|
||||
),
|
||||
Token::line(vec![Token::variable("argument_4", 0)], 0, LineEnding::LF),
|
||||
Token::line(vec![Token::variable("argument_5", 0)], 0, LineEnding::None),
|
||||
],
|
||||
0,
|
||||
);
|
||||
let top_level_first_line =
|
||||
Token::line(vec![Token::variable("f", 0), block_fn_args], 0, LineEnding::LF);
|
||||
let top_level_block = token::Stream::from(vec![Token::block(
|
||||
BlockType::Continuous,
|
||||
0,
|
||||
vec![top_level_first_line],
|
||||
0,
|
||||
)]);
|
||||
assert_lexes(input, top_level_block);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn empty_lines() {
|
||||
let input = "f\r\n a\n\n b\n";
|
||||
let nested_block = Token::block(
|
||||
BlockType::Continuous,
|
||||
4,
|
||||
vec![
|
||||
Token::line(vec![Token::variable("a", 0)], 0, LineEnding::LF),
|
||||
Token::blank_line(0, LineEnding::LF),
|
||||
Token::line(vec![Token::variable("b", 0)], 0, LineEnding::LF),
|
||||
],
|
||||
0,
|
||||
);
|
||||
let top_line = Token::line(vec![Token::variable("f", 0), nested_block], 0, LineEnding::CRLF);
|
||||
let expected =
|
||||
token::Stream::from(vec![Token::block(BlockType::Continuous, 0, vec![top_line], 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn top_level() {
|
||||
let input = make_unix_line_endings(
|
||||
r#"
|
||||
|
||||
foo
|
||||
bar
|
||||
baz
|
||||
"#,
|
||||
);
|
||||
let expected = token::Stream::from(vec![Token::block(
|
||||
BlockType::Continuous,
|
||||
0,
|
||||
vec![
|
||||
Token::blank_line(0, LineEnding::LF),
|
||||
Token::blank_line(0, LineEnding::LF),
|
||||
Token::line(vec![Token::variable("foo", 0)], 0, LineEnding::LF),
|
||||
Token::line(vec![Token::variable("bar", 0)], 0, LineEnding::LF),
|
||||
Token::line(vec![Token::variable("baz", 0)], 0, LineEnding::LF),
|
||||
],
|
||||
0,
|
||||
)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn with_operator() {
|
||||
let input = make_unix_line_endings(
|
||||
r#"x ->
|
||||
foo x 1
|
||||
"#,
|
||||
);
|
||||
let nested_block = Token::block(
|
||||
BlockType::Discontinuous,
|
||||
4,
|
||||
vec![Token::line(
|
||||
vec![Token::variable("foo", 0), Token::variable("x", 1), Token::number("", "1", 1)],
|
||||
0,
|
||||
LineEnding::LF,
|
||||
)],
|
||||
0,
|
||||
);
|
||||
let expected = token::Stream::from(vec![Token::block(
|
||||
BlockType::Continuous,
|
||||
0,
|
||||
vec![Token::line(
|
||||
vec![Token::variable("x", 0), Token::operator("->", 1), nested_block],
|
||||
0,
|
||||
LineEnding::LF,
|
||||
)],
|
||||
0,
|
||||
)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn with_nesting() {
|
||||
let input = make_unix_line_endings(
|
||||
r#"
|
||||
some_long_thing
|
||||
foo ->
|
||||
Bar
|
||||
baz
|
||||
|
||||
quux
|
||||
"#,
|
||||
);
|
||||
let function_block = Token::block(
|
||||
BlockType::Discontinuous,
|
||||
8,
|
||||
vec![
|
||||
Token::line(vec![Token::referent("Bar", 0)], 0, LineEnding::LF),
|
||||
Token::line(vec![Token::variable("baz", 0)], 0, LineEnding::LF),
|
||||
Token::blank_line(0, LineEnding::LF),
|
||||
],
|
||||
0,
|
||||
);
|
||||
let foo_block = Token::block(
|
||||
BlockType::Continuous,
|
||||
4,
|
||||
vec![
|
||||
Token::line(
|
||||
vec![Token::variable("foo", 0), Token::operator("->", 1), function_block],
|
||||
0,
|
||||
LineEnding::LF,
|
||||
),
|
||||
Token::line(vec![Token::variable("quux", 0)], 0, LineEnding::LF),
|
||||
],
|
||||
0,
|
||||
);
|
||||
let expected = token::Stream::from(vec![Token::block(
|
||||
BlockType::Continuous,
|
||||
0,
|
||||
vec![
|
||||
Token::blank_line(0, LineEnding::LF),
|
||||
Token::line(vec![Token::variable("some_long_thing", 0), foo_block], 0, LineEnding::LF),
|
||||
],
|
||||
0,
|
||||
)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multiple_dedent() {
|
||||
let input = make_unix_line_endings(
|
||||
r#"
|
||||
some_long_thing
|
||||
foo ->
|
||||
Bar
|
||||
baz
|
||||
quux
|
||||
"#,
|
||||
);
|
||||
let function_block = Token::block(
|
||||
BlockType::Discontinuous,
|
||||
8,
|
||||
vec![
|
||||
Token::line(vec![Token::referent("Bar", 0)], 0, LineEnding::LF),
|
||||
Token::line(vec![Token::variable("baz", 0)], 0, LineEnding::LF),
|
||||
],
|
||||
0,
|
||||
);
|
||||
let foo_block = Token::block(
|
||||
BlockType::Continuous,
|
||||
4,
|
||||
vec![Token::line(
|
||||
vec![Token::variable("foo", 0), Token::operator("->", 1), function_block],
|
||||
0,
|
||||
LineEnding::LF,
|
||||
)],
|
||||
0,
|
||||
);
|
||||
let expected = token::Stream::from(vec![Token::block(
|
||||
BlockType::Continuous,
|
||||
0,
|
||||
vec![
|
||||
Token::blank_line(0, LineEnding::LF),
|
||||
Token::line(vec![Token::variable("some_long_thing", 0), foo_block], 0, LineEnding::LF),
|
||||
Token::line(vec![Token::variable("quux", 0)], 0, LineEnding::LF),
|
||||
],
|
||||
0,
|
||||
)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extra_indented_blank_lines() {
|
||||
let input = "a\n b\n \n \n c";
|
||||
let indented_block = Token::block(
|
||||
BlockType::Continuous,
|
||||
4,
|
||||
vec![
|
||||
Token::line(vec![Token::variable("b", 0)], 0, LineEnding::LF),
|
||||
Token::blank_line(8, LineEnding::LF),
|
||||
Token::blank_line(2, LineEnding::LF),
|
||||
Token::line(vec![Token::variable("c", 0)], 0, LineEnding::None),
|
||||
],
|
||||
0,
|
||||
);
|
||||
let top_level_line =
|
||||
Token::line(vec![Token::variable("a", 0), indented_block], 0, LineEnding::LF);
|
||||
let expected =
|
||||
token::Stream::from(vec![Token::block(BlockType::Continuous, 0, vec![top_level_line], 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn length_unix() {
|
||||
let input = "a\n b\n c";
|
||||
assert_block_has_length(input, 13);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn length_windows() {
|
||||
let input = "a\r\n b\r\n c";
|
||||
assert_block_has_length(input, 15);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn length_mixed() {
|
||||
let input = "a\r\n b\n c\n d";
|
||||
assert_block_has_length(input, 20);
|
||||
}
|
@ -1,630 +0,0 @@
|
||||
//! This file contains tests for lexing full-on Enso with the lexer.
|
||||
|
||||
// === Features ===
|
||||
#![feature(test)]
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![deny(unconditional_recursion)]
|
||||
#![warn(unsafe_code)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(trivial_casts)]
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unused_import_braces)]
|
||||
|
||||
|
||||
|
||||
mod test_utils;
|
||||
|
||||
use lexer_definition::library::*;
|
||||
use test_utils::*;
|
||||
|
||||
use lexer_definition::library::token::Token;
|
||||
|
||||
|
||||
|
||||
// ================
|
||||
// === Combined ===
|
||||
// ================
|
||||
|
||||
#[test]
|
||||
fn method_definition() {
|
||||
let input = make_unix_line_endings(
|
||||
r#"## Traverse the heterogeneous list, applying the provided polymorphic function
|
||||
wherever it matches.
|
||||
@Tail_Call
|
||||
map : forall ts ts' => (this : H_List ts) -> (exists a b . a ~> b) -> H_List ts'
|
||||
map this fn -> case this.types of
|
||||
Cons x xs ->
|
||||
x' = fn x
|
||||
x.Cons (map xs)
|
||||
x -> fn x
|
||||
"#,
|
||||
);
|
||||
let doc_comment = Token::line(
|
||||
vec![Token::doc_comment(
|
||||
vec![
|
||||
Token::line(
|
||||
vec![Token::text_segment_raw(
|
||||
"Traverse the heterogeneous list, applying the provided polymorphic \
|
||||
function",
|
||||
0,
|
||||
)],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
Token::line(
|
||||
vec![Token::text_segment_raw("wherever it matches.", 0)],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
],
|
||||
4,
|
||||
0,
|
||||
)],
|
||||
0,
|
||||
token::LineEnding::None,
|
||||
);
|
||||
let annotation = Token::line(vec![Token::annotation("Tail_Call", 0)], 0, token::LineEnding::LF);
|
||||
let signature = Token::line(
|
||||
vec![
|
||||
Token::variable("map", 0),
|
||||
Token::operator(":", 1),
|
||||
Token::variable("forall", 1),
|
||||
Token::variable("ts", 1),
|
||||
Token::variable("ts'", 1),
|
||||
Token::operator("=>", 1),
|
||||
Token::operator("(", 1),
|
||||
Token::variable("this", 0),
|
||||
Token::operator(":", 1),
|
||||
Token::referent("H_List", 1),
|
||||
Token::variable("ts", 1),
|
||||
Token::operator(")", 0),
|
||||
Token::operator("->", 1),
|
||||
Token::operator("(", 1),
|
||||
Token::variable("exists", 0),
|
||||
Token::variable("a", 1),
|
||||
Token::variable("b", 1),
|
||||
Token::operator(".", 1),
|
||||
Token::variable("a", 1),
|
||||
Token::operator("~>", 1),
|
||||
Token::variable("b", 1),
|
||||
Token::operator(")", 0),
|
||||
Token::operator("->", 1),
|
||||
Token::referent("H_List", 1),
|
||||
Token::variable("ts'", 1),
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
);
|
||||
let cons_branch_body = Token::block(
|
||||
token::BlockType::Discontinuous,
|
||||
8,
|
||||
vec![
|
||||
Token::line(
|
||||
vec![
|
||||
Token::variable("x'", 0),
|
||||
Token::operator("=", 1),
|
||||
Token::variable("fn", 1),
|
||||
Token::variable("x", 1),
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
Token::line(
|
||||
vec![
|
||||
Token::variable("x", 0),
|
||||
Token::operator(".", 0),
|
||||
Token::referent("Cons", 0),
|
||||
Token::operator("(", 1),
|
||||
Token::variable("map", 0),
|
||||
Token::variable("xs", 1),
|
||||
Token::operator(")", 0),
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
],
|
||||
0,
|
||||
);
|
||||
let case_body = Token::block(
|
||||
token::BlockType::Continuous,
|
||||
4,
|
||||
vec![
|
||||
Token::line(
|
||||
vec![
|
||||
Token::referent("Cons", 0),
|
||||
Token::variable("x", 1),
|
||||
Token::variable("xs", 1),
|
||||
Token::operator("->", 1),
|
||||
cons_branch_body,
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
Token::line(
|
||||
vec![
|
||||
Token::variable("x", 0),
|
||||
Token::operator("->", 1),
|
||||
Token::variable("fn", 1),
|
||||
Token::variable("x", 1),
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
],
|
||||
0,
|
||||
);
|
||||
let function = Token::line(
|
||||
vec![
|
||||
Token::variable("map", 0),
|
||||
Token::variable("this", 1),
|
||||
Token::variable("fn", 1),
|
||||
Token::operator("->", 1),
|
||||
Token::variable("case", 1),
|
||||
Token::variable("this", 1),
|
||||
Token::operator(".", 0),
|
||||
Token::variable("types", 0),
|
||||
Token::variable("of", 1),
|
||||
case_body,
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
);
|
||||
let expected = token::Stream::from(vec![Token::block(
|
||||
token::BlockType::Continuous,
|
||||
0,
|
||||
vec![doc_comment, annotation, signature, function],
|
||||
0,
|
||||
)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn complex_type() {
|
||||
let input = make_unix_line_endings(
|
||||
r#"
|
||||
type Maybe a
|
||||
type Just item:a
|
||||
Nothing
|
||||
|
||||
is_just = case this of
|
||||
Just _ -> True
|
||||
Nothing -> False
|
||||
"#,
|
||||
);
|
||||
let case_block = Token::block(
|
||||
token::BlockType::Continuous,
|
||||
8,
|
||||
vec![
|
||||
Token::line(
|
||||
vec![
|
||||
Token::referent("Just", 0),
|
||||
Token::blank(1),
|
||||
Token::operator("->", 2),
|
||||
Token::referent("True", 1),
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
Token::line(
|
||||
vec![
|
||||
Token::referent("Nothing", 0),
|
||||
Token::operator("->", 1),
|
||||
Token::referent("False", 1),
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
],
|
||||
0,
|
||||
);
|
||||
let type_body = Token::block(
|
||||
token::BlockType::Continuous,
|
||||
4,
|
||||
vec![
|
||||
Token::line(
|
||||
vec![
|
||||
Token::variable("type", 0),
|
||||
Token::referent("Just", 1),
|
||||
Token::variable("item", 1),
|
||||
Token::operator(":", 0),
|
||||
Token::variable("a", 0),
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
Token::line(vec![Token::referent("Nothing", 0)], 0, token::LineEnding::LF),
|
||||
Token::blank_line(0, token::LineEnding::LF),
|
||||
Token::line(
|
||||
vec![
|
||||
Token::variable("is_just", 0),
|
||||
Token::operator("=", 1),
|
||||
Token::variable("case", 1),
|
||||
Token::variable("this", 1),
|
||||
Token::variable("of", 1),
|
||||
case_block,
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
],
|
||||
0,
|
||||
);
|
||||
let complex_type = Token::line(
|
||||
vec![
|
||||
Token::variable("type", 0),
|
||||
Token::referent("Maybe", 1),
|
||||
Token::variable("a", 1),
|
||||
type_body,
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
);
|
||||
let expected = token::Stream::from(vec![Token::block(
|
||||
token::BlockType::Continuous,
|
||||
0,
|
||||
vec![Token::blank_line(0, token::LineEnding::LF), complex_type],
|
||||
0,
|
||||
)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn imports_exports() {
|
||||
let input = make_unix_line_endings(
|
||||
r#"import Base.List
|
||||
import Base.Number.Extensions
|
||||
from Standard.Builtins import Unit, Number, Integer, Any, True, False
|
||||
|
||||
from Standard.Builtins export all
|
||||
|
||||
from Base.List export Nil, Cons
|
||||
from Base.Number.Extensions export all hiding Math
|
||||
|
||||
polyglot java import com.ibm.icu.text.BreakIterator
|
||||
polyglot java import org.enso.base.Text_Utils
|
||||
"#,
|
||||
);
|
||||
let expected = token::Stream::from(vec![Token::block(
|
||||
token::BlockType::Continuous,
|
||||
0,
|
||||
vec![
|
||||
Token::line(
|
||||
vec![
|
||||
Token::variable("import", 0),
|
||||
Token::referent("Base", 1),
|
||||
Token::operator(".", 0),
|
||||
Token::referent("List", 0),
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
Token::line(
|
||||
vec![
|
||||
Token::variable("import", 0),
|
||||
Token::referent("Base", 1),
|
||||
Token::operator(".", 0),
|
||||
Token::referent("Number", 0),
|
||||
Token::operator(".", 0),
|
||||
Token::referent("Extensions", 0),
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
Token::line(
|
||||
vec![
|
||||
Token::variable("from", 0),
|
||||
Token::referent("Standard", 1),
|
||||
Token::operator(".", 0),
|
||||
Token::referent("Builtins", 0),
|
||||
Token::variable("import", 1),
|
||||
Token::referent("Unit", 1),
|
||||
Token::operator(",", 0),
|
||||
Token::referent("Number", 1),
|
||||
Token::operator(",", 0),
|
||||
Token::referent("Integer", 1),
|
||||
Token::operator(",", 0),
|
||||
Token::referent("Any", 1),
|
||||
Token::operator(",", 0),
|
||||
Token::referent("True", 1),
|
||||
Token::operator(",", 0),
|
||||
Token::referent("False", 1),
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
Token::blank_line(0, token::LineEnding::LF),
|
||||
Token::line(
|
||||
vec![
|
||||
Token::variable("from", 0),
|
||||
Token::referent("Standard", 1),
|
||||
Token::operator(".", 0),
|
||||
Token::referent("Builtins", 0),
|
||||
Token::variable("export", 1),
|
||||
Token::variable("all", 1),
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
Token::blank_line(0, token::LineEnding::LF),
|
||||
Token::line(
|
||||
vec![
|
||||
Token::variable("from", 0),
|
||||
Token::referent("Base", 1),
|
||||
Token::operator(".", 0),
|
||||
Token::referent("List", 0),
|
||||
Token::variable("export", 1),
|
||||
Token::referent("Nil", 1),
|
||||
Token::operator(",", 0),
|
||||
Token::referent("Cons", 1),
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
Token::line(
|
||||
vec![
|
||||
Token::variable("from", 0),
|
||||
Token::referent("Base", 1),
|
||||
Token::operator(".", 0),
|
||||
Token::referent("Number", 0),
|
||||
Token::operator(".", 0),
|
||||
Token::referent("Extensions", 0),
|
||||
Token::variable("export", 1),
|
||||
Token::variable("all", 1),
|
||||
Token::variable("hiding", 1),
|
||||
Token::referent("Math", 1),
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
Token::blank_line(0, token::LineEnding::LF),
|
||||
Token::line(
|
||||
vec![
|
||||
Token::variable("polyglot", 0),
|
||||
Token::variable("java", 1),
|
||||
Token::variable("import", 1),
|
||||
Token::variable("com", 1),
|
||||
Token::operator(".", 0),
|
||||
Token::variable("ibm", 0),
|
||||
Token::operator(".", 0),
|
||||
Token::variable("icu", 0),
|
||||
Token::operator(".", 0),
|
||||
Token::variable("text", 0),
|
||||
Token::operator(".", 0),
|
||||
Token::external("BreakIterator", 0),
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
Token::line(
|
||||
vec![
|
||||
Token::variable("polyglot", 0),
|
||||
Token::variable("java", 1),
|
||||
Token::variable("import", 1),
|
||||
Token::variable("org", 1),
|
||||
Token::operator(".", 0),
|
||||
Token::variable("enso", 0),
|
||||
Token::operator(".", 0),
|
||||
Token::variable("base", 0),
|
||||
Token::operator(".", 0),
|
||||
Token::referent("Text_Utils", 0),
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
],
|
||||
0,
|
||||
)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn some_stdlib() {
|
||||
let input = make_unix_line_endings(
|
||||
r#"from Standard.Base import all
|
||||
|
||||
## The top-level entry point for a test suite.
|
||||
type Suite specs
|
||||
|
||||
## PRIVATE
|
||||
type Spec name behaviors
|
||||
|
||||
## PRIVATE
|
||||
type Behavior name result
|
||||
|
||||
## PRIVATE
|
||||
Behavior.is_fail = this.result.is_fail
|
||||
|
||||
## PRIVATE
|
||||
Spec.is_fail = this.behaviors.any is_fail
|
||||
|
||||
## PRIVATE
|
||||
Suite.is_fail = this.specs.any is_fail
|
||||
"#,
|
||||
);
|
||||
let expected = token::Stream::from(vec![Token::block(
|
||||
token::BlockType::Continuous,
|
||||
0,
|
||||
vec![
|
||||
Token::line(
|
||||
vec![
|
||||
Token::variable("from", 0),
|
||||
Token::referent("Standard", 1),
|
||||
Token::operator(".", 0),
|
||||
Token::referent("Base", 0),
|
||||
Token::variable("import", 1),
|
||||
Token::variable("all", 1),
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
Token::blank_line(0, token::LineEnding::LF),
|
||||
Token::line(
|
||||
vec![Token::doc_comment(
|
||||
vec![Token::line(
|
||||
vec![Token::text_segment_raw(
|
||||
"The top-level entry point for a test suite.",
|
||||
0,
|
||||
)],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
)],
|
||||
3,
|
||||
0,
|
||||
)],
|
||||
0,
|
||||
token::LineEnding::None,
|
||||
),
|
||||
Token::line(
|
||||
vec![
|
||||
Token::variable("type", 0),
|
||||
Token::referent("Suite", 1),
|
||||
Token::variable("specs", 1),
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
Token::blank_line(0, token::LineEnding::LF),
|
||||
Token::line(
|
||||
vec![Token::doc_comment(
|
||||
vec![Token::line(
|
||||
vec![Token::text_segment_raw("PRIVATE", 0)],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
)],
|
||||
3,
|
||||
0,
|
||||
)],
|
||||
0,
|
||||
token::LineEnding::None,
|
||||
),
|
||||
Token::line(
|
||||
vec![
|
||||
Token::variable("type", 0),
|
||||
Token::referent("Spec", 1),
|
||||
Token::variable("name", 1),
|
||||
Token::variable("behaviors", 1),
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
Token::blank_line(0, token::LineEnding::LF),
|
||||
Token::line(
|
||||
vec![Token::doc_comment(
|
||||
vec![Token::line(
|
||||
vec![Token::text_segment_raw("PRIVATE", 0)],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
)],
|
||||
3,
|
||||
0,
|
||||
)],
|
||||
0,
|
||||
token::LineEnding::None,
|
||||
),
|
||||
Token::line(
|
||||
vec![
|
||||
Token::variable("type", 0),
|
||||
Token::referent("Behavior", 1),
|
||||
Token::variable("name", 1),
|
||||
Token::variable("result", 1),
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
Token::blank_line(0, token::LineEnding::LF),
|
||||
Token::line(
|
||||
vec![Token::doc_comment(
|
||||
vec![Token::line(
|
||||
vec![Token::text_segment_raw("PRIVATE", 0)],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
)],
|
||||
3,
|
||||
0,
|
||||
)],
|
||||
0,
|
||||
token::LineEnding::None,
|
||||
),
|
||||
Token::line(
|
||||
vec![
|
||||
Token::referent("Behavior", 0),
|
||||
Token::operator(".", 0),
|
||||
Token::variable("is_fail", 0),
|
||||
Token::operator("=", 1),
|
||||
Token::variable("this", 1),
|
||||
Token::operator(".", 0),
|
||||
Token::variable("result", 0),
|
||||
Token::operator(".", 0),
|
||||
Token::variable("is_fail", 0),
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
Token::blank_line(0, token::LineEnding::LF),
|
||||
Token::line(
|
||||
vec![Token::doc_comment(
|
||||
vec![Token::line(
|
||||
vec![Token::text_segment_raw("PRIVATE", 0)],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
)],
|
||||
3,
|
||||
0,
|
||||
)],
|
||||
0,
|
||||
token::LineEnding::None,
|
||||
),
|
||||
Token::line(
|
||||
vec![
|
||||
Token::referent("Spec", 0),
|
||||
Token::operator(".", 0),
|
||||
Token::variable("is_fail", 0),
|
||||
Token::operator("=", 1),
|
||||
Token::variable("this", 1),
|
||||
Token::operator(".", 0),
|
||||
Token::variable("behaviors", 0),
|
||||
Token::operator(".", 0),
|
||||
Token::variable("any", 0),
|
||||
Token::variable("is_fail", 1),
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
Token::blank_line(0, token::LineEnding::LF),
|
||||
Token::line(
|
||||
vec![Token::doc_comment(
|
||||
vec![Token::line(
|
||||
vec![Token::text_segment_raw("PRIVATE", 0)],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
)],
|
||||
3,
|
||||
0,
|
||||
)],
|
||||
0,
|
||||
token::LineEnding::None,
|
||||
),
|
||||
Token::line(
|
||||
vec![
|
||||
Token::referent("Suite", 0),
|
||||
Token::operator(".", 0),
|
||||
Token::variable("is_fail", 0),
|
||||
Token::operator("=", 1),
|
||||
Token::variable("this", 1),
|
||||
Token::operator(".", 0),
|
||||
Token::variable("specs", 0),
|
||||
Token::operator(".", 0),
|
||||
Token::variable("any", 0),
|
||||
Token::variable("is_fail", 1),
|
||||
],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
],
|
||||
0,
|
||||
)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
@ -1,297 +0,0 @@
|
||||
//! This file contains tests for lexing comments in the Enso lexer.
|
||||
|
||||
// === Features ===
|
||||
#![feature(test)]
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![deny(unconditional_recursion)]
|
||||
#![warn(unsafe_code)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(trivial_casts)]
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unused_import_braces)]
|
||||
|
||||
|
||||
|
||||
mod test_utils;
|
||||
|
||||
use lexer_definition::library::*;
|
||||
use test_utils::*;
|
||||
|
||||
use lexer_definition::library::token::Token;
|
||||
|
||||
|
||||
|
||||
// ================
|
||||
// === Comments ===
|
||||
// ================
|
||||
|
||||
#[test]
|
||||
fn disable_eof() {
|
||||
let input = "# Here is a nice long comment string.";
|
||||
let expected = token::Stream::from(vec![Token::disable_comment(
|
||||
" Here is a nice long comment string.",
|
||||
0,
|
||||
)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn disable_lf() {
|
||||
let input = "# Here is a nice long comment string.\n";
|
||||
let expected = token::Stream::from(vec![Token::block(
|
||||
token::BlockType::Continuous,
|
||||
0,
|
||||
vec![Token::line(
|
||||
vec![Token::disable_comment(" Here is a nice long comment string.", 0)],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
)],
|
||||
0,
|
||||
)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn disable_crlf() {
|
||||
let input = "# Here is a nice long comment string.\r\n";
|
||||
let expected = token::Stream::from(vec![Token::block(
|
||||
token::BlockType::Continuous,
|
||||
0,
|
||||
vec![Token::line(
|
||||
vec![Token::disable_comment(" Here is a nice long comment string.", 0)],
|
||||
0,
|
||||
token::LineEnding::CRLF,
|
||||
)],
|
||||
0,
|
||||
)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn disable_in_line() {
|
||||
let input = "a + b <*> N # Compare the frobnicators.";
|
||||
let expected = token::Stream::from(vec![
|
||||
Token::variable("a", 0),
|
||||
Token::operator("+", 1),
|
||||
Token::variable("b", 1),
|
||||
Token::operator("<*>", 1),
|
||||
Token::referent("N", 1),
|
||||
Token::disable_comment(" Compare the frobnicators.", 1),
|
||||
]);
|
||||
assert_lexes(input, expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn disable_in_interpolate() {
|
||||
let input = "'String `1 + 1 # add` stuff.'";
|
||||
let expected = token::Stream::from(vec![Token::text_line(
|
||||
token::TextStyle::FormatLine,
|
||||
vec![
|
||||
Token::text_segment_raw("String ", 0),
|
||||
Token::text_segment_interpolate(
|
||||
vec![
|
||||
Token::number("", "1", 0),
|
||||
Token::operator("+", 1),
|
||||
Token::number("", "1", 1),
|
||||
Token::unrecognized("#", 1),
|
||||
Token::variable("add", 1),
|
||||
],
|
||||
0,
|
||||
),
|
||||
Token::text_segment_raw(" stuff.", 0),
|
||||
],
|
||||
0,
|
||||
)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn doc_single_line_eof() {
|
||||
let input = "## Foo bar baz";
|
||||
let expected = token::Stream::from(vec![Token::doc_comment(
|
||||
vec![Token::line(
|
||||
vec![Token::text_segment_raw("Foo bar baz", 0)],
|
||||
0,
|
||||
token::LineEnding::None,
|
||||
)],
|
||||
3,
|
||||
0,
|
||||
)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn doc_single_line_lf() {
|
||||
let input = "## Foo bar baz\n";
|
||||
let expected = token::Stream::from(vec![Token::block(
|
||||
token::BlockType::Continuous,
|
||||
0,
|
||||
vec![
|
||||
Token::line(
|
||||
vec![Token::doc_comment(
|
||||
vec![Token::line(
|
||||
vec![Token::text_segment_raw("Foo bar baz", 0)],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
)],
|
||||
3,
|
||||
0,
|
||||
)],
|
||||
0,
|
||||
token::LineEnding::None,
|
||||
),
|
||||
Token::blank_line(0, token::LineEnding::None),
|
||||
],
|
||||
0,
|
||||
)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn doc_single_line_crlf() {
|
||||
let input = "## Foo bar baz\r\n";
|
||||
let expected = token::Stream::from(vec![Token::block(
|
||||
token::BlockType::Continuous,
|
||||
0,
|
||||
vec![
|
||||
Token::line(
|
||||
vec![Token::doc_comment(
|
||||
vec![Token::line(
|
||||
vec![Token::text_segment_raw("Foo bar baz", 0)],
|
||||
0,
|
||||
token::LineEnding::CRLF,
|
||||
)],
|
||||
3,
|
||||
0,
|
||||
)],
|
||||
0,
|
||||
token::LineEnding::None,
|
||||
),
|
||||
Token::blank_line(0, token::LineEnding::None),
|
||||
],
|
||||
0,
|
||||
)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn doc_in_interpolate() {
|
||||
let input = "'String `1 + 1 ## add` stuff.'";
|
||||
let expected = token::Stream::from(vec![Token::text_line(
|
||||
token::TextStyle::FormatLine,
|
||||
vec![
|
||||
Token::text_segment_raw("String ", 0),
|
||||
Token::text_segment_interpolate(
|
||||
vec![
|
||||
Token::number("", "1", 0),
|
||||
Token::operator("+", 1),
|
||||
Token::number("", "1", 1),
|
||||
Token::unrecognized("##", 1),
|
||||
Token::variable("add", 1),
|
||||
],
|
||||
0,
|
||||
),
|
||||
Token::text_segment_raw(" stuff.", 0),
|
||||
],
|
||||
0,
|
||||
)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn doc_multi_line() {
|
||||
let input = make_unix_line_endings(
|
||||
r#"## Here is a doc comment.
|
||||
It spans multiple lines.
|
||||
Some are indented much further.
|
||||
And this is okay.
|
||||
|
||||
It keeps going, even with blank lines.
|
||||
Until the indentation decreases back.
|
||||
|
||||
trailing_blanks_not_part_of_comment"#,
|
||||
);
|
||||
let doc_comment = Token::doc_comment(
|
||||
vec![
|
||||
Token::line(
|
||||
vec![Token::text_segment_raw("Here is a doc comment.", 0)],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
Token::line(
|
||||
vec![Token::text_segment_raw("It spans multiple lines.", 0)],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
Token::line(
|
||||
vec![Token::text_segment_raw(" Some are indented much further.", 0)],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
Token::line(
|
||||
vec![Token::text_segment_raw(" And this is okay.", 0)],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
Token::blank_line(0, token::LineEnding::LF),
|
||||
Token::line(
|
||||
vec![Token::text_segment_raw("It keeps going, even with blank lines.", 0)],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
Token::line(
|
||||
vec![Token::text_segment_raw("Until the indentation decreases back.", 0)],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
],
|
||||
4,
|
||||
0,
|
||||
);
|
||||
let expected = token::Stream::from(vec![Token::block(
|
||||
token::BlockType::Continuous,
|
||||
0,
|
||||
vec![
|
||||
Token::line(vec![doc_comment], 0, token::LineEnding::None),
|
||||
Token::blank_line(0, token::LineEnding::LF),
|
||||
Token::line(
|
||||
vec![Token::variable("trailing_blanks_not_part_of_comment", 0)],
|
||||
0,
|
||||
token::LineEnding::None,
|
||||
),
|
||||
],
|
||||
0,
|
||||
)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn doc_mixed_line_endings() {
|
||||
let input = "## Start a doc comment\n It has indent 3.\r\n \n An indented blank too.";
|
||||
let expected = token::Stream::from(vec![Token::doc_comment(
|
||||
vec![
|
||||
Token::line(
|
||||
vec![Token::text_segment_raw("Start a doc comment", 0)],
|
||||
0,
|
||||
token::LineEnding::LF,
|
||||
),
|
||||
Token::line(
|
||||
vec![Token::text_segment_raw("It has indent 3.", 0)],
|
||||
0,
|
||||
token::LineEnding::CRLF,
|
||||
),
|
||||
Token::blank_line(4, token::LineEnding::LF),
|
||||
Token::line(
|
||||
vec![Token::text_segment_raw(" An indented blank too.", 0)],
|
||||
0,
|
||||
token::LineEnding::None,
|
||||
),
|
||||
],
|
||||
3,
|
||||
0,
|
||||
)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
@ -1,179 +0,0 @@
|
||||
//! This file contains tests for lexing identifiers in the Enso lexer.
|
||||
|
||||
// === Features ===
|
||||
#![feature(test)]
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![deny(unconditional_recursion)]
|
||||
#![warn(unsafe_code)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(trivial_casts)]
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unused_import_braces)]
|
||||
|
||||
|
||||
|
||||
mod test_utils;
|
||||
|
||||
use lexer_definition::library::*;
|
||||
use test_utils::*;
|
||||
|
||||
use lexer_definition::library::token::Token;
|
||||
|
||||
|
||||
|
||||
// ===================
|
||||
// === Identifiers ===
|
||||
// ===================
|
||||
|
||||
#[test]
|
||||
fn variable_ident() {
|
||||
let input = "some_variable_name";
|
||||
let expected = token::Stream::from(vec![Token::variable("some_variable_name", 0)]);
|
||||
assert_lexes(input, expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn referent_ident() {
|
||||
let input = "Some_Referent_Name";
|
||||
let expected = token::Stream::from(vec![Token::referent("Some_Referent_Name", 0)]);
|
||||
assert_lexes(input, expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn external_ident() {
|
||||
let input = "__camelCaseIdentifier";
|
||||
let expected = token::Stream::from(vec![Token::external("__camelCaseIdentifier", 0)]);
|
||||
assert_lexes(input, expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn blank_ident() {
|
||||
let input = "_";
|
||||
let expected = token::Stream::from(vec![Token::blank(0)]);
|
||||
assert_lexes(input, expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn annotation() {
|
||||
let input = "@debug";
|
||||
let expected = token::Stream::from(vec![Token::annotation("debug", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ticked_variable_ident() {
|
||||
let input = "some_variable_name'";
|
||||
let expected = token::Stream::from(vec![Token::variable("some_variable_name'", 0)]);
|
||||
assert_lexes(input, expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ticked_referent_ident() {
|
||||
let input = "Some_Referent_Name'";
|
||||
let expected = token::Stream::from(vec![Token::referent("Some_Referent_Name'", 0)]);
|
||||
assert_lexes(input, expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ticked_annotation() {
|
||||
let input = "@debug'";
|
||||
let expected = token::Stream::from(vec![Token::annotation("debug'", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multi_ticked_variable_ident() {
|
||||
let input = "some_variable_name'''";
|
||||
let expected = token::Stream::from(vec![Token::variable("some_variable_name'''", 0)]);
|
||||
assert_lexes(input, expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multi_ticked_referent_ident() {
|
||||
let input = "Some_Referent_Name'''";
|
||||
let expected = token::Stream::from(vec![Token::referent("Some_Referent_Name'''", 0)]);
|
||||
assert_lexes(input, expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multi_ticked_annotation() {
|
||||
let input = "@debug''";
|
||||
let expected = token::Stream::from(vec![Token::annotation("debug''", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn variable_with_numbers() {
|
||||
let input = "some0_1";
|
||||
let expected = token::Stream::from(vec![Token::variable("some0_1", 0)]);
|
||||
assert_lexes(input, expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn referent_with_numbers() {
|
||||
let input = "Some_1821";
|
||||
let expected = token::Stream::from(vec![Token::referent("Some_1821", 0)]);
|
||||
assert_lexes(input, expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn annotation_with_numbers() {
|
||||
let input = "@debug_1";
|
||||
let expected = token::Stream::from(vec![Token::annotation("debug_1", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tick_not_at_end_variable() {
|
||||
let input = "some_var'iable";
|
||||
let expected = token::Stream::from(vec![
|
||||
Token::variable("some_var'", 0),
|
||||
Token::invalid_suffix("iable", 0),
|
||||
]);
|
||||
assert_lexes(input, expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trailing_underscore() {
|
||||
let input = "some_var_";
|
||||
let expected = token::Stream::from(vec![Token::external("some_var_", 0)]);
|
||||
assert_lexes(input, expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trailing_underscore_with_tick() {
|
||||
let input = "some_var_'";
|
||||
let expected = token::Stream::from(vec![Token::external("some_var_'", 0)]);
|
||||
assert_lexes(input, expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_suffix() {
|
||||
let input = "some_varД";
|
||||
let expected =
|
||||
token::Stream::from(vec![Token::variable("some_var", 0), Token::invalid_suffix("Д", 0)]);
|
||||
assert_lexes(input, expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unrecognized_token() {
|
||||
let input = "some_var@";
|
||||
let expected =
|
||||
token::Stream::from(vec![Token::variable("some_var", 0), Token::unrecognized("@", 0)]);
|
||||
assert_lexes(input, expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chained_identifiers() {
|
||||
let input = "my_func A' someJavaValue some_python_value";
|
||||
let expected = token::Stream::from(vec![
|
||||
Token::variable("my_func", 0),
|
||||
Token::referent("A'", 1),
|
||||
Token::external("someJavaValue", 1),
|
||||
Token::variable("some_python_value", 1),
|
||||
]);
|
||||
assert_lexes(input, expected)
|
||||
}
|
@ -1,86 +0,0 @@
|
||||
//! This file contains tests for lexing number literals in the Enso lexer.
|
||||
|
||||
// === Features ===
|
||||
#![feature(test)]
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![deny(unconditional_recursion)]
|
||||
#![warn(unsafe_code)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(trivial_casts)]
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unused_import_braces)]
|
||||
|
||||
|
||||
|
||||
mod test_utils;
|
||||
|
||||
use lexer_definition::library::*;
|
||||
use test_utils::*;
|
||||
|
||||
use lexer_definition::library::token::Token;
|
||||
|
||||
|
||||
|
||||
// ===============
|
||||
// === Numbers ===
|
||||
// ===============
|
||||
|
||||
#[test]
|
||||
fn integer() {
|
||||
let input = "13831";
|
||||
let expected = token::Stream::from(vec![Token::number("", "13831", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn integer_with_explicit_base() {
|
||||
let input = "10_13831";
|
||||
let expected = token::Stream::from(vec![Token::number("10", "13831", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dangling_base() {
|
||||
let input = "10_";
|
||||
let expected = token::Stream::from(vec![Token::dangling_base("10", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hex_number() {
|
||||
let input = "16_ff";
|
||||
let expected = token::Stream::from(vec![Token::number("16", "ff", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decimal() {
|
||||
let input = "2.71828";
|
||||
let expected = token::Stream::from(vec![Token::number("", "2.71828", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decimal_with_explicit_base() {
|
||||
let input = "10_2.71828";
|
||||
let expected = token::Stream::from(vec![Token::number("10", "2.71828", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error_base() {
|
||||
let input = "10.2_2";
|
||||
let expected =
|
||||
token::Stream::from(vec![Token::number("", "10.2", 0), Token::invalid_suffix("_2", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_number() {
|
||||
let input = " 10.2";
|
||||
let expected = token::Stream::from(vec![Token::number("", "10.2", 4)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
@ -1,238 +0,0 @@
|
||||
//! This file contains tests for lexing operators in the Enso lexer.
|
||||
|
||||
// === Features ===
|
||||
#![feature(test)]
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![deny(unconditional_recursion)]
|
||||
#![warn(unsafe_code)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(trivial_casts)]
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unused_import_braces)]
|
||||
|
||||
|
||||
|
||||
mod test_utils;
|
||||
|
||||
use lexer_definition::library::*;
|
||||
use test_utils::*;
|
||||
|
||||
use lexer_definition::library::token::Token;
|
||||
|
||||
|
||||
|
||||
// =================
|
||||
// === Operators ===
|
||||
// =================
|
||||
|
||||
#[test]
|
||||
fn function_operator() {
|
||||
let input = "->";
|
||||
let expected = token::Stream::from(vec![Token::operator("->", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bind_operator() {
|
||||
let input = "<-";
|
||||
let expected = token::Stream::from(vec![Token::operator("<-", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn left_pipe_operator() {
|
||||
let input = "<|";
|
||||
let expected = token::Stream::from(vec![Token::operator("<|", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn right_pipe_operator() {
|
||||
let input = "|>";
|
||||
let expected = token::Stream::from(vec![Token::operator("|>", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn eq_operator() {
|
||||
let input = "=";
|
||||
let expected = token::Stream::from(vec![Token::operator("=", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn eq_compare_operator() {
|
||||
let input = "==";
|
||||
let expected = token::Stream::from(vec![Token::operator("==", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn geq_operator() {
|
||||
let input = ">=";
|
||||
let expected = token::Stream::from(vec![Token::operator(">=", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn neq_operator() {
|
||||
let input = "!=";
|
||||
let expected = token::Stream::from(vec![Token::operator("!=", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dot_operator() {
|
||||
let input = ".";
|
||||
let expected = token::Stream::from(vec![Token::operator(".", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn comma_operator() {
|
||||
let input = ",";
|
||||
let expected = token::Stream::from(vec![Token::operator(",", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn double_dot_operator() {
|
||||
let input = "..";
|
||||
let expected = token::Stream::from(vec![Token::operator("..", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn triple_dot_operator() {
|
||||
let input = "...";
|
||||
let expected = token::Stream::from(vec![Token::operator("...", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error_operator() {
|
||||
let input = "!";
|
||||
let expected = token::Stream::from(vec![Token::operator("!", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn type_ascription_operator() {
|
||||
let input = ":";
|
||||
let expected = token::Stream::from(vec![Token::operator(":", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn in_operator() {
|
||||
let input = "in";
|
||||
let expected = token::Stream::from(vec![Token::operator("in", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn typeset_union_operator() {
|
||||
let input = "|";
|
||||
let expected = token::Stream::from(vec![Token::operator("|", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn typeset_intersection_operator() {
|
||||
let input = "&";
|
||||
let expected = token::Stream::from(vec![Token::operator("&", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn typeset_subtraction_operator() {
|
||||
let input = "\\";
|
||||
let expected = token::Stream::from(vec![Token::operator("\\", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn arbitrary_left_operator() {
|
||||
let input = "<!!-";
|
||||
let expected = token::Stream::from(vec![Token::operator("<!!-", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn arbitrary_right_operator() {
|
||||
let input = "-->>";
|
||||
let expected = token::Stream::from(vec![Token::operator("-->>", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn modifier_plus() {
|
||||
let input = "+=";
|
||||
let expected = token::Stream::from(vec![Token::modifier("+", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn modifier_minus() {
|
||||
let input = "-=";
|
||||
let expected = token::Stream::from(vec![Token::modifier("-", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn arbitrary_modifier() {
|
||||
let input = "<%=";
|
||||
let expected = token::Stream::from(vec![Token::modifier("<%", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_eq_suffix() {
|
||||
let input = "===";
|
||||
let expected =
|
||||
token::Stream::from(vec![Token::operator("==", 0), Token::invalid_suffix("=", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_dots_suffix() {
|
||||
let input = "....";
|
||||
let expected =
|
||||
token::Stream::from(vec![Token::operator("...", 0), Token::invalid_suffix(".", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_modifier_suffix() {
|
||||
let input = "+==";
|
||||
let expected =
|
||||
token::Stream::from(vec![Token::operator("+", 0), Token::invalid_suffix("==", 0)]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dot_call_operator() {
|
||||
let input = ".+ .<*>";
|
||||
let expected = token::Stream::from(vec![
|
||||
Token::operator(".", 0),
|
||||
Token::operator("+", 0),
|
||||
Token::operator(".", 1),
|
||||
Token::operator("<*>", 0),
|
||||
]);
|
||||
assert_lexes(input, expected)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dot_eq_operator() {
|
||||
let input = ".== . !=";
|
||||
let expected = token::Stream::from(vec![
|
||||
Token::operator(".", 0),
|
||||
Token::operator("==", 0),
|
||||
Token::operator(".", 1),
|
||||
Token::operator("!=", 2),
|
||||
]);
|
||||
assert_lexes(input, expected);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,62 +0,0 @@
|
||||
//! Utilities for testing the Enso lexer.
|
||||
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![allow(dead_code)]
|
||||
#![warn(unsafe_code)]
|
||||
|
||||
use enso_flexer::*;
|
||||
use lexer_definition::library::*;
|
||||
|
||||
use lexer_definition::library::token::Token;
|
||||
|
||||
|
||||
|
||||
// =================
|
||||
// === Utilities ===
|
||||
// =================
|
||||
|
||||
/// Assert that `result` is a success with tokens `expected`.
|
||||
pub fn assert_succeeds_as(result: &LexingResult<token::Stream>, expected: token::Stream) {
|
||||
match result.kind {
|
||||
ResultKind::Success => assert_eq!(result.tokens, expected),
|
||||
_ => panic!("Lexing failed."),
|
||||
}
|
||||
}
|
||||
|
||||
/// Assert that the provided input lexes as `expected`.
|
||||
pub fn assert_lexes(input: impl AsRef<str>, expected: token::Stream) {
|
||||
let input_len = input.as_ref().chars().count();
|
||||
let result = lex(input);
|
||||
assert_succeeds_as(&result, expected);
|
||||
let tokens_vec: Vec<_> = result.tokens.into();
|
||||
let total_length: usize = tokens_vec.iter().map(|token| token.offset + token.length).sum();
|
||||
assert_eq!(total_length, input_len);
|
||||
}
|
||||
|
||||
/// Lex the provided string.
|
||||
pub fn lex(input: impl AsRef<str>) -> LexingResult<token::Stream> {
|
||||
lexer::run(input)
|
||||
}
|
||||
|
||||
/// Asserts that the input is a block and has a length equal to `length`.
|
||||
pub fn assert_block_has_length(input: impl AsRef<str>, expected_length: usize) {
|
||||
let result = lex(input);
|
||||
match result.kind {
|
||||
ResultKind::Success => {
|
||||
let tokens = result.tokens.tokens();
|
||||
match tokens.first().expect("Token should be present.") {
|
||||
Token { shape: token::Shape::Block { .. }, length, .. } =>
|
||||
assert_eq!(*length, expected_length),
|
||||
_ => panic!("Token not a block."),
|
||||
}
|
||||
}
|
||||
_ => panic!("Lexing failed"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Makes the test text have unix line endings to ensure consistency regardless of git checkout
|
||||
/// style.
|
||||
pub fn make_unix_line_endings(input: &str) -> String {
|
||||
let string = String::from(input);
|
||||
string.chars().filter(|c| *c != '\r').collect()
|
||||
}
|
@ -1,25 +0,0 @@
|
||||
[package]
|
||||
name = "parser-jni"
|
||||
version = "0.1.0"
|
||||
authors = ["Enso Team <enso-dev@enso.org>"]
|
||||
edition = "2021"
|
||||
|
||||
description = "A parser for the Enso language"
|
||||
readme = "README.md"
|
||||
homepage = "https://github.com/enso-org/enso/lib/rust/parser"
|
||||
repository = "https://github.com/enso-org/enso"
|
||||
license-file = "../../../LICENSE"
|
||||
|
||||
keywords = ["parser"]
|
||||
categories = ["parsing"]
|
||||
|
||||
publish = false
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib", "rlib"]
|
||||
test = true
|
||||
bench = true
|
||||
|
||||
[dependencies]
|
||||
jni = { version = "0.19.0" }
|
||||
ast-new = { version = "0.1.0", path = "../ast" }
|
@ -1,125 +0,0 @@
|
||||
//! This module exports JNI interface for parser methods implemented in Rust.
|
||||
//!
|
||||
//! The basics steps to add a new method are following:
|
||||
//! 1. Add the new method in Scala (in `org.enso.parser.Parser`).
|
||||
//! 2. (Optional) Run `scalac Parser.scala; javah Parser` to generate the C API in `Parser.h`.
|
||||
//! Note that you can skip this step. It is merely a guidance for you, as it generates
|
||||
//! the correct function names and type signatures of all `Parser` native methods.
|
||||
//! Generally, the method interface is going to have the following shape:
|
||||
//! ```c
|
||||
//! JNIEXPORT $returnType JNICALL Java_$package_$className_$methodName
|
||||
//! (JNIEnv* env, jobject this, $argType1 $arg1, $argType2 $arg2)
|
||||
//! ```
|
||||
//! For example if the definition is:
|
||||
//! ```scala
|
||||
//! package org.enso.parser
|
||||
//!
|
||||
//! class Parser {
|
||||
//! @native def newMethod(string: String, array: Array[Int])
|
||||
//! }
|
||||
//! ```
|
||||
//! Then the JNI API is going to be:
|
||||
//! ```c
|
||||
//! JNIEXPORT jobject JNICALL Java_org_enso_parser_Parser_newMethod
|
||||
//! (JNIEnv* env, jobject this, jstring string, jintArray array)
|
||||
//! ```
|
||||
//! The list of all available types can be found in
|
||||
//! [oracle documentation](https://docs.oracle.com/javase/7/docs/technotes/guides/jni/spec/types.html).
|
||||
//! 3. Implement the new parser method in this file.
|
||||
//! For the above definition the implementation is going to be:
|
||||
//! ```rust
|
||||
//! use jni::JNIEnv;
|
||||
//! use jni::objects::*;
|
||||
//! use jni::sys::*;
|
||||
//!
|
||||
//! #[no_mangle]
|
||||
//! pub extern "system" fn Java_org_enso_parser_Parser_newMethod(
|
||||
//! env : JNIEnv, // the JVM enviroment, used for calling methods and constructors
|
||||
//! this : JClass, // the instance of `Parser`
|
||||
//! string : JString,
|
||||
//! array : jintArray,
|
||||
//! ) -> jweak { unimplemented!() }
|
||||
//! ```
|
||||
//! 4. (Optional) Generate a shared library from the Rust definition by `cargo build`.
|
||||
//! It will be generated into `target/rust/debug/`.
|
||||
//! This step is done automatically by `sbt`.
|
||||
|
||||
use jni::objects::*;
|
||||
use jni::sys::*;
|
||||
|
||||
use jni::JNIEnv;
|
||||
|
||||
|
||||
|
||||
// ======================
|
||||
// === Parser JNI API ===
|
||||
// ======================
|
||||
|
||||
/// Parses a content a of single source file.
|
||||
#[allow(unsafe_code)]
|
||||
#[no_mangle]
|
||||
pub extern "system" fn Java_org_enso_parser_Parser_parseStr(
|
||||
env: JNIEnv,
|
||||
_this: JClass,
|
||||
input: JString,
|
||||
) -> jweak {
|
||||
let txt = env
|
||||
.new_object(
|
||||
env.find_class("org/enso/ast/Ast$Txt$Text").unwrap(),
|
||||
"(Ljava/lang/String;)V",
|
||||
&[input.into()],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let non = env
|
||||
.get_static_field(env.find_class("scala/None$").unwrap(), "MODULE$", "Lscala/None$;")
|
||||
.unwrap()
|
||||
.l()
|
||||
.unwrap();
|
||||
|
||||
let ast = env
|
||||
.new_object(
|
||||
env.find_class("org/enso/ast/Ast$Ast").unwrap(),
|
||||
"(Lscala/Option;JJLjava/lang/Object;)V",
|
||||
&[non.into(), 0i64.into(), 0i64.into(), txt.into()],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
ast.into_inner()
|
||||
}
|
||||
|
||||
/// Parses a single source file.
|
||||
#[allow(unsafe_code)]
|
||||
#[no_mangle]
|
||||
pub extern "system" fn Java_org_enso_parser_Parser_parseFile(
|
||||
env: JNIEnv,
|
||||
this: JClass,
|
||||
filename: JString,
|
||||
) -> jweak {
|
||||
Java_org_enso_parser_Parser_parseStr(env, this, filename)
|
||||
}
|
||||
|
||||
|
||||
// === Tokens ===
|
||||
|
||||
/// Parses a content of a single source file into a stream of tokens.
|
||||
#[allow(unsafe_code)]
|
||||
#[no_mangle]
|
||||
pub extern "system" fn Java_org_enso_parser_Parser_lexStr(
|
||||
env: JNIEnv,
|
||||
this: JClass,
|
||||
input: JString,
|
||||
) -> jweak {
|
||||
Java_org_enso_parser_Parser_parseStr(env, this, input)
|
||||
}
|
||||
|
||||
/// Parses a single source file into a stream of tokens.
|
||||
#[allow(unsafe_code)]
|
||||
#[no_mangle]
|
||||
pub extern "system" fn Java_org_enso_parser_Parser_lexFile(
|
||||
env: JNIEnv,
|
||||
this: JClass,
|
||||
filename: JString,
|
||||
) -> jweak {
|
||||
Java_org_enso_parser_Parser_parseStr(env, this, filename)
|
||||
}
|
@ -1,55 +0,0 @@
|
||||
//! This module exports the implementation of parser for the Enso language.
|
||||
|
||||
// === Features ===
|
||||
#![feature(test)]
|
||||
|
||||
// === Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![warn(unsafe_code)]
|
||||
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![deny(unconditional_recursion)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(trivial_casts)]
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unused_import_braces)]
|
||||
|
||||
|
||||
|
||||
mod jni;
|
||||
|
||||
pub use crate::jni::*;
|
||||
|
||||
use ast_new::AnyAst;
|
||||
use ast_new::Ast;
|
||||
|
||||
|
||||
|
||||
// =======================
|
||||
// === Parser Rust API ===
|
||||
// =======================
|
||||
|
||||
/// Parse a content of a single source file.
|
||||
pub fn parse_str(input: String) -> AnyAst {
|
||||
Ast::new(ast_new::txt::Text { text: input })
|
||||
}
|
||||
|
||||
/// Parse a single source file.
|
||||
pub fn parse_file(filename: String) -> AnyAst {
|
||||
parse_str(filename)
|
||||
}
|
||||
|
||||
|
||||
// === Tokens ===
|
||||
|
||||
/// Parse a content of single source file.
|
||||
pub fn lexe_str(input: String) -> AnyAst {
|
||||
parse_str(input)
|
||||
}
|
||||
|
||||
/// Parse a single source file.
|
||||
pub fn lexe_file(filename: String) -> AnyAst {
|
||||
parse_str(filename)
|
||||
}
|
1090
lib/rust/parser/src/lexer.rs
Normal file
1090
lib/rust/parser/src/lexer.rs
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,40 +0,0 @@
|
||||
//! This library contains the implementation of the Enso parser.
|
||||
|
||||
// === Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![warn(unsafe_code)]
|
||||
|
||||
|
||||
// ==============
|
||||
// === Export ===
|
||||
// ==============
|
||||
|
||||
pub mod macros;
|
||||
pub mod operator;
|
||||
pub mod parser;
|
||||
|
||||
pub use crate::parser::*;
|
||||
|
||||
|
||||
|
||||
/// The prelude for the parser.
|
||||
pub mod prelude {
|
||||
pub use enso_logger::AnyLogger;
|
||||
pub use enso_prelude::*;
|
||||
|
||||
/// The Enso logging library.
|
||||
pub mod logger {
|
||||
pub use enso_logger::Logger;
|
||||
pub use enso_logger::*;
|
||||
}
|
||||
|
||||
/// The lexer types.
|
||||
pub mod lexer {
|
||||
pub use ::lexer::*;
|
||||
|
||||
/// The lexer tokens.
|
||||
pub mod token {
|
||||
pub use lexer::library::token::*;
|
||||
}
|
||||
}
|
||||
}
|
@ -1,55 +1,110 @@
|
||||
//! The macro system for the Enso parser.
|
||||
//! Enso macro utilities. The parser contains a powerful macro resolution engine and a lot of the
|
||||
//! language constructs are defined as macros. This module contains macro definition structs and
|
||||
//! utilities allowing macros management.
|
||||
//! Read the docs of the main module of this crate to learn more about the parsing process.
|
||||
|
||||
//
|
||||
|
||||
use crate::prelude::logger::*;
|
||||
use crate::prelude::*;
|
||||
|
||||
use crate::macros::definition::Definition;
|
||||
use crate::macros::registry::Registry;
|
||||
use crate::syntax;
|
||||
use crate::syntax::token::Token;
|
||||
|
||||
use enso_data_structures::im_list;
|
||||
use pattern::Pattern;
|
||||
|
||||
|
||||
// ==============
|
||||
// === Export ===
|
||||
// ==============
|
||||
|
||||
pub mod definition;
|
||||
pub mod literal;
|
||||
pub mod registry;
|
||||
pub mod pattern;
|
||||
|
||||
|
||||
|
||||
// ====================
|
||||
// === Type Aliases ===
|
||||
// ====================
|
||||
// ==================
|
||||
// === Definition ===
|
||||
// ==================
|
||||
|
||||
type DebugLevel = crate::prelude::logger::entry::level::Debug;
|
||||
|
||||
|
||||
|
||||
// ================
|
||||
// === Resolver ===
|
||||
// ================
|
||||
|
||||
/// The Enso macro resolver.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
/// Macro definition. It contains list of macro segments and optional macro prefix.
|
||||
///
|
||||
/// For example, the macro `if ... then ... else ...` contains three segments and no prefix. On the
|
||||
/// other hand, the macro `... -> ...` contains one segment (starting with the `->` token) and a
|
||||
/// prefix (it consumes tokens on the left of its first segment).
|
||||
///
|
||||
/// If you want to create macro definition in Rust, use the [`macro_definition`] macro instead,
|
||||
/// which for a nice and concise definitions.
|
||||
#[derive(Derivative)]
|
||||
#[derivative(Debug)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Resolver<Logger> {
|
||||
registry: Registry,
|
||||
logger: Logger,
|
||||
pub struct Definition<'a> {
|
||||
/// The pattern in this field will be matched from right to left, unlike patterns in segments.
|
||||
pub rev_prefix_pattern: Option<Pattern>,
|
||||
pub segments: im_list::NonEmpty<SegmentDefinition<'a>>,
|
||||
#[derivative(Debug = "ignore")]
|
||||
pub body: Rc<Body>,
|
||||
}
|
||||
|
||||
impl<Logger> Resolver<Logger>
|
||||
where Logger: AnyLogger<Owned = Logger> + LoggerOps<DebugLevel>
|
||||
{
|
||||
/// All the tokens matched as prefix of the resolved macro.
|
||||
pub type PrefixTokens<'s> = Option<Vec<syntax::Item<'s>>>;
|
||||
|
||||
/// All the sections of the resolved macro.
|
||||
pub type MatchedSections<'s> = NonEmptyVec<(Token<'s>, Vec<syntax::Item<'s>>)>;
|
||||
|
||||
/// A function that transforms matched macro tokens into [`syntax::Tree`].
|
||||
pub type Body = dyn for<'s> Fn(PrefixTokens<'s>, MatchedSections<'s>) -> syntax::Tree<'s>;
|
||||
|
||||
|
||||
|
||||
// =========================
|
||||
// === SegmentDefinition ===
|
||||
// =========================
|
||||
|
||||
/// Definition of macro segment. Contains header, such as `if`, or `->` and pattern that following
|
||||
/// tokens have to match.
|
||||
#[derive(Clone, Debug)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct SegmentDefinition<'a> {
|
||||
pub header: &'a str,
|
||||
pub pattern: Pattern,
|
||||
}
|
||||
|
||||
impl<'a> SegmentDefinition<'a> {
|
||||
/// Constructor.
|
||||
pub fn new(macros: Vec<Definition>, parent_logger: &Logger) -> Self {
|
||||
let logger = <Logger>::sub(parent_logger, "Resolver");
|
||||
let registry = Registry::from(macros);
|
||||
Self { registry, logger }
|
||||
}
|
||||
|
||||
/// Define the macro described by `definition` in the macro resolver `self`.
|
||||
pub fn define_macro(&mut self, definition: Definition) {
|
||||
debug!(self.logger, "Define Macro: {&definition:?}.");
|
||||
self.registry.insert(definition)
|
||||
pub fn new(header: &'a str, pattern: Pattern) -> Self {
|
||||
Self { header, pattern }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===================
|
||||
// === Rust Macros ===
|
||||
// ===================
|
||||
|
||||
/// Macro allowing for nice macro [`Definition`] generation. For example, the following code defines
|
||||
/// the `if ... then .. else ...` macro:
|
||||
///
|
||||
/// ```text
|
||||
/// macro_definition! {
|
||||
// ("if", Pattern::Everything, "then", Pattern::Everything, "else", Pattern::Everything)
|
||||
// body_handler_fn
|
||||
// }
|
||||
/// ```
|
||||
#[macro_export]
|
||||
macro_rules! macro_definition {
|
||||
( ($($section:literal, $pattern:expr),* $(,)?) $body:expr ) => {
|
||||
$crate::macro_definition!{[None] ($($section, $pattern),*) $body}
|
||||
};
|
||||
( ($prefix:expr, $($section:literal, $pattern:expr),* $(,)?) $body:expr ) => {
|
||||
$crate::macro_definition!{[Some($prefix)] ($($section, $pattern),*) $body}
|
||||
};
|
||||
( [$prefix:expr] ($($section:literal, $pattern:expr),* $(,)?) $body:expr ) => {
|
||||
macros::Definition {
|
||||
rev_prefix_pattern: $prefix,
|
||||
segments: im_list::NonEmpty::try_from(vec![
|
||||
$(macros::SegmentDefinition::new($section, $pattern)),*]).unwrap(),
|
||||
body: Rc::new($body),
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -1,70 +0,0 @@
|
||||
//! Macro definitions in Enso.
|
||||
|
||||
use crate::prelude::*;
|
||||
|
||||
use crate::macros::literal::Literal;
|
||||
|
||||
use itertools::Itertools;
|
||||
|
||||
|
||||
|
||||
// ==================
|
||||
// === Definition ===
|
||||
// ==================
|
||||
|
||||
/// A macro definition.
|
||||
///
|
||||
/// A macro definition consists of a name, which identifies the macro to users, and a list of
|
||||
/// [sections](`Section`). The sections are the most important portion of the macro definition, as
|
||||
/// they define the literal portions of the token stream on which the macro will match.
|
||||
#[derive(Clone, Debug, Default, Eq, PartialEq)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Definition {
|
||||
pub name: String,
|
||||
pub sections: Vec<Section>,
|
||||
}
|
||||
|
||||
impl Definition {
|
||||
/// Constructor.
|
||||
pub fn new(name: impl Str, sections: Vec<Section>) -> Self {
|
||||
let name = name.into();
|
||||
Self { name, sections }
|
||||
}
|
||||
|
||||
/// Get the path for the definition.
|
||||
///
|
||||
/// The definition's path consists of the headers of each of the sections that make it up, and
|
||||
/// describes the literals that must be matched for the macro to match.
|
||||
pub fn path(&self) -> Vec<Literal> {
|
||||
self.sections.iter().map(|s| s.start_symbol.clone()).collect_vec()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===============
|
||||
// === Section ===
|
||||
// ===============
|
||||
|
||||
/// A section in a macro, representing both a literal section header to match against, and the
|
||||
/// tokens that the section contains.
|
||||
///
|
||||
/// The literal is the _most_ important portion of a section, as they are constants that allow the
|
||||
/// macro resolver to divide up the input token stream based on these constants.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Section {
|
||||
start_symbol: Literal, // TODO Pattern
|
||||
}
|
||||
|
||||
impl Section {
|
||||
/// Constructor.
|
||||
pub fn new(symbol: Literal) -> Self {
|
||||
Self { start_symbol: symbol }
|
||||
}
|
||||
|
||||
/// Get a reference to the literal that heads the section.
|
||||
pub fn start_symbol(&self) -> &Literal {
|
||||
&self.start_symbol
|
||||
}
|
||||
}
|
@ -1,95 +0,0 @@
|
||||
//! This file contains the literal matchers that are used to head up macro sections.
|
||||
|
||||
use crate::prelude::*;
|
||||
|
||||
use crate::prelude::lexer::token;
|
||||
|
||||
|
||||
|
||||
// ===============
|
||||
// === Literal ===
|
||||
// ===============
|
||||
|
||||
/// The kinds of literal that can be the head of a macro section.
|
||||
///
|
||||
/// For more detailed descriptions of the various literal types, please see the documentation of the
|
||||
/// tokens in the Lexer.
|
||||
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
|
||||
pub enum Literal {
|
||||
Referent(String),
|
||||
Variable(String),
|
||||
External(String),
|
||||
Blank,
|
||||
Operator(String),
|
||||
Annotation(String),
|
||||
}
|
||||
|
||||
impl Literal {
|
||||
/// Construct a referent identifier literal.
|
||||
pub fn referent(lit: impl Str) -> Literal {
|
||||
Literal::Referent(lit.into())
|
||||
}
|
||||
|
||||
/// Construct a variable identifier literal.
|
||||
pub fn variable(lit: impl Str) -> Literal {
|
||||
Literal::Variable(lit.into())
|
||||
}
|
||||
|
||||
/// Construct an external identifier literal.
|
||||
pub fn external(lit: impl Str) -> Literal {
|
||||
Literal::External(lit.into())
|
||||
}
|
||||
|
||||
/// Construct a blank identifier literal.
|
||||
pub fn blank() -> Literal {
|
||||
Literal::Blank
|
||||
}
|
||||
|
||||
/// Construct an operator identifier literal.
|
||||
pub fn operator(lit: impl Str) -> Literal {
|
||||
Literal::Operator(lit.into())
|
||||
}
|
||||
|
||||
/// Construct an annodation identifier literal.
|
||||
pub fn annotation(lit: impl Str) -> Literal {
|
||||
Literal::Annotation(lit.into())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// === Trait Impls ===
|
||||
|
||||
impl From<&Literal> for Literal {
|
||||
fn from(lit: &Literal) -> Self {
|
||||
lit.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Literal> for token::Shape {
|
||||
fn from(lit: Literal) -> Self {
|
||||
match lit {
|
||||
Literal::Referent(str) => token::Shape::Referent(str),
|
||||
Literal::Variable(str) => token::Shape::Variable(str),
|
||||
Literal::External(str) => token::Shape::External(str),
|
||||
Literal::Blank => token::Shape::Blank,
|
||||
Literal::Operator(str) => token::Shape::Operator(str),
|
||||
Literal::Annotation(str) => token::Shape::Annotation(str),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<token::Shape> for Literal {
|
||||
type Error = token::Shape;
|
||||
|
||||
fn try_from(shape: token::Shape) -> Result<Self, Self::Error> {
|
||||
match shape {
|
||||
token::Shape::Referent(name) => Ok(Literal::Referent(name)),
|
||||
token::Shape::Variable(name) => Ok(Literal::Variable(name)),
|
||||
token::Shape::External(name) => Ok(Literal::External(name)),
|
||||
token::Shape::Blank => Ok(Literal::Blank),
|
||||
token::Shape::Operator(name) => Ok(Literal::Operator(name)),
|
||||
token::Shape::Annotation(name) => Ok(Literal::Annotation(name)),
|
||||
_ => Err(shape),
|
||||
}
|
||||
}
|
||||
}
|
126
lib/rust/parser/src/macros/pattern.rs
Normal file
126
lib/rust/parser/src/macros/pattern.rs
Normal file
@ -0,0 +1,126 @@
|
||||
//! This module defines patterns Pattern used to validate incoming token stream against expected
|
||||
//! macro input.
|
||||
|
||||
use crate::prelude::*;
|
||||
|
||||
use crate::syntax;
|
||||
|
||||
|
||||
|
||||
// ===============
|
||||
// === Pattern ===
|
||||
// ===============
|
||||
|
||||
/// Pattern used to validate incoming token stream against expected macro input.
|
||||
///
|
||||
/// The idea is similar to patterns used in `macro_rules` definitions in Rust. There are a few
|
||||
/// differences though:
|
||||
/// 1. This pattern implementation exposes different matchers and operations.
|
||||
/// 2. This macro implementation never attaches types to tokens, which means that every defined
|
||||
/// pattern behaves like a TT-muncher in Rust.
|
||||
#[derive(Clone, Debug)]
|
||||
#[allow(missing_docs)]
|
||||
pub enum Pattern {
|
||||
/// Consume all items, till the end of the token stream.
|
||||
Everything,
|
||||
/// Consume nothing.
|
||||
Nothing,
|
||||
/// Consume items matching the first pattern. If the match was unsuccessful, the second match
|
||||
/// will be tried.
|
||||
Or(Box<Pattern>, Box<Pattern>),
|
||||
/// Consume a single item if it matches the configuration.
|
||||
Item(Item),
|
||||
}
|
||||
|
||||
/// Item pattern configuration.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Item {
|
||||
/// Check whether the token has spaces on right-hand-side. The [`None`] value means that the
|
||||
/// condition would not be checked.
|
||||
pub has_rhs_spacing: Option<bool>,
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =======================
|
||||
// === ResolutionError ===
|
||||
// =======================
|
||||
|
||||
/// Pattern resolution error.
|
||||
#[derive(Debug)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct ResolutionError<T> {
|
||||
/// All the incoming tokens. The resolver consumes vector of tokens and returns it back in case
|
||||
/// an error happened.
|
||||
pub tokens: Vec<T>,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
impl<T> ResolutionError<T> {
|
||||
/// Constructor.
|
||||
pub fn new(tokens: Vec<T>, message: impl Into<String>) -> Self {
|
||||
let message = message.into();
|
||||
Self { tokens, message }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// ==================
|
||||
/// === Resolution ===
|
||||
/// ==================
|
||||
|
||||
/// Successful pattern match result.
|
||||
#[derive(Debug, Clone)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Match<T> {
|
||||
/// All the matched tokens.
|
||||
pub matched: Vec<T>,
|
||||
/// The rest of the token stream that was not needed for the successful pattern match.
|
||||
pub rest: Vec<T>,
|
||||
}
|
||||
|
||||
impl<T> Match<T> {
|
||||
/// Constructor.
|
||||
pub fn new(matched: Vec<T>, rest: Vec<T>) -> Self {
|
||||
Self { matched, rest }
|
||||
}
|
||||
}
|
||||
|
||||
impl Pattern {
|
||||
/// Match the token stream with this pattern.
|
||||
pub fn resolve<'s, T: TryAsRef<syntax::Item<'s>>>(
|
||||
&self,
|
||||
mut input: Vec<T>,
|
||||
has_spacing_at_end: bool,
|
||||
right_to_left_mode: bool,
|
||||
) -> Result<Match<T>, ResolutionError<T>> {
|
||||
match self {
|
||||
Self::Everything => Ok(Match::new(input, default())),
|
||||
Self::Nothing => Ok(Match::new(default(), input)),
|
||||
Self::Or(fst, snd) => fst
|
||||
.resolve(input, has_spacing_at_end, right_to_left_mode)
|
||||
.or_else(|err| snd.resolve(err.tokens, has_spacing_at_end, right_to_left_mode)),
|
||||
Self::Item(item) => match input.first() {
|
||||
None => Err(ResolutionError::new(input, "Expected an item.")),
|
||||
Some(first) => match first.try_as_ref() {
|
||||
None => Err(ResolutionError::new(input, "Expected an item.")),
|
||||
Some(_) => match item.has_rhs_spacing {
|
||||
Some(spacing) =>
|
||||
if right_to_left_mode {
|
||||
if spacing == has_spacing_at_end {
|
||||
Ok(Match::new(vec![input.pop_front().unwrap()], input))
|
||||
} else {
|
||||
Err(ResolutionError::new(input, "Expected an item."))
|
||||
}
|
||||
} else {
|
||||
todo!()
|
||||
},
|
||||
None => Ok(Match::new(vec![input.pop_front().unwrap()], input)),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
@ -1,155 +0,0 @@
|
||||
//! The macro registry that can be queried during the process of macro resolution.
|
||||
|
||||
use crate::prelude::*;
|
||||
use enso_data_structures::hash_map_tree::*;
|
||||
|
||||
use crate::macros::definition::Definition;
|
||||
use crate::macros::literal::Literal;
|
||||
|
||||
|
||||
|
||||
// ================
|
||||
// === Registry ===
|
||||
// ================
|
||||
|
||||
/// The type of the tree that underlies the registry.
|
||||
pub type Tree = HashMapTree<Literal, Option<Definition>>;
|
||||
|
||||
/// The registry is responsible for the registration of macro definitions, and the querying of said
|
||||
/// definitions.
|
||||
#[derive(Clone, Debug, Default, PartialEq)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Registry {
|
||||
tree: Tree,
|
||||
}
|
||||
|
||||
impl Registry {
|
||||
/// Insert `definition` into the macro registry.
|
||||
pub fn insert(&mut self, definition: Definition) {
|
||||
self.tree.set(definition.path(), Some(definition));
|
||||
}
|
||||
|
||||
/// Get a reference to the root of the registry.
|
||||
pub fn root(&self) -> &Tree {
|
||||
&self.tree
|
||||
}
|
||||
|
||||
/// Query the registry for a tree.
|
||||
pub fn subtree<P>(&self, path: P) -> Option<&Tree>
|
||||
where
|
||||
P: IntoIterator,
|
||||
P::Item: Into<Literal>, {
|
||||
self.tree.get_node(path)
|
||||
}
|
||||
|
||||
/// Query the registry for a tree, assuming such a tree is present.
|
||||
///
|
||||
/// # Panics
|
||||
/// If no tree exists at `path`.
|
||||
pub fn unsafe_subtree<P>(&self, path: P) -> &Tree
|
||||
where
|
||||
P: IntoIterator,
|
||||
P::Item: Into<Literal>, {
|
||||
self.subtree(path).expect("A tree exists at the input path.")
|
||||
}
|
||||
|
||||
/// Query the registry for a definition.
|
||||
pub fn definition<P>(&self, path: P) -> Option<&Definition>
|
||||
where
|
||||
P: IntoIterator,
|
||||
P::Item: Into<Literal>, {
|
||||
match self.tree.get(path) {
|
||||
Some(Some(def)) => Some(def),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Query the registry for a definition, assuming such a definition is present.
|
||||
///
|
||||
/// # Panics
|
||||
/// If no definition exists at `path`.
|
||||
pub fn unsafe_definition<P>(&self, path: P) -> &Definition
|
||||
where
|
||||
P: IntoIterator,
|
||||
P::Item: Into<Literal>, {
|
||||
self.definition(path).expect("A definition exists at the input path.")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// === Trait Impls ===
|
||||
|
||||
impl From<Vec<Definition>> for Registry {
|
||||
fn from(defs: Vec<Definition>) -> Self {
|
||||
let mut registry: Registry = default();
|
||||
defs.into_iter().for_each(|def| registry.insert(def));
|
||||
registry
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === Tests ===
|
||||
// =============
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::macros::definition::Section;
|
||||
|
||||
#[test]
|
||||
fn insert_query() {
|
||||
let mut registry = Registry::default();
|
||||
let definition = Definition::new("Test", vec![
|
||||
Section::new(Literal::variable("if")),
|
||||
Section::new(Literal::variable("then")),
|
||||
Section::new(Literal::variable("else")),
|
||||
]);
|
||||
let path_1 =
|
||||
&[Literal::variable("if"), Literal::variable("then"), Literal::variable("else")];
|
||||
let path_2 = &[Literal::variable("if"), Literal::variable("then")];
|
||||
registry.insert(definition.clone());
|
||||
let result_1 = registry.definition(path_1);
|
||||
let result_2 = registry.definition(path_2);
|
||||
assert!(result_1.is_some());
|
||||
assert_eq!(result_1.unwrap(), &definition);
|
||||
assert_eq!(result_2, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_defs() {
|
||||
let definitions = vec![
|
||||
Definition::new("if_then_else", vec![
|
||||
Section::new(Literal::variable("if")),
|
||||
Section::new(Literal::variable("then")),
|
||||
Section::new(Literal::variable("else")),
|
||||
]),
|
||||
Definition::new("if_then", vec![
|
||||
Section::new(Literal::variable("if")),
|
||||
Section::new(Literal::variable("then")),
|
||||
]),
|
||||
Definition::new("if_let", vec![
|
||||
Section::new(Literal::variable("if")),
|
||||
Section::new(Literal::variable("let")),
|
||||
]),
|
||||
];
|
||||
let registry = Registry::from(definitions.clone());
|
||||
let path_1 =
|
||||
&[Literal::variable("if"), Literal::variable("then"), Literal::variable("else")];
|
||||
let path_2 = &[Literal::variable("if"), Literal::variable("then")];
|
||||
let path_3 = &[Literal::variable("if"), Literal::variable("let")];
|
||||
let path_4 = &[Literal::variable("if")];
|
||||
let result_1 = registry.definition(path_1);
|
||||
let result_2 = registry.definition(path_2);
|
||||
let result_3 = registry.definition(path_3);
|
||||
let result_4 = registry.definition(path_4);
|
||||
assert!(result_1.is_some());
|
||||
assert!(result_2.is_some());
|
||||
assert!(result_3.is_some());
|
||||
assert!(result_4.is_none());
|
||||
assert_eq!(result_1, definitions.get(0));
|
||||
assert_eq!(result_2, definitions.get(1));
|
||||
assert_eq!(result_3, definitions.get(2));
|
||||
}
|
||||
}
|
811
lib/rust/parser/src/main.rs
Normal file
811
lib/rust/parser/src/main.rs
Normal file
@ -0,0 +1,811 @@
|
||||
//! The Enso parser. Parsing is a multi-stage process:
|
||||
//!
|
||||
//! # Lexing.
|
||||
//! First, the source code is feed to [`lexer::Lexer`], which consumes it and outputs a stream of
|
||||
//! [`Token`]. Tokens are chunks of the input with a generic description attached, like "operator",
|
||||
//! or "identifier".
|
||||
//!
|
||||
//! # Building macro registry.
|
||||
//! Macros in Enso are a very powerful mechanism and are used to transform group of tokens into
|
||||
//! almost any statement. First, macros need to be discovered and registered. Currently, there is no
|
||||
//! real macro discovery process, as there is no support for user-defined macros. Instead, there is
|
||||
//! a set of hardcoded macros defined in the compiler.
|
||||
//!
|
||||
//! Each macro defines one or more segments. Every segment starts with a predefined token and can
|
||||
//! contain any number of other tokens. For example, the macro `if ... then ... else ...` contains
|
||||
//! three segments. Macros can also accept prefix tokens, a set of tokens on the left of the first
|
||||
//! segment. A good example is the lambda macro `... -> ...`.
|
||||
//!
|
||||
//! In this step, a [`MacroMatchTree`] is built. Basically, it is a map from the possible next
|
||||
//! segment name to information of what other segments are required and what is the macro definition
|
||||
//! in case these segments were found. For example, let's consider two macros: `if ... then ...`,
|
||||
//! and `if ... then ... else ...`. In such a case, the macro registry will contain only one entry,
|
||||
//! "if", and two sets of possible resolution paths: ["then"], and ["then", "else"], each associated
|
||||
//! with the corresponding macro definition.
|
||||
//!
|
||||
//! # Splitting the token stream by the macro segments.
|
||||
//! The input token stream is being iterated and is being split based on the segments of the
|
||||
//! registered macros. For example, for the input `if a b then c d else e f`, the token stream will
|
||||
//! be split into three segments, `a b`, `c d`, and `e f`, which will be associated with the
|
||||
//! `if ... then ... else ...` macro definition.
|
||||
//!
|
||||
//! The splitting process is hierarchical. It means that a new macro can start being resolved during
|
||||
//! resolution of a parent macro. For example, `if if a then b then c else d` is a correct
|
||||
//! expression. After finding the first `if` token, the token stream will be split. The next `if`
|
||||
//! token starts a new token stream splitting. The first `then` token belongs to the nested macro,
|
||||
//! however, as soon as the resolver sees the second `then` token, it will consider the nested macro
|
||||
//! to be finished, and will come back to parent macro resolution.
|
||||
//!
|
||||
//! # Resolving right-hand-side patterns of macro segments.
|
||||
//! In the next steps, each macro is being analyzed, started from the most nested ones. For each
|
||||
//! macro, the [`Pattern`] of last segment is being run to check which tokens belong to that macro,
|
||||
//! and which tokens should be transferred to parent macro definition. For example, consider the
|
||||
//! following code `process (read file) content-> print content`. The `(...)` is a macro with two
|
||||
//! sections `(` and `)`. Let's mark the token splitting with `[` and `]` characters. The previous
|
||||
//! macro resolution steps would output such split of the token stream:
|
||||
//! `process [(read file][) content[-> print content]]`. In this step, the most inner macro will be
|
||||
//! analyzed first. The pattern of the last segment of the inner macro (`->`) defines that it
|
||||
//! consumes all tokens, so all the tokens `print content` are left as they are. Now, the resolution
|
||||
//! moves to the parent macro. Its last segment starts with the `)` token, which pattern defines
|
||||
//! that it does not consume any tokens, so all of its current tokens (`content[-> print content]]`)
|
||||
//! are popped to a parent definition, forming `process [(read file][)] content[-> print content]`.
|
||||
//!
|
||||
//! Please note, that root of the expression is considered a special macro as well. It is done for
|
||||
//! the algorithm unification purposes.
|
||||
//!
|
||||
//! # Resolving left-hand-side patterns of macro segments.
|
||||
//! In this step, each macro is being analyzed, started from the most nested ones. For each macro,
|
||||
//! the [`Pattern`] of the macro prefix is being run to check which tokens belong to the prefix of
|
||||
//! the macro (in case the macro defines the prefix). In the example above, the macro `->` defines
|
||||
//! complex prefix rules: if the token on the left of the arrow used no space, then only a single
|
||||
//! token will be consumed. As a result of this step, the following token split will occur:
|
||||
//! `[process [(read file][)] [content-> print content]`, which is exactly what we wanted.
|
||||
//!
|
||||
//! # Resolving patterns of macro segments.
|
||||
//! In this step, all macro segment patterns are being resolved and errors are reported in case it
|
||||
//! was not possible. If tokens in a segment match the segment pattern, they are sent to the
|
||||
//! operator precedence resolver for final transformation.
|
||||
//!
|
||||
//! # Operator precedence resolution.
|
||||
//! Each token stream sent to the operator resolver is processed by a modified Shunting Yard
|
||||
//! algorithm, which handles such situations as multiple operators placed next to each other,
|
||||
//! multiple identifiers placed next to each other, and also takes spacing into consideration in
|
||||
//! order to implement spacing-aware precedence rules. After all segments are resolved, the macro
|
||||
//! is being treated as a single token in one of the segments of the parent macro, and is being
|
||||
//! processed by the operator precedence resolver as well. In the end, a single [`syntax::Tree`] is
|
||||
//! produced, containing the parsed expression.
|
||||
|
||||
#![recursion_limit = "256"]
|
||||
// === Features ===
|
||||
#![allow(incomplete_features)]
|
||||
#![feature(allocator_api)]
|
||||
#![feature(test)]
|
||||
#![feature(specialization)]
|
||||
#![feature(let_chains)]
|
||||
// === Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![warn(unsafe_code)]
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![allow(clippy::option_map_unit_fn)]
|
||||
#![allow(clippy::precedence)]
|
||||
#![allow(dead_code)]
|
||||
#![deny(unconditional_recursion)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(trivial_casts)]
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unused_import_braces)]
|
||||
#![warn(unused_qualifications)]
|
||||
|
||||
use crate::prelude::*;
|
||||
|
||||
use crate::source::VisibleOffset;
|
||||
|
||||
use enso_data_structures::im_list;
|
||||
use enso_data_structures::im_list::List;
|
||||
use lexer::Lexer;
|
||||
use macros::pattern::Pattern;
|
||||
use syntax::token;
|
||||
use syntax::token::Token;
|
||||
|
||||
|
||||
// ==============
|
||||
// === Export ===
|
||||
// ==============
|
||||
|
||||
pub mod lexer;
|
||||
pub mod macros;
|
||||
pub mod source;
|
||||
pub mod syntax;
|
||||
|
||||
|
||||
|
||||
/// Popular utilities, imported by most modules of this crate.
|
||||
pub mod prelude {
|
||||
pub use enso_prelude::*;
|
||||
pub use enso_types::traits::*;
|
||||
pub use enso_types::unit2::Bytes;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =================================
|
||||
// === SyntaxItemOrMacroResolver ===
|
||||
// =================================
|
||||
|
||||
/// One of [`syntax::Item`] or [`MacroResolver`].
|
||||
#[derive(Debug)]
|
||||
#[allow(missing_docs)]
|
||||
pub enum SyntaxItemOrMacroResolver<'s> {
|
||||
SyntaxItem(syntax::Item<'s>),
|
||||
MacroResolver(MacroResolver<'s>),
|
||||
}
|
||||
|
||||
impl<'s> From<syntax::Item<'s>> for SyntaxItemOrMacroResolver<'s> {
|
||||
fn from(t: syntax::Item<'s>) -> Self {
|
||||
Self::SyntaxItem(t)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> From<MacroResolver<'s>> for SyntaxItemOrMacroResolver<'s> {
|
||||
fn from(t: MacroResolver<'s>) -> Self {
|
||||
Self::MacroResolver(t)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> TryAsRef<syntax::Item<'s>> for SyntaxItemOrMacroResolver<'s> {
|
||||
fn try_as_ref(&self) -> Option<&syntax::Item<'s>> {
|
||||
match self {
|
||||
Self::SyntaxItem(t) => Some(t),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ======================
|
||||
// === MacroMatchTree ===
|
||||
// ======================
|
||||
|
||||
/// A tree-like structure encoding potential macro matches. The keys are representations of tokens
|
||||
/// that can be matched. For example, the key could be "if" or "->". Each key is associated with one
|
||||
/// or more [`PartiallyMatchedMacro`], which stories a list of required segments and a macro
|
||||
/// definition in case all the segments were matched. For example, for the "if" key, there can be
|
||||
/// two required segment lists, one for "then" and "else" segments, and one for the "then" segment
|
||||
/// only.
|
||||
#[derive(Default, Debug, Deref, DerefMut)]
|
||||
pub struct MacroMatchTree<'s> {
|
||||
map: HashMap<&'s str, NonEmptyVec<PartiallyMatchedMacro<'s>>>,
|
||||
}
|
||||
|
||||
/// Partially matched macro info. See docs of [`MacroMatchTree`] to learn more.
|
||||
#[derive(Clone, Debug)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct PartiallyMatchedMacro<'s> {
|
||||
pub required_segments: List<macros::SegmentDefinition<'s>>,
|
||||
pub definition: Rc<macros::Definition<'s>>,
|
||||
}
|
||||
|
||||
impl<'a> MacroMatchTree<'a> {
|
||||
/// Register a new macro definition in this macro tree.
|
||||
pub fn register(&mut self, definition: macros::Definition<'a>) {
|
||||
let header = definition.segments.head.header;
|
||||
let entry = PartiallyMatchedMacro {
|
||||
required_segments: definition.segments.tail.clone(),
|
||||
definition: Rc::new(definition),
|
||||
};
|
||||
if let Some(node) = self.get_mut(header) {
|
||||
node.push(entry);
|
||||
} else {
|
||||
self.insert(header, NonEmptyVec::singleton(entry));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =====================
|
||||
// === MacroResolver ===
|
||||
// =====================
|
||||
|
||||
/// Enso macro resolver. See the docs of the main module to learn more about the macro resolution
|
||||
/// steps.
|
||||
#[derive(Debug)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct MacroResolver<'s> {
|
||||
pub current_segment: MatchedSegment<'s>,
|
||||
pub resolved_segments: Vec<MatchedSegment<'s>>,
|
||||
pub possible_next_segments: MacroMatchTree<'s>,
|
||||
pub matched_macro_def: Option<Rc<macros::Definition<'s>>>,
|
||||
}
|
||||
|
||||
impl<'a> MacroResolver<'a> {
|
||||
/// A new macro resolver with a special "root" segment definition. The "root" segment does not
|
||||
/// exist in the source code, it is simply the whole expression being parsed. It is treated
|
||||
/// as a macro in order to unify the algorithms.
|
||||
pub fn new_root() -> Self {
|
||||
let current_segment =
|
||||
MatchedSegment { header: Token("", "", token::Variant::newline()), body: default() };
|
||||
let resolved_segments = default();
|
||||
let possible_next_segments = default();
|
||||
let matched_macro_def = Some(Rc::new(macros::Definition {
|
||||
rev_prefix_pattern: None,
|
||||
segments: im_list::NonEmpty::singleton(macros::SegmentDefinition {
|
||||
header: "__ROOT__",
|
||||
pattern: Pattern::Everything,
|
||||
}),
|
||||
body: Rc::new(|_, v| {
|
||||
if v.len() != 1 {
|
||||
panic!()
|
||||
}
|
||||
let t = v.into_vec().pop().unwrap().1;
|
||||
resolve_operator_precedence(t)
|
||||
}),
|
||||
}));
|
||||
Self { current_segment, resolved_segments, possible_next_segments, matched_macro_def }
|
||||
}
|
||||
}
|
||||
|
||||
/// A matched macro segment. Partial macro resolution product.
|
||||
#[derive(Debug)]
|
||||
pub struct MatchedSegment<'s> {
|
||||
header: Token<'s>,
|
||||
body: Vec<SyntaxItemOrMacroResolver<'s>>,
|
||||
}
|
||||
|
||||
impl<'s> MatchedSegment<'s> {
|
||||
/// Constructor.
|
||||
pub fn new(header: Token<'s>) -> Self {
|
||||
let body = default();
|
||||
Self { header, body }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Main macro resolver capable of resolving nested macro usages. See the docs of the main module to
|
||||
/// learn more about the macro resolution steps.
|
||||
#[derive(Debug)]
|
||||
pub struct Resolver<'s> {
|
||||
current_macro: MacroResolver<'s>,
|
||||
macro_stack: Vec<MacroResolver<'s>>,
|
||||
}
|
||||
|
||||
/// Result of the macro resolution step.
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
enum ResolverStep {
|
||||
NormalToken,
|
||||
NewSegmentStarted,
|
||||
MacroStackPop,
|
||||
}
|
||||
|
||||
impl<'s> Resolver<'s> {
|
||||
fn new_root() -> Self {
|
||||
let current_macro = MacroResolver::new_root();
|
||||
let macro_stack = default();
|
||||
Self { current_macro, macro_stack }
|
||||
}
|
||||
|
||||
fn run(
|
||||
mut self,
|
||||
lexer: &Lexer<'s>,
|
||||
root_macro_map: &MacroMatchTree<'s>,
|
||||
tokens: Vec<syntax::Item<'s>>,
|
||||
) -> syntax::Tree<'s> {
|
||||
let mut stream = tokens.into_iter();
|
||||
let mut opt_token: Option<syntax::Item<'s>>;
|
||||
macro_rules! next_token {
|
||||
() => {{
|
||||
opt_token = stream.next();
|
||||
if let Some(token) = opt_token.as_ref() {
|
||||
event!(TRACE, "New token {:#?}", token);
|
||||
}
|
||||
}};
|
||||
}
|
||||
macro_rules! trace_state {
|
||||
() => {
|
||||
event!(TRACE, "Current macro:\n{:#?}", self.current_macro);
|
||||
event!(TRACE, "Parent macros:\n{:#?}", self.macro_stack);
|
||||
};
|
||||
}
|
||||
next_token!();
|
||||
while let Some(token) = opt_token {
|
||||
let step_result = match &token {
|
||||
// FIXME: clone?
|
||||
syntax::Item::Token(token) => self.process_token(root_macro_map, token.clone()),
|
||||
_ => ResolverStep::NormalToken,
|
||||
};
|
||||
match step_result {
|
||||
ResolverStep::MacroStackPop => {
|
||||
trace_state!();
|
||||
opt_token = Some(token)
|
||||
}
|
||||
ResolverStep::NewSegmentStarted => {
|
||||
trace_state!();
|
||||
next_token!()
|
||||
}
|
||||
ResolverStep::NormalToken => {
|
||||
self.current_macro.current_segment.body.push(token.into());
|
||||
trace_state!();
|
||||
next_token!();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while let Some(parent_macro) = self.macro_stack.pop() {
|
||||
self.replace_current_with_parent_macro(parent_macro);
|
||||
}
|
||||
|
||||
trace_state!();
|
||||
|
||||
Self::resolve(lexer, self.current_macro, None)
|
||||
}
|
||||
|
||||
fn replace_current_with_parent_macro(&mut self, mut parent_macro: MacroResolver<'s>) {
|
||||
mem::swap(&mut parent_macro, &mut self.current_macro);
|
||||
let mut child_macro = parent_macro;
|
||||
if let Some(def) = &child_macro.matched_macro_def {
|
||||
let pattern = &def.segments.last().pattern;
|
||||
let child_tokens = mem::take(&mut child_macro.current_segment.body);
|
||||
// FIXME: the first [`false`] below is invalid.
|
||||
let match_result = pattern.resolve(child_tokens, false, false).unwrap();
|
||||
let mut new_child_tokens = match_result.matched;
|
||||
let new_parent_tokens = match_result.rest;
|
||||
mem::swap(&mut child_macro.current_segment.body, &mut new_child_tokens);
|
||||
self.current_macro.current_segment.body.push(child_macro.into());
|
||||
self.current_macro.current_segment.body.extend(new_parent_tokens);
|
||||
} else {
|
||||
panic!()
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve(
|
||||
lexer: &Lexer<'s>,
|
||||
m: MacroResolver<'s>,
|
||||
prefix_tokens: Option<Vec<syntax::Item<'s>>>,
|
||||
) -> syntax::Tree<'s> {
|
||||
let segments = NonEmptyVec::new_with_last(m.resolved_segments, m.current_segment);
|
||||
let sss: NonEmptyVec<(Token, Vec<syntax::Item<'s>>)> = segments.mapped(|segment| {
|
||||
let mut ss: Vec<syntax::Item<'s>> = vec![];
|
||||
for item in segment.body {
|
||||
let resolved_token = match item {
|
||||
SyntaxItemOrMacroResolver::MacroResolver(m2) => {
|
||||
if let Some(macro_def) = &m2.matched_macro_def
|
||||
&& let Some(pfx_pattern) = ¯o_def.rev_prefix_pattern {
|
||||
ss.reverse();
|
||||
let spacing = m2.current_segment.header.left_offset.visible > VisibleOffset(0);
|
||||
let mut match_result = pfx_pattern.resolve(ss,spacing,true).unwrap();
|
||||
match_result.matched.reverse();
|
||||
ss = match_result.rest;
|
||||
ss.reverse();
|
||||
Self::resolve(lexer, m2, Some(match_result.matched)).into()
|
||||
} else {
|
||||
Self::resolve(lexer, m2, None).into()
|
||||
}
|
||||
},
|
||||
SyntaxItemOrMacroResolver::SyntaxItem(t) => t,
|
||||
};
|
||||
ss.push(resolved_token);
|
||||
}
|
||||
(segment.header, ss)
|
||||
});
|
||||
|
||||
if let Some(macro_def) = m.matched_macro_def {
|
||||
(macro_def.body)(prefix_tokens, sss)
|
||||
} else {
|
||||
todo!("Handling non-fully-resolved macros")
|
||||
}
|
||||
}
|
||||
|
||||
fn pop_macro_stack_if_reserved(&mut self, repr: &str) -> Option<MacroResolver<'s>> {
|
||||
let reserved = self.macro_stack.iter().any(|p| p.possible_next_segments.contains_key(repr));
|
||||
if reserved {
|
||||
self.macro_stack.pop()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn process_token(
|
||||
&mut self,
|
||||
root_macro_map: &MacroMatchTree<'s>,
|
||||
token: Token<'s>,
|
||||
) -> ResolverStep {
|
||||
let repr = &**token.code;
|
||||
if let Some(subsegments) = self.current_macro.possible_next_segments.get(repr) {
|
||||
event!(TRACE, "Entering next segment of the current macro.");
|
||||
let mut new_match_tree =
|
||||
Self::enter(&mut self.current_macro.matched_macro_def, subsegments);
|
||||
let mut current_segment = MatchedSegment::new(token);
|
||||
mem::swap(&mut new_match_tree, &mut self.current_macro.possible_next_segments);
|
||||
mem::swap(&mut self.current_macro.current_segment, &mut current_segment);
|
||||
self.current_macro.resolved_segments.push(current_segment);
|
||||
ResolverStep::NewSegmentStarted
|
||||
} else if let Some(parent_macro) = self.pop_macro_stack_if_reserved(repr) {
|
||||
event!(TRACE, "Next token reserved by parent macro. Resolving current macro.");
|
||||
self.replace_current_with_parent_macro(parent_macro);
|
||||
ResolverStep::MacroStackPop
|
||||
} else if let Some(segments) = root_macro_map.get(repr) {
|
||||
event!(TRACE, "Starting a new nested macro resolution.");
|
||||
let mut matched_macro_def = default();
|
||||
let mut current_macro = MacroResolver {
|
||||
current_segment: MatchedSegment { header: token, body: default() },
|
||||
resolved_segments: default(),
|
||||
possible_next_segments: Self::enter(&mut matched_macro_def, segments),
|
||||
matched_macro_def,
|
||||
};
|
||||
mem::swap(&mut self.current_macro, &mut current_macro);
|
||||
self.macro_stack.push(current_macro);
|
||||
ResolverStep::NewSegmentStarted
|
||||
} else {
|
||||
event!(TRACE, "Consuming token as current segment body.");
|
||||
ResolverStep::NormalToken
|
||||
}
|
||||
}
|
||||
|
||||
fn enter(
|
||||
matched_macro_def: &mut Option<Rc<macros::Definition<'s>>>,
|
||||
path: &[PartiallyMatchedMacro<'s>],
|
||||
) -> MacroMatchTree<'s> {
|
||||
*matched_macro_def = None;
|
||||
let mut new_section_tree = MacroMatchTree::default();
|
||||
for v in path {
|
||||
if let Some(first) = v.required_segments.head() {
|
||||
let tail = v.required_segments.tail().cloned().unwrap_or_default();
|
||||
let definition = v.definition.clone_ref();
|
||||
let x = PartiallyMatchedMacro { required_segments: tail, definition };
|
||||
if let Some(node) = new_section_tree.get_mut(&first.header) {
|
||||
node.push(x);
|
||||
} else {
|
||||
new_section_tree.insert(first.header, NonEmptyVec::singleton(x));
|
||||
}
|
||||
} else {
|
||||
if matched_macro_def.is_some() {
|
||||
event!(ERROR, "Internal error. Duplicate macro definition.");
|
||||
}
|
||||
*matched_macro_def = Some(v.definition.clone_ref());
|
||||
}
|
||||
}
|
||||
new_section_tree
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// FIXME: hardcoded values + not finished implementation.
|
||||
fn precedence_of(operator: &str) -> usize {
|
||||
match operator {
|
||||
"+" => 3,
|
||||
"-" => 3,
|
||||
"*" => 7,
|
||||
_ => panic!("Operator not supported: {}", operator),
|
||||
}
|
||||
}
|
||||
//
|
||||
#[derive(Clone, Copy, Debug, Deref, DerefMut)]
|
||||
struct WithPrecedence<T> {
|
||||
#[deref]
|
||||
#[deref_mut]
|
||||
elem: T,
|
||||
precedence: usize,
|
||||
}
|
||||
|
||||
impl<T> WithPrecedence<T> {
|
||||
pub fn new(precedence: usize, elem: T) -> Self {
|
||||
Self { elem, precedence }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn annotate_tokens_that_need_spacing(items: Vec<syntax::Item>) -> Vec<syntax::Item> {
|
||||
items
|
||||
.into_iter()
|
||||
.map(|item| match item {
|
||||
syntax::Item::Token(_) => item,
|
||||
syntax::Item::Tree(ast) =>
|
||||
match &*ast.variant {
|
||||
syntax::tree::Variant::MultiSegmentApp(data) => {
|
||||
if data.segments.first().header.variant.marker()
|
||||
!= token::variant::VariantMarker::Symbol
|
||||
{
|
||||
syntax::Item::Tree(ast.with_error(
|
||||
"This expression cannot be used in a non-spaced equation.",
|
||||
))
|
||||
} else {
|
||||
syntax::Item::Tree(ast)
|
||||
}
|
||||
}
|
||||
_ => syntax::Item::Tree(ast),
|
||||
},
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn resolve_operator_precedence<'s>(items: Vec<syntax::Item<'s>>) -> syntax::Tree<'s> {
|
||||
type Tokens<'s> = Vec<syntax::Item<'s>>;
|
||||
let mut flattened: Tokens<'s> = default();
|
||||
let mut no_space_group: Tokens<'s> = default();
|
||||
let processs_no_space_group = |flattened: &mut Tokens<'s>, no_space_group: &mut Tokens<'s>| {
|
||||
let tokens = mem::take(no_space_group);
|
||||
if tokens.len() == 1 {
|
||||
flattened.extend(tokens);
|
||||
} else {
|
||||
let tokens = annotate_tokens_that_need_spacing(tokens);
|
||||
let ast = resolve_operator_precedence_internal(tokens);
|
||||
flattened.push(ast.into());
|
||||
}
|
||||
};
|
||||
for item in items {
|
||||
if item.span().left_offset.visible.width_in_spaces == 0 || no_space_group.is_empty() {
|
||||
no_space_group.push(item)
|
||||
} else if !no_space_group.is_empty() {
|
||||
processs_no_space_group(&mut flattened, &mut no_space_group);
|
||||
no_space_group.push(item);
|
||||
} else {
|
||||
// FIXME: this is unreachable.
|
||||
flattened.push(item);
|
||||
}
|
||||
}
|
||||
if !no_space_group.is_empty() {
|
||||
processs_no_space_group(&mut flattened, &mut no_space_group);
|
||||
}
|
||||
resolve_operator_precedence_internal(flattened)
|
||||
}
|
||||
|
||||
fn resolve_operator_precedence_internal(items: Vec<syntax::Item<'_>>) -> syntax::Tree<'_> {
|
||||
// Reverse-polish notation encoding.
|
||||
let mut output: Vec<syntax::Item> = default();
|
||||
let mut operator_stack: Vec<WithPrecedence<syntax::tree::OperatorOrError>> = default();
|
||||
let mut last_token_was_ast = false;
|
||||
let mut last_token_was_opr = false;
|
||||
for item in items {
|
||||
let i2 = item.clone(); // FIXME
|
||||
if let syntax::Item::Token(token) = i2 && let token::Variant::Operator(opr) = token.variant {
|
||||
// Item is an operator.
|
||||
let last_token_was_opr_copy = last_token_was_opr;
|
||||
last_token_was_ast = false;
|
||||
last_token_was_opr = true;
|
||||
|
||||
let prec = precedence_of(&token.code);
|
||||
let opr = Token(token.left_offset, token.code, opr);
|
||||
// let opr = item.span().with(opr);
|
||||
|
||||
if last_token_was_opr_copy && let Some(prev_opr) = operator_stack.last_mut() {
|
||||
// Error. Multiple operators next to each other.
|
||||
match &mut prev_opr.elem {
|
||||
Err(err) => err.operators.push(opr),
|
||||
Ok(prev) => {
|
||||
let operators = NonEmptyVec::new(prev.clone(),vec![opr]); // FIXME: clone?
|
||||
prev_opr.elem = Err(syntax::tree::MultipleOperatorError{operators});
|
||||
}
|
||||
}
|
||||
} else {
|
||||
while let Some(prev_opr) = operator_stack.last()
|
||||
&& prev_opr.precedence >= prec
|
||||
&& let Some(prev_opr) = operator_stack.pop()
|
||||
&& let Some(rhs) = output.pop()
|
||||
{
|
||||
// Prev operator in the [`operator_stack`] has a higher precedence.
|
||||
let lhs = output.pop().map(token_to_ast);
|
||||
let ast = syntax::Tree::opr_app(lhs, prev_opr.elem, Some(token_to_ast(rhs)));
|
||||
output.push(ast.into());
|
||||
}
|
||||
operator_stack.push(WithPrecedence::new(prec, Ok(opr)));
|
||||
}
|
||||
} else if last_token_was_ast && let Some(lhs) = output.pop() {
|
||||
// Multiple non-operators next to each other.
|
||||
let lhs = token_to_ast(lhs);
|
||||
let rhs = token_to_ast(item);
|
||||
let ast = syntax::Tree::app(lhs, rhs);
|
||||
output.push(ast.into());
|
||||
} else {
|
||||
// Non-operator that follows previously consumed operator.
|
||||
last_token_was_ast = true;
|
||||
last_token_was_opr = false;
|
||||
output.push(item);
|
||||
}
|
||||
}
|
||||
let mut opt_rhs = last_token_was_ast.and_option_from(|| output.pop().map(token_to_ast));
|
||||
while let Some(opr) = operator_stack.pop() {
|
||||
let opt_lhs = output.pop().map(token_to_ast);
|
||||
opt_rhs = Some(syntax::Tree::opr_app(opt_lhs, opr.elem, opt_rhs));
|
||||
}
|
||||
if !output.is_empty() {
|
||||
panic!(
|
||||
"Internal error. Not all tokens were consumed while constructing the
|
||||
expression."
|
||||
);
|
||||
}
|
||||
syntax::Tree::opr_section_boundary(opt_rhs.unwrap()) // fixme
|
||||
}
|
||||
|
||||
fn token_to_ast(elem: syntax::Item) -> syntax::Tree {
|
||||
match elem {
|
||||
syntax::Item::Token(token) => match token.variant {
|
||||
token::Variant::Ident(ident) => {
|
||||
let ii2 = token.with_variant(ident);
|
||||
syntax::tree::Tree::ident(ii2)
|
||||
}
|
||||
_ => panic!(),
|
||||
},
|
||||
syntax::Item::Tree(ast) => ast,
|
||||
}
|
||||
}
|
||||
|
||||
fn matched_segments_into_multi_segment_app<'s>(
|
||||
prefix_tokens: Option<Vec<syntax::Item<'s>>>,
|
||||
matched_segments: NonEmptyVec<(Token<'s>, Vec<syntax::Item<'s>>)>,
|
||||
) -> syntax::Tree<'s> {
|
||||
// FIXME: remove into_vec and use NonEmptyVec::mapped
|
||||
let segments = matched_segments
|
||||
.into_vec()
|
||||
.into_iter()
|
||||
.map(|segment| {
|
||||
let header = segment.0;
|
||||
let body =
|
||||
(!segment.1.is_empty()).as_some_from(|| resolve_operator_precedence(segment.1));
|
||||
syntax::tree::MultiSegmentAppSegment { header, body }
|
||||
})
|
||||
.collect_vec();
|
||||
if let Ok(segments) = NonEmptyVec::try_from(segments) {
|
||||
let prefix = prefix_tokens.map(resolve_operator_precedence);
|
||||
syntax::Tree::multi_segment_app(prefix, segments)
|
||||
} else {
|
||||
panic!()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =========================
|
||||
// === Macro Definitions ===
|
||||
// =========================
|
||||
|
||||
fn macro_if_then_else<'s>() -> macros::Definition<'s> {
|
||||
macro_definition! {
|
||||
("if", Pattern::Everything, "then", Pattern::Everything, "else", Pattern::Everything)
|
||||
matched_segments_into_multi_segment_app
|
||||
}
|
||||
}
|
||||
|
||||
fn macro_if_then<'s>() -> macros::Definition<'s> {
|
||||
macro_definition! {
|
||||
("if", Pattern::Everything, "then", Pattern::Everything)
|
||||
matched_segments_into_multi_segment_app
|
||||
}
|
||||
}
|
||||
|
||||
fn macro_group<'s>() -> macros::Definition<'s> {
|
||||
macro_definition! {
|
||||
("(", Pattern::Everything, ")", Pattern::Nothing)
|
||||
matched_segments_into_multi_segment_app
|
||||
}
|
||||
}
|
||||
|
||||
fn macro_lambda<'s>() -> macros::Definition<'s> {
|
||||
let prefix = Pattern::Or(
|
||||
Box::new(Pattern::Item(macros::pattern::Item { has_rhs_spacing: Some(false) })),
|
||||
Box::new(Pattern::Everything),
|
||||
);
|
||||
macro_definition! {
|
||||
(prefix, "->", Pattern::Everything)
|
||||
matched_segments_into_multi_segment_app
|
||||
}
|
||||
}
|
||||
|
||||
fn builtin_macros() -> MacroMatchTree<'static> {
|
||||
let mut macro_map = MacroMatchTree::default();
|
||||
macro_map.register(macro_if_then());
|
||||
macro_map.register(macro_if_then_else());
|
||||
macro_map.register(macro_group());
|
||||
macro_map.register(macro_lambda());
|
||||
macro_map
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ============
|
||||
// === Main ===
|
||||
// ============
|
||||
|
||||
// fn main() {
|
||||
// lexer::lexer_main();
|
||||
// }
|
||||
|
||||
fn main() {
|
||||
init_tracing(TRACE);
|
||||
// let str = "if a then b else c";
|
||||
// let str = "if if * a + b * then y then b";
|
||||
// let str = "* a + b *";
|
||||
// let str = "* a + * b";
|
||||
// let str = "(a) (b) c";
|
||||
// let str = "if (a) then b";
|
||||
// let str = "foo a-> b";
|
||||
// let str = "a+b * c";
|
||||
// let str = "foo if a then b";
|
||||
// let str = "foo *(a)";
|
||||
let str = "foo if a then b else c";
|
||||
let mut lexer = Lexer::new(str);
|
||||
lexer.run();
|
||||
|
||||
let root_macro_map = builtin_macros();
|
||||
|
||||
event!(TRACE, "Registered macros:\n{:#?}", root_macro_map);
|
||||
|
||||
let resolver = Resolver::new_root();
|
||||
let ast = resolver.run(
|
||||
&lexer,
|
||||
&root_macro_map,
|
||||
lexer.output.iter().map(|t| t.clone().into()).collect_vec(),
|
||||
);
|
||||
println!("{:#?}", ast);
|
||||
println!("\n\n{}", ast.code());
|
||||
|
||||
println!("\n\n==================\n\n");
|
||||
|
||||
lexer::main();
|
||||
}
|
||||
//
|
||||
//
|
||||
//
|
||||
// // =============
|
||||
// // === Tests ===
|
||||
// // =============
|
||||
//
|
||||
// #[cfg(test)]
|
||||
// mod test {
|
||||
// use super::*;
|
||||
//
|
||||
// pub fn ident(repr: &str) -> syntax::Tree {
|
||||
// match token::Variant::to_ident_unchecked(repr) {
|
||||
// token::Variant::Ident(ident) => span::With::new_no_left_offset_no_start(
|
||||
// Bytes::from(repr.len()),
|
||||
// syntax::tree::Type::from(syntax::tree::Ident(ident)),
|
||||
// ),
|
||||
// _ => panic!(),
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// pub fn app_segment(
|
||||
// header: Token,
|
||||
// body: Option<syntax::Tree>,
|
||||
// ) -> syntax::tree::MultiSegmentAppSegment {
|
||||
// syntax::tree::MultiSegmentAppSegment { header, body }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
//
|
||||
//
|
||||
// #[cfg(test)]
|
||||
// mod tests {
|
||||
// use super::*;
|
||||
// use enso_parser_syntax_tree_builder::ast_builder;
|
||||
//
|
||||
// fn one_shot(input: &str) -> syntax::Tree {
|
||||
// let mut lexer = Lexer::new(input);
|
||||
// lexer.run();
|
||||
// let root_macro_map = builtin_macros();
|
||||
// let resolver = Resolver::new_root();
|
||||
// let ast = resolver.run(
|
||||
// &lexer,
|
||||
// &root_macro_map,
|
||||
// lexer.output.borrow_vec().iter().map(|t| (*t).into()).collect_vec(),
|
||||
// );
|
||||
// ast
|
||||
// }
|
||||
//
|
||||
// macro_rules! test_parse {
|
||||
// ($input:tt = {$($def:tt)*}) => {
|
||||
// assert_eq!(
|
||||
// one_shot($input).with_removed_span_info(),
|
||||
// ast_builder! { $($def)* }.with_removed_span_info()
|
||||
// )
|
||||
// };
|
||||
// }
|
||||
//
|
||||
// #[test]
|
||||
// fn test_expressions() {
|
||||
// test_parse!("if a then b" = { {if} a {then} b });
|
||||
// test_parse!("if a then b else c" = { {if} a {then} b {else} c });
|
||||
// test_parse!("if a b then c d else e f" = { {if} a b {then} c d {else} e f });
|
||||
// }
|
||||
// }
|
@ -1,9 +0,0 @@
|
||||
//! The logic for working with operators in the Enso parser.
|
||||
|
||||
|
||||
// ==============
|
||||
// === Export ===
|
||||
// ==============
|
||||
|
||||
pub mod associativity;
|
||||
pub mod precedence;
|
@ -1 +0,0 @@
|
||||
//! Associativity inference for Enso.
|
@ -1 +0,0 @@
|
||||
//! Operator precedence levels.
|
@ -1,18 +0,0 @@
|
||||
//! The driver for the Enso parser.
|
||||
|
||||
|
||||
|
||||
// ==============
|
||||
// === Parser ===
|
||||
// ==============
|
||||
|
||||
/// The Enso parser itself.
|
||||
#[derive(Clone, Debug, Default, Eq, PartialEq)]
|
||||
pub struct Parser;
|
||||
|
||||
impl Parser {
|
||||
/// Constructor.
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
}
|
25
lib/rust/parser/src/source.rs
Normal file
25
lib/rust/parser/src/source.rs
Normal file
@ -0,0 +1,25 @@
|
||||
//! Enso language source code related utilities, including a structure attaching source code to
|
||||
//! other types or an abstraction allowing for getting the representation of an entity, such as
|
||||
//! [`Token`] (tokens remember the location only, in order to get their representation, the source
|
||||
//! code needs to be sampled).
|
||||
|
||||
|
||||
// ==============
|
||||
// === Export ===
|
||||
// ==============
|
||||
|
||||
pub mod code;
|
||||
pub mod span;
|
||||
|
||||
pub use code::Code;
|
||||
pub use span::Offset;
|
||||
pub use span::Span;
|
||||
pub use span::VisibleOffset;
|
||||
|
||||
|
||||
|
||||
/// Popular traits.
|
||||
pub mod traits {
|
||||
pub use super::span::traits::*;
|
||||
}
|
||||
pub use traits::*;
|
66
lib/rust/parser/src/source/code.rs
Normal file
66
lib/rust/parser/src/source/code.rs
Normal file
@ -0,0 +1,66 @@
|
||||
//! Source code abstraction.
|
||||
|
||||
use crate::prelude::*;
|
||||
|
||||
|
||||
|
||||
// ============
|
||||
// === Code ===
|
||||
// ============
|
||||
|
||||
/// A code representation. It can either be a borrowed source code or a modified owned one.
|
||||
#[derive(Clone, Default, Eq, PartialEq, From, Into, Shrinkwrap)]
|
||||
#[shrinkwrap(mutable)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Code<'s> {
|
||||
pub repr: Cow<'s, str>,
|
||||
}
|
||||
|
||||
impl<'s> Code<'s> {
|
||||
/// Length of the code in bytes.
|
||||
#[inline(always)]
|
||||
pub fn len(&self) -> Bytes {
|
||||
Bytes(self.repr.len())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&'a str> for Code<'a> {
|
||||
#[inline(always)]
|
||||
fn from(str: &'a str) -> Self {
|
||||
let repr = str.into();
|
||||
Self { repr }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Display for Code<'s> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
Display::fmt(&self.repr, f)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Debug for Code<'s> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
Debug::fmt(&self.repr, f)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, 'b> PartialEq<&'b str> for Code<'a> {
|
||||
#[inline(always)]
|
||||
fn eq(&self, other: &&'b str) -> bool {
|
||||
self.repr.eq(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<str> for Code<'_> {
|
||||
#[inline(always)]
|
||||
fn as_ref(&self) -> &str {
|
||||
&self.repr
|
||||
}
|
||||
}
|
||||
|
||||
impl std::borrow::Borrow<str> for Code<'_> {
|
||||
#[inline(always)]
|
||||
fn borrow(&self) -> &str {
|
||||
&self.repr
|
||||
}
|
||||
}
|
486
lib/rust/parser/src/source/span.rs
Normal file
486
lib/rust/parser/src/source/span.rs
Normal file
@ -0,0 +1,486 @@
|
||||
//! Source code location. Every token and AST node are using [`Offset`] to remember their location
|
||||
//! in the source code.
|
||||
|
||||
use crate::prelude::*;
|
||||
use crate::source::*;
|
||||
use crate::syntax::*;
|
||||
|
||||
use crate::lexer;
|
||||
|
||||
|
||||
|
||||
/// Common traits.
|
||||
pub mod traits {
|
||||
pub use super::FirstChildTrim;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =====================
|
||||
// === VisibleOffset ===
|
||||
// =====================
|
||||
|
||||
/// A strongly typed visible offset size. For example, a space character has value of 1, while the
|
||||
/// tab character has value of 4. For other space-like character sizes, refer to the lexer
|
||||
/// implementation.
|
||||
#[derive(
|
||||
Clone, Copy, Debug, Default, From, Into, Add, AddAssign, Sub, PartialEq, Eq, Hash, PartialOrd,
|
||||
Ord
|
||||
)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct VisibleOffset {
|
||||
pub width_in_spaces: usize,
|
||||
}
|
||||
|
||||
/// Constructor.
|
||||
#[allow(non_snake_case)]
|
||||
pub const fn VisibleOffset(width_in_spaces: usize) -> VisibleOffset {
|
||||
VisibleOffset { width_in_spaces }
|
||||
}
|
||||
|
||||
impl Display for VisibleOffset {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
Display::fmt(&self.width_in_spaces, f)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&str> for VisibleOffset {
|
||||
fn from(code: &str) -> Self {
|
||||
code.chars()
|
||||
.map(|char| lexer::space_char_visible_size(char).unwrap_or(VisibleOffset(1)))
|
||||
.fold(default(), Add::add)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==============
|
||||
// === Offset ===
|
||||
// ==============
|
||||
|
||||
/// Offset information. In most cases it is used to express the left-hand-side whitespace offset
|
||||
/// for tokens and AST nodes.
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Offset<'s> {
|
||||
pub visible: VisibleOffset,
|
||||
pub code: Code<'s>,
|
||||
}
|
||||
|
||||
/// Constructor.
|
||||
#[allow(non_snake_case)]
|
||||
pub fn Offset<'s>(visible: VisibleOffset, code: impl Into<Code<'s>>) -> Offset<'s> {
|
||||
let code = code.into();
|
||||
Offset { visible, code }
|
||||
}
|
||||
|
||||
impl<'s> Offset<'s> {
|
||||
/// Length of the offset.
|
||||
pub fn len(&self) -> Bytes {
|
||||
self.code.len()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> AsRef<Offset<'s>> for Offset<'s> {
|
||||
fn as_ref(&self) -> &Offset<'s> {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> From<&'s str> for Offset<'s> {
|
||||
#[inline(always)]
|
||||
fn from(code: &'s str) -> Self {
|
||||
Offset(code.into(), code)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ============
|
||||
// === Span ===
|
||||
// ============
|
||||
|
||||
/// A span of a given syntactic element (token or AST). It contains the left offset code and the
|
||||
/// information about the length of the element. It does not contain the code of the element. This
|
||||
/// is done in order to not duplicate the data. For example, some AST nodes contain a lot of tokens.
|
||||
/// They need to remember their span, but they do not need to remember their code, because it is
|
||||
/// already stored in the tokens.
|
||||
#[derive(Clone, Debug, Default, Eq, PartialEq)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Span<'s> {
|
||||
pub left_offset: Offset<'s>,
|
||||
/// The length of the code, excluding [`left_offset`].
|
||||
pub code_length: Bytes,
|
||||
}
|
||||
|
||||
impl<'s> Span<'s> {
|
||||
/// Extend the span with another one. The other span has to be the immediate neighbor of the
|
||||
/// current span.
|
||||
#[inline(always)]
|
||||
pub fn extend<'a, T>(&mut self, other: T)
|
||||
where
|
||||
T: Into<Ref<'s, 'a>>,
|
||||
's: 'a, {
|
||||
let other = other.into();
|
||||
self.code_length += other.left_offset.len() + other.code_length;
|
||||
}
|
||||
|
||||
/// Self consuming version of [`extend`].
|
||||
pub fn extended<'a, T>(mut self, other: T) -> Self
|
||||
where
|
||||
T: Into<Ref<'s, 'a>>,
|
||||
's: 'a, {
|
||||
self.extend(other);
|
||||
self
|
||||
}
|
||||
|
||||
/// Get the [`Ref`] of the current span.
|
||||
pub fn as_ref(&self) -> Ref<'_, 's> {
|
||||
Ref { left_offset: &self.left_offset, code_length: self.code_length }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> AsRef<Span<'s>> for Span<'s> {
|
||||
fn as_ref(&self) -> &Span<'s> {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===========
|
||||
// === Ref ===
|
||||
// ===========
|
||||
|
||||
/// A borrowed version of [`Span`]. Used mostly by AST visitors.
|
||||
///
|
||||
/// One may wonder why this struct is needed, because it looks like we could use [`&Span<'s>`]
|
||||
/// instead. The problem is that some structs, such as [`Token`] do not contain [`Span<'s>`], but
|
||||
/// they contain information the [`Ref`] can be constructed from.
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Ref<'s, 'a> {
|
||||
pub left_offset: &'a Offset<'s>,
|
||||
/// The length of the code, excluding [`left_offset`].
|
||||
pub code_length: Bytes,
|
||||
}
|
||||
|
||||
impl<'s, 'a> From<&'a Span<'s>> for Ref<'s, 'a> {
|
||||
#[inline(always)]
|
||||
fn from(span: &'a Span<'s>) -> Self {
|
||||
let left_offset = &span.left_offset;
|
||||
let code_length = span.code_length;
|
||||
Self { left_offset, code_length }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==============
|
||||
// === RefMut ===
|
||||
// ==============
|
||||
|
||||
/// A mutably borrowed version of [`Span`]. Used mostly by AST visitors.
|
||||
///
|
||||
/// Please note that the [`code_length`] field does not provide the mutable access. Each AST node
|
||||
/// can contain other AST nodes and tokens. The span of an AST node is computed based on the span of
|
||||
/// the tokens it contains. Thus, you should never modify the [`code_length`] property, you should
|
||||
/// modify the AST structure instead and this field should be automatically recomputed.
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct RefMut<'s, 'a> {
|
||||
pub left_offset: &'a mut Offset<'s>,
|
||||
/// The length of the code, excluding [`left_offset`].
|
||||
pub code_length: Bytes,
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ======================
|
||||
// === FirstChildTrim ===
|
||||
// ======================
|
||||
|
||||
/// Trim the left offset and return a new [`Span`] containing the trimmed offset and the length of
|
||||
/// the code.
|
||||
///
|
||||
/// It is used to prepare this element for insertion into parent AST node. Left offsets are kept in
|
||||
/// a hierarchical way in AST. For example, the expression ` a b` will be represented as two tokens
|
||||
/// `a` and `b`, each having left offset of 1. However, after constructing the [`App`] AST node, the
|
||||
/// left span of the `a` token will be removed and will be moved to the AST node instead. This
|
||||
/// function is responsible exactly for this operation.
|
||||
#[allow(missing_docs)]
|
||||
pub trait FirstChildTrim<'s> {
|
||||
fn trim_as_first_child(&mut self) -> Span<'s>;
|
||||
}
|
||||
|
||||
impl<'s> FirstChildTrim<'s> for Span<'s> {
|
||||
#[inline(always)]
|
||||
fn trim_as_first_child(&mut self) -> Span<'s> {
|
||||
let left_offset = mem::take(&mut self.left_offset);
|
||||
let code_length = self.code_length;
|
||||
Span { left_offset, code_length }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===============
|
||||
// === Builder ===
|
||||
// ===============
|
||||
|
||||
/// A span builder. You can provide it with any elements that contain spans, and it will compute
|
||||
/// the total span of the provided elements.
|
||||
#[macro_export]
|
||||
macro_rules! span_builder {
|
||||
($($arg:ident),* $(,)?) => {
|
||||
$crate::source::span::Builder::new() $(.add(&mut $arg))* .span
|
||||
};
|
||||
}
|
||||
|
||||
/// A marker struct for span building. The [`T`] parameter can be one of:
|
||||
/// - [`()`], which means that the structure was not used yet.
|
||||
/// - [`Option<Span<'s>>`], which means that the struct was used to build the span, however, we are
|
||||
/// unsure whether the span is known in all the cases.
|
||||
/// - [`Span<'s>`], which means that the total span can be always computed for the provided
|
||||
/// parameters.
|
||||
#[derive(Default, Debug)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Builder<T = ()> {
|
||||
pub span: T,
|
||||
}
|
||||
|
||||
/// Constructor.
|
||||
#[allow(non_snake_case)]
|
||||
pub fn Builder<T>(span: T) -> Builder<T> {
|
||||
Builder { span }
|
||||
}
|
||||
|
||||
impl Builder<()> {
|
||||
/// Constructor.
|
||||
pub fn new() -> Self {
|
||||
default()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Builder<T> {
|
||||
/// Add a new span to the builder.
|
||||
#[inline(always)]
|
||||
#[allow(clippy::should_implement_trait)]
|
||||
pub fn add<S>(self, elem: &mut S) -> Builder<S::Output>
|
||||
where S: Build<T> {
|
||||
Builder(elem.build(self))
|
||||
}
|
||||
}
|
||||
|
||||
/// A trait defining the behavior of [`Builder`] for different types containing spans.
|
||||
///
|
||||
/// The trait definition is a little bit strange, consuming the builder as a parameter instead of
|
||||
/// consuming it as self. This is done because otherwise Rust type checker goes into infinite
|
||||
/// loops.
|
||||
#[allow(missing_docs)]
|
||||
pub trait Build<T> {
|
||||
type Output;
|
||||
fn build(&mut self, builder: Builder<T>) -> Self::Output;
|
||||
}
|
||||
|
||||
|
||||
// === Instances ===
|
||||
|
||||
impl<'s> Build<()> for Span<'s> {
|
||||
type Output = Span<'s>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, _builder: Builder<()>) -> Self::Output {
|
||||
self.trim_as_first_child()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Build<Span<'s>> for Span<'s> {
|
||||
type Output = Span<'s>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<Span<'s>>) -> Self::Output {
|
||||
builder.span.extended(&*self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Build<Option<Span<'s>>> for Span<'s> {
|
||||
type Output = Span<'s>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<Option<Span<'s>>>) -> Self::Output {
|
||||
match builder.span {
|
||||
Some(span) => span.extended(&*self),
|
||||
None => self.trim_as_first_child(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Build<()> for Tree<'s> {
|
||||
type Output = Span<'s>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<()>) -> Self::Output {
|
||||
Build::build(&mut self.span, builder)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Build<Span<'s>> for Tree<'s> {
|
||||
type Output = Span<'s>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<Span<'s>>) -> Self::Output {
|
||||
builder.span.extended(&self.span)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Build<Option<Span<'s>>> for Tree<'s> {
|
||||
type Output = Span<'s>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<Option<Span<'s>>>) -> Self::Output {
|
||||
Build::build(&mut self.span, builder)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T> Build<()> for Token<'s, T> {
|
||||
type Output = Span<'s>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, _builder: Builder<()>) -> Self::Output {
|
||||
self.trim_as_first_child()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T> Build<Span<'s>> for Token<'s, T> {
|
||||
type Output = Span<'s>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<Span<'s>>) -> Self::Output {
|
||||
builder.span.extended(self.span())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T> Build<Option<Span<'s>>> for Token<'s, T> {
|
||||
type Output = Span<'s>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<Option<Span<'s>>>) -> Self::Output {
|
||||
match builder.span {
|
||||
Some(span) => span.extended(self.span()),
|
||||
None => self.trim_as_first_child(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Build<()> for Option<T>
|
||||
where T: Build<()>
|
||||
{
|
||||
type Output = Option<<T as Build<()>>::Output>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<()>) -> Self::Output {
|
||||
self.as_mut().map(|t| Build::build(t, builder))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T> Build<Option<Span<'s>>> for Option<T>
|
||||
where T: Build<Option<Span<'s>>>
|
||||
{
|
||||
type Output = Option<<T as Build<Option<Span<'s>>>>::Output>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<Option<Span<'s>>>) -> Self::Output {
|
||||
self.as_mut().map(|t| Build::build(t, builder))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T> Build<Span<'s>> for Option<T>
|
||||
where T: Build<Span<'s>, Output = Span<'s>>
|
||||
{
|
||||
type Output = Span<'s>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<Span<'s>>) -> Self::Output {
|
||||
match self.as_mut() {
|
||||
None => builder.span,
|
||||
Some(t) => Build::build(t, builder),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S, T, E> Build<S> for Result<T, E>
|
||||
where
|
||||
T: Build<S>,
|
||||
E: Build<S, Output = <T as Build<S>>::Output>,
|
||||
{
|
||||
type Output = <T as Build<S>>::Output;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<S>) -> Self::Output {
|
||||
match self {
|
||||
Ok(t) => Build::build(t, builder),
|
||||
Err(t) => Build::build(t, builder),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S, T> Build<S> for NonEmptyVec<T>
|
||||
where
|
||||
T: Build<S>,
|
||||
[T]: Build<<T as Build<S>>::Output>,
|
||||
{
|
||||
type Output = <[T] as Build<T::Output>>::Output;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<S>) -> Self::Output {
|
||||
let b = Build::build(self.first_mut(), builder);
|
||||
Build::build(self.tail_mut(), Builder(b))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T> Build<Span<'s>> for Vec<T>
|
||||
where T: Build<Span<'s>, Output = Span<'s>>
|
||||
{
|
||||
type Output = Span<'s>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<Span<'s>>) -> Self::Output {
|
||||
let mut out = builder.span;
|
||||
for elem in self {
|
||||
out = Build::build(elem, Builder(out))
|
||||
}
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T> Build<Option<Span<'s>>> for Vec<T>
|
||||
where
|
||||
T: Build<Option<Span<'s>>>,
|
||||
T::Output: Into<Option<Span<'s>>>,
|
||||
{
|
||||
type Output = Option<Span<'s>>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<Option<Span<'s>>>) -> Self::Output {
|
||||
let mut out = builder.span;
|
||||
for elem in self {
|
||||
out = Build::build(elem, Builder(out)).into();
|
||||
}
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T> Build<Span<'s>> for [T]
|
||||
where T: Build<Span<'s>, Output = Span<'s>>
|
||||
{
|
||||
type Output = Span<'s>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<Span<'s>>) -> Self::Output {
|
||||
let mut out = builder.span;
|
||||
for elem in self {
|
||||
out = Build::build(elem, Builder(out));
|
||||
}
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T> Build<Option<Span<'s>>> for [T]
|
||||
where
|
||||
T: Build<Option<Span<'s>>>,
|
||||
T::Output: Into<Option<Span<'s>>>,
|
||||
{
|
||||
type Output = Option<Span<'s>>;
|
||||
#[inline(always)]
|
||||
fn build(&mut self, builder: Builder<Option<Span<'s>>>) -> Self::Output {
|
||||
let mut out = builder.span;
|
||||
for elem in self {
|
||||
out = Build::build(elem, Builder(out)).into();
|
||||
}
|
||||
out
|
||||
}
|
||||
}
|
15
lib/rust/parser/src/syntax.rs
Normal file
15
lib/rust/parser/src/syntax.rs
Normal file
@ -0,0 +1,15 @@
|
||||
//! Syntactic structures, including [`Token`] and [`Tree`], known as well as Abstract Syntax
|
||||
//! Tree, or AST.
|
||||
|
||||
|
||||
// ==============
|
||||
// === Export ===
|
||||
// ==============
|
||||
|
||||
pub mod item;
|
||||
pub mod token;
|
||||
pub mod tree;
|
||||
|
||||
pub use item::Item;
|
||||
pub use token::Token;
|
||||
pub use tree::Tree;
|
83
lib/rust/parser/src/syntax/item.rs
Normal file
83
lib/rust/parser/src/syntax/item.rs
Normal file
@ -0,0 +1,83 @@
|
||||
//! Syntactic structures, including [`Token`] and [`Tree`], known as well as Abstract Syntax
|
||||
//! Tree, or AST.
|
||||
|
||||
use crate::prelude::*;
|
||||
use crate::source::*;
|
||||
use crate::syntax::*;
|
||||
|
||||
|
||||
|
||||
// ============
|
||||
// === Item ===
|
||||
// ============
|
||||
|
||||
/// Abstraction for [`Token`] and [`Tree`]. Some functions, such as macro resolver need to
|
||||
/// distinguish between two cases and need to handle both incoming tokens and already constructed
|
||||
/// [`Tree`] nodes. This structure provides handy utilities to work with such cases.
|
||||
#[derive(Clone, Debug)]
|
||||
#[allow(missing_docs)]
|
||||
pub enum Item<'s> {
|
||||
Token(Token<'s>),
|
||||
Tree(Tree<'s>),
|
||||
}
|
||||
|
||||
impl<'s> Item<'s> {
|
||||
/// Check whether the element is the provided token variant. Returns [`false`] if it was an
|
||||
/// [`Tree`] node.
|
||||
pub fn is_variant(&self, variant: token::variant::VariantMarker) -> bool {
|
||||
match self {
|
||||
Item::Token(token) => token.is(variant),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// [`location::Span`] of the element.
|
||||
pub fn span(&self) -> span::Ref<'_, 's> {
|
||||
match self {
|
||||
Self::Token(t) => t.span(),
|
||||
Self::Tree(t) => t.span.as_ref(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> FirstChildTrim<'s> for Item<'s> {
|
||||
#[inline(always)]
|
||||
fn trim_as_first_child(&mut self) -> Span<'s> {
|
||||
match self {
|
||||
Self::Token(t) => t.trim_as_first_child(),
|
||||
Self::Tree(t) => t.span.trim_as_first_child(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> From<Token<'s>> for Item<'s> {
|
||||
fn from(t: Token<'s>) -> Self {
|
||||
Item::Token(t)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> From<Tree<'s>> for Item<'s> {
|
||||
fn from(t: Tree<'s>) -> Self {
|
||||
Item::Tree(t)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> TryAsRef<Item<'s>> for Item<'s> {
|
||||
fn try_as_ref(&self) -> Option<&Item<'s>> {
|
||||
Some(self)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===========
|
||||
// === Ref ===
|
||||
// ===========
|
||||
|
||||
/// A borrowed version of [`Item`]. Used mostly by AST visitors.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
#[allow(missing_docs)]
|
||||
pub enum Ref<'s, 'a> {
|
||||
Token(token::Ref<'s, 'a>),
|
||||
Tree(&'a Tree<'s>),
|
||||
}
|
323
lib/rust/parser/src/syntax/token.rs
Normal file
323
lib/rust/parser/src/syntax/token.rs
Normal file
@ -0,0 +1,323 @@
|
||||
//! A lexical token is a string with an assigned and thus identified meaning. Each token remembers
|
||||
//! its source code and can be printed back. It also contains information about the offset to the
|
||||
//! previous token if any.
|
||||
//!
|
||||
//! The [`Token`] structure has a signature of [`Token<'s, T>`], where [`T`] is the variant type.
|
||||
//!
|
||||
//!
|
||||
//! # Variants
|
||||
//! Each token contains a variant, a structure defining the meaning of the token. All variants are
|
||||
//! defined in the [`variant`] module. Every variant is associated with a constructor of the same
|
||||
//! name (tuple-struct like). For example, the [`variant::Ident`] is defined as:
|
||||
//!
|
||||
//! ```text
|
||||
//! pub mod variant {
|
||||
//! pub struct Ident {
|
||||
//! pub is_free: bool,
|
||||
//! pub lift_level: usize
|
||||
//! }
|
||||
//! pub fn Ident(is_free: bool, lift_level: usize) -> Ident { ... }
|
||||
//! // ... many more variants
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//!
|
||||
//! # Variants as tokens
|
||||
//! The [`Token`] structure can be parametrized with a variant type to form a token variant. This
|
||||
//! module defines type aliases for every such a combination. For example, the [`Ident`] token
|
||||
//! variant is defined as:
|
||||
//!
|
||||
//! ```text
|
||||
//! pub type Ident<'s> = Token<'s, variant::Ident>;
|
||||
//! ```
|
||||
//!
|
||||
//! There is a [`From`] conversion defined between any [`Token<'s, T>`] and [`Token<'s>`] for [`T`]
|
||||
//! being one of variant structs. Moreover, every such type is accompanied by two constructor utils,
|
||||
//! one creating a token variant and one creating a generic token instance. For example, the
|
||||
//! [`Ident`] token variant constructors are defined as:
|
||||
//!
|
||||
//! ```text
|
||||
//! pub fn ident <'s> (is_free: bool, lift_level: usize) -> Ident<'s> { ... }
|
||||
//! pub fn ident_ <'s> (is_free: bool, lift_level: usize) -> Token<'s> { ... }
|
||||
//! ```
|
||||
//!
|
||||
//!
|
||||
//! # The [`Variant`] type.
|
||||
//! There are many variants of tokens, however, some places in the code need to distinguish them,
|
||||
//! while some need to store several variants in the same collection. The [`Variant`] enum
|
||||
//! generalizes the variant types:
|
||||
//!
|
||||
//! ```text
|
||||
//! pub enum Variant {
|
||||
//! Newline (variant::Newline),
|
||||
//! Symbol (variant::Symbol),
|
||||
//! Wildcard (variant::Wildcard),
|
||||
//! Ident (variant::Ident),
|
||||
//! // ... many more
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! There is a [`From`] conversion defined between each variant and the [`Variant`] struct.
|
||||
//! Moreover, the [`Variant`] struct defines a constructor function for each of its variants. For
|
||||
//! example, the identifier variant constructor is defined as:
|
||||
//!
|
||||
//! ```text
|
||||
//! impl Variant {
|
||||
//! pub fn ident(is_free: bool, lift_level: usize) -> Self {
|
||||
//! Self::Ident(variant::Ident(is_free, lift_level))
|
||||
//! }
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! # Generic token type
|
||||
//! The [`Token`] structure has a default parametrization of [`Token<'s, Variant>`] which basically
|
||||
//! is a token containing any of the defined variants.
|
||||
//!
|
||||
//!
|
||||
//!
|
||||
//! # Variant markers
|
||||
//! There is also a special enum [`VariantMarker`] defined which can be used to mark which token
|
||||
//! variant is used without keeping any of the variant data. It is defined as:
|
||||
//!
|
||||
//! ```text
|
||||
//! pub enum VariantMarker {
|
||||
//! Newline,
|
||||
//! Symbol,
|
||||
//! Wildcard,
|
||||
//! Ident,
|
||||
//! // ... many more
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! See the definitions and macros below to learn more.
|
||||
|
||||
use crate::prelude::*;
|
||||
use crate::source::*;
|
||||
|
||||
use enso_shapely_macros::tagged_enum;
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === Token ===
|
||||
// =============
|
||||
|
||||
/// The lexical token definition. See the module docs to learn more about its usage scenarios.
|
||||
#[derive(Clone, Deref, DerefMut, Eq, PartialEq)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Token<'s, T = Variant> {
|
||||
#[deref]
|
||||
#[deref_mut]
|
||||
pub variant: T,
|
||||
pub left_offset: Offset<'s>,
|
||||
pub code: Code<'s>,
|
||||
}
|
||||
|
||||
/// Constructor.
|
||||
#[inline(always)]
|
||||
#[allow(non_snake_case)]
|
||||
pub fn Token<'s, T>(
|
||||
left_offset: impl Into<Offset<'s>>,
|
||||
code: impl Into<Code<'s>>,
|
||||
variant: T,
|
||||
) -> Token<'s, T> {
|
||||
let left_offset = left_offset.into();
|
||||
let code = code.into();
|
||||
Token { variant, left_offset, code }
|
||||
}
|
||||
|
||||
impl<'s, T> Token<'s, T> {
|
||||
/// Split the token at the provided byte offset. The offset is counted from the [`code`] start
|
||||
/// position, which does not include the [`left_offset`]. It means that `split_at(Bytes(0))`
|
||||
/// will split the token into left offset only and a left-trimmed token.
|
||||
#[inline(always)]
|
||||
pub fn split_at(self, offset: Bytes) -> (Token<'s, ()>, Token<'s, ()>, T) {
|
||||
let left_lexeme_offset = self.left_offset;
|
||||
let right_lexeme_offset = Offset::default();
|
||||
let left = Token(left_lexeme_offset, self.code.slice(Bytes(0)..offset), ());
|
||||
let right = Token(right_lexeme_offset, self.code.slice(offset..), ());
|
||||
(left, right, self.variant)
|
||||
}
|
||||
|
||||
/// A version of [`split_at`] that discards the associated variant.
|
||||
#[inline(always)]
|
||||
pub fn split_at_(self, offset: Bytes) -> (Token<'s, ()>, Token<'s, ()>) {
|
||||
let (left, right, _) = self.split_at(offset);
|
||||
(left, right)
|
||||
}
|
||||
|
||||
/// Modify the associated variant of this token with the provided function.
|
||||
#[inline(always)]
|
||||
pub fn map_variant<S>(self, f: impl FnOnce(T) -> S) -> Token<'s, S> {
|
||||
Token(self.left_offset, self.code, f(self.variant))
|
||||
}
|
||||
|
||||
/// Replace the associated variant in this token.
|
||||
#[inline(always)]
|
||||
pub fn with_variant<S>(self, data: S) -> Token<'s, S> {
|
||||
self.map_variant(|_| data)
|
||||
}
|
||||
|
||||
/// Span of this token.
|
||||
pub fn span<'a>(&'a self) -> span::Ref<'s, 'a> {
|
||||
let code_length = self.code.len();
|
||||
span::Ref { left_offset: &self.left_offset, code_length }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T: Debug> Debug for Token<'s, T> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "[{}:\"{}\"] ", self.left_offset.visible, self.code)?;
|
||||
Debug::fmt(&self.variant, f)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T: PartialEq> PartialEq<Token<'s, T>> for &Token<'s, T> {
|
||||
fn eq(&self, other: &Token<'s, T>) -> bool {
|
||||
<Token<'s, T> as PartialEq<Token<'s, T>>>::eq(*self, other)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T> FirstChildTrim<'s> for Token<'s, T> {
|
||||
#[inline(always)]
|
||||
fn trim_as_first_child(&mut self) -> Span<'s> {
|
||||
let left_offset = mem::take(&mut self.left_offset);
|
||||
let code_length = self.code.len();
|
||||
Span { left_offset, code_length }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===========
|
||||
// === Ref ===
|
||||
// ===========
|
||||
|
||||
/// A reference of a [`Token`]. It is used mostly by AST visitors.
|
||||
///
|
||||
/// There is an important question involved – why we don't just use [`&Token<'s, T>`] instead. The
|
||||
/// reason for that is that sometimes AST nodes contain [`Token<'s, T>`] for a specific [`T`] and
|
||||
/// we want to traverse them for any possible variant, thus converting [`T`] to [`token::Variant`]
|
||||
/// first. However, we do not want to clone the code during such an operation. This struct allows
|
||||
/// viewing any [`Token<'s, T>`] as [`Ref<'s, token::Variant>`].
|
||||
#[derive(Clone, Copy, Deref, DerefMut, Eq, PartialEq)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Ref<'s, 'a, T = Variant> {
|
||||
#[deref]
|
||||
#[deref_mut]
|
||||
pub data: T,
|
||||
pub left_offset: &'a Offset<'s>,
|
||||
pub code: &'a Code<'s>,
|
||||
}
|
||||
|
||||
impl<'s, 'a, T, S> From<&'a Token<'s, T>> for Ref<'s, 'a, S>
|
||||
where T: Copy + Into<S>
|
||||
{
|
||||
fn from(token: &'a Token<'s, T>) -> Self {
|
||||
Ref {
|
||||
data: token.variant.into(),
|
||||
left_offset: &token.left_offset,
|
||||
code: &token.code,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, 'a, T: Debug> Debug for Ref<'s, 'a, T> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "[off: {}, repr: \"{}\"] ", self.left_offset.visible, self.code)?;
|
||||
Debug::fmt(&self.data, f)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===============
|
||||
// === Variant ===
|
||||
// ===============
|
||||
|
||||
/// Macro providing [`Token`] type definition. It is used to both define the token [`Variant`], and
|
||||
/// to define impls for every token type in other modules.
|
||||
#[macro_export]
|
||||
macro_rules! with_token_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)*
|
||||
/// Elements that can be found in the source code.
|
||||
#[tagged_enum]
|
||||
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||
#[allow(missing_docs)]
|
||||
pub enum Variant {
|
||||
Newline,
|
||||
Symbol,
|
||||
BlockStart,
|
||||
BlockEnd,
|
||||
Wildcard {
|
||||
pub lift_level: usize
|
||||
},
|
||||
Ident {
|
||||
pub is_free: bool,
|
||||
pub lift_level: usize
|
||||
},
|
||||
Operator,
|
||||
Modifier,
|
||||
Comment,
|
||||
DocComment,
|
||||
Number,
|
||||
TextStart,
|
||||
TextEnd,
|
||||
TextSection,
|
||||
TextEscape,
|
||||
}
|
||||
}}}
|
||||
|
||||
macro_rules! generate_token_aliases {
|
||||
(
|
||||
$(#$enum_meta:tt)*
|
||||
pub enum $enum:ident {
|
||||
$(
|
||||
$(#$variant_meta:tt)*
|
||||
$variant:ident $({ $(pub $field:ident : $field_ty:ty),* $(,)? })?
|
||||
),* $(,)?
|
||||
}
|
||||
) => { paste!{
|
||||
$(
|
||||
/// Token variant alias.
|
||||
pub type $variant<'s> = Token<'s, variant::$variant>;
|
||||
|
||||
/// Constructor.
|
||||
pub fn [<$variant:snake:lower>]<'s> (
|
||||
left_offset: impl Into<Offset<'s>>,
|
||||
code: impl Into<Code<'s>>,
|
||||
$($($field : $field_ty),*)?
|
||||
) -> $variant<'s> {
|
||||
Token(left_offset, code, variant::$variant($($($field),*)?))
|
||||
}
|
||||
|
||||
/// Constructor.
|
||||
pub fn [<$variant:snake:lower _>]<'s> (
|
||||
left_offset: impl Into<Offset<'s>>,
|
||||
code: impl Into<Code<'s>>,
|
||||
$($($field : $field_ty),*)?
|
||||
) -> Token<'s> {
|
||||
Token(left_offset, code, variant::$variant($($($field),*)?)).into()
|
||||
}
|
||||
|
||||
impl<'s> From<Token<'s, variant::$variant>> for Token<'s, Variant> {
|
||||
fn from(token: Token<'s, variant::$variant>) -> Self {
|
||||
token.map_variant(|t| t.into())
|
||||
}
|
||||
}
|
||||
)*
|
||||
}};
|
||||
}
|
||||
|
||||
macro_rules! define_token_type {
|
||||
($($ts:tt)*) => {
|
||||
/// All token variants.
|
||||
pub mod variant {
|
||||
use super::*;
|
||||
$($ts)*
|
||||
}
|
||||
generate_token_aliases! { $($ts)* }
|
||||
};
|
||||
}
|
||||
|
||||
with_token_definition!(define_token_type());
|
||||
pub use variant::Variant;
|
574
lib/rust/parser/src/syntax/tree.rs
Normal file
574
lib/rust/parser/src/syntax/tree.rs
Normal file
@ -0,0 +1,574 @@
|
||||
//! Implementation of Syntax Tree, known as well as Abstract Syntax Tree, or AST.
|
||||
|
||||
use crate::prelude::*;
|
||||
use crate::source::*;
|
||||
use crate::syntax::*;
|
||||
|
||||
use crate::span_builder;
|
||||
|
||||
use enso_parser_syntax_tree_visitor::Visitor;
|
||||
use enso_shapely_macros::tagged_enum;
|
||||
|
||||
|
||||
|
||||
// ============
|
||||
// === Tree ===
|
||||
// ============
|
||||
|
||||
/// The Abstract Syntax Tree of the language.
|
||||
#[derive(Clone, Deref, DerefMut, Eq, PartialEq)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Tree<'s> {
|
||||
#[deref]
|
||||
#[deref_mut]
|
||||
pub variant: Box<Variant<'s>>,
|
||||
pub span: Span<'s>,
|
||||
}
|
||||
|
||||
/// Constructor.
|
||||
#[allow(non_snake_case)]
|
||||
pub fn Tree<'s>(span: Span<'s>, variant: impl Into<Variant<'s>>) -> Tree<'s> {
|
||||
let variant = Box::new(variant.into());
|
||||
Tree { variant, span }
|
||||
}
|
||||
|
||||
impl<'s> Debug for Tree<'s> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let max_code_len = 30;
|
||||
let ellipsis = "...";
|
||||
let mut code = self.code();
|
||||
if code.len() > max_code_len {
|
||||
code = format!("{}{}", &code[..max_code_len - ellipsis.len()], ellipsis);
|
||||
}
|
||||
write!(f, "[{}:{}:\"{}\"] ", self.span.left_offset.visible, self.span.code_length, code)?;
|
||||
Debug::fmt(&self.variant, f)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> AsRef<Span<'s>> for Tree<'s> {
|
||||
fn as_ref(&self) -> &Span<'s> {
|
||||
&self.span
|
||||
}
|
||||
}
|
||||
|
||||
/// Macro providing [`Tree`] type definition. It is used to both define the ast [`Variant`], and to
|
||||
/// define impls for every token type in other modules.
|
||||
#[macro_export]
|
||||
macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)*
|
||||
/// [`Tree`] variants definition. See its docs to learn more.
|
||||
#[tagged_enum]
|
||||
#[derive(Clone, Eq, PartialEq, Visitor)]
|
||||
pub enum Variant<'s> {
|
||||
/// Invalid [`Tree`] fragment with an attached [`Error`].
|
||||
Invalid {
|
||||
pub error: Error,
|
||||
pub ast: Tree<'s>,
|
||||
},
|
||||
/// A simple identifier, like `foo` or `bar`.
|
||||
Ident {
|
||||
pub token: token::Ident<'s>,
|
||||
},
|
||||
/// A simple application, like `print "hello"`.
|
||||
App {
|
||||
pub func: Tree<'s>,
|
||||
pub arg: Tree<'s>,
|
||||
},
|
||||
/// Application of an operator, like `a + b`. The left or right operands might be missing,
|
||||
/// thus creating an operator section like `a +`, `+ b`, or simply `+`. See the
|
||||
/// [`OprSectionBoundary`] variant to learn more about operator section scope.
|
||||
OprApp {
|
||||
pub lhs: Option<Tree<'s>>,
|
||||
pub opr: OperatorOrError<'s>,
|
||||
pub rhs: Option<Tree<'s>>,
|
||||
},
|
||||
/// Defines the point where operator sections should be expanded to lambdas. Let's consider
|
||||
/// the expression `map (.sum 1)`. It should be desugared to `map (x -> x.sum 1)`, not to
|
||||
/// `map ((x -> x.sum) 1)`. The expression `.sum` will be parsed as operator section
|
||||
/// ([`OprApp`] with left operand missing), and the [`OprSectionBoundary`] will be placed
|
||||
/// around the whole `.sum 1` expression.
|
||||
OprSectionBoundary {
|
||||
pub ast: Tree<'s>,
|
||||
},
|
||||
/// An application of a multi-segment function, such as `if ... then ... else ...`. Each
|
||||
/// segment starts with a token and contains an expression. Some multi-segment functions can
|
||||
/// have a prefix, an expression that is argument of the function, but is placed before the
|
||||
/// first token. Lambda is a good example for that. In an expression
|
||||
/// `Vector x y z -> x + y + z`, the `->` token is the beginning of the section, the
|
||||
/// `x + y + z` is the section body, and `Vector x y z` is the prefix of this function
|
||||
/// application.
|
||||
MultiSegmentApp {
|
||||
pub prefix: Option<Tree<'s>>,
|
||||
pub segments: NonEmptyVec<MultiSegmentAppSegment<'s>>,
|
||||
}
|
||||
}
|
||||
}};}
|
||||
|
||||
macro_rules! generate_variant_constructors {
|
||||
(
|
||||
$(#$enum_meta:tt)*
|
||||
pub enum $enum:ident<'s> {
|
||||
$(
|
||||
$(#$variant_meta:tt)*
|
||||
$variant:ident $({ $(pub $field:ident : $field_ty:ty),* $(,)? })?
|
||||
),* $(,)?
|
||||
}
|
||||
) => { paste! {
|
||||
impl<'s> Tree<'s> {
|
||||
$(
|
||||
/// Constructor.
|
||||
pub fn [<$variant:snake:lower>]($($(mut $field : $field_ty),*)?) -> Self {
|
||||
let span = span_builder![$($($field),*)?];
|
||||
Tree(span, $variant($($($field),*)?))
|
||||
}
|
||||
)*
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
macro_rules! generate_ast_definition {
|
||||
($($ts:tt)*) => {
|
||||
$($ts)*
|
||||
generate_variant_constructors!{$($ts)*}
|
||||
};
|
||||
}
|
||||
|
||||
with_ast_definition!(generate_ast_definition());
|
||||
|
||||
|
||||
// === Invalid ===
|
||||
|
||||
/// Error of parsing attached to an [`Tree`] node.
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq, Visitor)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Error {
|
||||
pub message: &'static str,
|
||||
}
|
||||
|
||||
impl Error {
|
||||
/// Constructor.
|
||||
pub fn new(message: &'static str) -> Self {
|
||||
Self { message }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Tree<'s> {
|
||||
/// Constructor.
|
||||
pub fn with_error(self, message: &'static str) -> Self {
|
||||
Tree::invalid(Error::new(message), self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<S> span::Build<S> for Error {
|
||||
type Output = S;
|
||||
fn build(&mut self, builder: span::Builder<S>) -> Self::Output {
|
||||
builder.span
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// === OprApp ===
|
||||
|
||||
/// Operator or [`MultipleOperatorError`].
|
||||
pub type OperatorOrError<'s> = Result<token::Operator<'s>, MultipleOperatorError<'s>>;
|
||||
|
||||
/// Error indicating multiple operators found next to each other, like `a + * b`.
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Visitor)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct MultipleOperatorError<'s> {
|
||||
pub operators: NonEmptyVec<token::Operator<'s>>,
|
||||
}
|
||||
|
||||
impl<'s, S> span::Build<S> for MultipleOperatorError<'s>
|
||||
where NonEmptyVec<token::Operator<'s>>: span::Build<S>
|
||||
{
|
||||
type Output = <NonEmptyVec<token::Operator<'s>> as span::Build<S>>::Output;
|
||||
fn build(&mut self, builder: span::Builder<S>) -> Self::Output {
|
||||
self.operators.build(builder)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// === MultiSegmentApp ===
|
||||
|
||||
/// A segment of [`MultiSegmentApp`], like `if cond` in the `if cond then ok else fail` expression.
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Visitor)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct MultiSegmentAppSegment<'s> {
|
||||
pub header: Token<'s>,
|
||||
pub body: Option<Tree<'s>>,
|
||||
}
|
||||
|
||||
impl<'s, S> span::Build<S> for MultiSegmentAppSegment<'s>
|
||||
where Token<'s>: span::Build<S, Output = Span<'s>>
|
||||
{
|
||||
type Output = Span<'s>;
|
||||
fn build(&mut self, builder: span::Builder<S>) -> Self::Output {
|
||||
builder.add(&mut self.header).add(&mut self.body).span
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ================
|
||||
// === Visitors ===
|
||||
// ================
|
||||
|
||||
/// The visitor pattern for [`AST`].
|
||||
///
|
||||
/// # Visitor traits
|
||||
/// There are several visitor traits defined allowing for traversal of specific AST elements, such
|
||||
/// as AST nodes ([`TreeVisitor`]), span information ([`SpanVisitor`]), and AST nodes or tokens
|
||||
/// altogether ([`ItemVisitor`]). A visitor is a struct that is modified when traversing the target
|
||||
/// elements. Visitors are also capable of tracking when they entered or exited a nested
|
||||
/// [`Tree`] structure, and they can control how deep the traversal should be performed. To learn
|
||||
/// more, see the [`RefCollectorVisitor`] implementation, which traverses [`Tree`] and collects
|
||||
/// references to all [`Tree`] nodes in a vector.
|
||||
///
|
||||
/// # Visitable traits
|
||||
/// This macro also defines visitable traits, such as [`TreeVisitable`] or [`SpanVisitable`], which
|
||||
/// provide [`Tree`] elements with such functions as [`visit`], [`visit_mut`], [`visit_span`], or
|
||||
/// [`visit_span_mut`]. These functions let you run visitors. However, as defining a visitor is
|
||||
/// relatively complex, a set of traversal functions are provided, such as [`map`], [`map_mut`],
|
||||
/// [`map_span`], or [`map_span_mut`].
|
||||
///
|
||||
/// # Generalization of the implementation
|
||||
/// The current implementation bases on a few non-generic traits. One might define a way better
|
||||
/// implementation (causing way less boilerplate), such as:
|
||||
/// ```text
|
||||
/// pub trait Visitor<T> {
|
||||
/// fn visit(&mut self, elem: &T);
|
||||
/// }
|
||||
/// ```
|
||||
/// Such definition could be implemented for every [`Tree`] node (the [`T`] parameter).
|
||||
/// Unfortunately, due to Rust compiler errors, Rust is not able to compile such a definition. We
|
||||
/// could move to it as soon as this error gets resolved:
|
||||
/// https://github.com/rust-lang/rust/issues/96634.
|
||||
#[allow(missing_docs)]
|
||||
pub trait Visitor {
|
||||
fn before_visiting_children(&mut self) {}
|
||||
fn after_visiting_children(&mut self) {}
|
||||
}
|
||||
|
||||
/// The visitor trait allowing for [`Tree`] nodes traversal.
|
||||
#[allow(missing_docs)]
|
||||
pub trait TreeVisitor<'s, 'a>: Visitor {
|
||||
fn visit(&mut self, ast: &'a Tree<'s>) -> bool;
|
||||
}
|
||||
|
||||
/// The visitor trait allowing for [`Tree`] nodes mutable traversal.
|
||||
#[allow(missing_docs)]
|
||||
pub trait TreeVisitorMut<'s>: Visitor {
|
||||
fn visit_mut(&mut self, ast: &mut Tree<'s>) -> bool;
|
||||
}
|
||||
|
||||
/// The visitor trait allowing for [`Span`] traversal.
|
||||
#[allow(missing_docs)]
|
||||
pub trait SpanVisitor<'s, 'a>: Visitor {
|
||||
fn visit(&mut self, ast: span::Ref<'s, 'a>) -> bool;
|
||||
}
|
||||
|
||||
/// The visitor trait allowing for [`Span`] mutable traversal.
|
||||
#[allow(missing_docs)]
|
||||
pub trait SpanVisitorMut<'s>: Visitor {
|
||||
fn visit_mut(&mut self, ast: span::RefMut<'s, '_>) -> bool;
|
||||
}
|
||||
|
||||
/// The visitor trait allowing for [`Item`] traversal.
|
||||
#[allow(missing_docs)]
|
||||
pub trait ItemVisitor<'s, 'a>: Visitor {
|
||||
fn visit_item(&mut self, ast: item::Ref<'s, 'a>) -> bool;
|
||||
}
|
||||
|
||||
macro_rules! define_visitor {
|
||||
($name:ident, $visit:ident) => {
|
||||
define_visitor_no_mut! {$name, $visit}
|
||||
define_visitor_mut! {$name, $visit}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! define_visitor_no_mut {
|
||||
($name:ident, $visit:ident) => {
|
||||
paste! {
|
||||
define_visitor_internal! {
|
||||
$name,
|
||||
$visit,
|
||||
[[<$name Visitor>]<'s, 'a>],
|
||||
[<$name Visitable>],
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! define_visitor_mut {
|
||||
($name:ident, $visit:ident) => {
|
||||
paste! {
|
||||
define_visitor_internal! {
|
||||
[_mut mut]
|
||||
$name,
|
||||
[<$visit _mut>],
|
||||
[[<$name VisitorMut>]<'s>],
|
||||
[<$name VisitableMut>],
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! define_visitor_internal {
|
||||
(
|
||||
$([$pfx_mod:ident $mod:ident])?
|
||||
$name:ident,
|
||||
$visit:ident,
|
||||
[$($visitor:tt)*],
|
||||
$visitable:ident,
|
||||
) => { paste! {
|
||||
/// The visitable trait. See documentation of [`define_visitor`] to learn more.
|
||||
#[allow(missing_docs)]
|
||||
pub trait $visitable<'s, 'a> {
|
||||
fn $visit<V: $($visitor)*>(&'a $($mod)? self, _visitor: &mut V) {}
|
||||
}
|
||||
|
||||
impl<'s, 'a, T: $visitable<'s, 'a>> $visitable<'s, 'a> for Box<T> {
|
||||
fn $visit<V: $($visitor)*>(&'a $($mod)? self, visitor: &mut V) {
|
||||
$visitable::$visit(& $($mod)? **self, visitor)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, 'a, T: $visitable<'s, 'a>> $visitable<'s, 'a> for Option<T> {
|
||||
fn $visit<V: $($visitor)*>(&'a $($mod)? self, visitor: &mut V) {
|
||||
if let Some(elem) = self {
|
||||
$visitable::$visit(elem, visitor)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, 'a, T: $visitable<'s, 'a>, E: $visitable<'s, 'a>> $visitable<'s, 'a>
|
||||
for Result<T, E>
|
||||
{
|
||||
fn $visit<V: $($visitor)*>(&'a $($mod)? self, visitor: &mut V) {
|
||||
match self {
|
||||
Ok(elem) => $visitable::$visit(elem, visitor),
|
||||
Err(elem) => $visitable::$visit(elem, visitor),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, 'a, T: $visitable<'s, 'a>> $visitable<'s, 'a> for Vec<T> {
|
||||
fn $visit<V: $($visitor)*>(&'a $($mod)? self, visitor: &mut V) {
|
||||
self.[<iter $($pfx_mod)?>]().map(|t| $visitable::$visit(t, visitor)).for_each(drop);
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, 'a, T: $visitable<'s, 'a>> $visitable<'s, 'a> for NonEmptyVec<T> {
|
||||
fn $visit<V: $($visitor)*>(&'a $($mod)? self, visitor: &mut V) {
|
||||
self.[<iter $($pfx_mod)?>]().map(|t| $visitable::$visit(t, visitor)).for_each(drop);
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, 'a> $visitable<'s, 'a> for &str {}
|
||||
impl<'s, 'a> $visitable<'s, 'a> for str {}
|
||||
}};
|
||||
}
|
||||
|
||||
macro_rules! define_visitor_for_tokens {
|
||||
(
|
||||
$(#$kind_meta:tt)*
|
||||
pub enum $kind:ident {
|
||||
$( $variant:ident $({$($args:tt)*})? ),* $(,)?
|
||||
}
|
||||
) => {
|
||||
impl<'s, 'a> TreeVisitable<'s, 'a> for token::$kind {}
|
||||
impl<'s, 'a> TreeVisitableMut<'s, 'a> for token::$kind {}
|
||||
};
|
||||
}
|
||||
|
||||
define_visitor!(Tree, visit);
|
||||
define_visitor!(Span, visit_span);
|
||||
define_visitor_no_mut!(Item, visit_item);
|
||||
|
||||
crate::with_token_definition!(define_visitor_for_tokens());
|
||||
|
||||
|
||||
// === TreeVisitable special cases ===
|
||||
|
||||
impl<'s, 'a> TreeVisitable<'s, 'a> for Tree<'s> {
|
||||
fn visit<V: TreeVisitor<'s, 'a>>(&'a self, visitor: &mut V) {
|
||||
if visitor.visit(self) {
|
||||
self.variant.visit(visitor)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, 'a> TreeVisitableMut<'s, 'a> for Tree<'s> {
|
||||
fn visit_mut<V: TreeVisitorMut<'s>>(&'a mut self, visitor: &mut V) {
|
||||
if visitor.visit_mut(self) {
|
||||
self.variant.visit_mut(visitor)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, 'a, T> TreeVisitable<'s, 'a> for Token<'s, T> {}
|
||||
impl<'s, 'a, T> TreeVisitableMut<'s, 'a> for Token<'s, T> {}
|
||||
|
||||
|
||||
// === SpanVisitable special cases ===
|
||||
|
||||
impl<'s, 'a> SpanVisitable<'s, 'a> for Tree<'s> {
|
||||
fn visit_span<V: SpanVisitor<'s, 'a>>(&'a self, visitor: &mut V) {
|
||||
if visitor.visit(span::Ref {
|
||||
left_offset: &self.span.left_offset,
|
||||
code_length: self.span.code_length,
|
||||
}) {
|
||||
self.variant.visit_span(visitor)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, 'a> SpanVisitableMut<'s, 'a> for Tree<'s> {
|
||||
fn visit_span_mut<V: SpanVisitorMut<'s>>(&'a mut self, visitor: &mut V) {
|
||||
if visitor.visit_mut(span::RefMut {
|
||||
left_offset: &mut self.span.left_offset,
|
||||
code_length: self.span.code_length,
|
||||
}) {
|
||||
self.variant.visit_span_mut(visitor)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, 't, 's, T> SpanVisitable<'s, 'a> for Token<'s, T> {
|
||||
fn visit_span<V: SpanVisitor<'s, 'a>>(&'a self, visitor: &mut V) {
|
||||
let code_length = self.code.len();
|
||||
visitor.visit(span::Ref { left_offset: &self.left_offset, code_length });
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, 't, 's, T> SpanVisitableMut<'s, 'a> for Token<'s, T> {
|
||||
fn visit_span_mut<V: SpanVisitorMut<'s>>(&'a mut self, visitor: &mut V) {
|
||||
let code_length = self.code.len();
|
||||
visitor.visit_mut(span::RefMut { left_offset: &mut self.left_offset, code_length });
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// === ItemVisitable special cases ===
|
||||
|
||||
impl<'s, 'a> ItemVisitable<'s, 'a> for Tree<'s> {
|
||||
fn visit_item<V: ItemVisitor<'s, 'a>>(&'a self, visitor: &mut V) {
|
||||
if visitor.visit_item(item::Ref::Tree(self)) {
|
||||
self.variant.visit_item(visitor)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s: 'a, 'a, T: 'a> ItemVisitable<'s, 'a> for Token<'s, T>
|
||||
where &'a Token<'s, T>: Into<token::Ref<'s, 'a>>
|
||||
{
|
||||
fn visit_item<V: ItemVisitor<'s, 'a>>(&'a self, visitor: &mut V) {
|
||||
visitor.visit_item(item::Ref::Token(self.into()));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==========================
|
||||
// === CodePrinterVisitor ===
|
||||
// ==========================
|
||||
|
||||
/// A visitor collecting code representation of AST nodes.
|
||||
#[derive(Debug, Default)]
|
||||
#[allow(missing_docs)]
|
||||
struct CodePrinterVisitor {
|
||||
pub code: String,
|
||||
}
|
||||
|
||||
impl Visitor for CodePrinterVisitor {}
|
||||
impl<'s, 'a> ItemVisitor<'s, 'a> for CodePrinterVisitor {
|
||||
fn visit_item(&mut self, item: item::Ref<'s, 'a>) -> bool {
|
||||
match item {
|
||||
item::Ref::Tree(tree) => self.code.push_str(&tree.span.left_offset.code),
|
||||
item::Ref::Token(token) => {
|
||||
self.code.push_str(&token.left_offset.code);
|
||||
self.code.push_str(token.code);
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Tree<'s> {
|
||||
/// Code generator of this AST.
|
||||
pub fn code(&self) -> String {
|
||||
let mut visitor = CodePrinterVisitor::default();
|
||||
self.visit_item(&mut visitor);
|
||||
visitor.code
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===========================
|
||||
// === RefCollectorVisitor ===
|
||||
// ===========================
|
||||
|
||||
/// A visitor collecting references to all [`Tree`] nodes.
|
||||
#[derive(Debug, Default)]
|
||||
#[allow(missing_docs)]
|
||||
struct RefCollectorVisitor<'s, 'a> {
|
||||
pub vec: Vec<&'a Tree<'s>>,
|
||||
}
|
||||
|
||||
impl<'s, 'a> Visitor for RefCollectorVisitor<'s, 'a> {}
|
||||
impl<'s, 'a> TreeVisitor<'s, 'a> for RefCollectorVisitor<'s, 'a> {
|
||||
fn visit(&mut self, ast: &'a Tree<'s>) -> bool {
|
||||
self.vec.push(ast);
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Tree<'s> {
|
||||
/// Collect references to all [`Tree`] nodes and return them in a vector.
|
||||
pub fn collect_vec_ref(&self) -> Vec<&Tree<'s>> {
|
||||
let mut visitor = RefCollectorVisitor::default();
|
||||
self.visit(&mut visitor);
|
||||
visitor.vec
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =================
|
||||
// === FnVisitor ===
|
||||
// =================
|
||||
|
||||
/// A visitor allowing running a function on every [`Tree`] node.
|
||||
#[derive(Debug, Default)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct FnVisitor<F>(pub F);
|
||||
|
||||
impl<F> Visitor for FnVisitor<F> {}
|
||||
impl<'s: 'a, 'a, T, F: Fn(&'a Tree<'s>) -> T> TreeVisitor<'s, 'a> for FnVisitor<F> {
|
||||
fn visit(&mut self, ast: &'a Tree<'s>) -> bool {
|
||||
(self.0)(ast);
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T, F: Fn(&mut Tree<'s>) -> T> TreeVisitorMut<'s> for FnVisitor<F> {
|
||||
fn visit_mut(&mut self, ast: &mut Tree<'s>) -> bool {
|
||||
(self.0)(ast);
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Tree<'s> {
|
||||
/// Map the provided function over each [`Tree`] node. The function results will be discarded.
|
||||
pub fn map<T>(&self, f: impl Fn(&Tree<'s>) -> T) {
|
||||
let mut visitor = FnVisitor(f);
|
||||
self.visit(&mut visitor);
|
||||
}
|
||||
|
||||
/// Map the provided function over each [`Tree`] node. The function results will be discarded.
|
||||
pub fn map_mut<T>(&mut self, f: impl Fn(&mut Tree<'s>) -> T) {
|
||||
let mut visitor = FnVisitor(f);
|
||||
self.visit_mut(&mut visitor);
|
||||
}
|
||||
}
|
25
lib/rust/parser/src/syntax/tree/builder/Cargo.toml
Normal file
25
lib/rust/parser/src/syntax/tree/builder/Cargo.toml
Normal file
@ -0,0 +1,25 @@
|
||||
[package]
|
||||
name = "enso-parser-syntax-tree-builder"
|
||||
version = "0.1.0"
|
||||
authors = ["Enso Team <enso-dev@enso.org>"]
|
||||
edition = "2021"
|
||||
|
||||
description = "Enso Parser AST Builder."
|
||||
readme = "README.md"
|
||||
homepage = "https://github.com/enso-org/enso"
|
||||
repository = "https://github.com/enso-org/enso"
|
||||
license-file = "../../LICENSE"
|
||||
|
||||
[lib]
|
||||
proc-macro = true
|
||||
|
||||
[dependencies]
|
||||
proc-macro2 = "1.0"
|
||||
enso-macro-utils = { path = "../../../../../macro-utils" }
|
||||
quote = "1.0"
|
||||
|
||||
[dependencies.syn]
|
||||
version = "1.0"
|
||||
features = [
|
||||
'extra-traits', 'visit', 'full'
|
||||
]
|
124
lib/rust/parser/src/syntax/tree/builder/src/lib.rs
Normal file
124
lib/rust/parser/src/syntax/tree/builder/src/lib.rs
Normal file
@ -0,0 +1,124 @@
|
||||
//! Definition of a macro allowing building mock AST structures, mostly useful for testing.
|
||||
|
||||
// === Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![warn(unsafe_code)]
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![allow(clippy::option_map_unit_fn)]
|
||||
#![allow(clippy::precedence)]
|
||||
#![allow(dead_code)]
|
||||
#![deny(unconditional_recursion)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(trivial_casts)]
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unused_import_braces)]
|
||||
#![warn(unused_qualifications)]
|
||||
|
||||
use proc_macro2::TokenStream;
|
||||
use quote::quote;
|
||||
use std::mem;
|
||||
|
||||
|
||||
|
||||
/// A macro allowing building mock AST structures, mostly useful for testing.
|
||||
///
|
||||
/// Currently supported syntax:
|
||||
///
|
||||
/// - `a b c` Application of arguments. Arguments are applied in-order, from left to right. Here,
|
||||
/// this expression would be the same as `[[a b] c]`.
|
||||
///
|
||||
/// - `a [b c] d` Grouping syntax that does not produce AST group expression. Here, `b c` is just
|
||||
/// the first argument passed to `a`.
|
||||
///
|
||||
/// - `{if} a {then} b {else} c` Multi-segment application. All segments should be enclosed in curly
|
||||
/// braces. You can also place segments in quotes, like `{"("} a {")"}`.
|
||||
#[proc_macro]
|
||||
pub fn ast_builder(tokens: proc_macro::TokenStream) -> proc_macro::TokenStream {
|
||||
let output = expr(tokens);
|
||||
let output = quote!(syntax::Tree::opr_section_boundary(#output));
|
||||
output.into()
|
||||
}
|
||||
|
||||
|
||||
struct Segment {
|
||||
header: TokenStream,
|
||||
body: TokenStream,
|
||||
}
|
||||
|
||||
impl Segment {
|
||||
fn new(header: TokenStream) -> Self {
|
||||
let body = quote!();
|
||||
Self { header, body }
|
||||
}
|
||||
}
|
||||
|
||||
fn expr(tokens: proc_macro::TokenStream) -> TokenStream {
|
||||
use proc_macro::TokenTree::*;
|
||||
let mut output = quote! {};
|
||||
let mut prefix: Option<TokenStream> = None;
|
||||
let mut segments: Vec<Segment> = vec![];
|
||||
let mut current_segment: Option<Segment> = None;
|
||||
let app_to_output = |output: &mut TokenStream, tok| {
|
||||
if output.is_empty() {
|
||||
*output = tok;
|
||||
} else {
|
||||
*output = quote! {syntax::Tree::app(#output,#tok)};
|
||||
}
|
||||
};
|
||||
for token in tokens {
|
||||
match token {
|
||||
// a b c ...
|
||||
Ident(ident) => {
|
||||
let ident = ident.to_string();
|
||||
app_to_output(&mut output, quote! {test::ident(#ident)});
|
||||
}
|
||||
// {if} a {then} b {else} c
|
||||
// {"("} a {")"}
|
||||
Group(group) if group.delimiter() == proc_macro::Delimiter::Brace => {
|
||||
if let Some(mut current_segment) = mem::take(&mut current_segment) {
|
||||
current_segment.body = mem::take(&mut output);
|
||||
segments.push(current_segment);
|
||||
} else if !output.is_empty() {
|
||||
prefix = Some(mem::take(&mut output));
|
||||
}
|
||||
let body = group.stream().to_string();
|
||||
current_segment = Some(Segment::new(quote! {Token::ident(#body)})); // Token::symbol
|
||||
}
|
||||
// a [b c] d
|
||||
Group(group) if group.delimiter() == proc_macro::Delimiter::Bracket => {
|
||||
app_to_output(&mut output, expr(group.stream()));
|
||||
}
|
||||
_ => panic!("Unsupported token {:?}", token),
|
||||
}
|
||||
}
|
||||
if let Some(mut current_segment) = current_segment {
|
||||
current_segment.body = mem::take(&mut output);
|
||||
segments.push(current_segment);
|
||||
let segments: Vec<TokenStream> = segments
|
||||
.into_iter()
|
||||
.map(|t| {
|
||||
let header = t.header;
|
||||
let body = t.body;
|
||||
let body = if !body.is_empty() {
|
||||
quote!(Some(syntax::Tree::opr_section_boundary(#body)))
|
||||
} else {
|
||||
quote!(None)
|
||||
};
|
||||
quote! { syntax::tree::MultiSegmentAppSegment { header: #header, body: #body } }
|
||||
})
|
||||
.collect();
|
||||
let pfx = prefix
|
||||
.map(|t| quote! {Some(Box::new(syntax::Tree::opr_section_boundary(#t)))})
|
||||
.unwrap_or_else(|| quote! {None});
|
||||
let segments = quote! {NonEmptyVec::try_from(vec![#(#segments),*]).unwrap()};
|
||||
output = quote! {
|
||||
span::With::new_no_left_offset_no_start(
|
||||
Bytes::from(0),
|
||||
syntax::tree::Type::MultiSegmentApp(Box::new(syntax::tree::MultiSegmentApp {prefix: #pfx, segments: #segments}))
|
||||
)
|
||||
}
|
||||
}
|
||||
output
|
||||
}
|
25
lib/rust/parser/src/syntax/tree/visitor/Cargo.toml
Normal file
25
lib/rust/parser/src/syntax/tree/visitor/Cargo.toml
Normal file
@ -0,0 +1,25 @@
|
||||
[package]
|
||||
name = "enso-parser-syntax-tree-visitor"
|
||||
version = "0.1.0"
|
||||
authors = ["Enso Team <enso-dev@enso.org>"]
|
||||
edition = "2021"
|
||||
|
||||
description = "Enso Parser AST Visitor."
|
||||
readme = "README.md"
|
||||
homepage = "https://github.com/enso-org/enso"
|
||||
repository = "https://github.com/enso-org/enso"
|
||||
license-file = "../../LICENSE"
|
||||
|
||||
[lib]
|
||||
proc-macro = true
|
||||
|
||||
[dependencies]
|
||||
proc-macro2 = "1.0"
|
||||
enso-macro-utils = { path = "../../../../../macro-utils" }
|
||||
quote = "1.0"
|
||||
|
||||
[dependencies.syn]
|
||||
version = "1.0"
|
||||
features = [
|
||||
'extra-traits', 'visit', 'full'
|
||||
]
|
179
lib/rust/parser/src/syntax/tree/visitor/src/lib.rs
Normal file
179
lib/rust/parser/src/syntax/tree/visitor/src/lib.rs
Normal file
@ -0,0 +1,179 @@
|
||||
//! Definition of [`Visitor`] deriving. It implements the visitor pattern for [`Ast`].
|
||||
|
||||
// === Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![warn(unsafe_code)]
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![allow(clippy::option_map_unit_fn)]
|
||||
#![allow(clippy::precedence)]
|
||||
#![allow(dead_code)]
|
||||
#![deny(unconditional_recursion)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(trivial_casts)]
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unused_import_braces)]
|
||||
#![warn(unused_qualifications)]
|
||||
|
||||
|
||||
|
||||
extern crate proc_macro;
|
||||
|
||||
use enso_macro_utils::field_names;
|
||||
use enso_macro_utils::identifier_sequence;
|
||||
use enso_macro_utils::index_sequence;
|
||||
use proc_macro2::TokenStream;
|
||||
use quote::quote;
|
||||
use syn::Data;
|
||||
use syn::DataEnum;
|
||||
use syn::DataStruct;
|
||||
use syn::DeriveInput;
|
||||
use syn::Fields;
|
||||
use syn::Variant;
|
||||
|
||||
|
||||
|
||||
/// ======================
|
||||
/// === Derive Visitor ===
|
||||
/// ======================
|
||||
use quote::ToTokens;
|
||||
|
||||
/// Implements [`TreeVisitable`], [`TreeVisitableMut`], [`SpanVisitable`], and [`SpanVisitableMut`].
|
||||
/// These traits are defined in the [`crate::ast`] module. Macros in this module hardcode the names
|
||||
/// of the traits and are not implemented in a generic way because the current Rust implementation
|
||||
/// does not understand generic definition. See the [`crate::ast`] module to learn more about the
|
||||
/// design and the Rust compiler issue.
|
||||
#[proc_macro_derive(Visitor)]
|
||||
pub fn derive_visitor(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
|
||||
let decl = syn::parse_macro_input!(input as DeriveInput);
|
||||
let ident = &decl.ident;
|
||||
let (impl_generics, ty_generics, _inherent_where_clause_opt) = &decl.generics.split_for_impl();
|
||||
let body = gen_body(quote!(TreeVisitable::visit), &decl.data, false);
|
||||
let body_mut = gen_body(quote!(TreeVisitableMut::visit_mut), &decl.data, true);
|
||||
let body_span = gen_body(quote!(SpanVisitable::visit_span), &decl.data, false);
|
||||
let body_span_mut = gen_body(quote!(SpanVisitableMut::visit_span_mut), &decl.data, true);
|
||||
let body_item = gen_body(quote!(ItemVisitable::visit_item), &decl.data, false);
|
||||
|
||||
let impl_generics_vec: Vec<_> = impl_generics.to_token_stream().into_iter().collect();
|
||||
let impl_generics_len = impl_generics_vec.len();
|
||||
let mut impl_generics;
|
||||
if impl_generics_len > 0 {
|
||||
let v: Vec<_> = impl_generics_vec.into_iter().take(impl_generics_len - 1).skip(1).collect();
|
||||
impl_generics = quote!(#(#v)*);
|
||||
if !v.is_empty() {
|
||||
impl_generics = quote!(#impl_generics,);
|
||||
}
|
||||
} else {
|
||||
impl_generics = quote!('s,);
|
||||
}
|
||||
let impl_generics = quote!(<#impl_generics 'a>);
|
||||
|
||||
let output = quote! {
|
||||
impl #impl_generics TreeVisitable #impl_generics for #ident #ty_generics {
|
||||
fn visit<T: TreeVisitor #impl_generics>(&'a self, visitor:&mut T) {
|
||||
visitor.before_visiting_children();
|
||||
#body
|
||||
visitor.after_visiting_children();
|
||||
}
|
||||
}
|
||||
|
||||
impl #impl_generics TreeVisitableMut #impl_generics for #ident #ty_generics {
|
||||
fn visit_mut<T: TreeVisitorMut<'s>>(&'a mut self, visitor:&mut T) {
|
||||
visitor.before_visiting_children();
|
||||
#body_mut
|
||||
visitor.after_visiting_children();
|
||||
}
|
||||
}
|
||||
|
||||
impl #impl_generics SpanVisitable #impl_generics for #ident #ty_generics {
|
||||
fn visit_span<T: SpanVisitor #impl_generics>(&'a self, visitor:&mut T) {
|
||||
visitor.before_visiting_children();
|
||||
#body_span
|
||||
visitor.after_visiting_children();
|
||||
}
|
||||
}
|
||||
|
||||
impl #impl_generics SpanVisitableMut #impl_generics for #ident #ty_generics {
|
||||
fn visit_span_mut<T: SpanVisitorMut<'s>>(&'a mut self, visitor:&mut T) {
|
||||
visitor.before_visiting_children();
|
||||
#body_span_mut
|
||||
visitor.after_visiting_children();
|
||||
}
|
||||
}
|
||||
|
||||
impl #impl_generics ItemVisitable #impl_generics for #ident #ty_generics {
|
||||
fn visit_item<T: ItemVisitor #impl_generics>(&'a self, visitor:&mut T) {
|
||||
visitor.before_visiting_children();
|
||||
#body_item
|
||||
visitor.after_visiting_children();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// #[allow(missing_docs)]
|
||||
// pub trait ItemVisitable<'s, 'a> {
|
||||
// fn visit_item<V: ItemVisitor<'s, 'a>>(&'a self, _visitor: &mut V) {}
|
||||
// }
|
||||
|
||||
output.into()
|
||||
}
|
||||
|
||||
fn gen_body(f: TokenStream, data: &Data, is_mut: bool) -> TokenStream {
|
||||
match data {
|
||||
Data::Struct(t) => body_for_struct(&f, t, is_mut),
|
||||
Data::Enum(t) => body_for_enum(&f, t),
|
||||
Data::Union(_) => panic!("Untagged union types not supported."),
|
||||
}
|
||||
}
|
||||
|
||||
fn body_for_struct(f: &TokenStream, data: &DataStruct, is_mut: bool) -> TokenStream {
|
||||
match &data.fields {
|
||||
Fields::Unit => quote!({}),
|
||||
Fields::Unnamed(fields) => {
|
||||
let indices = index_sequence(fields.unnamed.len());
|
||||
if is_mut {
|
||||
quote!(#( #f(&mut self.#indices, visitor); )*)
|
||||
} else {
|
||||
quote!(#( #f(&self.#indices, visitor); )*)
|
||||
}
|
||||
}
|
||||
Fields::Named(fields) => {
|
||||
let names = field_names(fields);
|
||||
if is_mut {
|
||||
quote!(#( #f(&mut self.#names, visitor); )*)
|
||||
} else {
|
||||
quote!(#( #f(&self.#names, visitor); )*)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Prepares a match arm for a single variant that `clone_ref`s such value.
|
||||
fn arm_for_variant(f: &TokenStream, variant: &Variant) -> TokenStream {
|
||||
let variant_ident = &variant.ident;
|
||||
match &variant.fields {
|
||||
Fields::Unit => {
|
||||
quote!(Self::#variant_ident => {})
|
||||
}
|
||||
Fields::Named(fields) => {
|
||||
let names = field_names(fields);
|
||||
quote!(Self::#variant_ident { #(#names),* } => {
|
||||
#( #f(#names, visitor); )*
|
||||
})
|
||||
}
|
||||
Fields::Unnamed(fields) => {
|
||||
let names = identifier_sequence(fields.unnamed.len());
|
||||
quote!(Self::#variant_ident(#(#names),*) => {
|
||||
#( #f(#names, visitor); )*
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn body_for_enum(f: &TokenStream, data: &DataEnum) -> TokenStream {
|
||||
let make_arm = |variant| arm_for_variant(f, variant);
|
||||
let arms = data.variants.iter().map(make_arm);
|
||||
let body = quote!(match self { #(#arms)* });
|
||||
body
|
||||
}
|
@ -6,8 +6,8 @@ edition = "2021"
|
||||
|
||||
description = "An augmented standard library in the vein of Haskell's prelude."
|
||||
readme = "README.md"
|
||||
homepage = "https://github.com/enso-org/rust-lib/src/-prelude"
|
||||
repository = "https://github.com/enso-org/rust-lib"
|
||||
homepage = "https://github.com/enso-org/enso"
|
||||
repository = "https://github.com/enso-org/enso"
|
||||
license-file = "../../LICENSE"
|
||||
|
||||
keywords = ["prelude", "standard-library"]
|
||||
@ -24,7 +24,6 @@ enso-shapely = { version = "^0.2.0", path = "../shapely" }
|
||||
anyhow = "1.0.37"
|
||||
backtrace = "0.3.53"
|
||||
boolinator = "2.4.0"
|
||||
bumpalo = "3.4.0"
|
||||
cfg-if = "1.0.0"
|
||||
colored = "2.0.0"
|
||||
derivative = "2.2.0"
|
||||
@ -42,6 +41,9 @@ shrinkwraprs = "0.3.0"
|
||||
serde = { version = "1.0.126", features = ["derive", "rc"], optional = true }
|
||||
serde_json = { version = "1.0", optional = true }
|
||||
smallvec = "1.0.0"
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = "0.3"
|
||||
tracing-wasm = "0.2"
|
||||
wasm-bindgen = { version = "0.2.78" , features = ["nightly"], optional = true }
|
||||
weak-table = "0.3.0"
|
||||
nalgebra = { version = "0.26.2", optional = true }
|
||||
|
@ -14,24 +14,14 @@ use std::vec::Splice;
|
||||
|
||||
/// A version of [`std::vec::Vec`] that can't be empty.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Deref, DerefMut)]
|
||||
pub struct NonEmptyVec<T> {
|
||||
elems: Vec<T>,
|
||||
}
|
||||
|
||||
impl<T> Deref for NonEmptyVec<T> {
|
||||
type Target = Vec<T>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.elems
|
||||
}
|
||||
pub elems: Vec<T>,
|
||||
}
|
||||
|
||||
impl<T> NonEmptyVec<T> {
|
||||
/// Construct a new non-empty vector.
|
||||
///
|
||||
/// The vector will not allocate more than the space required to contain `first` and `rest`.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
@ -45,6 +35,20 @@ impl<T> NonEmptyVec<T> {
|
||||
NonEmptyVec { elems }
|
||||
}
|
||||
|
||||
/// Construct a new non-empty vector.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// #![allow(unused_mut)]
|
||||
/// use enso_prelude::NonEmptyVec;
|
||||
/// let mut vec: NonEmptyVec<usize> = NonEmptyVec::new_with_last(vec![], 0);
|
||||
/// ```
|
||||
pub fn new_with_last(mut elems: Vec<T>, last: T) -> NonEmptyVec<T> {
|
||||
elems.push(last);
|
||||
NonEmptyVec { elems }
|
||||
}
|
||||
|
||||
/// Construct a `NonEmptyVec` containing a single element.
|
||||
///
|
||||
/// # Examples
|
||||
@ -56,7 +60,8 @@ impl<T> NonEmptyVec<T> {
|
||||
/// assert_eq!(vec.len(), 1);
|
||||
/// ```
|
||||
pub fn singleton(first: T) -> NonEmptyVec<T> {
|
||||
NonEmptyVec::new(first, vec![])
|
||||
let elems = vec![first];
|
||||
Self { elems }
|
||||
}
|
||||
|
||||
/// Construct a new, `NonEmptyVec<T>` containing the provided element and with the provided
|
||||
@ -157,21 +162,26 @@ impl<T> NonEmptyVec<T> {
|
||||
|
||||
/// Remove an element from the back of the collection, returning it.
|
||||
///
|
||||
/// Will not pop any item if there is only one item left in the vector.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use enso_prelude::NonEmptyVec;
|
||||
/// let mut vec = NonEmptyVec::new(0, vec![1]);
|
||||
/// assert!(vec.pop().is_some());
|
||||
/// assert!(vec.pop().is_none());
|
||||
/// assert!(vec.pop_if_has_more_than_1_elem().is_some());
|
||||
/// assert!(vec.pop_if_has_more_than_1_elem().is_none());
|
||||
/// assert_eq!(vec.len(), 1);
|
||||
/// ```
|
||||
pub fn pop(&mut self) -> Option<T> {
|
||||
pub fn pop_if_has_more_than_1_elem(&mut self) -> Option<T> {
|
||||
(self.len() > 1).and_option_from(|| self.elems.pop())
|
||||
}
|
||||
|
||||
/// Remove an element from the back of the collection, returning it and a new possibly empty
|
||||
/// vector.
|
||||
pub fn pop(mut self) -> (T, Vec<T>) {
|
||||
let first = self.elems.pop().unwrap();
|
||||
(first, self.elems)
|
||||
}
|
||||
|
||||
/// Obtain a mutable reference to teh element in the vector at the specified `index`.
|
||||
///
|
||||
/// # Examples
|
||||
@ -213,6 +223,16 @@ impl<T> NonEmptyVec<T> {
|
||||
self.elems.first_mut().expect("The NonEmptyVec always has an item in it.")
|
||||
}
|
||||
|
||||
/// Get the tail reference.
|
||||
pub fn tail(&mut self) -> &[T] {
|
||||
&self.elems[1..]
|
||||
}
|
||||
|
||||
/// Get the mutable tail reference.
|
||||
pub fn tail_mut(&mut self) -> &mut [T] {
|
||||
&mut self.elems[1..]
|
||||
}
|
||||
|
||||
/// Obtain an immutable reference to the last element in the `NonEmptyVec`.
|
||||
///
|
||||
/// # Examples
|
||||
@ -307,6 +327,17 @@ impl<T> NonEmptyVec<T> {
|
||||
{
|
||||
self.elems.splice(range, replace_with)
|
||||
}
|
||||
|
||||
/// Convert this non-empty vector to vector.
|
||||
pub fn into_vec(self) -> Vec<T> {
|
||||
self.elems
|
||||
}
|
||||
|
||||
/// Consume this non-empty vector, map each element with a function, and produce a new one.
|
||||
pub fn mapped<S>(self, f: impl FnMut(T) -> S) -> NonEmptyVec<S> {
|
||||
let elems = self.elems.into_iter().map(f).collect();
|
||||
NonEmptyVec { elems }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -317,3 +348,40 @@ impl<T: Default> Default for NonEmptyVec<T> {
|
||||
Self::singleton(default())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> TryFrom<Vec<T>> for NonEmptyVec<T> {
|
||||
type Error = ();
|
||||
fn try_from(elems: Vec<T>) -> Result<Self, Self::Error> {
|
||||
(!elems.is_empty()).as_result_from(|| NonEmptyVec { elems }, || ())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> From<NonEmptyVec<T>> for Vec<T> {
|
||||
fn from(v: NonEmptyVec<T>) -> Self {
|
||||
v.elems
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> IntoIterator for NonEmptyVec<T> {
|
||||
type Item = T;
|
||||
type IntoIter = std::vec::IntoIter<T>;
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.elems.into_iter()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> IntoIterator for &'a NonEmptyVec<T> {
|
||||
type Item = &'a T;
|
||||
type IntoIter = slice::Iter<'a, T>;
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.elems.iter()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> IntoIterator for &'a mut NonEmptyVec<T> {
|
||||
type Item = &'a mut T;
|
||||
type IntoIter = slice::IterMut<'a, T>;
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.elems.iter_mut()
|
||||
}
|
||||
}
|
||||
|
@ -103,6 +103,33 @@ pub mod serde_reexports {
|
||||
pub use serde::Serialize;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===============
|
||||
// === Tracing ===
|
||||
// ===============
|
||||
|
||||
pub mod tracing {
|
||||
pub use tracing::*;
|
||||
pub use tracing_subscriber::*;
|
||||
}
|
||||
pub use ::tracing::event;
|
||||
pub use ::tracing::span as log_span;
|
||||
|
||||
pub const ERROR: tracing::Level = tracing::Level::ERROR;
|
||||
pub const WARN: tracing::Level = tracing::Level::WARN;
|
||||
pub const INFO: tracing::Level = tracing::Level::INFO;
|
||||
pub const DEBUG: tracing::Level = tracing::Level::DEBUG;
|
||||
pub const TRACE: tracing::Level = tracing::Level::TRACE;
|
||||
|
||||
pub fn init_tracing(level: tracing::Level) {
|
||||
let subscriber =
|
||||
tracing::fmt().compact().with_target(false).with_max_level(level).without_time().finish();
|
||||
tracing::subscriber::set_global_default(subscriber).expect("Failed to initialize logger.");
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =================
|
||||
// === Immutable ===
|
||||
// =================
|
||||
|
@ -36,6 +36,18 @@ pub fn with<T, F: FnOnce(T) -> Out, Out>(t: T, f: F) -> Out {
|
||||
|
||||
|
||||
|
||||
// ================
|
||||
// === TryAsRef ===
|
||||
// ================
|
||||
|
||||
/// Just like [`AsRef`], but might return [`None`] for some data.
|
||||
#[allow(missing_docs)]
|
||||
pub trait TryAsRef<T> {
|
||||
fn try_as_ref(&self) -> Option<&T>;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === ToRef ===
|
||||
// =============
|
||||
|
@ -6,8 +6,8 @@ edition = "2021"
|
||||
|
||||
description = "Automated typeclass derivation."
|
||||
readme = "README.md"
|
||||
homepage = "https://github.com/enso-org/rust-lib/src/shapely"
|
||||
repository = "https://github.com/enso-org/rust-lib"
|
||||
homepage = "https://github.com/enso-org/enso/lib/rust/shapely"
|
||||
repository = "https://github.com/enso-org/enso"
|
||||
license-file = "../../../LICENSE"
|
||||
|
||||
keywords = ["typeclass", "deriving"]
|
||||
|
@ -6,8 +6,8 @@ edition = "2021"
|
||||
|
||||
description = "Automated typeclass derivation."
|
||||
readme = "README.md"
|
||||
homepage = "https://github.com/enso-org/rust-lib/src/shapely/macros"
|
||||
repository = "https://github.com/enso-org/rust-lib"
|
||||
homepage = "https://github.com/enso-org/enso/lib/rust/shapely/macros"
|
||||
repository = "https://github.com/enso-org/enso"
|
||||
license-file = "../../../LICENSE"
|
||||
|
||||
keywords = ["typeclass", "deriving", "macro"]
|
||||
@ -22,7 +22,7 @@ proc-macro = true
|
||||
default = []
|
||||
|
||||
[dependencies]
|
||||
enso-macro-utils = { version = "^0.2.0", path = "../../macro-utils" }
|
||||
enso-macro-utils = { path = "../../macro-utils" }
|
||||
proc-macro2 = "1.0"
|
||||
quote = "1.0"
|
||||
Inflector = "0.11.4"
|
||||
|
@ -26,6 +26,7 @@ mod derive_entry_point;
|
||||
mod derive_iterator;
|
||||
mod derive_no_clone;
|
||||
mod overlappable;
|
||||
mod tagged_enum;
|
||||
|
||||
mod prelude {
|
||||
pub use enso_macro_utils::repr;
|
||||
@ -131,3 +132,14 @@ pub fn overlappable(
|
||||
) -> proc_macro::TokenStream {
|
||||
overlappable::overlappable(attrs, input)
|
||||
}
|
||||
|
||||
/// Transforms Rust enums into enums where each variant is a separate type. It also implements
|
||||
/// several traits (such as conversions between variants and the enum type) and defines utility
|
||||
/// functions, such as constructors. See [`tagged_enum::run`] to learn more.
|
||||
#[proc_macro_attribute]
|
||||
pub fn tagged_enum(
|
||||
attr: proc_macro::TokenStream,
|
||||
input: proc_macro::TokenStream,
|
||||
) -> proc_macro::TokenStream {
|
||||
tagged_enum::run(attr, input)
|
||||
}
|
||||
|
320
lib/rust/shapely/macros/src/tagged_enum.rs
Normal file
320
lib/rust/shapely/macros/src/tagged_enum.rs
Normal file
@ -0,0 +1,320 @@
|
||||
use crate::prelude::*;
|
||||
|
||||
use inflector::cases::snakecase::to_snake_case;
|
||||
use syn::Data;
|
||||
use syn::DeriveInput;
|
||||
use syn::Fields;
|
||||
|
||||
|
||||
|
||||
// ===================
|
||||
// === Entry Point ===
|
||||
// ===================
|
||||
|
||||
/// Transforms Rust enums into enums where each variant is a separate type. It also implements
|
||||
/// several traits (such as conversions between variants and the enum type) and defines utility
|
||||
/// functions, such as constructors.
|
||||
///
|
||||
/// To learn more about what code is being generated, parts of the code generation were provided
|
||||
/// with comments showing the output of application of this macro to the following structure:
|
||||
///
|
||||
/// ```text
|
||||
/// #[tagged_enum(boxed)]
|
||||
/// pub enum Ast<'s> {
|
||||
/// Ident {
|
||||
/// token: Token<'s>
|
||||
/// }
|
||||
/// App {
|
||||
/// func: Ast<'s>,
|
||||
/// arg: Ast<'s>,
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// # Attributes
|
||||
/// All attributes defined before the `#[tagged_enum]` one will be applied to the enum only, while
|
||||
/// all other attributes will be applied to both the enum and all the variant structs.
|
||||
pub fn run(
|
||||
attr: proc_macro::TokenStream,
|
||||
input: proc_macro::TokenStream,
|
||||
) -> proc_macro::TokenStream {
|
||||
let mut is_boxed = false;
|
||||
let attrs: Vec<_> = attr.into_iter().collect();
|
||||
if attrs.len() == 1 && &attrs[0].to_string() == "boxed" {
|
||||
is_boxed = true;
|
||||
} else if !attrs.is_empty() {
|
||||
panic!("Unsupported attributes: {:?}", attrs);
|
||||
}
|
||||
let decl = syn::parse_macro_input!(input as DeriveInput);
|
||||
let (impl_generics, ty_generics, inherent_where_clause_opt) = &decl.generics.split_for_impl();
|
||||
let mut where_clause = enso_macro_utils::new_where_clause(vec![]);
|
||||
for inherent_where_clause in inherent_where_clause_opt {
|
||||
where_clause.predicates.extend(inherent_where_clause.predicates.iter().cloned())
|
||||
}
|
||||
|
||||
let data = match &decl.data {
|
||||
Data::Enum(data) => data,
|
||||
_ => panic!("This macro is meant for enum structs only."),
|
||||
};
|
||||
|
||||
let mut output = vec![];
|
||||
|
||||
|
||||
|
||||
// ========================
|
||||
// === Main Enum Struct ===
|
||||
// ========================
|
||||
|
||||
// pub enum Ast<'s> {
|
||||
// Ident(Box<Ident<'s>>),
|
||||
// App(Box<App<'s>>)
|
||||
// }
|
||||
//
|
||||
// impl<'s> Debug for Ast<'s> {
|
||||
// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
// match self {
|
||||
// Self::Ident(t) => Debug::fmt(&t,f),
|
||||
// Self::App(t) => Debug::fmt(&t,f),
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
let vis = &decl.vis;
|
||||
let enum_name = &decl.ident;
|
||||
let enum_attrs = &decl.attrs;
|
||||
let variant_names: Vec<_> = data.variants.iter().map(|v| &v.ident).collect();
|
||||
let variant_bodies = variant_names.iter().map(|v| {
|
||||
if is_boxed {
|
||||
quote!(Box<#v #ty_generics>)
|
||||
} else {
|
||||
quote!(#v #ty_generics)
|
||||
}
|
||||
});
|
||||
output.push(quote! {
|
||||
#(#enum_attrs)*
|
||||
#[allow(missing_docs)]
|
||||
#vis enum #enum_name #ty_generics #where_clause {
|
||||
#(#variant_names(#variant_bodies)),*
|
||||
}
|
||||
|
||||
impl #impl_generics Debug for #enum_name #ty_generics #where_clause {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
#(Self::#variant_names(t) => Debug::fmt(&t,f)),*
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
|
||||
// ==========================
|
||||
// === Marker Enum Struct ===
|
||||
// ==========================
|
||||
|
||||
// #[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
// pub enum AstMarker {
|
||||
// Ident,
|
||||
// App
|
||||
// }
|
||||
//
|
||||
// impl<'s> From<&Ast<'s>> for AstMarker {
|
||||
// fn from(t:&Ast<'s>) -> Self {
|
||||
// match t {
|
||||
// Ast::Ident(_) => AstMarker::Ident,
|
||||
// Ast::App(_) => AstMarker::App,
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// impl<'s> Ast<'s> {
|
||||
// pub fn marker(&self) -> AstMarker {
|
||||
// self.into()
|
||||
// }
|
||||
//
|
||||
// pub fn is(&self, marker: AstMarker) -> bool {
|
||||
// self.marker() == marker
|
||||
// }
|
||||
// }
|
||||
let enum_marker_name = quote::format_ident!("{}Marker", enum_name);
|
||||
output.push(quote! {
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
#[allow(missing_docs)]
|
||||
#vis enum #enum_marker_name {
|
||||
#(#variant_names),*
|
||||
}
|
||||
|
||||
impl #impl_generics From<&#enum_name #ty_generics> for #enum_marker_name #where_clause {
|
||||
fn from(t:&#enum_name #ty_generics) -> Self {
|
||||
match t {
|
||||
#(#enum_name::#variant_names(_) => Self::#variant_names),*
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl #impl_generics #enum_name #ty_generics #where_clause {
|
||||
/// Abstract variant representation of this struct.
|
||||
#[inline(always)]
|
||||
pub fn marker(&self) -> #enum_marker_name {
|
||||
self.into()
|
||||
}
|
||||
|
||||
/// Check whether this struct is the given variant.
|
||||
#[inline(always)]
|
||||
pub fn is(&self, marker: #enum_marker_name) -> bool {
|
||||
self.marker() == marker
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
for variant in &data.variants {
|
||||
// =======================
|
||||
// === Variant Structs ===
|
||||
// =======================
|
||||
|
||||
// pub struct Ident<'s> {
|
||||
// pub token: Token<'s>
|
||||
// }
|
||||
// pub struct App<'s> {
|
||||
// pub func: Ast<'s>,
|
||||
// pub args: Ast<'s>,
|
||||
// }
|
||||
let variant_attrs = &variant.attrs;
|
||||
let variant_name = &variant.ident;
|
||||
let fields = &variant.fields;
|
||||
let fields = if fields.is_empty() { quote!({}) } else { quote!(#fields) };
|
||||
output.push(quote! {
|
||||
#(#enum_attrs)*
|
||||
#(#variant_attrs)*
|
||||
#[derive(Debug)]
|
||||
#[allow(missing_docs)]
|
||||
#vis struct #variant_name #ty_generics #fields #where_clause
|
||||
});
|
||||
|
||||
|
||||
|
||||
// ====================
|
||||
// === Constructors ===
|
||||
// ====================
|
||||
|
||||
// impl<'s> Ast<'s> {
|
||||
// pub fn ident(name: String) -> Self {
|
||||
// Self::Ident(Box::new(Ident{name}))
|
||||
// }
|
||||
// pub fn app(func: Ast, args: Ast) -> Self {
|
||||
// Self::App(Box::new(App{func, args}))
|
||||
// }
|
||||
// }
|
||||
let variant_snake_name = to_snake_case(&variant_name.to_string());
|
||||
let variant_snake_ident = quote::format_ident!("{}", variant_snake_name);
|
||||
let (names, types) = match &variant.fields {
|
||||
Fields::Unit => (vec![], vec![]),
|
||||
Fields::Named(fields) => {
|
||||
let names = fields.named.iter().map(|f| f.ident.as_ref().unwrap()).collect();
|
||||
let types = fields.named.iter().map(|f| &f.ty).collect();
|
||||
(names, types)
|
||||
}
|
||||
_ => panic!(),
|
||||
};
|
||||
let cons = if is_boxed {
|
||||
quote!(Box::new(#variant_name { #(#names),* }))
|
||||
} else {
|
||||
quote!(#variant_name { #(#names),* })
|
||||
};
|
||||
output.push(quote! {
|
||||
impl #impl_generics #enum_name #ty_generics #where_clause {
|
||||
/// Constructor.
|
||||
#[inline(always)]
|
||||
pub fn #variant_snake_ident(#(#names: #types),*) -> Self {
|
||||
Self::#variant_name (#cons)
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
|
||||
// ========================================
|
||||
// === Unnamed Struct Like Constructors ===
|
||||
// ========================================
|
||||
|
||||
// pub fn Ident<'s>(token: Token<'s>) -> Token<'s> {
|
||||
// Ident {name}
|
||||
// }
|
||||
// pub fn App<'s>(func: Ast<'s>, args: Ast<'s>) -> App<'s> {
|
||||
// App {func, args}
|
||||
// }
|
||||
output.push(quote! {
|
||||
/// Constructor.
|
||||
#[inline(always)]
|
||||
#[allow(non_snake_case)]
|
||||
pub fn #variant_name #impl_generics (#(#names: #types),*)
|
||||
-> #variant_name #ty_generics #where_clause {
|
||||
#variant_name { #(#names),* }
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
|
||||
// ======================
|
||||
// === Variant Checks ===
|
||||
// ======================
|
||||
|
||||
// impl<'s> Ast<'s> {
|
||||
// pub fn is_ident(&self) -> bool {
|
||||
// self.is(AstMarker::Ident)
|
||||
// }
|
||||
//
|
||||
// pub fn is_app(&self) -> bool {
|
||||
// self.is(AstMarker::App)
|
||||
// }
|
||||
// }
|
||||
let variant_check_ident = quote::format_ident!("is_{}", variant_snake_name);
|
||||
output.push(quote! {
|
||||
impl #impl_generics #enum_name #ty_generics #where_clause {
|
||||
/// Check if this struct is the given variant.
|
||||
#[inline(always)]
|
||||
pub fn #variant_check_ident(&self) -> bool {
|
||||
self.is(#enum_marker_name::#variant_name)
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
// ===================
|
||||
// === Conversions ===
|
||||
// ===================
|
||||
|
||||
// impl<'s> From<Ident<'s>> for Ast<'s> {
|
||||
// fn from(variant: Ident<'s>) -> Self {
|
||||
// Self::Ident(Box::new(variant))
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// impl<'s> From<App<'s>> for Ast<'s> {
|
||||
// fn from(variant: App<'s>) -> Self {
|
||||
// Self::App(Box::new(variant))
|
||||
// }
|
||||
// }
|
||||
let cons = if is_boxed { quote!(Box::new(variant)) } else { quote!(variant) };
|
||||
output.push(quote! {
|
||||
impl #impl_generics From<#variant_name #ty_generics> for #enum_name #ty_generics
|
||||
#where_clause {
|
||||
#[inline(always)]
|
||||
fn from(variant: #variant_name #ty_generics) -> Self {
|
||||
Self::#variant_name(#cons)
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =============================
|
||||
// === Final Code Generation ===
|
||||
// =============================
|
||||
|
||||
let output = quote! {
|
||||
#(#output)*
|
||||
};
|
||||
|
||||
output.into()
|
||||
}
|
@ -40,4 +40,5 @@ pub use topology::*;
|
||||
/// Common traits.
|
||||
pub mod traits {
|
||||
pub use super::topology::traits::*;
|
||||
pub use super::unit2::traits::*;
|
||||
}
|
||||
|
@ -7,14 +7,20 @@
|
||||
//! and rules of how the result inference should be performed.
|
||||
|
||||
use paste::paste;
|
||||
use std::borrow::Cow;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
|
||||
|
||||
/// Common traits for built-in units.
|
||||
pub mod traits {
|
||||
pub use super::BytesCowOps;
|
||||
pub use super::BytesOps;
|
||||
pub use super::BytesStrOps;
|
||||
pub use super::DurationNumberOps;
|
||||
pub use super::DurationOps;
|
||||
pub use super::IntoUncheckedRawRange;
|
||||
pub use super::UncheckedFrom;
|
||||
}
|
||||
|
||||
mod ops {
|
||||
@ -25,27 +31,40 @@ mod ops {
|
||||
|
||||
|
||||
// =====================
|
||||
// === UncheckedInto ===
|
||||
// === UncheckedFrom ===
|
||||
// =====================
|
||||
|
||||
/// Unchecked unit conversion. You should use it only for unit conversion definition, never in
|
||||
/// unit-usage code.
|
||||
#[allow(missing_docs)]
|
||||
pub trait UncheckedFrom<T> {
|
||||
fn unchecked_from(t: T) -> Self;
|
||||
}
|
||||
|
||||
impl<T> const UncheckedFrom<T> for T {
|
||||
fn unchecked_from(t: T) -> Self {
|
||||
t
|
||||
}
|
||||
}
|
||||
|
||||
impl<V, R> const UncheckedFrom<R> for UnitData<V, R> {
|
||||
fn unchecked_from(repr: R) -> Self {
|
||||
let variant = PhantomData;
|
||||
UnitData { repr, variant }
|
||||
}
|
||||
}
|
||||
|
||||
/// Unchecked unit conversion. See [`UncheckedFrom`] to learn more.
|
||||
#[allow(missing_docs)]
|
||||
pub trait UncheckedInto<T> {
|
||||
fn unchecked_into(self) -> T;
|
||||
}
|
||||
|
||||
impl<T> const UncheckedInto<T> for T {
|
||||
impl<T, S> const UncheckedInto<T> for S
|
||||
where T: ~const UncheckedFrom<S>
|
||||
{
|
||||
fn unchecked_into(self) -> T {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl<V, R> const UncheckedInto<UnitData<V, R>> for R {
|
||||
fn unchecked_into(self) -> UnitData<V, R> {
|
||||
let repr = self;
|
||||
let variant = PhantomData;
|
||||
UnitData { repr, variant }
|
||||
T::unchecked_from(self)
|
||||
}
|
||||
}
|
||||
|
||||
@ -74,6 +93,7 @@ pub trait Variant {
|
||||
}
|
||||
|
||||
/// Internal representation of every unit.
|
||||
#[repr(transparent)]
|
||||
pub struct UnitData<V, R> {
|
||||
repr: R,
|
||||
variant: PhantomData<V>,
|
||||
@ -165,6 +185,8 @@ impl<V, R: PartialEq> PartialEq for UnitData<V, R> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<V> Eq for UnitData<V, usize> {}
|
||||
|
||||
|
||||
|
||||
// ===========
|
||||
@ -177,6 +199,12 @@ impl<V, R: PartialOrd> PartialOrd for UnitData<V, R> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<V> Ord for UnitData<V, usize> {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
self.repr.cmp(&other.repr)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ============
|
||||
@ -193,6 +221,33 @@ where R: Copy
|
||||
|
||||
|
||||
|
||||
// =============================
|
||||
// === IntoUncheckedRawRange ===
|
||||
// =============================
|
||||
|
||||
/// Allows transmuting [`Range<UnitData<V,R>>`] to [`Range<R>`].
|
||||
#[allow(missing_docs)]
|
||||
pub trait IntoUncheckedRawRange {
|
||||
type Output;
|
||||
fn into_unchecked_raw_range(self) -> Self::Output;
|
||||
}
|
||||
|
||||
impl<V, R> IntoUncheckedRawRange for ops::Range<UnitData<V, R>> {
|
||||
type Output = ops::Range<R>;
|
||||
fn into_unchecked_raw_range(self) -> Self::Output {
|
||||
self.start.repr..self.end.repr
|
||||
}
|
||||
}
|
||||
|
||||
impl<V, R> IntoUncheckedRawRange for ops::RangeFrom<UnitData<V, R>> {
|
||||
type Output = ops::RangeFrom<R>;
|
||||
fn into_unchecked_raw_range(self) -> Self::Output {
|
||||
self.start.repr..
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ===============
|
||||
// === gen_ops ===
|
||||
// ===============
|
||||
@ -288,6 +343,34 @@ macro_rules! gen_ops {
|
||||
}
|
||||
}
|
||||
|
||||
// Please note that this impl is not as generic as the following ones because Rust compiler
|
||||
// is unable to compile the more generic version.
|
||||
impl<V, R> const ops::$trait<UnitData<V, R>> for f64
|
||||
where
|
||||
R: Copy,
|
||||
V: $rev_trait<f64>,
|
||||
f64: ~const ops::$trait<R>,
|
||||
{
|
||||
type Output = UnitData<<V as $rev_trait<f64>>::Output, <f64 as ops::$trait<R>>::Output>;
|
||||
fn $op(self, rhs: UnitData<V, R>) -> Self::Output {
|
||||
self.$op(rhs.repr).unchecked_into()
|
||||
}
|
||||
}
|
||||
|
||||
// Please note that this impl is not as generic as the following ones because Rust compiler
|
||||
// is unable to compile the more generic version.
|
||||
impl<V> const ops::$trait<UnitData<V, usize>> for usize
|
||||
where
|
||||
V: $rev_trait<usize>,
|
||||
usize: ~const ops::$trait<usize>,
|
||||
{
|
||||
type Output =
|
||||
UnitData<<V as $rev_trait<usize>>::Output, <usize as ops::$trait<usize>>::Output>;
|
||||
fn $op(self, rhs: UnitData<V, usize>) -> Self::Output {
|
||||
self.$op(rhs.repr).unchecked_into()
|
||||
}
|
||||
}
|
||||
|
||||
impl<V, R, T> const ops::$trait<T> for UnitData<V, R>
|
||||
where
|
||||
UnitData<V, R>: $trait<T>,
|
||||
@ -331,6 +414,29 @@ macro_rules! gen_ops_mut {
|
||||
self.$op(rhs.repr)
|
||||
}
|
||||
}
|
||||
|
||||
impl<V, R> const ops::$trait_mut<UnitData<V, R>> for f64
|
||||
where
|
||||
f64: ~const ops::$trait_mut<R>,
|
||||
R: Copy,
|
||||
UnitData<V, R>: $rev_trait<f32>,
|
||||
{
|
||||
fn $op(&mut self, rhs: UnitData<V, R>) {
|
||||
self.$op(rhs.repr)
|
||||
}
|
||||
}
|
||||
|
||||
impl<V, R> const ops::$trait_mut<UnitData<V, R>> for usize
|
||||
where
|
||||
usize: ~const ops::$trait_mut<R>,
|
||||
R: Copy,
|
||||
UnitData<V, R>: $rev_trait<f32>,
|
||||
{
|
||||
fn $op(&mut self, rhs: UnitData<V, R>) {
|
||||
self.$op(rhs.repr)
|
||||
}
|
||||
}
|
||||
|
||||
impl<V, R, T> const ops::$trait_mut<T> for UnitData<V, R>
|
||||
where
|
||||
T: IsNotUnit,
|
||||
@ -341,6 +447,7 @@ macro_rules! gen_ops_mut {
|
||||
self.repr.$op(rhs)
|
||||
}
|
||||
}
|
||||
|
||||
impl<V1, V2, R1, R2> const ops::$trait_mut<UnitData<V2, R2>> for UnitData<V1, R1>
|
||||
where
|
||||
R1: ~const ops::$trait_mut<R2>,
|
||||
@ -479,6 +586,9 @@ macro_rules! define_single_op_switch {
|
||||
(f64 $op:tt $rhs:ident = $out:ident) => {
|
||||
$crate::define_single_rev_op! {f64 $op $rhs = $out}
|
||||
};
|
||||
(usize $op:tt $rhs:ident = $out:ident) => {
|
||||
$crate::define_single_rev_op! {usize $op $rhs = $out}
|
||||
};
|
||||
($lhs:ident $op:tt $rhs:ident = $out:ident) => {
|
||||
$crate::define_single_op! {$lhs $op $rhs = $out}
|
||||
};
|
||||
@ -488,25 +598,25 @@ macro_rules! define_single_op_switch {
|
||||
#[macro_export]
|
||||
macro_rules! define_single_op {
|
||||
($lhs:ident + $rhs:ident = $out:ident) => {
|
||||
impl Add<$rhs> for $lhs {
|
||||
impl $crate::unit2::Add<$rhs> for $lhs {
|
||||
type Output = $out;
|
||||
}
|
||||
};
|
||||
|
||||
($lhs:ident - $rhs:ident = $out:ident) => {
|
||||
impl Sub<$rhs> for $lhs {
|
||||
impl $crate::unit2::Sub<$rhs> for $lhs {
|
||||
type Output = $out;
|
||||
}
|
||||
};
|
||||
|
||||
($lhs:ident * $rhs:ident = $out:ident) => {
|
||||
impl Mul<$rhs> for $lhs {
|
||||
impl $crate::unit2::Mul<$rhs> for $lhs {
|
||||
type Output = $out;
|
||||
}
|
||||
};
|
||||
|
||||
($lhs:ident / $rhs:ident = $out:ident) => {
|
||||
impl Div<$rhs> for $lhs {
|
||||
impl $crate::unit2::Div<$rhs> for $lhs {
|
||||
type Output = $out;
|
||||
}
|
||||
};
|
||||
@ -522,25 +632,25 @@ macro_rules! define_single_op {
|
||||
#[macro_export]
|
||||
macro_rules! define_single_rev_op {
|
||||
($lhs:ident + $rhs:ident = $out:ident) => {
|
||||
impl RevAdd<$rhs> for $lhs {
|
||||
impl $crate::unit2::RevAdd<$rhs> for $lhs {
|
||||
type Output = $out;
|
||||
}
|
||||
};
|
||||
|
||||
($lhs:ident - $rhs:ident = $out:ident) => {
|
||||
impl RevSub<$rhs> for $lhs {
|
||||
impl $crate::unit2::RevSub<$rhs> for $lhs {
|
||||
type Output = $out;
|
||||
}
|
||||
};
|
||||
|
||||
($lhs:ident * $rhs:ident = $out:ident) => {
|
||||
impl RevMul<$rhs> for $lhs {
|
||||
impl $crate::unit2::RevMul<$rhs> for $lhs {
|
||||
type Output = $out;
|
||||
}
|
||||
};
|
||||
|
||||
($lhs:ident / $rhs:ident = $out:ident) => {
|
||||
impl RevDiv<$rhs> for $lhs {
|
||||
impl $crate::unit2::RevDiv<$rhs> for $lhs {
|
||||
type Output = $out;
|
||||
}
|
||||
};
|
||||
@ -574,7 +684,7 @@ define_ops![
|
||||
f32 * Duration = Duration,
|
||||
];
|
||||
|
||||
/// Methods for the [`Duration`] unit.
|
||||
/// Methods of the [`Duration`] unit.
|
||||
#[allow(missing_docs)]
|
||||
pub trait DurationOps {
|
||||
fn ms(t: f32) -> Duration;
|
||||
@ -653,3 +763,106 @@ impl From<Duration> for std::time::Duration {
|
||||
std::time::Duration::from_millis(duration.as_ms() as u64)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === Bytes ===
|
||||
// =============
|
||||
|
||||
define! {
|
||||
/// Number of bytes.
|
||||
Bytes: usize = 0
|
||||
}
|
||||
define_ops![
|
||||
Bytes [+,-] Bytes = Bytes,
|
||||
Bytes * usize = Bytes,
|
||||
usize * Bytes = Bytes,
|
||||
];
|
||||
|
||||
/// Constructor.
|
||||
#[allow(non_snake_case)]
|
||||
pub fn Bytes(size: usize) -> Bytes {
|
||||
Bytes::from(size)
|
||||
}
|
||||
|
||||
impl From<usize> for Bytes {
|
||||
fn from(t: usize) -> Self {
|
||||
Bytes::unchecked_from(t)
|
||||
}
|
||||
}
|
||||
|
||||
/// Additional methods for [`Bytes`].
|
||||
pub trait BytesOps {
|
||||
/// Check whether this bytes value is zero.
|
||||
fn is_zero(&self) -> bool;
|
||||
|
||||
/// Check whether this bytes value is positive.
|
||||
fn is_positive(&self) -> bool;
|
||||
|
||||
/// Check whether this bytes value is negative.
|
||||
fn is_negative(&self) -> bool;
|
||||
}
|
||||
|
||||
impl BytesOps for Bytes {
|
||||
fn is_zero(&self) -> bool {
|
||||
*self == Bytes::from(0)
|
||||
}
|
||||
|
||||
fn is_positive(&self) -> bool {
|
||||
*self > Bytes::from(0)
|
||||
}
|
||||
|
||||
fn is_negative(&self) -> bool {
|
||||
*self < Bytes::from(0)
|
||||
}
|
||||
}
|
||||
|
||||
/// Methods of the [`Bytes`] unit as extensions for the [`str`] type.
|
||||
#[allow(missing_docs)]
|
||||
pub trait BytesStrOps<Range> {
|
||||
/// Slice the provided string.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if the range start or end is not on a UTF-8 code point boundary, or if it is past the
|
||||
/// end of the last code point of the string slice.
|
||||
fn slice(&self, range: Range) -> &str;
|
||||
}
|
||||
|
||||
impl BytesStrOps<ops::Range<Bytes>> for str {
|
||||
#[inline(always)]
|
||||
fn slice(&self, range: ops::Range<Bytes>) -> &str {
|
||||
&self[range.into_unchecked_raw_range()]
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesStrOps<ops::RangeFrom<Bytes>> for str {
|
||||
#[inline(always)]
|
||||
fn slice(&self, range: ops::RangeFrom<Bytes>) -> &str {
|
||||
&self[range.into_unchecked_raw_range()]
|
||||
}
|
||||
}
|
||||
|
||||
/// Methods of the [`Bytes`] unit as extensions for the [`Cow`] type.
|
||||
#[allow(missing_docs)]
|
||||
pub trait BytesCowOps<'t, Range> {
|
||||
fn slice(&self, range: Range) -> Cow<'t, str>;
|
||||
}
|
||||
|
||||
impl<'t> BytesCowOps<'t, ops::Range<Bytes>> for Cow<'t, str> {
|
||||
fn slice(&self, range: ops::Range<Bytes>) -> Cow<'t, str> {
|
||||
match self {
|
||||
Cow::Borrowed(t) => Cow::Borrowed(t.slice(range)),
|
||||
Cow::Owned(t) => Cow::Owned(t.slice(range).to_owned()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> BytesCowOps<'t, ops::RangeFrom<Bytes>> for Cow<'t, str> {
|
||||
fn slice(&self, range: ops::RangeFrom<Bytes>) -> Cow<'t, str> {
|
||||
match self {
|
||||
Cow::Borrowed(t) => Cow::Borrowed(t.slice(range)),
|
||||
Cow::Owned(t) => Cow::Owned(t.slice(range).to_owned()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user