Preparation for more parser work (#1363)

This commit is contained in:
Ara Adkins 2020-12-18 14:25:30 +00:00 committed by GitHub
parent 0840ff546b
commit 4cc36e8c81
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 576 additions and 72 deletions

View File

@ -6,6 +6,7 @@ members = [
"lib/rust/lexer/definition",
"lib/rust/lexer/generation",
"lib/rust/parser",
"lib/rust/parser-jni",
]
# These patch versions exist to allow local development of these libraries alongside Enso. It

View File

@ -113,7 +113,7 @@ change made to the lexer. The lexer benchmarks are written using
[criterion.rs](https://github.com/bheisler/criterion.rs), and include both
examples of whole program definitions and more specific benchmark examples.
**Baseline Commit:** TBC (use head of this branch for now).
**Baseline Commit:** `e5695e6f5d44cba4094380545036a3a5cbbf6973`
The benchmarking process for the lexer is as follows:

View File

@ -40,6 +40,9 @@ used in the Enso parser itself.
- [Macro Resolution Errors](#macro-resolution-errors)
- [Macro Errors as Parser Errors](#macro-errors-as-parser-errors)
- [User-Defined Macros](#user-defined-macros)
- [Benchmarking](#benchmarking)
- [Running a Subset of the Benchmarks](#running-a-subset-of-the-benchmarks)
- [Changing the Macro Resolver](#changing-the-macro-resolver)
<!-- /MarkdownTOC -->
@ -255,3 +258,55 @@ define syntactic macros for their programs, similar to how Rust exposes the
> - Determine what this should look like in surface Enso.
> - Determine exactly how the round-trip mechanic between the runtime and parser
> should function.
## Benchmarking
All components of the macro resolver are accompanied by comprehensive benchmarks
in order to protect the performance-crucial code against regressions. These
benchmarks are written using
[criterion.rs](https://github.com/bheisler/criterion.rs), and cover all of the
performance critical functionality of the macro resolver.
**Baseline Commit:** TBC (use the latest for now)
The benchmarking process for the macro resolver is as follows:
1. Check out the current _baseline commit_, listed above.
2. In each of the benchmark files, change the configuration line reading
`.retain_baseline` to `.save_baseline`. This will save the current baseline
(taken on your machine).
3. In `lexer_bench_sources.rs` change the line that reads `.retain_baseline` to
instead read `.save_baseline`. This will save the current baseline (taken on
your machine).
4. Run the benchmarks using `cargo bench`.
5. Once the baseline run has completed, change the above-mentioned lines back to
`.retain_baseline`. This will disable overwriting the saved baseline, and
will perform its regression reporting against it.
6. Make your changes.
7. Run the benchmark suite again. It will report any performance regressions in
the benchmark report, measured against your saved baseline.
Unfortunately, the use of time-based benchmarks means that we can't commit the
baseline to the repository. There is far too much variance between machines for
this to be useful.
### Running a Subset of the Benchmarks
The benchmarks are very comprehensive, testing all performance-critical paths of
the macro resolver. However, it can often be useful to run a subset of these.
There are two main tuning points for this:
1. The _sizes_ of inputs being executed on, where relevant.
2. The benchmarks being executed.
While it is _possible_ to tune the benchmarking config to decrease benchmarking
time, this is not recommended. The current settings are tuned to provide
reliable results.
### Changing the Macro Resolver
When changing the macro resolver the _full_ benchmark suite must be run against
the current baseline before the changes can be merged. This suite run must use
the provided settings for the benchmarking library, and should be performed
using the process described above.

View File

@ -15,7 +15,7 @@ use crate::lexeme;
// =============
/// A lexer token.
#[derive(Clone,Debug,Eq,PartialEq)]
#[derive(Clone,Debug,Eq,Hash,PartialEq)]
pub struct Token {
/// The shape of the token.
pub shape : Shape,
@ -294,7 +294,7 @@ impl Token {
// =================
/// The type for an Enso Block token.
#[derive(Copy,Clone,Debug,PartialEq,Eq)]
#[derive(Copy,Clone,Debug,Eq,Hash,PartialEq)]
pub enum BlockType {
/// A block made up of arguments to a function.
Continuous,
@ -309,7 +309,7 @@ pub enum BlockType {
// ==================
/// The type of newline associated with the line.
#[derive(Copy,Clone,Debug,Display,PartialEq,Eq)]
#[derive(Copy,Clone,Debug,Display,Eq,Hash,PartialEq)]
pub enum LineEnding {
/// There is no newline.
None,
@ -348,7 +348,7 @@ impl Default for LineEnding {
// =================
/// The style of the text literal.
#[derive(Copy,Clone,Debug,Eq,PartialEq)]
#[derive(Copy,Clone,Debug,Eq,Hash,PartialEq)]
pub enum TextStyle {
// === Line ===
@ -424,7 +424,7 @@ impl TextStyle {
// ===================
/// A description of the style of escape sequence seen.
#[derive(Clone,Copy,Debug,Eq,PartialEq)]
#[derive(Clone,Copy,Debug,Eq,Hash,PartialEq)]
pub enum EscapeStyle {
/// A \xNN-style byte escape.
Byte,
@ -474,7 +474,7 @@ impl EscapeStyle {
/// This is a very small set of shapes, because the [`Token`] type only deals with the tokens that
/// the lexer works with, not the full complexity of Enso's syntax.
#[allow(missing_docs)]
#[derive(Clone,Debug,PartialEq,Eq)]
#[derive(Clone,Debug,Eq,Hash,PartialEq)]
pub enum Shape {
// === Identifiers ===
@ -761,6 +761,11 @@ impl Stream {
pub fn tokens_len(&self) -> usize {
self.tokens.iter().map(|token|token.length + token.offset).sum()
}
/// Get a consuming iterator over the token stream.
pub fn into_iter(self) -> std::vec::IntoIter<Token> {
self.tokens.into_iter()
}
}
impl Deref for Stream {

View File

@ -2,9 +2,7 @@
//! benchmarking the Enso lexer.
use criterion::{black_box, Criterion, Throughput};
use enso_flexer::prelude::Reader;
use enso_flexer::prelude::reader::decoder::DecoderUTF8;
use lexer::generated::engine::EnsoLexer;
use lexer;
use std::time::Duration;
@ -51,9 +49,7 @@ pub fn run_bench_sizes(name:&str, input:&str, add_newline:bool, c:&mut Criterion
group.bench_function(
*size_name,
|b| b.iter(|| {
let mut lexer = EnsoLexer::new();
let reader = Reader::new(input.as_str().as_bytes(),DecoderUTF8());
lexer.run(black_box(reader));
lexer::run(black_box(input.as_str()));
})
);
})

View File

@ -0,0 +1,21 @@
//! A driver for the Enso lexer.
use crate::prelude::*;
use enso_flexer::LexingResult;
use crate::library::token;
use crate::generated::engine::EnsoLexer;
use crate::prelude::reader::decoder::DecoderUTF8;
// ====================
// === Lexer Driver ===
// ====================
/// Execute the lexer on the provided `input`, assuming utf-8 encoding.
pub fn run(input:impl AsRef<str>) -> LexingResult<token::Stream> {
let mut lexer = EnsoLexer::new();
let reader = Reader::new(input.as_ref().as_bytes(),DecoderUTF8());
lexer.run(reader)
}

View File

@ -10,12 +10,15 @@
//! This module exports the interface to the generated Enso lexer.
pub mod lexer;
pub mod generated;
pub use crate::lexer::*;
/// Support libraries for the lexer definition.
///
/// This is an intentional re-export in this crate's namespace.
mod library {
pub mod library {
pub use lexer_definition::library::*;
}

View File

@ -5,9 +5,7 @@
use enso_flexer::*;
use lexer_definition::library::*;
use enso_flexer::prelude::reader::decoder::DecoderUTF8;
use enso_flexer::prelude::Reader;
use lexer::generated::engine::EnsoLexer;
use lexer;
use lexer_definition::library::token::Token;
@ -36,9 +34,7 @@ pub fn assert_lexes(input:impl AsRef<str>, expected:token::Stream) {
/// Lex the provided string.
pub fn lex(input:impl AsRef<str>) -> LexingResult<token::Stream> {
let mut lexer = EnsoLexer::new();
let reader = Reader::new(input.as_ref().as_bytes(),DecoderUTF8());
lexer.run(reader)
lexer::run(input)
}
/// Asserts that the input is a block and has a length equal to `length`.

View File

@ -0,0 +1,25 @@
[package]
name = "parser-jni"
version = "0.1.0"
authors = ["Enso Team <enso-dev@enso.org>"]
edition = "2018"
description = "A parser for the Enso language"
readme = "README.md"
homepage = "https://github.com/enso-org/enso/lib/rust/parser"
repository = "https://github.com/enso-org/enso"
license-file = "../../../LICENSE"
keywords = ["parser"]
categories = ["parsing"]
publish = false
[lib]
crate-type = ["cdylib", "rlib"]
test = true
bench = true
[dependencies]
jni = { version = "0.17.0" }
ast = { version = "0.1.0", path = "../ast" }

View File

@ -0,0 +1,47 @@
#![feature(test)]
#![deny(unconditional_recursion)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
#![warn(missing_docs)]
#![warn(trivial_casts)]
#![warn(trivial_numeric_casts)]
#![warn(unsafe_code)]
#![warn(unused_import_braces)]
//! This module exports the implementation of parser for the Enso language.
mod jni;
pub use crate::jni::*;
use ast::AnyAst;
use ast::Ast;
// =======================
// === Parser Rust API ===
// =======================
/// Parse a content of a single source file.
pub fn parse_str(input:String) -> AnyAst {
Ast::new(ast::txt::Text{text:input})
}
/// Parse a single source file.
pub fn parse_file(filename:String) -> AnyAst {
parse_str(filename)
}
// === Tokens ===
/// Parse a content of single source file.
pub fn lexe_str(input:String) -> AnyAst {
parse_str(input)
}
/// Parse a single source file.
pub fn lexe_file(filename:String) -> AnyAst {
parse_str(filename)
}

View File

@ -4,23 +4,21 @@ version = "0.1.0"
authors = ["Enso Team <enso-dev@enso.org>"]
edition = "2018"
description = "A parser for the Enso language"
readme = "README.md"
homepage = "https://github.com/enso-org/enso/lib/rust/parser"
repository = "https://github.com/enso-org/enso"
license-file = "../../../LICENSE"
keywords = ["parser"]
categories = ["parsing"]
publish = false
[lib]
name = "parser"
crate-type = ["cdylib", "rlib"]
test = true
bench = true
[dependencies]
jni = { version = "0.17.0" }
ast = { version = "0.1.0", path = "../ast" }
enso-data = { version = "0.1.3" }
enso-logger = { version = "0.1.3" }
enso-prelude = { version = "0.1.8" }
lexer = { version = "0.1.0", path = "../lexer/generation" }
itertools = { version = "0.9.0" }
[build-dependencies]
[dev-dependencies]
criterion = "0.3"

View File

@ -1,47 +1,30 @@
#![feature(test)]
#![deny(unconditional_recursion)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
#![warn(missing_docs)]
#![warn(trivial_casts)]
#![warn(trivial_numeric_casts)]
#![warn(unsafe_code)]
#![warn(unused_import_braces)]
//! This library contains the implementation of the Enso parser.
//! This module exports the implementation of parser for the Enso language.
pub mod macros;
pub mod operator;
pub mod parser;
mod jni;
pub use crate::parser::*;
pub use crate::jni::*;
/// The prelude for the parser.
pub mod prelude {
pub use enso_prelude::*;
pub use enso_logger::AnyLogger;
use ast::AnyAst;
use ast::Ast;
// =======================
// === Parser Rust API ===
// =======================
/// Parse a content of a single source file.
pub fn parse_str(input:String) -> AnyAst {
Ast::new(ast::txt::Text{text:input})
/// The Enso logging library.
pub mod logger {
pub use enso_logger::*;
pub use enso_logger::disabled::Logger as Disabled;
pub use enso_logger::enabled::Logger as Enabled;
}
/// Parse a single source file.
pub fn parse_file(filename:String) -> AnyAst {
parse_str(filename)
/// The lexer types.
pub mod lexer {
pub use ::lexer::*;
/// The lexer tokens.
pub mod token {
pub use lexer::library::token::*;
}
// === Tokens ===
/// Parse a content of single source file.
pub fn lexe_str(input:String) -> AnyAst {
parse_str(input)
}
/// Parse a single source file.
pub fn lexe_file(filename:String) -> AnyAst {
parse_str(filename)
}

View File

@ -0,0 +1,41 @@
//! The macro system for the Enso parser.
pub mod definition;
pub mod literal;
pub mod registry;
use crate::prelude::*;
use crate::prelude::logger::*;
use crate::macros::definition::Definition;
use crate::macros::registry::Registry;
// ================
// === Resolver ===
// ================
/// The Enso macro resolver.
#[derive(Clone,Debug,PartialEq)]
#[allow(missing_docs)]
pub struct Resolver<Logger> {
registry : Registry,
logger : Logger
}
impl<Logger> Resolver<Logger>
where Logger : AnyLogger<Owned=Logger> {
/// Constructor.
pub fn new(macros:Vec<Definition>, parent_logger:&Logger) -> Self {
let logger = <Logger>::sub(parent_logger,"Resolver");
let registry = Registry::from(macros);
Self{registry,logger}
}
/// Define the macro described by `definition` in the macro resolver `self`.
pub fn define_macro(&mut self, definition:Definition) {
debug!(self.logger,"Define Macro: {&definition:?}.");
self.registry.insert(definition)
}
}

View File

@ -0,0 +1,69 @@
//! Macro definitions in Enso.
use crate::prelude::*;
use crate::macros::literal::Literal;
use itertools::Itertools;
// ==================
// === Definition ===
// ==================
/// A macro definition.
///
/// A macro definition consists of a name, which identifies the macro to users, and a list of
/// [sections](`Section`). The sections are the most important portion of the macro definition, as
/// they define the literal portions of the token stream on which the macro will match.
#[derive(Clone,Debug,Default,Eq,PartialEq)]
#[allow(missing_docs)]
pub struct Definition {
pub name : String,
pub sections : Vec<Section>
}
impl Definition {
/// Constructor.
pub fn new(name:impl Str, sections:Vec<Section>) -> Self {
let name = name.into();
Self{name,sections}
}
/// Get the path for the definition.
///
/// The definition's path consists of the headers of each of the sections that make it up, and
/// describes the literals that must be matched for the macro to match.
pub fn path(&self) -> Vec<Literal> {
self.sections.iter().map(|s| s.start_symbol.clone()).collect_vec()
}
}
// ===============
// === Section ===
// ===============
/// A section in a macro, representing both a literal section header to match against, and the
/// tokens that the section contains.
///
/// The literal is the _most_ important portion of a section, as they are constants that allow the
/// macro resolver to divide up the input token stream based on these constants.
#[derive(Clone,Debug,Eq,PartialEq)]
#[allow(missing_docs)]
pub struct Section {
start_symbol : Literal
// TODO Pattern
}
impl Section {
/// Constructor.
pub fn new(symbol:Literal) -> Self {
Self{ start_symbol: symbol }
}
/// Get a reference to the literal that heads the section.
pub fn start_symbol(&self) -> &Literal {
&self.start_symbol
}
}

View File

@ -0,0 +1,95 @@
//! This file contains the literal matchers that are used to head up macro sections.
use crate::prelude::*;
use crate::prelude::lexer::token;
// ===============
// === Literal ===
// ===============
/// The kinds of literal that can be the head of a macro section.
///
/// For more detailed descriptions of the various literal types, please see the documentation of the
/// tokens in the Lexer.
#[derive(Clone,Debug,Eq,Hash,Ord,PartialEq,PartialOrd)]
pub enum Literal {
Referent(String),
Variable(String),
External(String),
Blank,
Operator(String),
Annotation(String)
}
impl Literal {
/// Construct a referent identifier literal.
pub fn referent(lit:impl Str) -> Literal {
Literal::Referent(lit.into())
}
/// Construct a variable identifier literal.
pub fn variable(lit:impl Str) -> Literal {
Literal::Variable(lit.into())
}
/// Construct an external identifier literal.
pub fn external(lit:impl Str) -> Literal {
Literal::External(lit.into())
}
/// Construct a blank identifier literal.
pub fn blank() -> Literal {
Literal::Blank
}
/// Construct an operator identifier literal.
pub fn operator(lit:impl Str) -> Literal {
Literal::Operator(lit.into())
}
/// Construct an annodation identifier literal.
pub fn annotation(lit:impl Str) -> Literal {
Literal::Annotation(lit.into())
}
}
// === Trait Impls ===
impl From<&Literal> for Literal {
fn from(lit:&Literal) -> Self {
lit.clone()
}
}
impl From<Literal> for token::Shape {
fn from(lit:Literal) -> Self {
match lit {
Literal::Referent(str) => token::Shape::Referent(str),
Literal::Variable(str) => token::Shape::Variable(str),
Literal::External(str) => token::Shape::External(str),
Literal::Blank => token::Shape::Blank,
Literal::Operator(str) => token::Shape::Operator(str),
Literal::Annotation(str) => token::Shape::Annotation(str),
}
}
}
impl TryFrom<token::Shape> for Literal {
type Error = token::Shape;
fn try_from(shape:token::Shape) -> Result<Self, Self::Error> {
match shape {
token::Shape::Referent(name) => Ok(Literal::Referent(name)),
token::Shape::Variable(name) => Ok(Literal::Variable(name)),
token::Shape::External(name) => Ok(Literal::External(name)),
token::Shape::Blank => Ok(Literal::Blank),
token::Shape::Operator(name) => Ok(Literal::Operator(name)),
token::Shape::Annotation(name) => Ok(Literal::Annotation(name)),
_ => Err(shape)
}
}
}

View File

@ -0,0 +1,145 @@
//! The macro registry that can be queried during the process of macro resolution.
use crate::prelude::*;
use enso_data::hash_map_tree::*;
use crate::macros::definition::Definition;
use crate::macros::literal::Literal;
// ================
// === Registry ===
// ================
/// The type of the tree that underlies the registry.
pub type Tree = HashMapTree<Literal,Option<Definition>>;
/// The registry is responsible for the registration of macro definitions, and the querying of said
/// definitions.
#[derive(Clone,Debug,Default,PartialEq)]
#[allow(missing_docs)]
pub struct Registry {
tree : Tree
}
impl Registry {
/// Insert `definition` into the macro registry.
pub fn insert(&mut self, definition:Definition) {
self.tree.set(definition.path(),Some(definition));
}
/// Get a reference to the root of the registry.
pub fn root(&self) -> &Tree {
&self.tree
}
/// Query the registry for a tree.
pub fn subtree<P>(&self, path:P) -> Option<&Tree>
where P:IntoIterator, P::Item:Into<Literal> {
self.tree.get_node(path)
}
/// Query the registry for a tree, assuming such a tree is present.
///
/// # Panics
/// If no tree exists at `path`.
pub fn unsafe_subtree<P>(&self, path:P) -> &Tree
where P:IntoIterator, P::Item:Into<Literal> {
self.subtree(path).expect("A tree exists at the input path.")
}
/// Query the registry for a definition.
pub fn definition<P>(&self, path:P) -> Option<&Definition>
where P:IntoIterator, P::Item:Into<Literal> {
match self.tree.get(path) {
Some(Some(def)) => Some(def),
_ => None
}
}
/// Query the registry for a definition, assuming such a definition is present.
///
/// # Panics
/// If no definition exists at `path`.
pub fn unsafe_definition<P>(&self, path:P) -> &Definition
where P:IntoIterator, P::Item:Into<Literal> {
self.definition(path).expect("A definition exists at the input path.")
}
}
// === Trait Impls ===
impl From<Vec<Definition>> for Registry {
fn from(defs:Vec<Definition>) -> Self {
let mut registry:Registry = default();
defs.into_iter().for_each(|def| registry.insert(def));
registry
}
}
// =============
// === Tests ===
// =============
#[cfg(test)]
mod tests {
use super::*;
use crate::macros::definition::Section;
#[test]
fn insert_query() {
let mut registry = Registry::default();
let definition = Definition::new("Test",vec![
Section::new(Literal::variable("if")),
Section::new(Literal::variable("then")),
Section::new(Literal::variable("else")),
]);
let path_1 = &[Literal::variable("if"),Literal::variable("then"),Literal::variable("else")];
let path_2 = &[Literal::variable("if"),Literal::variable("then")];
registry.insert(definition.clone());
let result_1 = registry.definition(path_1);
let result_2 = registry.definition(path_2);
assert!(result_1.is_some());
assert_eq!(result_1.unwrap(),&definition);
assert_eq!(result_2,None);
}
#[test]
fn from_defs() {
let definitions = vec![
Definition::new("if_then_else", vec![
Section::new(Literal::variable("if")),
Section::new(Literal::variable("then")),
Section::new(Literal::variable("else")),
]),
Definition::new("if_then", vec![
Section::new(Literal::variable("if")),
Section::new(Literal::variable("then")),
]),
Definition::new("if_let", vec![
Section::new(Literal::variable("if")),
Section::new(Literal::variable("let")),
]),
];
let registry = Registry::from(definitions.clone());
let path_1 = &[Literal::variable("if"),Literal::variable("then"),Literal::variable("else")];
let path_2 = &[Literal::variable("if"),Literal::variable("then")];
let path_3 = &[Literal::variable("if"),Literal::variable("let")];
let path_4 = &[Literal::variable("if")];
let result_1 = registry.definition(path_1);
let result_2 = registry.definition(path_2);
let result_3 = registry.definition(path_3);
let result_4 = registry.definition(path_4);
assert!(result_1.is_some());
assert!(result_2.is_some());
assert!(result_3.is_some());
assert!(result_4.is_none());
assert_eq!(result_1,definitions.get(0));
assert_eq!(result_2,definitions.get(1));
assert_eq!(result_3,definitions.get(2));
}
}

View File

@ -0,0 +1,4 @@
//! The logic for working with operators in the Enso parser.
pub mod associativity;
pub mod precedence;

View File

@ -0,0 +1 @@
//! Associativity inference for Enso.

View File

@ -0,0 +1 @@
//! Operator precedence levels.

View File

@ -0,0 +1,18 @@
//! The driver for the Enso parser.
// ==============
// === Parser ===
// ==============
/// The Enso parser itself.
#[derive(Clone,Debug,Default,Eq,PartialEq)]
pub struct Parser;
impl Parser {
/// Constructor.
pub fn new() -> Self {
Self
}
}