Parser: don't panic for any standard library files (#3609)

This commit is contained in:
Kaz Wesley 2022-07-28 10:17:33 -07:00 committed by GitHub
parent 26018e4969
commit c525b201b9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 855 additions and 229 deletions

5
.github/CODEOWNERS vendored
View File

@ -1,6 +1,9 @@
# Catch All
* @4e6 @MichaelMauderer @PabloBuchu @jdunkerley
# Github metadata
/.github/ @4e6 @MichaelMauderer @PabloBuchu @jdunkerley @wdanilo
# Change log
CHANGELOG.md
@ -11,6 +14,8 @@ Cargo.lock @MichaelMauderer @4e6 @mwu-tow @farmaazon @wdanilo @kazcw
Cargo.toml @MichaelMauderer @4e6 @mwu-tow @farmaazon @wdanilo @kazcw
/lib/rust/ @MichaelMauderer @4e6 @mwu-tow @farmaazon @wdanilo @kazcw
/lib/rust/ensogl/ @MichaelMauderer @wdanilo @farmaazon
/lib/rust/metamodel/ @kazcw @wdanilo @jaroslavtulach
/lib/rust/parser/ @kazcw @wdanilo @jaroslavtulach
/lib/rust/profiler/ @kazcw @MichaelMauderer @wdanilo
/integration-test/ @MichaelMauderer @wdanilo @farmaazon @kazcw
/tools/build-performance/ @kazcw @mwu-tow @wdanilo

13
Cargo.lock generated
View File

@ -2088,6 +2088,7 @@ dependencies = [
"lexpr",
"rand 0.8.5",
"rand_chacha 0.3.1",
"rand_distr 0.4.3",
"serde",
]
@ -4486,7 +4487,7 @@ dependencies = [
"num-rational 0.2.4",
"num-traits",
"rand 0.7.3",
"rand_distr",
"rand_distr 0.2.2",
"serde",
"serde_derive",
"simba 0.1.5",
@ -5482,6 +5483,16 @@ dependencies = [
"rand 0.7.3",
]
[[package]]
name = "rand_distr"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31"
dependencies = [
"num-traits",
"rand 0.8.5",
]
[[package]]
name = "rand_hc"
version = "0.1.0"

View File

@ -67,6 +67,8 @@ pub struct DeserializerBuilder {
materializers: BTreeMap<FieldId, Materializer>,
#[derivative(Debug = "ignore")]
mappers: BTreeMap<FieldId, Mapper>,
#[derivative(Debug = "ignore")]
pre_hooks: Vec<Hook>,
support: String,
either_type: String,
}
@ -81,7 +83,8 @@ impl DeserializerBuilder {
let mappers = Default::default();
let support = support.into();
let either_type = either_type.into();
Self { root, materializers, mappers, support, either_type }
let pre_hooks = Default::default();
Self { root, materializers, mappers, support, either_type, pre_hooks }
}
/// Configure the specified field to be produced according to an expression, instead of by
@ -98,6 +101,12 @@ impl DeserializerBuilder {
self.mappers.insert(field, Box::new(mapper));
}
/// Generate code to be run in the deserialization function, before any deserialization begins.
pub fn pre_hook<F>(&mut self, f: F)
where F: for<'a> FnOnce(HookInput<'a>) -> String + 'static {
self.pre_hooks.push(Box::new(f));
}
/// Generate the deserialization method.
pub fn build(mut self, graph: &TypeGraph) -> Method {
let method = match graph[self.root].abstract_ {
@ -110,6 +119,7 @@ impl DeserializerBuilder {
type Materializer = Box<dyn for<'a> FnOnce(MaterializerInput<'a>) -> String>;
type Mapper = Box<dyn for<'a, 'b> FnOnce(MapperInput<'a, 'b>) -> String>;
type Hook = Box<dyn for<'a> FnOnce(HookInput<'a>) -> String>;
/// Input to a function that produces an expression that deserializes a field.
#[derive(Debug)]
@ -127,6 +137,13 @@ pub struct MapperInput<'a, 'b> {
pub value: &'b str,
}
/// Input to a function that produces statement(s) to be run.
#[derive(Debug)]
pub struct HookInput<'a> {
/// Identifier of the serialized message object.
pub message: &'a str,
}
// === Product Types ===
@ -143,6 +160,9 @@ impl DeserializerBuilder {
next_temp_variable_number += 1;
result
};
for hook in self.pre_hooks.drain(..) {
body.push_str(&(hook)(HookInput { message }));
}
let fields = class_fields(graph, class);
for field in &fields {
let ty_name = quote_type(graph, &field.data);

View File

@ -138,7 +138,7 @@ pub fn quote_params<'a>(
/// Given a model of a field ([`Field`]), create a representation of the Java syntax defining a
/// class field with name, type, and attributes as specified in the model.
fn quote_field(graph: &TypeGraph, field: &Field) -> syntax::Field {
let Field { name, data, id: _ } = field;
let Field { name, data, id: _, hide_in_tostring: _ } = field;
let type_ = quote_type(graph, data);
let name = name.clone();
let final_ = true;
@ -267,9 +267,13 @@ fn implement_equals(graph: &TypeGraph, class: &Class) -> syntax::Method {
/// [2]: https://openjdk.org/jeps/395
fn implement_to_string(graph: &TypeGraph, class: &Class) -> syntax::Method {
let string_builder = "stringBuilder";
let stringify =
|field: &Field| format!("{string_builder}.append(String.valueOf({}));", field.name);
let fields: Vec<_> = class_fields(graph, class).into_iter().map(stringify).collect();
let fields_ = class_fields(graph, class);
let mut fields = Vec::with_capacity(fields_.len());
for field in fields_ {
if !field.hide_in_tostring {
fields.push(format!("{string_builder}.append(String.valueOf({}));", field.name));
}
}
let mut body = String::new();
let ty_name = &class.name;
writeln!(body, "StringBuilder {string_builder} = new StringBuilder();").unwrap();

View File

@ -156,10 +156,11 @@ fn standard_methods() -> Vec<Method> {
#[derive(Debug, PartialEq, Eq)]
pub struct Field {
#[allow(missing_docs)]
pub name: String,
pub name: String,
#[allow(missing_docs)]
pub data: FieldData,
id: FieldId,
pub data: FieldData,
id: FieldId,
hide_in_tostring: bool,
}
impl Field {
@ -168,7 +169,8 @@ impl Field {
let name = name.into();
let data = FieldData::Object { type_, non_null };
let id = Default::default();
Self { name, data, id }
let hide_in_tostring = Default::default();
Self { name, data, id, hide_in_tostring }
}
/// Create a field holding primitive data.
@ -176,13 +178,19 @@ impl Field {
let name = name.into();
let data = FieldData::Primitive(primitive);
let id = Default::default();
Self { name, data, id }
let hide_in_tostring = Default::default();
Self { name, data, id, hide_in_tostring }
}
#[allow(missing_docs)]
pub fn id(&self) -> FieldId {
self.id
}
/// Omit this field from any containing class's `toString` implementation.
pub fn hide_in_tostring(&mut self) {
self.hide_in_tostring = true;
}
}
/// A field's data contents.

View File

@ -25,3 +25,4 @@ enso-metamodel-lexpr = { path = "../metamodel/lexpr" }
lexpr = "0.2.6"
rand = "0.8.5"
rand_chacha = "0.3.1"
rand_distr = "0.4.3"

View File

@ -1,6 +1,7 @@
package org.enso.syntax2;
import org.enso.syntax2.serialization.Message;
import org.enso.syntax2.UnsupportedSyntaxException;
import java.io.File;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
@ -25,7 +26,7 @@ public final class Parser implements AutoCloseable {
var state = allocState();
return new Parser(state);
}
public final Tree parse(String input) {
public final Tree parse(String input) throws UnsupportedSyntaxException {
try {
byte[] inputBytes = input.getBytes("UTF-8");
ByteBuffer inputBuf = ByteBuffer.allocateDirect(inputBytes.length);
@ -33,7 +34,12 @@ public final class Parser implements AutoCloseable {
var serializedTree = parseInput(state, inputBuf);
var base = getLastInputBase(state);
serializedTree.order(ByteOrder.LITTLE_ENDIAN);
return Tree.deserialize(new Message(serializedTree, inputBuf, base));
var message = new Message(serializedTree, inputBuf, base);
var result = Tree.deserialize(message);
if (message.getEncounteredUnsupportedSyntax()) {
throw new UnsupportedSyntaxException(result);
}
return result;
} catch (java.io.UnsupportedEncodingException e) {
throw new RuntimeException(e);
}

View File

@ -0,0 +1,14 @@
package org.enso.syntax2;
public final class UnsupportedSyntaxException extends Exception {
private final Tree tree;
UnsupportedSyntaxException(Tree treeIn) {
super("Tree contains unsupported syntax. Details are in an `Unsupported` node in the tree.");
tree = treeIn;
}
public final Tree getTree() {
return tree;
}
}

View File

@ -4,6 +4,7 @@ public final class Message {
private final java.nio.ByteBuffer buffer;
private final java.nio.ByteBuffer context;
private final int base;
private boolean encounteredUnsupportedSyntax;
public Message(java.nio.ByteBuffer bufferIn, java.nio.ByteBuffer contextIn, long baseIn) {
buffer = bufferIn;
@ -53,4 +54,16 @@ public final class Message {
assert tmp >= 0;
return tmp;
}
final String getLocation() {
return "Message[buffer=" + buffer.position() + "]";
}
public final boolean getEncounteredUnsupportedSyntax() {
return encounteredUnsupportedSyntax;
}
public final void markEncounteredUnsupportedSyntax() {
encounteredUnsupportedSyntax = true;
}
}

View File

@ -38,12 +38,14 @@ fn main() {
let ast = enso_parser::syntax::Tree::reflect();
let tree = enso_parser::syntax::Tree::reflect().id;
let token = enso_parser::syntax::Token::<enso_parser::syntax::token::Variant>::reflect().id;
let unsupported = enso_parser::syntax::tree::Unsupported::reflect().id;
let (graph, rust_to_meta) = rust::to_meta(ast);
let (graph, meta_to_java) = java::from_meta(&graph, enso_parser_generate_java::EITHER_TYPE);
let mut graph = java::transform::optional_to_null(graph);
let rust_to_java = |id| meta_to_java[&rust_to_meta[&id]];
let (tree, token) = (rust_to_java(tree), rust_to_java(token));
serialization::derive(&mut graph, tree, token);
let (tree, token, unsupported) =
(rust_to_java(tree), rust_to_java(token), rust_to_java(unsupported));
serialization::derive(&mut graph, tree, token, unsupported);
let graph = java::to_syntax(&graph, enso_parser_generate_java::PACKAGE);
let mut args = std::env::args();
args.next().unwrap();

View File

@ -24,9 +24,9 @@ const TOKEN_OFFSET_BEGIN: &str = "leftOffsetCodeReprBegin";
//const TOKEN_OFFSET_LEN: &str = "leftOffsetCodeReprLen";
/// Derive deserialization for all types in the typegraph.
pub fn derive(graph: &mut TypeGraph, tree: ClassId, token: ClassId) {
pub fn derive(graph: &mut TypeGraph, tree: ClassId, token: ClassId, unsupported: ClassId) {
let source = "source";
impl_deserialize(graph, tree, token, source);
impl_deserialize(graph, tree, token, unsupported, source);
graph[token].methods.push(impl_getter(CODE_GETTER, source, TOKEN_BEGIN, TOKEN_LEN));
graph[tree].methods.push(impl_getter(CODE_GETTER, source, TREE_BEGIN, TREE_LEN));
}
@ -34,14 +34,22 @@ pub fn derive(graph: &mut TypeGraph, tree: ClassId, token: ClassId) {
// === Deserialization Methods ===
fn impl_deserialize(graph: &mut TypeGraph, tree: ClassId, token: ClassId, source: &str) {
fn impl_deserialize(
graph: &mut TypeGraph,
tree: ClassId,
token: ClassId,
unsupported: ClassId,
source: &str,
) {
// Add source field to parent types.
let buffer = Class::builtin("java.nio.ByteBuffer", vec![]);
let buffer = graph.classes.insert(buffer);
let tree_source_ = Field::object(source, buffer, true);
let mut tree_source_ = Field::object(source, buffer, true);
tree_source_.hide_in_tostring();
let tree_source = tree_source_.id();
graph[tree].fields.push(tree_source_);
let token_source_ = Field::object(source, buffer, true);
let mut token_source_ = Field::object(source, buffer, true);
token_source_.hide_in_tostring();
let token_source = token_source_.id();
graph[token].fields.push(token_source_);
let tree_begin = graph[tree].find_field(TREE_BEGIN).unwrap().id();
@ -52,6 +60,11 @@ fn impl_deserialize(graph: &mut TypeGraph, tree: ClassId, token: ClassId, source
let class = &graph[id];
let mut deserialization =
bincode::DeserializerBuilder::new(id, crate::SERIALIZATION_SUPPORT, crate::EITHER_TYPE);
if id == unsupported {
deserialization.pre_hook(|bincode::HookInput { message }| {
format!("{message}.markEncounteredUnsupportedSyntax();\n")
});
}
if class.parent == Some(tree) {
deserialization.materialize(tree_source, context_materializer());
}

View File

@ -96,6 +96,7 @@ pub struct LexerState {
pub last_spaces_visible_offset: VisibleOffset,
pub current_block_indent: VisibleOffset,
pub block_indent_stack: Vec<VisibleOffset>,
pub internal_error: Option<String>,
}
impl<'s> Lexer<'s> {
@ -516,8 +517,9 @@ impl<'s> Lexer<'s> {
match current {
'.' => this.take_while_1_('.'),
'=' => this.take_while_1_('='),
':' => (),
',' => (),
':' | ',' => {
this.take_next();
}
_ => this.take_while_1_(is_operator_body_char),
};
}
@ -525,13 +527,18 @@ impl<'s> Lexer<'s> {
if let Some(token) = token {
if token.code == "+-" {
let (left, right) = token.split_at_(Bytes(1));
self.submit_token(left.with_variant(token::Variant::operator()));
self.submit_token(right.with_variant(token::Variant::operator()));
let (prec, binary, unary) = compute_precedence(&left.code);
self.submit_token(left.with_variant(token::Variant::operator(prec, binary, unary)));
self.submit_token(right.with_variant(token::Variant::operator(0, false, true)));
} else {
let only_eq = token.code.chars().all(|t| t == '=');
let is_mod = token.code.ends_with('=') && !only_eq;
let tp =
if is_mod { token::Variant::modifier() } else { token::Variant::operator() };
let tp = if is_mod {
token::Variant::modifier()
} else {
let (prec, binary, unary) = compute_precedence(&token.code);
token::Variant::operator(prec, binary, unary)
};
let token = token.with_variant(tp);
self.submit_token(token);
}
@ -540,6 +547,40 @@ impl<'s> Lexer<'s> {
}
// === Precedence ===
// FIXME: Compute precedences according to spec. Issue: #182497344
fn compute_precedence(token: &str) -> (usize, bool, bool) {
let binary = match token {
// Special handling for tokens that can be unary.
"~" => return (0, false, true),
"-" => return (14, true, true),
// "There are a few operators with the lowest precedence possible."
"=" => 1,
":" => 2,
"->" => 3,
"|" | "\\\\" | "&" => 4,
">>" | "<<" => 5,
"|>" | "|>>" | "<|" | "<<|" => 6,
// "The precedence of all other operators is determined by the operator's Precedence
// Character:"
"!" => 10,
"||" => 11,
"^" => 12,
"&&" => 13,
"+" | "++" => 14,
"*" | "/" | "%" => 15,
// FIXME: Not sure about these:
"==" => 1,
"," => 1,
"@" => 20,
"." => 21,
_ => return (0, false, false),
};
(binary, true, false)
}
// ==============
// === Symbol ===
@ -587,44 +628,45 @@ fn is_inline_text_body(t: char) -> bool {
impl<'s> Lexer<'s> {
/// Parse a text literal.
// FIXME: This impl is not yet finished and not all cases are covered (also, tests missing).
fn text(&mut self) {
let token = self.token(|this| this.take_1('"'));
if let Some(token) = token {
self.submit_token(token.with_variant(token::Variant::text_start()));
let line_empty = self.current_char.map(is_newline_char).unwrap_or(true);
if line_empty {
todo!()
} else {
let mut parsed_element;
loop {
parsed_element = false;
// FIXME: Handle this case; test this function. Issue: #182496940
let char = self.current_char;
self.internal_error.get_or_insert_with(|| format!("text: line_empty ({:?})", char));
return;
}
let mut parsed_element;
loop {
parsed_element = false;
let section = self.token(|this| this.take_while_1(is_inline_text_body));
if let Some(tok) = section {
parsed_element = true;
self.submit_token(tok.with_variant(token::Variant::text_section()));
}
let section = self.token(|this| this.take_while_1(is_inline_text_body));
if let Some(tok) = section {
parsed_element = true;
self.submit_token(tok.with_variant(token::Variant::text_section()));
}
let escape = self.token(|this| {
if this.take_1('\\') {
this.take_1('"');
}
});
if let Some(token) = escape {
parsed_element = true;
self.submit_token(token.with_variant(token::Variant::text_escape()));
let escape = self.token(|this| {
if this.take_1('\\') {
this.take_1('"');
}
});
if let Some(token) = escape {
parsed_element = true;
self.submit_token(token.with_variant(token::Variant::text_escape()));
}
let end = self.token(|this| this.take_1('"'));
if let Some(token) = end {
self.submit_token(token.with_variant(token::Variant::text_end()));
break;
}
let end = self.token(|this| this.take_1('"'));
if let Some(token) = end {
self.submit_token(token.with_variant(token::Variant::text_end()));
break;
}
if !parsed_element {
break;
}
if !parsed_element {
break;
}
}
}
@ -732,13 +774,13 @@ const PARSERS: &[for<'r> fn(&'r mut Lexer<'_>)] = &[
impl<'s> Lexer<'s> {
/// Run the lexer. Return hierarchical list of tokens (the token groups will be represented as a
/// hierarchy).
pub fn run(self) -> Vec<Item<'s>> {
build_block_hierarchy(self.run_flat())
pub fn run(self) -> ParseResult<Vec<Item<'s>>> {
self.run_flat().map(build_block_hierarchy)
}
/// Run the lexer. Return non-hierarchical list of tokens (the token groups will be represented
/// as start and end tokens).
pub fn run_flat(mut self) -> Vec<Token<'s>> {
pub fn run_flat(mut self) -> ParseResult<Vec<Token<'s>>> {
self.spaces_after_lexeme();
let mut any_parser_matched = true;
while any_parser_matched {
@ -750,28 +792,30 @@ impl<'s> Lexer<'s> {
}
}
}
if self.current_char != None {
panic!("Internal error. Lexer did not consume all input. State: {self:?}");
}
while self.end_block().is_some() {
let block_end = self.marker_token(token::Variant::block_end());
self.submit_token(block_end);
}
let tokens = self.output;
event!(TRACE, "Tokens:\n{:#?}", tokens);
tokens
let mut internal_error = self.internal_error.take();
if self.current_char != None {
let message = format!("Lexer did not consume all input. State: {self:?}");
internal_error.get_or_insert(message);
}
let value = self.output;
event!(TRACE, "Tokens:\n{:#?}", value);
ParseResult { value, internal_error }
}
}
/// Run the lexer. Return non-hierarchical list of tokens (the token groups will be represented
/// as start and end tokens).
pub fn run_flat(input: &'_ str) -> Vec<Token<'_>> {
pub fn run_flat(input: &'_ str) -> ParseResult<Vec<Token<'_>>> {
Lexer::new(input).run_flat()
}
/// Run the lexer. Return hierarchical list of tokens (the token groups will be represented as a
/// hierarchy).
pub fn run(input: &'_ str) -> Vec<Item<'_>> {
pub fn run(input: &'_ str) -> ParseResult<Vec<Item<'_>>> {
Lexer::new(input).run()
}
@ -828,7 +872,8 @@ pub mod test {
/// Constructor.
pub fn operator_<'s>(left_offset: &'s str, code: &'s str) -> Token<'s> {
Token(left_offset, code, token::Variant::operator())
let (precedence, binary, unary) = compute_precedence(code);
Token(left_offset, code, token::Variant::operator(precedence, binary, unary))
}
}
@ -844,7 +889,7 @@ mod tests {
}
fn test_lexer<'s>(input: &'s str, expected: Vec<Token<'s>>) {
assert_eq!(run_flat(input), expected);
assert_eq!(run_flat(input).unwrap(), expected);
}
fn lexer_case_idents<'s>(idents: &[&'s str]) -> Vec<(&'s str, Vec<Token<'s>>)> {
@ -975,8 +1020,9 @@ mod tests {
#[test]
fn test_case_operators() {
test_lexer_many(lexer_case_operators(&["+", "-", "=", "==", "==="]));
test_lexer_many(vec![("+-", vec![operator_("", "+"), operator_("", "-")])]);
test_lexer_many(lexer_case_operators(&["+", "-", "=", "==", "===", ":", ","]));
let unary_minus = Token("", "-", token::Variant::operator(0, false, true));
test_lexer_many(vec![("+-", vec![operator_("", "+"), unary_minus])]);
}
/// Based on https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt.
@ -1165,7 +1211,7 @@ mod benches {
b.iter(move || {
let lexer = Lexer::new(&str);
assert_eq!(lexer.run().len(), reps);
assert_eq!(lexer.run().unwrap().len(), reps);
});
}
}

View File

@ -124,6 +124,37 @@ pub mod prelude {
pub use enso_reflect::Reflect;
pub use enso_types::traits::*;
pub use enso_types::unit2::Bytes;
/// Wraps return value for functions whose implementations don't handle all cases yet. When the
/// parser is complete, this type will be eliminated.
pub type WipResult<T> = Result<T, String>;
/// Return type for functions that will only fail in case of a bug in the implementation.
#[derive(Debug, Default)]
pub struct ParseResult<T> {
/// The result of the operation. If `internal_error` is set, this is a best-effort value
/// that cannot be assumed to be accurate; otherwise, it should be correct.
pub value: T,
/// Internal error encountered while computing this result.
pub internal_error: Option<String>,
}
impl<T> ParseResult<T> {
/// Return a new [`ParseResult`] whose value is the result of applying the given function to
/// the input's value, and whose `internal_error` field is the same as the input.
pub fn map<U, F>(self, f: F) -> ParseResult<U>
where F: FnOnce(T) -> U {
let ParseResult { value, internal_error } = self;
let value = f(value);
ParseResult { value, internal_error }
}
/// Panic if the result contains an internal error; otherwise, return the contained value.
pub fn unwrap(self) -> T {
assert_eq!(self.internal_error, None);
self.value
}
}
}
@ -150,7 +181,12 @@ impl Parser {
pub fn run<'s>(&self, code: &'s str) -> syntax::Tree<'s> {
let tokens = lexer::run(code);
let resolver = macros::resolver::Resolver::new_root();
resolver.run(&self.macros, tokens)
let result = tokens.map(|tokens| resolver.run(&self.macros, tokens));
let value = result.value;
if let Some(error) = result.internal_error {
return value.with_error(format!("Internal error: {}", error));
}
value
}
}
@ -169,12 +205,19 @@ impl Default for Parser {
/// interpreted as a variable assignment or method definition.
fn expression_to_statement(mut tree: syntax::Tree<'_>) -> syntax::Tree<'_> {
use syntax::tree::*;
let tree_ = match &mut *tree.variant {
Variant::OprSectionBoundary(OprSectionBoundary { ast }) => ast,
let mut left_offset = source::span::Offset::default();
let tree_ = match &mut tree {
Tree { variant: box Variant::OprSectionBoundary(OprSectionBoundary { ast }), span } => {
left_offset += &span.left_offset;
ast
}
_ => &mut tree,
};
let opr_app = match &mut *tree_.variant {
Variant::OprApp(opr_app) => opr_app,
let opr_app = match tree_ {
Tree { variant: box Variant::OprApp(opr_app), span } => {
left_offset += &span.left_offset;
opr_app
}
_ => return tree,
};
if let OprApp { lhs: Some(lhs), opr: Ok(opr), rhs } = opr_app && opr.code == "=" {
@ -188,12 +231,18 @@ fn expression_to_statement(mut tree: syntax::Tree<'_>) -> syntax::Tree<'_> {
if args.is_empty() && let Some(rhs) = rhs && !is_body_block(rhs) {
// If the LHS has no arguments, and there is a RHS, and the RHS is not a body block,
// this is a variable assignment.
return Tree::assignment(mem::take(lhs), mem::take(opr), mem::take(rhs))
let mut result = Tree::assignment(mem::take(lhs), mem::take(opr), mem::take(rhs));
left_offset += result.span.left_offset;
result.span.left_offset = left_offset;
return result;
}
if let Variant::Ident(Ident { token }) = &mut *lhs.variant {
// If this is not a variable assignment, and the leftmost leaf of the `App` tree is
// an identifier, this is a function definition.
return Tree::function(mem::take(token), args, mem::take(opr), mem::take(rhs))
let mut result = Tree::function(mem::take(token), args, mem::take(opr), mem::take(rhs));
left_offset += result.span.left_offset;
result.span.left_offset = left_offset;
return result;
}
}
tree
@ -261,6 +310,41 @@ mod benches {
str.push('\n');
}
let parser = Parser::new();
bencher.bytes = str.len() as u64;
bencher.iter(move || {
parser.run(&str);
});
}
#[bench]
fn bench_expressions(bencher: &mut Bencher) {
use rand::prelude::*;
use rand_chacha::ChaCha8Rng;
let lines = 100;
let avg_group_len = 20;
let avg_groups_per_line = 20;
let mut str = String::new();
let mut rng = ChaCha8Rng::seed_from_u64(0);
let normal = rand_distr::StandardNormal;
for _ in 0..lines {
let operators = ['=', '+', '-', '*', ':'];
let groups: f64 = normal.sample(&mut rng);
let groups = (groups * avg_groups_per_line as f64) as usize;
for _ in 0..groups {
let len: f64 = normal.sample(&mut rng);
let len = (len * avg_group_len as f64) as usize;
str.push('x');
for _ in 0..len {
let i = rng.gen_range(0..operators.len());
str.push(operators[i]);
str.push('x');
}
str.push(' ');
}
str.push('\n');
}
let parser = Parser::new();
bencher.bytes = str.len() as u64;
bencher.iter(move || {
parser.run(&str);
});

View File

@ -44,42 +44,135 @@ pub fn type_def<'s>() -> Definition<'s> {
identifier() / "name" % "type name" >>
many(identifier() % "type parameter" / "param") % "type parameters" >>
block(
many(identifier() / "constructor") % "type constructors" >>
everything()
everything() / "statements"
) % "type definition body";
// let pattern2 = Everything;
crate::macro_definition! {
("type", pattern)
type_def_body
}
}
// TODO: The comments in the code were left in order to allow easy debugging of this struct. They
// should be removed in the future.
fn type_def_body(matched_segments: NonEmptyVec<MatchedSegment>) -> syntax::Tree {
let segment = matched_segments.to_vec().pop().unwrap();
// println!(">>>");
// println!("{:#?}", segment);
// println!(">>>");
// FIXME: This implementation of parsing constructors works for correct inputs, but doesn't
// handle incorrect syntax ideally. Issue: #182745069
let segment = matched_segments.pop().0;
let match_tree = segment.result.into_var_map();
// println!("{:#?}", match_tree);
// println!("\n\n------------- 1");
let mut v = match_tree.view();
let name = &v.query("name").unwrap()[0];
let name = operator::resolve_operator_precedence_if_non_empty(name.clone()).unwrap();
// println!("{:#?}", name);
// println!("\n\n------------- 2");
let no_params = vec![];
let name = v.query("name").map(|name| name[0].clone()).unwrap_or_default();
let name = operator::resolve_operator_precedence_if_non_empty(name);
let no_params = [];
let params = v.nested().query("param").unwrap_or(&no_params);
// println!("{:#?}", params);
// println!("\n\n------------- 3");
let params = params
.iter()
.map(|tokens| operator::resolve_operator_precedence_if_non_empty(tokens.clone()).unwrap())
.map(|tokens| {
operator::resolve_operator_precedence_if_non_empty(tokens.iter().cloned()).unwrap()
})
.collect_vec();
// println!("{:#?}", params);
syntax::Tree::type_def(segment.header, name, params)
let mut constructors = default();
let mut body = default();
if let Some(items) = v.query("statements") {
let items = items[0].iter().cloned();
let mut builder = TypeDefBodyBuilder::default();
for syntax::tree::block::Line { newline, expression } in syntax::tree::block::lines(items) {
builder.line(newline, expression);
}
let (constructors_, body_) = builder.finish();
constructors = constructors_;
body = body_;
}
match name {
Some(name) => syntax::Tree::type_def(segment.header, name, params, constructors, body),
None => {
let name = syntax::Tree::ident(syntax::token::ident("", "", false, 0));
let result = syntax::Tree::type_def(segment.header, name, params, constructors, body);
result.with_error("Expected identifier after `type` keyword.")
}
}
}
#[derive(Default)]
struct TypeDefBodyBuilder<'s> {
constructors: Vec<syntax::tree::TypeConstructorLine<'s>>,
body: Vec<syntax::tree::block::Line<'s>>,
}
impl<'s> TypeDefBodyBuilder<'s> {
/// Apply the line to the state.
pub fn line(
&mut self,
newline: syntax::token::Newline<'s>,
expression: Option<syntax::Tree<'s>>,
) {
if self.body.is_empty() {
if let Some(expression) = expression {
match Self::to_constructor_line(expression) {
Ok(expression) => {
let expression = Some(expression);
let line = syntax::tree::TypeConstructorLine { newline, expression };
self.constructors.push(line);
}
Err(expression) => {
let expression = crate::expression_to_statement(expression);
let expression = Some(expression);
self.body.push(syntax::tree::block::Line { newline, expression });
}
}
} else {
self.constructors.push(newline.into());
}
} else {
let expression = expression.map(crate::expression_to_statement);
self.body.push(syntax::tree::block::Line { newline, expression });
}
}
/// Return the constructor/body sequences.
pub fn finish(
self,
) -> (Vec<syntax::tree::TypeConstructorLine<'s>>, Vec<syntax::tree::block::Line<'s>>) {
(self.constructors, self.body)
}
/// Interpret the given expression as an `TypeConstructorDef`, if its syntax is compatible.
fn to_constructor_line(
expression: syntax::Tree<'_>,
) -> Result<syntax::tree::TypeConstructorDef<'_>, syntax::Tree<'_>> {
use syntax::tree::*;
if let Tree {
variant:
box Variant::ArgumentBlockApplication(ArgumentBlockApplication {
lhs: Some(Tree { variant: box Variant::Ident(ident), span: span_ }),
arguments,
}),
span,
} = expression
{
let mut constructor = ident.token;
let mut left_offset = span.left_offset;
left_offset += &span_.left_offset;
left_offset += constructor.left_offset;
constructor.left_offset = left_offset;
let block = arguments;
let arguments = default();
return Ok(TypeConstructorDef { constructor, arguments, block });
}
let mut arguments = vec![];
let mut lhs = &expression;
let mut left_offset = crate::source::span::Offset::default();
while let Tree { variant: box Variant::App(App { func, arg }), span } = lhs {
left_offset += &span.left_offset;
lhs = func;
arguments.push(arg.clone());
}
if let Tree { variant: box Variant::Ident(Ident { token }), span } = lhs {
let mut constructor = token.clone();
left_offset += &span.left_offset;
left_offset += constructor.left_offset;
constructor.left_offset = left_offset;
arguments.reverse();
let block = default();
return Ok(TypeConstructorDef { constructor, arguments, block });
}
Err(expression)
}
}

View File

@ -158,27 +158,21 @@ impl<'s> Match<'s> {
fn build_var_map<V: Default + Validator>(self, tree: &mut VarMap<'s, V>, validator: &V) {
match self {
Self::Everything(_) => {}
Self::Nothing => {}
Self::Identifier(_) => {}
Self::Expected(_, _) => {}
Self::NotBlock(_) => {}
Self::Or(t) => match *t {
OrMatch::First(first) => first.build_var_map(tree, validator),
OrMatch::Second(second) => second.build_var_map(tree, validator),
},
Self::Everything(_)
| Self::Nothing
| Self::Identifier(_)
| Self::Expected(_, _)
| Self::NotBlock(_) => {}
Self::Or(box OrMatch::First(item) | box OrMatch::Second(item)) =>
item.build_var_map(tree, validator),
Self::Seq(first, second) => {
first.build_var_map(tree, validator);
second.build_var_map(tree, validator);
}
Self::Many(matches) => {
if tree.nested.is_none() {
let nested = VarMap::<'s, V>::default();
tree.nested = Some(Box::new(nested));
}
let nested_validator = V::default();
nested_validator.set_parent(validator);
let nested = tree.nested.as_mut().unwrap();
let nested = tree.nested.get_or_insert(default());
for m in matches {
m.build_var_map(nested, &nested_validator);
}
@ -310,7 +304,7 @@ impl<'t, 's, V: Validator> VarMapView<'t, 's, V> {
impl<'t, 's, V: Validator> VarMapView<'t, 's, V> {
/// Query for a variable.
pub fn query(&mut self, name: &str) -> Option<&'t Vec<Vec<syntax::Item<'s>>>> {
pub fn query(&mut self, name: &str) -> Option<&'t [Vec<syntax::Item<'s>>]> {
self.tree.and_then(|t| {
t.map.get(name).map(|entry| {
match &self.resolved_validator {
@ -342,7 +336,7 @@ impl<'t, 's, V: Validator> VarMapView<'t, 's, V> {
self.resolved_validator = Some(resolved_validator);
}
}
&entry.tokens
&entry.tokens[..]
})
})
}

View File

@ -217,23 +217,30 @@ impl<'s> Match<'s> {
Self::Named(label.into(), Box::new(second))
}
/// Get all tokens of the match.
pub fn tokens(self) -> Vec<syntax::Item<'s>> {
/// Get all tokens of the match; append them to an existing sequence.
pub fn get_tokens(self, out: &mut Vec<syntax::Item<'s>>) {
match self {
Self::Everything(tokens) => tokens.into(),
Self::Nothing => default(),
Self::Seq(fst, snd) => fst.tokens().extended(snd.tokens()),
Self::Many(t) => t.into_iter().flat_map(|s| s.tokens()).collect(),
Self::Identifier(ident) => vec![ident],
Self::Expected(_, item) => item.tokens(),
Self::Named(_, item) => item.tokens(),
Self::NotBlock(item) => vec![item],
Self::Or(t) => match *t {
OrMatch::First(fst) => fst.tokens(),
OrMatch::Second(snd) => snd.tokens(),
},
Self::Nothing => (),
Self::Identifier(item) | Self::NotBlock(item) => out.push(item),
Self::Everything(tokens) => out.extend(tokens),
Self::Seq(fst, snd) => {
fst.get_tokens(out);
snd.get_tokens(out);
}
Self::Expected(_, box item)
| Self::Named(_, box item)
| Self::Or(box OrMatch::First(item) | box OrMatch::Second(item)) =>
item.get_tokens(out),
Self::Many(matches) => matches.into_iter().for_each(|match_| match_.get_tokens(out)),
}
}
/// Get all tokens of the match; return them in a newly-allocated vector.
pub fn tokens(self) -> Vec<syntax::Item<'s>> {
let mut out = vec![];
self.get_tokens(&mut out);
out
}
}
@ -349,8 +356,7 @@ impl Pattern {
},
},
PatternData::Block(body) => match input.pop_front() {
Some(syntax::Item::Block(tokens)) =>
body.resolve(tokens.into_iter().rev().map_into().collect()),
Some(syntax::Item::Block(tokens)) => body.resolve(tokens.into()),
Some(t) => {
input.push_front(t);
Err(input)

View File

@ -504,7 +504,9 @@ impl<'s> Resolver<'s> {
let out = (macro_def.body)(pattern_matched_segments);
(out, not_used_items_of_last_segment)
} else {
todo!("Macro was not matched with any known macro definition. This should return an AST node indicating invalid match.")
let message = format!("Macro was not matched with any known macro definition.\nResolved segments: {resolved_segments:?}");
let error = syntax::tree::Error::new(message);
(syntax::tree::Tree::invalid(error, default()), default())
}
}

View File

@ -73,8 +73,9 @@ where D: serde::Deserializer<'de> {
pub(crate) struct Error(String);
impl From<Error> for crate::syntax::tree::Error {
fn from(_: Error) -> Self {
crate::syntax::tree::Error { message: "" }
fn from(error: Error) -> Self {
let message = error.0.into();
crate::syntax::tree::Error { message }
}
}

View File

@ -47,7 +47,13 @@ impl<'s> Item<'s> {
Item::Token(token) => match token.variant {
token::Variant::Ident(ident) => Tree::ident(token.with_variant(ident)),
token::Variant::Number(number) => Tree::number(token.with_variant(number)),
_ => todo!("{token:?}"),
token::Variant::Comment(comment) => Tree::comment(token.with_variant(comment)),
token::Variant::TextSection(text) => Tree::text_section(token.with_variant(text)),
_ => {
let message = format!("to_ast: Item::Token({token:?})");
let value = Tree::ident(token.with_variant(token::variant::Ident(false, 0)));
Tree::with_unsupported(value, message)
}
},
Item::Tree(ast) => ast,
Item::Block(items) => build_block(items),
@ -86,24 +92,8 @@ impl<'s> TryAsRef<Item<'s>> for Item<'s> {
/// Given a sequence of [`Item`]s belonging to one block, create an AST block node, of a type
/// determined by the syntax of the lines in the block.
fn build_block<'s>(items: impl IntoIterator<Item = Item<'s>>) -> Tree<'s> {
let mut line = vec![];
let mut block_builder = tree::block::Builder::new();
let mut newline = None;
for item in items {
match item {
Item::Token(Token { variant: token::Variant::Newline(_), left_offset, code }) => {
let newline = mem::replace(&mut newline, Some(token::newline(left_offset, code)));
if let Some(newline) = newline {
let line: Vec<_> = line.drain(..).collect();
let expression = operator::resolve_operator_precedence_if_non_empty(line);
block_builder.push(newline, expression);
}
}
_ => line.push(item),
}
}
if let Some(newline) = newline {
let expression = operator::resolve_operator_precedence_if_non_empty(line);
for tree::block::Line { newline, expression } in tree::block::lines(items) {
block_builder.push(newline, expression);
}
block_builder.build()

View File

@ -12,35 +12,6 @@ use crate::syntax::token::Token;
// === Precedence ===
// ==================
// FIXME: The current implementation hard-codes precedence values and does not support precedence
// computations for any operator (according to the spec)
fn precedence_of(operator: &str) -> usize {
match operator {
"=" => 1,
"+" => 3,
"-" => 3,
"*" => 7,
_ => panic!("Operator not supported: {}", operator),
}
}
/// An item with an assigned precedence.
#[derive(Clone, Copy, Debug, Deref, DerefMut)]
struct WithPrecedence<T> {
#[deref]
#[deref_mut]
elem: T,
precedence: usize,
}
impl<T> WithPrecedence<T> {
/// Constructor.
pub fn new(precedence: usize, elem: T) -> Self {
Self { elem, precedence }
}
}
/// Annotate expressions that should use spacing, because otherwise they are misleading. For
/// example, `if cond then.x else.y` is parsed as `if cond then .x else .y`, which after expansion
/// translates to `if cond then (\t -> t.x) else (\t -> t.y)`. However, for some macros spacing is
@ -54,23 +25,20 @@ fn annotate_tokens_that_need_spacing(item: syntax::Item) -> syntax::Item {
})
}
/// If the input sequence is non-empty, return the result of applying
/// [`resolve_operator_precedence`] to it.
pub fn resolve_operator_precedence_if_non_empty(
items: Vec<syntax::Item<'_>>,
) -> Option<syntax::Tree<'_>> {
match NonEmptyVec::try_from(items) {
Ok(items) => Some(resolve_operator_precedence(items)),
_ => None,
}
}
/// Take [`Item`] stream, resolve operator precedence and return the final AST.
///
/// The precedence resolution algorithm is based on the Shunting yard algorithm[1], extended to
/// handle operator sections.
/// [1]: https://en.wikipedia.org/wiki/Shunting_yard_algorithm
pub fn resolve_operator_precedence<'s>(items: NonEmptyVec<syntax::Item<'s>>) -> syntax::Tree<'s> {
pub fn resolve_operator_precedence(items: NonEmptyVec<syntax::Item<'_>>) -> syntax::Tree<'_> {
resolve_operator_precedence_if_non_empty(items).unwrap()
}
/// If the input sequence is non-empty, return the result of applying
/// [`resolve_operator_precedence`] to it.
pub fn resolve_operator_precedence_if_non_empty<'s>(
items: impl IntoIterator<Item = syntax::Item<'s>>,
) -> Option<syntax::Tree<'s>> {
type Tokens<'s> = Vec<syntax::Item<'s>>;
let mut flattened: Tokens<'s> = default();
let mut no_space_group: Tokens<'s> = default();
@ -80,7 +48,7 @@ pub fn resolve_operator_precedence<'s>(items: NonEmptyVec<syntax::Item<'s>>) ->
flattened.extend(tokens);
} else {
let tokens = tokens.map(annotate_tokens_that_need_spacing);
let ast = resolve_operator_precedence_internal(tokens);
let ast = resolve_operator_precedence_internal(tokens).unwrap();
flattened.push(ast.into());
}
};
@ -107,7 +75,7 @@ pub fn resolve_operator_precedence<'s>(items: NonEmptyVec<syntax::Item<'s>>) ->
fn resolve_operator_precedence_internal<'s>(
items: impl IntoIterator<Item = syntax::Item<'s>>,
) -> syntax::Tree<'s> {
) -> Option<syntax::Tree<'s>> {
// Reverse-polish notation encoding.
/// Classify an item as an operator-token, or other data; we track this state information
/// because whenever consecutive operators or consecutive non-operators occur, we merge them
@ -120,40 +88,55 @@ fn resolve_operator_precedence_internal<'s>(
use ItemType::*;
let mut was_section_used = false;
let mut output: Vec<syntax::Item> = default();
let mut operator_stack: Vec<WithPrecedence<syntax::tree::OperatorOrError>> = default();
let mut operator_stack: Vec<Vec<token::Operator>> = default();
let mut unary_operator: Option<token::Operator> = default();
let mut prev_type = None;
let mut precedence_error = None;
for item in items {
if let syntax::Item::Token(
Token { variant: token::Variant::Operator(opr), left_offset, code }) = item {
// Item is an operator.
if let Some(unsatisified_opr) = unary_operator.take() {
output.push(syntax::Tree::unary_opr_app(unsatisified_opr, None).into());
prev_type = Some(Ast);
}
let prev_type = mem::replace(&mut prev_type, Some(Opr));
let prec = precedence_of(&code);
if opr.can_be_binary_infix {
} else if opr.can_be_unary_prefix {
if prev_type == Some(Ast) {
operator_stack.push(default());
}
let opr = Token(left_offset, code, opr);
unary_operator = Some(opr);
continue;
} else {
precedence_error.get_or_insert_with(|| format!("Precedence of: {:?}", code));
};
let prec = opr.precedence;
let opr = Token(left_offset, code, opr);
if prev_type == Some(Opr) && let Some(prev_opr) = operator_stack.last_mut() {
// Error. Multiple operators next to each other.
match &mut prev_opr.elem {
Err(err) => err.operators.push(opr),
Ok(prev) => {
let operators = NonEmptyVec::new(prev.clone(),vec![opr]);
prev_opr.elem = Err(syntax::tree::MultipleOperatorError{operators});
}
}
prev_opr.push(opr);
} else {
// Application has the highest precedence.
const APP_PREC: usize = std::usize::MAX;
while let Some(prev_opr) = operator_stack.last()
&& prev_opr.precedence >= prec
&& prev_opr.first().map(|opr| opr.precedence).unwrap_or(APP_PREC) >= prec
&& let Some(prev_opr) = operator_stack.pop()
&& let Some(rhs) = output.pop()
{
// Prev operator in the [`operator_stack`] has a higher precedence.
let lhs = output.pop().map(|t| t.to_ast());
if lhs.is_none() { was_section_used = true; }
let ast = syntax::tree::apply_operator(lhs, prev_opr.elem, Some(rhs.to_ast()));
let ast = syntax::tree::apply_operator(lhs, prev_opr, Some(rhs.to_ast()));
output.push(ast.into());
}
operator_stack.push(WithPrecedence::new(prec, Ok(opr)));
operator_stack.push(vec![opr]);
}
} else if let Some(opr) = unary_operator.take() {
let rhs = Some(item.to_ast());
output.push(syntax::Tree::unary_opr_app(opr, rhs).into());
prev_type = Some(Ast);
} else if prev_type == Some(Ast) && let Some(lhs) = output.pop() {
// Multiple non-operators next to each other.
let lhs = lhs.to_ast();
@ -166,25 +149,31 @@ fn resolve_operator_precedence_internal<'s>(
output.push(item);
}
}
if let Some(unsatisified_opr) = unary_operator.take() {
output.push(syntax::Tree::unary_opr_app(unsatisified_opr, None).into());
prev_type = Some(Ast);
}
let mut opt_rhs = (prev_type == Some(Ast)).and_option_from(|| output.pop().map(|t| t.to_ast()));
while let Some(opr) = operator_stack.pop() {
let opt_lhs = output.pop().map(|t| t.to_ast());
if opt_lhs.is_none() || opt_rhs.is_none() {
was_section_used = true;
}
opt_rhs = Some(syntax::tree::apply_operator(opt_lhs, opr.elem, opt_rhs));
opt_rhs = Some(syntax::tree::apply_operator(opt_lhs, opr, opt_rhs));
}
if !output.is_empty() {
panic!("Internal error. Not all tokens were consumed while constructing the expression.");
}
// This unwrap is safe because:
// - resolve_operator_precedence only calls this function with non-empty sequences as inputs.
// - Given a non-empty input, we will always have at least one output.
let out = opt_rhs.unwrap();
if was_section_used {
syntax::Tree::opr_section_boundary(out)
let out = if was_section_used {
// This can't fail: `was_section_used` won't be true unless we had at least one input,
// and if we have at least one input, we have output.
let out = opt_rhs.unwrap();
Some(syntax::Tree::opr_section_boundary(out))
} else {
out
opt_rhs
};
if let Some(error) = precedence_error {
return Some(syntax::Tree::with_unsupported(out.unwrap(), error));
}
out
}

View File

@ -170,7 +170,7 @@ impl<'s, T> Token<'s, T> {
impl<'s, T: Debug> Debug for Token<'s, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "[{}:\"{}\"] ", self.left_offset.visible, self.code)?;
write!(f, "[{}:{:?}] ", self.left_offset.visible, self.code)?;
Debug::fmt(&self.variant, f)
}
}
@ -262,7 +262,11 @@ macro_rules! with_token_definition { ($f:ident ($($args:tt)*)) => { $f! { $($arg
pub is_free: bool,
pub lift_level: usize
},
Operator,
Operator {
pub precedence: usize,
pub can_be_binary_infix: bool,
pub can_be_unary_prefix: bool,
},
Modifier,
Comment,
DocComment,

View File

@ -79,6 +79,11 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
pub error: Error,
pub ast: Tree<'s>,
},
/// Indicates a subtree in which an unimplemented case was reached.
Unsupported {
pub error: String,
pub ast: Tree<'s>,
},
/// A sequence of lines introduced by a line ending in an operator.
BodyBlock {
/// The lines of the block.
@ -110,6 +115,14 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
Number {
pub token: token::Number<'s>,
},
/// A comment.
Comment {
pub token: token::Comment<'s>,
},
/// A text section.
TextSection {
pub token: token::TextSection<'s>,
},
/// A simple application, like `print "hello"`.
App {
pub func: Tree<'s>,
@ -123,6 +136,12 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
pub opr: OperatorOrError<'s>,
pub rhs: Option<Tree<'s>>,
},
/// Application of a unary operator, like `-a` or `~handler`. It is a syntax error for `rhs`
/// to be `None`.
UnaryOprApp {
pub opr: token::Operator<'s>,
pub rhs: Option<Tree<'s>>,
},
/// Defines the point where operator sections should be expanded to lambdas. Let's consider
/// the expression `map (.sum 1)`. It should be desugared to `map (x -> x.sum 1)`, not to
/// `map ((x -> x.sum) 1)`. The expression `.sum` will be parsed as operator section
@ -141,10 +160,17 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
MultiSegmentApp {
pub segments: NonEmptyVec<MultiSegmentAppSegment<'s>>,
},
/// A type definition; introduced by a line consisting of the keyword `type`, an identifier
/// to be used as the name of the type, and zero or more specifications of type parameters.
/// The following indented block contains two types of lines:
/// - First zero or more type constructors, and their subordinate blocks.
/// - Then a block of statements, which may define methods or type methods.
TypeDef {
pub keyword: Token<'s>,
pub name: Tree<'s>,
pub params: Vec<Tree<'s>>,
pub constructors: Vec<TypeConstructorLine<'s>>,
pub block: Vec<block::Line<'s>>,
},
/// A variable assignment, like `foo = bar 23`.
Assignment {
@ -205,27 +231,34 @@ with_ast_definition!(generate_ast_definition());
// === Invalid ===
/// Error of parsing attached to an [`Tree`] node.
#[derive(Clone, Copy, Debug, Eq, PartialEq, Visitor, Serialize, Reflect, Deserialize)]
#[derive(Clone, Debug, Eq, PartialEq, Visitor, Serialize, Reflect, Deserialize)]
#[allow(missing_docs)]
#[reflect(transparent)]
#[serde(from = "crate::serialization::Error")]
pub struct Error {
#[serde(skip_deserializing)]
pub message: &'static str,
pub message: Cow<'static, str>,
}
impl Error {
/// Constructor.
pub fn new(message: &'static str) -> Self {
pub fn new(message: impl Into<Cow<'static, str>>) -> Self {
let message = message.into();
Self { message }
}
}
impl<'s> Tree<'s> {
/// Constructor.
pub fn with_error(self, message: &'static str) -> Self {
pub fn with_error(self, message: impl Into<Cow<'static, str>>) -> Self {
Tree::invalid(Error::new(message), self)
}
/// Constructor.
pub fn with_unsupported(self, message: String) -> Self {
eprintln!("Unsupported syntax: {}", &message);
Tree::unsupported(message, self)
}
}
impl<'s> span::Builder<'s> for Error {
@ -235,6 +268,47 @@ impl<'s> span::Builder<'s> for Error {
}
// === Type Definitions ===
/// A line within a type definition, containing a type constructor definition.
#[derive(Clone, Debug, Eq, PartialEq, Visitor, Serialize, Reflect, Deserialize)]
pub struct TypeConstructorLine<'s> {
/// The token beginning the line.
pub newline: token::Newline<'s>,
/// The type constructor definition, unless this is an empty line.
pub expression: Option<TypeConstructorDef<'s>>,
}
impl<'s> span::Builder<'s> for TypeConstructorLine<'s> {
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
span.add(&mut self.newline).add(&mut self.expression)
}
}
impl<'s> From<token::Newline<'s>> for TypeConstructorLine<'s> {
fn from(newline: token::Newline<'s>) -> Self {
Self { newline, expression: None }
}
}
/// A type constructor definition within a type definition.
#[derive(Clone, Debug, Eq, PartialEq, Visitor, Serialize, Reflect, Deserialize)]
pub struct TypeConstructorDef<'s> {
/// The identifier naming the type constructor.
pub constructor: token::Ident<'s>,
/// The arguments the type constructor accepts, specified inline.
pub arguments: Vec<Tree<'s>>,
/// The arguments the type constructor accepts, specified on their own lines.
pub block: Vec<block::Line<'s>>,
}
impl<'s> span::Builder<'s> for TypeConstructorDef<'s> {
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
span.add(&mut self.constructor).add(&mut self.arguments).add(&mut self.block)
}
}
// === OprApp ===
/// Operator or [`MultipleOperatorError`].
@ -324,9 +398,14 @@ pub fn apply<'s>(func: Tree<'s>, mut arg: Tree<'s>) -> Tree<'s> {
/// expression.
pub fn apply_operator<'s>(
lhs: Option<Tree<'s>>,
opr: OperatorOrError<'s>,
opr: Vec<token::Operator<'s>>,
mut rhs: Option<Tree<'s>>,
) -> Tree<'s> {
let opr = match opr.len() {
0 => return apply(lhs.unwrap(), rhs.unwrap()),
1 => Ok(opr.into_iter().next().unwrap()),
_ => Err(MultipleOperatorError { operators: NonEmptyVec::try_from(opr).unwrap() }),
};
if let Some(rhs_) = rhs.as_mut() {
if let Variant::ArgumentBlockApplication(block) = &mut *rhs_.variant {
if block.lhs.is_none() {
@ -604,6 +683,31 @@ where &'a Token<'s, T>: Into<token::Ref<'s, 'a>>
}
// === String ===
impl<'s, 'a> TreeVisitable<'s, 'a> for String {}
impl<'s, 'a> TreeVisitableMut<'s, 'a> for String {}
impl<'a, 't, 's> SpanVisitable<'s, 'a> for String {}
impl<'a, 't, 's> SpanVisitableMut<'s, 'a> for String {}
impl<'a, 't, 's> ItemVisitable<'s, 'a> for String {}
impl<'s> span::Builder<'s> for String {
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
span
}
}
impl<'s, 'a> TreeVisitable<'s, 'a> for Cow<'static, str> {}
impl<'s, 'a> TreeVisitableMut<'s, 'a> for Cow<'static, str> {}
impl<'a, 't, 's> SpanVisitable<'s, 'a> for Cow<'static, str> {}
impl<'a, 't, 's> SpanVisitableMut<'s, 'a> for Cow<'static, str> {}
impl<'a, 't, 's> ItemVisitable<'s, 'a> for Cow<'static, str> {}
impl<'s> span::Builder<'s> for Cow<'static, str> {
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
span
}
}
// ==========================
// === CodePrinterVisitor ===

View File

@ -2,6 +2,7 @@
use crate::syntax::token;
use crate::syntax::tree::*;
@ -250,3 +251,72 @@ impl<'s> Default for Builder<'s> {
Self::new()
}
}
// =============
// === Lines ===
// =============
/// Given an iterable of [`Item`]s, return an iterator of the [`Line`]s produced by dividing the
/// input at newline tokens, and parsing the expressions with
/// [`operator::resolve_operator_precedence`].
pub fn lines<'s, I, J>(items: I) -> Lines<'s, J>
where
I: IntoIterator<IntoIter = J>,
J: Iterator<Item = Item<'s>>, {
let items = items.into_iter();
let newline = default();
let line = default();
let finished = default();
Lines { items, newline, line, finished }
}
/// An iterator of [`Line`]s.
#[derive(Debug)]
pub struct Lines<'s, I> {
items: I,
newline: token::Newline<'s>,
line: Vec<Item<'s>>,
finished: bool,
}
impl<'s, I> Lines<'s, I> {
fn parse_current_line(&mut self, newline: token::Newline<'s>) -> Line<'s> {
let line = self.line.drain(..);
let expression = operator::resolve_operator_precedence_if_non_empty(line);
Line { newline, expression }
}
}
impl<'s, I> Iterator for Lines<'s, I>
where I: Iterator<Item = Item<'s>>
{
type Item = Line<'s>;
fn next(&mut self) -> Option<Self::Item> {
if self.finished {
return None;
}
while let Some(item) = self.items.next() {
match item {
Item::Token(Token { variant: token::Variant::Newline(_), left_offset, code }) => {
let token = token::newline(left_offset, code);
let newline = mem::replace(&mut self.newline, token);
if newline.code.is_empty() && self.line.is_empty() {
// The block started with a real newline; ignore the implicit newline.
continue;
}
return self.parse_current_line(newline).into();
}
_ => {
self.line.push(item);
continue;
}
}
}
self.finished = true;
let newline = mem::take(&mut self.newline);
self.parse_current_line(newline).into()
}
}

View File

@ -77,16 +77,91 @@ fn parentheses_nested() {
}
#[test]
fn type_definition() {
test("type Bool", block![(TypeDef (Ident type) (Ident Bool) #())]);
test("type Option a", block![(TypeDef (Ident type) (Ident Option) #((Ident a)))]);
fn comments() {
// Basic, full-line comment.
test("# a b c", block![(Comment "# a b c")]);
}
// === Type Definitions ===
#[test]
fn type_definition_no_body() {
test("type Bool", block![(TypeDef (Ident type) (Ident Bool) #() #() #())]);
test("type Option a", block![(TypeDef (Ident type) (Ident Option) #((Ident a)) #() #())]);
}
#[test]
fn type_constructors() {
let code = [
"type Geo",
" Circle",
" radius",
" 4",
" Rectangle width height",
" Point",
];
#[rustfmt::skip]
let expected = block![
(TypeDef (Ident type) (Ident Geo) #()
#(((Circle #() #((Ident radius) (Number 4))))
((Rectangle #((Ident width) (Ident height)) #()))
((Point #() #())))
#())
];
test(&code.join("\n"), expected);
}
#[test]
fn type_methods() {
let code = ["type Geo", " number =", " 23", " area self = 1 + 1"];
#[rustfmt::skip]
let expected = block![
(TypeDef (Ident type) (Ident Geo) #() #()
#((Function number #() "=" (BodyBlock #((Number 23))))
(Function area #((Ident self)) "=" (OprApp (Number 1) (Ok "+") (Number 1)))))
];
test(&code.join("\n"), expected);
}
#[test]
fn type_def_full() {
let code = [
"type Geo",
" Circle",
" radius : float",
" 4",
" Rectangle width height",
" Point",
"",
" number =",
" 23",
" area self = 1 + 1",
];
#[rustfmt::skip]
let expected = block![
(TypeDef (Ident type) (Ident Geo) #()
#(((Circle #() #((OprApp (Ident radius) (Ok ":") (Ident float)) (Number 4))))
((Rectangle #((Ident width) (Ident height)) #()))
((Point #() #()))
(()))
#((Function number #() "=" (BodyBlock #((Number 23))))
(Function area #((Ident self)) "=" (OprApp (Number 1) (Ok "+") (Number 1)))))
];
test(&code.join("\n"), expected);
}
// === Variable Assignment ===
#[test]
fn assignment_simple() {
test("foo = 23", block![(Assignment (Ident foo) "=" (Number 23))]);
}
// === Functions ===
#[test]
fn function_inline_simple_args() {
test("foo a = 23", block![(Function foo #((Ident a)) "=" (Number 23))]);
@ -106,6 +181,9 @@ fn function_block_simple_args() {
test("foo a b c =", block![(Function foo #((Ident a) (Ident b) (Ident c)) "=" ())]);
}
// === Code Blocks ===
#[test]
fn code_block_body() {
let code = ["main =", " 4"];
@ -219,6 +297,71 @@ fn code_block_with_following_statement() {
}
// === Binary Operators ===
#[test]
fn multiple_operator_error() {
let code = ["4 + + 1"];
let expected = block![
(OprApp (Number 4) (Err (#("+" "+"))) (Number 1))
];
test(&code.join("\n"), expected);
let code = ["4 + + + 1"];
let expected = block![
(OprApp (Number 4) (Err (#("+" "+" "+"))) (Number 1))
];
test(&code.join("\n"), expected);
}
#[test]
fn precedence() {
let code = ["1 * 2 + 3"];
let expected = block![
(OprApp (OprApp (Number 1) (Ok "*") (Number 2)) (Ok "+") (Number 3))
];
test(&code.join("\n"), expected);
}
// === Unary Operators ===
#[test]
fn unevaluated_argument() {
let code = ["main ~foo = 4"];
let expected = block![
(Function main #((UnaryOprApp "~" (Ident foo))) "=" (Number 4))
];
test(&code.join("\n"), expected);
}
#[test]
fn unary_operator_missing_operand() {
let code = ["main ~ = 4"];
let expected = block![
(Function main #((UnaryOprApp "~" ())) "=" (Number 4))
];
test(&code.join("\n"), expected);
}
#[test]
fn unary_operator_at_end_of_expression() {
let code = ["foo ~"];
let expected = block![
(App (Ident foo) (UnaryOprApp "~" ()))
];
test(&code.join("\n"), expected);
}
#[test]
fn plus_negative() {
let code = ["x = 4+-1"];
let expected = block![
(Assignment (Ident x) "=" (OprApp (Number 4) (Ok "+") (UnaryOprApp "-" (Number 1))))
];
test(&code.join("\n"), expected);
}
// ====================
// === Test Support ===
@ -268,6 +411,7 @@ where T: serde::Serialize + Reflect {
let mut to_s_expr = ToSExpr::new(&graph);
to_s_expr.mapper(ast_ty, strip_hidden_fields);
let ident_token = rust_to_meta[&token::variant::Ident::reflect().id];
let comment_token = rust_to_meta[&token::variant::Comment::reflect().id];
let operator_token = rust_to_meta[&token::variant::Operator::reflect().id];
let symbol_token = rust_to_meta[&token::variant::Symbol::reflect().id];
let number_token = rust_to_meta[&token::variant::Number::reflect().id];
@ -283,6 +427,8 @@ where T: serde::Serialize + Reflect {
let token_to_str_ = token_to_str.clone();
to_s_expr.mapper(ident_token, move |token| Value::symbol(token_to_str_(token)));
let token_to_str_ = token_to_str.clone();
to_s_expr.mapper(comment_token, move |token| Value::string(token_to_str_(token)));
let token_to_str_ = token_to_str.clone();
to_s_expr.mapper(operator_token, move |token| Value::string(token_to_str_(token)));
let token_to_str_ = token_to_str.clone();
to_s_expr.mapper(symbol_token, move |token| Value::string(token_to_str_(token)));