mirror of
https://github.com/enso-org/enso.git
synced 2024-12-27 05:34:07 +03:00
Parser: don't panic for any standard library files (#3609)
This commit is contained in:
parent
26018e4969
commit
c525b201b9
5
.github/CODEOWNERS
vendored
5
.github/CODEOWNERS
vendored
@ -1,6 +1,9 @@
|
||||
# Catch All
|
||||
* @4e6 @MichaelMauderer @PabloBuchu @jdunkerley
|
||||
|
||||
# Github metadata
|
||||
/.github/ @4e6 @MichaelMauderer @PabloBuchu @jdunkerley @wdanilo
|
||||
|
||||
# Change log
|
||||
CHANGELOG.md
|
||||
|
||||
@ -11,6 +14,8 @@ Cargo.lock @MichaelMauderer @4e6 @mwu-tow @farmaazon @wdanilo @kazcw
|
||||
Cargo.toml @MichaelMauderer @4e6 @mwu-tow @farmaazon @wdanilo @kazcw
|
||||
/lib/rust/ @MichaelMauderer @4e6 @mwu-tow @farmaazon @wdanilo @kazcw
|
||||
/lib/rust/ensogl/ @MichaelMauderer @wdanilo @farmaazon
|
||||
/lib/rust/metamodel/ @kazcw @wdanilo @jaroslavtulach
|
||||
/lib/rust/parser/ @kazcw @wdanilo @jaroslavtulach
|
||||
/lib/rust/profiler/ @kazcw @MichaelMauderer @wdanilo
|
||||
/integration-test/ @MichaelMauderer @wdanilo @farmaazon @kazcw
|
||||
/tools/build-performance/ @kazcw @mwu-tow @wdanilo
|
||||
|
13
Cargo.lock
generated
13
Cargo.lock
generated
@ -2088,6 +2088,7 @@ dependencies = [
|
||||
"lexpr",
|
||||
"rand 0.8.5",
|
||||
"rand_chacha 0.3.1",
|
||||
"rand_distr 0.4.3",
|
||||
"serde",
|
||||
]
|
||||
|
||||
@ -4486,7 +4487,7 @@ dependencies = [
|
||||
"num-rational 0.2.4",
|
||||
"num-traits",
|
||||
"rand 0.7.3",
|
||||
"rand_distr",
|
||||
"rand_distr 0.2.2",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"simba 0.1.5",
|
||||
@ -5482,6 +5483,16 @@ dependencies = [
|
||||
"rand 0.7.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_distr"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
"rand 0.8.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_hc"
|
||||
version = "0.1.0"
|
||||
|
@ -67,6 +67,8 @@ pub struct DeserializerBuilder {
|
||||
materializers: BTreeMap<FieldId, Materializer>,
|
||||
#[derivative(Debug = "ignore")]
|
||||
mappers: BTreeMap<FieldId, Mapper>,
|
||||
#[derivative(Debug = "ignore")]
|
||||
pre_hooks: Vec<Hook>,
|
||||
support: String,
|
||||
either_type: String,
|
||||
}
|
||||
@ -81,7 +83,8 @@ impl DeserializerBuilder {
|
||||
let mappers = Default::default();
|
||||
let support = support.into();
|
||||
let either_type = either_type.into();
|
||||
Self { root, materializers, mappers, support, either_type }
|
||||
let pre_hooks = Default::default();
|
||||
Self { root, materializers, mappers, support, either_type, pre_hooks }
|
||||
}
|
||||
|
||||
/// Configure the specified field to be produced according to an expression, instead of by
|
||||
@ -98,6 +101,12 @@ impl DeserializerBuilder {
|
||||
self.mappers.insert(field, Box::new(mapper));
|
||||
}
|
||||
|
||||
/// Generate code to be run in the deserialization function, before any deserialization begins.
|
||||
pub fn pre_hook<F>(&mut self, f: F)
|
||||
where F: for<'a> FnOnce(HookInput<'a>) -> String + 'static {
|
||||
self.pre_hooks.push(Box::new(f));
|
||||
}
|
||||
|
||||
/// Generate the deserialization method.
|
||||
pub fn build(mut self, graph: &TypeGraph) -> Method {
|
||||
let method = match graph[self.root].abstract_ {
|
||||
@ -110,6 +119,7 @@ impl DeserializerBuilder {
|
||||
|
||||
type Materializer = Box<dyn for<'a> FnOnce(MaterializerInput<'a>) -> String>;
|
||||
type Mapper = Box<dyn for<'a, 'b> FnOnce(MapperInput<'a, 'b>) -> String>;
|
||||
type Hook = Box<dyn for<'a> FnOnce(HookInput<'a>) -> String>;
|
||||
|
||||
/// Input to a function that produces an expression that deserializes a field.
|
||||
#[derive(Debug)]
|
||||
@ -127,6 +137,13 @@ pub struct MapperInput<'a, 'b> {
|
||||
pub value: &'b str,
|
||||
}
|
||||
|
||||
/// Input to a function that produces statement(s) to be run.
|
||||
#[derive(Debug)]
|
||||
pub struct HookInput<'a> {
|
||||
/// Identifier of the serialized message object.
|
||||
pub message: &'a str,
|
||||
}
|
||||
|
||||
|
||||
// === Product Types ===
|
||||
|
||||
@ -143,6 +160,9 @@ impl DeserializerBuilder {
|
||||
next_temp_variable_number += 1;
|
||||
result
|
||||
};
|
||||
for hook in self.pre_hooks.drain(..) {
|
||||
body.push_str(&(hook)(HookInput { message }));
|
||||
}
|
||||
let fields = class_fields(graph, class);
|
||||
for field in &fields {
|
||||
let ty_name = quote_type(graph, &field.data);
|
||||
|
@ -138,7 +138,7 @@ pub fn quote_params<'a>(
|
||||
/// Given a model of a field ([`Field`]), create a representation of the Java syntax defining a
|
||||
/// class field with name, type, and attributes as specified in the model.
|
||||
fn quote_field(graph: &TypeGraph, field: &Field) -> syntax::Field {
|
||||
let Field { name, data, id: _ } = field;
|
||||
let Field { name, data, id: _, hide_in_tostring: _ } = field;
|
||||
let type_ = quote_type(graph, data);
|
||||
let name = name.clone();
|
||||
let final_ = true;
|
||||
@ -267,9 +267,13 @@ fn implement_equals(graph: &TypeGraph, class: &Class) -> syntax::Method {
|
||||
/// [2]: https://openjdk.org/jeps/395
|
||||
fn implement_to_string(graph: &TypeGraph, class: &Class) -> syntax::Method {
|
||||
let string_builder = "stringBuilder";
|
||||
let stringify =
|
||||
|field: &Field| format!("{string_builder}.append(String.valueOf({}));", field.name);
|
||||
let fields: Vec<_> = class_fields(graph, class).into_iter().map(stringify).collect();
|
||||
let fields_ = class_fields(graph, class);
|
||||
let mut fields = Vec::with_capacity(fields_.len());
|
||||
for field in fields_ {
|
||||
if !field.hide_in_tostring {
|
||||
fields.push(format!("{string_builder}.append(String.valueOf({}));", field.name));
|
||||
}
|
||||
}
|
||||
let mut body = String::new();
|
||||
let ty_name = &class.name;
|
||||
writeln!(body, "StringBuilder {string_builder} = new StringBuilder();").unwrap();
|
||||
|
@ -156,10 +156,11 @@ fn standard_methods() -> Vec<Method> {
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub struct Field {
|
||||
#[allow(missing_docs)]
|
||||
pub name: String,
|
||||
pub name: String,
|
||||
#[allow(missing_docs)]
|
||||
pub data: FieldData,
|
||||
id: FieldId,
|
||||
pub data: FieldData,
|
||||
id: FieldId,
|
||||
hide_in_tostring: bool,
|
||||
}
|
||||
|
||||
impl Field {
|
||||
@ -168,7 +169,8 @@ impl Field {
|
||||
let name = name.into();
|
||||
let data = FieldData::Object { type_, non_null };
|
||||
let id = Default::default();
|
||||
Self { name, data, id }
|
||||
let hide_in_tostring = Default::default();
|
||||
Self { name, data, id, hide_in_tostring }
|
||||
}
|
||||
|
||||
/// Create a field holding primitive data.
|
||||
@ -176,13 +178,19 @@ impl Field {
|
||||
let name = name.into();
|
||||
let data = FieldData::Primitive(primitive);
|
||||
let id = Default::default();
|
||||
Self { name, data, id }
|
||||
let hide_in_tostring = Default::default();
|
||||
Self { name, data, id, hide_in_tostring }
|
||||
}
|
||||
|
||||
#[allow(missing_docs)]
|
||||
pub fn id(&self) -> FieldId {
|
||||
self.id
|
||||
}
|
||||
|
||||
/// Omit this field from any containing class's `toString` implementation.
|
||||
pub fn hide_in_tostring(&mut self) {
|
||||
self.hide_in_tostring = true;
|
||||
}
|
||||
}
|
||||
|
||||
/// A field's data contents.
|
||||
|
@ -25,3 +25,4 @@ enso-metamodel-lexpr = { path = "../metamodel/lexpr" }
|
||||
lexpr = "0.2.6"
|
||||
rand = "0.8.5"
|
||||
rand_chacha = "0.3.1"
|
||||
rand_distr = "0.4.3"
|
||||
|
@ -1,6 +1,7 @@
|
||||
package org.enso.syntax2;
|
||||
|
||||
import org.enso.syntax2.serialization.Message;
|
||||
import org.enso.syntax2.UnsupportedSyntaxException;
|
||||
import java.io.File;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
@ -25,7 +26,7 @@ public final class Parser implements AutoCloseable {
|
||||
var state = allocState();
|
||||
return new Parser(state);
|
||||
}
|
||||
public final Tree parse(String input) {
|
||||
public final Tree parse(String input) throws UnsupportedSyntaxException {
|
||||
try {
|
||||
byte[] inputBytes = input.getBytes("UTF-8");
|
||||
ByteBuffer inputBuf = ByteBuffer.allocateDirect(inputBytes.length);
|
||||
@ -33,7 +34,12 @@ public final class Parser implements AutoCloseable {
|
||||
var serializedTree = parseInput(state, inputBuf);
|
||||
var base = getLastInputBase(state);
|
||||
serializedTree.order(ByteOrder.LITTLE_ENDIAN);
|
||||
return Tree.deserialize(new Message(serializedTree, inputBuf, base));
|
||||
var message = new Message(serializedTree, inputBuf, base);
|
||||
var result = Tree.deserialize(message);
|
||||
if (message.getEncounteredUnsupportedSyntax()) {
|
||||
throw new UnsupportedSyntaxException(result);
|
||||
}
|
||||
return result;
|
||||
} catch (java.io.UnsupportedEncodingException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
@ -0,0 +1,14 @@
|
||||
package org.enso.syntax2;
|
||||
|
||||
public final class UnsupportedSyntaxException extends Exception {
|
||||
private final Tree tree;
|
||||
|
||||
UnsupportedSyntaxException(Tree treeIn) {
|
||||
super("Tree contains unsupported syntax. Details are in an `Unsupported` node in the tree.");
|
||||
tree = treeIn;
|
||||
}
|
||||
|
||||
public final Tree getTree() {
|
||||
return tree;
|
||||
}
|
||||
}
|
@ -4,6 +4,7 @@ public final class Message {
|
||||
private final java.nio.ByteBuffer buffer;
|
||||
private final java.nio.ByteBuffer context;
|
||||
private final int base;
|
||||
private boolean encounteredUnsupportedSyntax;
|
||||
|
||||
public Message(java.nio.ByteBuffer bufferIn, java.nio.ByteBuffer contextIn, long baseIn) {
|
||||
buffer = bufferIn;
|
||||
@ -53,4 +54,16 @@ public final class Message {
|
||||
assert tmp >= 0;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
final String getLocation() {
|
||||
return "Message[buffer=" + buffer.position() + "]";
|
||||
}
|
||||
|
||||
public final boolean getEncounteredUnsupportedSyntax() {
|
||||
return encounteredUnsupportedSyntax;
|
||||
}
|
||||
|
||||
public final void markEncounteredUnsupportedSyntax() {
|
||||
encounteredUnsupportedSyntax = true;
|
||||
}
|
||||
}
|
||||
|
@ -38,12 +38,14 @@ fn main() {
|
||||
let ast = enso_parser::syntax::Tree::reflect();
|
||||
let tree = enso_parser::syntax::Tree::reflect().id;
|
||||
let token = enso_parser::syntax::Token::<enso_parser::syntax::token::Variant>::reflect().id;
|
||||
let unsupported = enso_parser::syntax::tree::Unsupported::reflect().id;
|
||||
let (graph, rust_to_meta) = rust::to_meta(ast);
|
||||
let (graph, meta_to_java) = java::from_meta(&graph, enso_parser_generate_java::EITHER_TYPE);
|
||||
let mut graph = java::transform::optional_to_null(graph);
|
||||
let rust_to_java = |id| meta_to_java[&rust_to_meta[&id]];
|
||||
let (tree, token) = (rust_to_java(tree), rust_to_java(token));
|
||||
serialization::derive(&mut graph, tree, token);
|
||||
let (tree, token, unsupported) =
|
||||
(rust_to_java(tree), rust_to_java(token), rust_to_java(unsupported));
|
||||
serialization::derive(&mut graph, tree, token, unsupported);
|
||||
let graph = java::to_syntax(&graph, enso_parser_generate_java::PACKAGE);
|
||||
let mut args = std::env::args();
|
||||
args.next().unwrap();
|
||||
|
@ -24,9 +24,9 @@ const TOKEN_OFFSET_BEGIN: &str = "leftOffsetCodeReprBegin";
|
||||
//const TOKEN_OFFSET_LEN: &str = "leftOffsetCodeReprLen";
|
||||
|
||||
/// Derive deserialization for all types in the typegraph.
|
||||
pub fn derive(graph: &mut TypeGraph, tree: ClassId, token: ClassId) {
|
||||
pub fn derive(graph: &mut TypeGraph, tree: ClassId, token: ClassId, unsupported: ClassId) {
|
||||
let source = "source";
|
||||
impl_deserialize(graph, tree, token, source);
|
||||
impl_deserialize(graph, tree, token, unsupported, source);
|
||||
graph[token].methods.push(impl_getter(CODE_GETTER, source, TOKEN_BEGIN, TOKEN_LEN));
|
||||
graph[tree].methods.push(impl_getter(CODE_GETTER, source, TREE_BEGIN, TREE_LEN));
|
||||
}
|
||||
@ -34,14 +34,22 @@ pub fn derive(graph: &mut TypeGraph, tree: ClassId, token: ClassId) {
|
||||
|
||||
// === Deserialization Methods ===
|
||||
|
||||
fn impl_deserialize(graph: &mut TypeGraph, tree: ClassId, token: ClassId, source: &str) {
|
||||
fn impl_deserialize(
|
||||
graph: &mut TypeGraph,
|
||||
tree: ClassId,
|
||||
token: ClassId,
|
||||
unsupported: ClassId,
|
||||
source: &str,
|
||||
) {
|
||||
// Add source field to parent types.
|
||||
let buffer = Class::builtin("java.nio.ByteBuffer", vec![]);
|
||||
let buffer = graph.classes.insert(buffer);
|
||||
let tree_source_ = Field::object(source, buffer, true);
|
||||
let mut tree_source_ = Field::object(source, buffer, true);
|
||||
tree_source_.hide_in_tostring();
|
||||
let tree_source = tree_source_.id();
|
||||
graph[tree].fields.push(tree_source_);
|
||||
let token_source_ = Field::object(source, buffer, true);
|
||||
let mut token_source_ = Field::object(source, buffer, true);
|
||||
token_source_.hide_in_tostring();
|
||||
let token_source = token_source_.id();
|
||||
graph[token].fields.push(token_source_);
|
||||
let tree_begin = graph[tree].find_field(TREE_BEGIN).unwrap().id();
|
||||
@ -52,6 +60,11 @@ fn impl_deserialize(graph: &mut TypeGraph, tree: ClassId, token: ClassId, source
|
||||
let class = &graph[id];
|
||||
let mut deserialization =
|
||||
bincode::DeserializerBuilder::new(id, crate::SERIALIZATION_SUPPORT, crate::EITHER_TYPE);
|
||||
if id == unsupported {
|
||||
deserialization.pre_hook(|bincode::HookInput { message }| {
|
||||
format!("{message}.markEncounteredUnsupportedSyntax();\n")
|
||||
});
|
||||
}
|
||||
if class.parent == Some(tree) {
|
||||
deserialization.materialize(tree_source, context_materializer());
|
||||
}
|
||||
|
@ -96,6 +96,7 @@ pub struct LexerState {
|
||||
pub last_spaces_visible_offset: VisibleOffset,
|
||||
pub current_block_indent: VisibleOffset,
|
||||
pub block_indent_stack: Vec<VisibleOffset>,
|
||||
pub internal_error: Option<String>,
|
||||
}
|
||||
|
||||
impl<'s> Lexer<'s> {
|
||||
@ -516,8 +517,9 @@ impl<'s> Lexer<'s> {
|
||||
match current {
|
||||
'.' => this.take_while_1_('.'),
|
||||
'=' => this.take_while_1_('='),
|
||||
':' => (),
|
||||
',' => (),
|
||||
':' | ',' => {
|
||||
this.take_next();
|
||||
}
|
||||
_ => this.take_while_1_(is_operator_body_char),
|
||||
};
|
||||
}
|
||||
@ -525,13 +527,18 @@ impl<'s> Lexer<'s> {
|
||||
if let Some(token) = token {
|
||||
if token.code == "+-" {
|
||||
let (left, right) = token.split_at_(Bytes(1));
|
||||
self.submit_token(left.with_variant(token::Variant::operator()));
|
||||
self.submit_token(right.with_variant(token::Variant::operator()));
|
||||
let (prec, binary, unary) = compute_precedence(&left.code);
|
||||
self.submit_token(left.with_variant(token::Variant::operator(prec, binary, unary)));
|
||||
self.submit_token(right.with_variant(token::Variant::operator(0, false, true)));
|
||||
} else {
|
||||
let only_eq = token.code.chars().all(|t| t == '=');
|
||||
let is_mod = token.code.ends_with('=') && !only_eq;
|
||||
let tp =
|
||||
if is_mod { token::Variant::modifier() } else { token::Variant::operator() };
|
||||
let tp = if is_mod {
|
||||
token::Variant::modifier()
|
||||
} else {
|
||||
let (prec, binary, unary) = compute_precedence(&token.code);
|
||||
token::Variant::operator(prec, binary, unary)
|
||||
};
|
||||
let token = token.with_variant(tp);
|
||||
self.submit_token(token);
|
||||
}
|
||||
@ -540,6 +547,40 @@ impl<'s> Lexer<'s> {
|
||||
}
|
||||
|
||||
|
||||
// === Precedence ===
|
||||
|
||||
// FIXME: Compute precedences according to spec. Issue: #182497344
|
||||
fn compute_precedence(token: &str) -> (usize, bool, bool) {
|
||||
let binary = match token {
|
||||
// Special handling for tokens that can be unary.
|
||||
"~" => return (0, false, true),
|
||||
"-" => return (14, true, true),
|
||||
// "There are a few operators with the lowest precedence possible."
|
||||
"=" => 1,
|
||||
":" => 2,
|
||||
"->" => 3,
|
||||
"|" | "\\\\" | "&" => 4,
|
||||
">>" | "<<" => 5,
|
||||
"|>" | "|>>" | "<|" | "<<|" => 6,
|
||||
// "The precedence of all other operators is determined by the operator's Precedence
|
||||
// Character:"
|
||||
"!" => 10,
|
||||
"||" => 11,
|
||||
"^" => 12,
|
||||
"&&" => 13,
|
||||
"+" | "++" => 14,
|
||||
"*" | "/" | "%" => 15,
|
||||
// FIXME: Not sure about these:
|
||||
"==" => 1,
|
||||
"," => 1,
|
||||
"@" => 20,
|
||||
"." => 21,
|
||||
_ => return (0, false, false),
|
||||
};
|
||||
(binary, true, false)
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==============
|
||||
// === Symbol ===
|
||||
@ -587,44 +628,45 @@ fn is_inline_text_body(t: char) -> bool {
|
||||
|
||||
impl<'s> Lexer<'s> {
|
||||
/// Parse a text literal.
|
||||
// FIXME: This impl is not yet finished and not all cases are covered (also, tests missing).
|
||||
fn text(&mut self) {
|
||||
let token = self.token(|this| this.take_1('"'));
|
||||
if let Some(token) = token {
|
||||
self.submit_token(token.with_variant(token::Variant::text_start()));
|
||||
let line_empty = self.current_char.map(is_newline_char).unwrap_or(true);
|
||||
if line_empty {
|
||||
todo!()
|
||||
} else {
|
||||
let mut parsed_element;
|
||||
loop {
|
||||
parsed_element = false;
|
||||
// FIXME: Handle this case; test this function. Issue: #182496940
|
||||
let char = self.current_char;
|
||||
self.internal_error.get_or_insert_with(|| format!("text: line_empty ({:?})", char));
|
||||
return;
|
||||
}
|
||||
let mut parsed_element;
|
||||
loop {
|
||||
parsed_element = false;
|
||||
|
||||
let section = self.token(|this| this.take_while_1(is_inline_text_body));
|
||||
if let Some(tok) = section {
|
||||
parsed_element = true;
|
||||
self.submit_token(tok.with_variant(token::Variant::text_section()));
|
||||
}
|
||||
let section = self.token(|this| this.take_while_1(is_inline_text_body));
|
||||
if let Some(tok) = section {
|
||||
parsed_element = true;
|
||||
self.submit_token(tok.with_variant(token::Variant::text_section()));
|
||||
}
|
||||
|
||||
let escape = self.token(|this| {
|
||||
if this.take_1('\\') {
|
||||
this.take_1('"');
|
||||
}
|
||||
});
|
||||
if let Some(token) = escape {
|
||||
parsed_element = true;
|
||||
self.submit_token(token.with_variant(token::Variant::text_escape()));
|
||||
let escape = self.token(|this| {
|
||||
if this.take_1('\\') {
|
||||
this.take_1('"');
|
||||
}
|
||||
});
|
||||
if let Some(token) = escape {
|
||||
parsed_element = true;
|
||||
self.submit_token(token.with_variant(token::Variant::text_escape()));
|
||||
}
|
||||
|
||||
let end = self.token(|this| this.take_1('"'));
|
||||
if let Some(token) = end {
|
||||
self.submit_token(token.with_variant(token::Variant::text_end()));
|
||||
break;
|
||||
}
|
||||
let end = self.token(|this| this.take_1('"'));
|
||||
if let Some(token) = end {
|
||||
self.submit_token(token.with_variant(token::Variant::text_end()));
|
||||
break;
|
||||
}
|
||||
|
||||
if !parsed_element {
|
||||
break;
|
||||
}
|
||||
if !parsed_element {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -732,13 +774,13 @@ const PARSERS: &[for<'r> fn(&'r mut Lexer<'_>)] = &[
|
||||
impl<'s> Lexer<'s> {
|
||||
/// Run the lexer. Return hierarchical list of tokens (the token groups will be represented as a
|
||||
/// hierarchy).
|
||||
pub fn run(self) -> Vec<Item<'s>> {
|
||||
build_block_hierarchy(self.run_flat())
|
||||
pub fn run(self) -> ParseResult<Vec<Item<'s>>> {
|
||||
self.run_flat().map(build_block_hierarchy)
|
||||
}
|
||||
|
||||
/// Run the lexer. Return non-hierarchical list of tokens (the token groups will be represented
|
||||
/// as start and end tokens).
|
||||
pub fn run_flat(mut self) -> Vec<Token<'s>> {
|
||||
pub fn run_flat(mut self) -> ParseResult<Vec<Token<'s>>> {
|
||||
self.spaces_after_lexeme();
|
||||
let mut any_parser_matched = true;
|
||||
while any_parser_matched {
|
||||
@ -750,28 +792,30 @@ impl<'s> Lexer<'s> {
|
||||
}
|
||||
}
|
||||
}
|
||||
if self.current_char != None {
|
||||
panic!("Internal error. Lexer did not consume all input. State: {self:?}");
|
||||
}
|
||||
while self.end_block().is_some() {
|
||||
let block_end = self.marker_token(token::Variant::block_end());
|
||||
self.submit_token(block_end);
|
||||
}
|
||||
let tokens = self.output;
|
||||
event!(TRACE, "Tokens:\n{:#?}", tokens);
|
||||
tokens
|
||||
let mut internal_error = self.internal_error.take();
|
||||
if self.current_char != None {
|
||||
let message = format!("Lexer did not consume all input. State: {self:?}");
|
||||
internal_error.get_or_insert(message);
|
||||
}
|
||||
let value = self.output;
|
||||
event!(TRACE, "Tokens:\n{:#?}", value);
|
||||
ParseResult { value, internal_error }
|
||||
}
|
||||
}
|
||||
|
||||
/// Run the lexer. Return non-hierarchical list of tokens (the token groups will be represented
|
||||
/// as start and end tokens).
|
||||
pub fn run_flat(input: &'_ str) -> Vec<Token<'_>> {
|
||||
pub fn run_flat(input: &'_ str) -> ParseResult<Vec<Token<'_>>> {
|
||||
Lexer::new(input).run_flat()
|
||||
}
|
||||
|
||||
/// Run the lexer. Return hierarchical list of tokens (the token groups will be represented as a
|
||||
/// hierarchy).
|
||||
pub fn run(input: &'_ str) -> Vec<Item<'_>> {
|
||||
pub fn run(input: &'_ str) -> ParseResult<Vec<Item<'_>>> {
|
||||
Lexer::new(input).run()
|
||||
}
|
||||
|
||||
@ -828,7 +872,8 @@ pub mod test {
|
||||
|
||||
/// Constructor.
|
||||
pub fn operator_<'s>(left_offset: &'s str, code: &'s str) -> Token<'s> {
|
||||
Token(left_offset, code, token::Variant::operator())
|
||||
let (precedence, binary, unary) = compute_precedence(code);
|
||||
Token(left_offset, code, token::Variant::operator(precedence, binary, unary))
|
||||
}
|
||||
}
|
||||
|
||||
@ -844,7 +889,7 @@ mod tests {
|
||||
}
|
||||
|
||||
fn test_lexer<'s>(input: &'s str, expected: Vec<Token<'s>>) {
|
||||
assert_eq!(run_flat(input), expected);
|
||||
assert_eq!(run_flat(input).unwrap(), expected);
|
||||
}
|
||||
|
||||
fn lexer_case_idents<'s>(idents: &[&'s str]) -> Vec<(&'s str, Vec<Token<'s>>)> {
|
||||
@ -975,8 +1020,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_case_operators() {
|
||||
test_lexer_many(lexer_case_operators(&["+", "-", "=", "==", "==="]));
|
||||
test_lexer_many(vec![("+-", vec![operator_("", "+"), operator_("", "-")])]);
|
||||
test_lexer_many(lexer_case_operators(&["+", "-", "=", "==", "===", ":", ","]));
|
||||
let unary_minus = Token("", "-", token::Variant::operator(0, false, true));
|
||||
test_lexer_many(vec![("+-", vec![operator_("", "+"), unary_minus])]);
|
||||
}
|
||||
|
||||
/// Based on https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt.
|
||||
@ -1165,7 +1211,7 @@ mod benches {
|
||||
|
||||
b.iter(move || {
|
||||
let lexer = Lexer::new(&str);
|
||||
assert_eq!(lexer.run().len(), reps);
|
||||
assert_eq!(lexer.run().unwrap().len(), reps);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -124,6 +124,37 @@ pub mod prelude {
|
||||
pub use enso_reflect::Reflect;
|
||||
pub use enso_types::traits::*;
|
||||
pub use enso_types::unit2::Bytes;
|
||||
|
||||
/// Wraps return value for functions whose implementations don't handle all cases yet. When the
|
||||
/// parser is complete, this type will be eliminated.
|
||||
pub type WipResult<T> = Result<T, String>;
|
||||
|
||||
/// Return type for functions that will only fail in case of a bug in the implementation.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ParseResult<T> {
|
||||
/// The result of the operation. If `internal_error` is set, this is a best-effort value
|
||||
/// that cannot be assumed to be accurate; otherwise, it should be correct.
|
||||
pub value: T,
|
||||
/// Internal error encountered while computing this result.
|
||||
pub internal_error: Option<String>,
|
||||
}
|
||||
|
||||
impl<T> ParseResult<T> {
|
||||
/// Return a new [`ParseResult`] whose value is the result of applying the given function to
|
||||
/// the input's value, and whose `internal_error` field is the same as the input.
|
||||
pub fn map<U, F>(self, f: F) -> ParseResult<U>
|
||||
where F: FnOnce(T) -> U {
|
||||
let ParseResult { value, internal_error } = self;
|
||||
let value = f(value);
|
||||
ParseResult { value, internal_error }
|
||||
}
|
||||
|
||||
/// Panic if the result contains an internal error; otherwise, return the contained value.
|
||||
pub fn unwrap(self) -> T {
|
||||
assert_eq!(self.internal_error, None);
|
||||
self.value
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -150,7 +181,12 @@ impl Parser {
|
||||
pub fn run<'s>(&self, code: &'s str) -> syntax::Tree<'s> {
|
||||
let tokens = lexer::run(code);
|
||||
let resolver = macros::resolver::Resolver::new_root();
|
||||
resolver.run(&self.macros, tokens)
|
||||
let result = tokens.map(|tokens| resolver.run(&self.macros, tokens));
|
||||
let value = result.value;
|
||||
if let Some(error) = result.internal_error {
|
||||
return value.with_error(format!("Internal error: {}", error));
|
||||
}
|
||||
value
|
||||
}
|
||||
}
|
||||
|
||||
@ -169,12 +205,19 @@ impl Default for Parser {
|
||||
/// interpreted as a variable assignment or method definition.
|
||||
fn expression_to_statement(mut tree: syntax::Tree<'_>) -> syntax::Tree<'_> {
|
||||
use syntax::tree::*;
|
||||
let tree_ = match &mut *tree.variant {
|
||||
Variant::OprSectionBoundary(OprSectionBoundary { ast }) => ast,
|
||||
let mut left_offset = source::span::Offset::default();
|
||||
let tree_ = match &mut tree {
|
||||
Tree { variant: box Variant::OprSectionBoundary(OprSectionBoundary { ast }), span } => {
|
||||
left_offset += &span.left_offset;
|
||||
ast
|
||||
}
|
||||
_ => &mut tree,
|
||||
};
|
||||
let opr_app = match &mut *tree_.variant {
|
||||
Variant::OprApp(opr_app) => opr_app,
|
||||
let opr_app = match tree_ {
|
||||
Tree { variant: box Variant::OprApp(opr_app), span } => {
|
||||
left_offset += &span.left_offset;
|
||||
opr_app
|
||||
}
|
||||
_ => return tree,
|
||||
};
|
||||
if let OprApp { lhs: Some(lhs), opr: Ok(opr), rhs } = opr_app && opr.code == "=" {
|
||||
@ -188,12 +231,18 @@ fn expression_to_statement(mut tree: syntax::Tree<'_>) -> syntax::Tree<'_> {
|
||||
if args.is_empty() && let Some(rhs) = rhs && !is_body_block(rhs) {
|
||||
// If the LHS has no arguments, and there is a RHS, and the RHS is not a body block,
|
||||
// this is a variable assignment.
|
||||
return Tree::assignment(mem::take(lhs), mem::take(opr), mem::take(rhs))
|
||||
let mut result = Tree::assignment(mem::take(lhs), mem::take(opr), mem::take(rhs));
|
||||
left_offset += result.span.left_offset;
|
||||
result.span.left_offset = left_offset;
|
||||
return result;
|
||||
}
|
||||
if let Variant::Ident(Ident { token }) = &mut *lhs.variant {
|
||||
// If this is not a variable assignment, and the leftmost leaf of the `App` tree is
|
||||
// an identifier, this is a function definition.
|
||||
return Tree::function(mem::take(token), args, mem::take(opr), mem::take(rhs))
|
||||
let mut result = Tree::function(mem::take(token), args, mem::take(opr), mem::take(rhs));
|
||||
left_offset += result.span.left_offset;
|
||||
result.span.left_offset = left_offset;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
tree
|
||||
@ -261,6 +310,41 @@ mod benches {
|
||||
str.push('\n');
|
||||
}
|
||||
let parser = Parser::new();
|
||||
bencher.bytes = str.len() as u64;
|
||||
bencher.iter(move || {
|
||||
parser.run(&str);
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_expressions(bencher: &mut Bencher) {
|
||||
use rand::prelude::*;
|
||||
use rand_chacha::ChaCha8Rng;
|
||||
let lines = 100;
|
||||
let avg_group_len = 20;
|
||||
let avg_groups_per_line = 20;
|
||||
let mut str = String::new();
|
||||
let mut rng = ChaCha8Rng::seed_from_u64(0);
|
||||
let normal = rand_distr::StandardNormal;
|
||||
for _ in 0..lines {
|
||||
let operators = ['=', '+', '-', '*', ':'];
|
||||
let groups: f64 = normal.sample(&mut rng);
|
||||
let groups = (groups * avg_groups_per_line as f64) as usize;
|
||||
for _ in 0..groups {
|
||||
let len: f64 = normal.sample(&mut rng);
|
||||
let len = (len * avg_group_len as f64) as usize;
|
||||
str.push('x');
|
||||
for _ in 0..len {
|
||||
let i = rng.gen_range(0..operators.len());
|
||||
str.push(operators[i]);
|
||||
str.push('x');
|
||||
}
|
||||
str.push(' ');
|
||||
}
|
||||
str.push('\n');
|
||||
}
|
||||
let parser = Parser::new();
|
||||
bencher.bytes = str.len() as u64;
|
||||
bencher.iter(move || {
|
||||
parser.run(&str);
|
||||
});
|
||||
|
@ -44,42 +44,135 @@ pub fn type_def<'s>() -> Definition<'s> {
|
||||
identifier() / "name" % "type name" >>
|
||||
many(identifier() % "type parameter" / "param") % "type parameters" >>
|
||||
block(
|
||||
many(identifier() / "constructor") % "type constructors" >>
|
||||
everything()
|
||||
everything() / "statements"
|
||||
) % "type definition body";
|
||||
// let pattern2 = Everything;
|
||||
crate::macro_definition! {
|
||||
("type", pattern)
|
||||
type_def_body
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: The comments in the code were left in order to allow easy debugging of this struct. They
|
||||
// should be removed in the future.
|
||||
fn type_def_body(matched_segments: NonEmptyVec<MatchedSegment>) -> syntax::Tree {
|
||||
let segment = matched_segments.to_vec().pop().unwrap();
|
||||
// println!(">>>");
|
||||
// println!("{:#?}", segment);
|
||||
// println!(">>>");
|
||||
// FIXME: This implementation of parsing constructors works for correct inputs, but doesn't
|
||||
// handle incorrect syntax ideally. Issue: #182745069
|
||||
let segment = matched_segments.pop().0;
|
||||
let match_tree = segment.result.into_var_map();
|
||||
// println!("{:#?}", match_tree);
|
||||
// println!("\n\n------------- 1");
|
||||
|
||||
let mut v = match_tree.view();
|
||||
let name = &v.query("name").unwrap()[0];
|
||||
let name = operator::resolve_operator_precedence_if_non_empty(name.clone()).unwrap();
|
||||
// println!("{:#?}", name);
|
||||
// println!("\n\n------------- 2");
|
||||
|
||||
let no_params = vec![];
|
||||
let name = v.query("name").map(|name| name[0].clone()).unwrap_or_default();
|
||||
let name = operator::resolve_operator_precedence_if_non_empty(name);
|
||||
let no_params = [];
|
||||
let params = v.nested().query("param").unwrap_or(&no_params);
|
||||
// println!("{:#?}", params);
|
||||
// println!("\n\n------------- 3");
|
||||
|
||||
let params = params
|
||||
.iter()
|
||||
.map(|tokens| operator::resolve_operator_precedence_if_non_empty(tokens.clone()).unwrap())
|
||||
.map(|tokens| {
|
||||
operator::resolve_operator_precedence_if_non_empty(tokens.iter().cloned()).unwrap()
|
||||
})
|
||||
.collect_vec();
|
||||
// println!("{:#?}", params);
|
||||
syntax::Tree::type_def(segment.header, name, params)
|
||||
let mut constructors = default();
|
||||
let mut body = default();
|
||||
if let Some(items) = v.query("statements") {
|
||||
let items = items[0].iter().cloned();
|
||||
let mut builder = TypeDefBodyBuilder::default();
|
||||
for syntax::tree::block::Line { newline, expression } in syntax::tree::block::lines(items) {
|
||||
builder.line(newline, expression);
|
||||
}
|
||||
let (constructors_, body_) = builder.finish();
|
||||
constructors = constructors_;
|
||||
body = body_;
|
||||
}
|
||||
match name {
|
||||
Some(name) => syntax::Tree::type_def(segment.header, name, params, constructors, body),
|
||||
None => {
|
||||
let name = syntax::Tree::ident(syntax::token::ident("", "", false, 0));
|
||||
let result = syntax::Tree::type_def(segment.header, name, params, constructors, body);
|
||||
result.with_error("Expected identifier after `type` keyword.")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct TypeDefBodyBuilder<'s> {
|
||||
constructors: Vec<syntax::tree::TypeConstructorLine<'s>>,
|
||||
body: Vec<syntax::tree::block::Line<'s>>,
|
||||
}
|
||||
|
||||
impl<'s> TypeDefBodyBuilder<'s> {
|
||||
/// Apply the line to the state.
|
||||
pub fn line(
|
||||
&mut self,
|
||||
newline: syntax::token::Newline<'s>,
|
||||
expression: Option<syntax::Tree<'s>>,
|
||||
) {
|
||||
if self.body.is_empty() {
|
||||
if let Some(expression) = expression {
|
||||
match Self::to_constructor_line(expression) {
|
||||
Ok(expression) => {
|
||||
let expression = Some(expression);
|
||||
let line = syntax::tree::TypeConstructorLine { newline, expression };
|
||||
self.constructors.push(line);
|
||||
}
|
||||
Err(expression) => {
|
||||
let expression = crate::expression_to_statement(expression);
|
||||
let expression = Some(expression);
|
||||
self.body.push(syntax::tree::block::Line { newline, expression });
|
||||
}
|
||||
}
|
||||
} else {
|
||||
self.constructors.push(newline.into());
|
||||
}
|
||||
} else {
|
||||
let expression = expression.map(crate::expression_to_statement);
|
||||
self.body.push(syntax::tree::block::Line { newline, expression });
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the constructor/body sequences.
|
||||
pub fn finish(
|
||||
self,
|
||||
) -> (Vec<syntax::tree::TypeConstructorLine<'s>>, Vec<syntax::tree::block::Line<'s>>) {
|
||||
(self.constructors, self.body)
|
||||
}
|
||||
|
||||
/// Interpret the given expression as an `TypeConstructorDef`, if its syntax is compatible.
|
||||
fn to_constructor_line(
|
||||
expression: syntax::Tree<'_>,
|
||||
) -> Result<syntax::tree::TypeConstructorDef<'_>, syntax::Tree<'_>> {
|
||||
use syntax::tree::*;
|
||||
if let Tree {
|
||||
variant:
|
||||
box Variant::ArgumentBlockApplication(ArgumentBlockApplication {
|
||||
lhs: Some(Tree { variant: box Variant::Ident(ident), span: span_ }),
|
||||
arguments,
|
||||
}),
|
||||
span,
|
||||
} = expression
|
||||
{
|
||||
let mut constructor = ident.token;
|
||||
let mut left_offset = span.left_offset;
|
||||
left_offset += &span_.left_offset;
|
||||
left_offset += constructor.left_offset;
|
||||
constructor.left_offset = left_offset;
|
||||
let block = arguments;
|
||||
let arguments = default();
|
||||
return Ok(TypeConstructorDef { constructor, arguments, block });
|
||||
}
|
||||
let mut arguments = vec![];
|
||||
let mut lhs = &expression;
|
||||
let mut left_offset = crate::source::span::Offset::default();
|
||||
while let Tree { variant: box Variant::App(App { func, arg }), span } = lhs {
|
||||
left_offset += &span.left_offset;
|
||||
lhs = func;
|
||||
arguments.push(arg.clone());
|
||||
}
|
||||
if let Tree { variant: box Variant::Ident(Ident { token }), span } = lhs {
|
||||
let mut constructor = token.clone();
|
||||
left_offset += &span.left_offset;
|
||||
left_offset += constructor.left_offset;
|
||||
constructor.left_offset = left_offset;
|
||||
arguments.reverse();
|
||||
let block = default();
|
||||
return Ok(TypeConstructorDef { constructor, arguments, block });
|
||||
}
|
||||
Err(expression)
|
||||
}
|
||||
}
|
||||
|
@ -158,27 +158,21 @@ impl<'s> Match<'s> {
|
||||
|
||||
fn build_var_map<V: Default + Validator>(self, tree: &mut VarMap<'s, V>, validator: &V) {
|
||||
match self {
|
||||
Self::Everything(_) => {}
|
||||
Self::Nothing => {}
|
||||
Self::Identifier(_) => {}
|
||||
Self::Expected(_, _) => {}
|
||||
Self::NotBlock(_) => {}
|
||||
Self::Or(t) => match *t {
|
||||
OrMatch::First(first) => first.build_var_map(tree, validator),
|
||||
OrMatch::Second(second) => second.build_var_map(tree, validator),
|
||||
},
|
||||
Self::Everything(_)
|
||||
| Self::Nothing
|
||||
| Self::Identifier(_)
|
||||
| Self::Expected(_, _)
|
||||
| Self::NotBlock(_) => {}
|
||||
Self::Or(box OrMatch::First(item) | box OrMatch::Second(item)) =>
|
||||
item.build_var_map(tree, validator),
|
||||
Self::Seq(first, second) => {
|
||||
first.build_var_map(tree, validator);
|
||||
second.build_var_map(tree, validator);
|
||||
}
|
||||
Self::Many(matches) => {
|
||||
if tree.nested.is_none() {
|
||||
let nested = VarMap::<'s, V>::default();
|
||||
tree.nested = Some(Box::new(nested));
|
||||
}
|
||||
let nested_validator = V::default();
|
||||
nested_validator.set_parent(validator);
|
||||
let nested = tree.nested.as_mut().unwrap();
|
||||
let nested = tree.nested.get_or_insert(default());
|
||||
for m in matches {
|
||||
m.build_var_map(nested, &nested_validator);
|
||||
}
|
||||
@ -310,7 +304,7 @@ impl<'t, 's, V: Validator> VarMapView<'t, 's, V> {
|
||||
|
||||
impl<'t, 's, V: Validator> VarMapView<'t, 's, V> {
|
||||
/// Query for a variable.
|
||||
pub fn query(&mut self, name: &str) -> Option<&'t Vec<Vec<syntax::Item<'s>>>> {
|
||||
pub fn query(&mut self, name: &str) -> Option<&'t [Vec<syntax::Item<'s>>]> {
|
||||
self.tree.and_then(|t| {
|
||||
t.map.get(name).map(|entry| {
|
||||
match &self.resolved_validator {
|
||||
@ -342,7 +336,7 @@ impl<'t, 's, V: Validator> VarMapView<'t, 's, V> {
|
||||
self.resolved_validator = Some(resolved_validator);
|
||||
}
|
||||
}
|
||||
&entry.tokens
|
||||
&entry.tokens[..]
|
||||
})
|
||||
})
|
||||
}
|
||||
|
@ -217,23 +217,30 @@ impl<'s> Match<'s> {
|
||||
Self::Named(label.into(), Box::new(second))
|
||||
}
|
||||
|
||||
/// Get all tokens of the match.
|
||||
pub fn tokens(self) -> Vec<syntax::Item<'s>> {
|
||||
/// Get all tokens of the match; append them to an existing sequence.
|
||||
pub fn get_tokens(self, out: &mut Vec<syntax::Item<'s>>) {
|
||||
match self {
|
||||
Self::Everything(tokens) => tokens.into(),
|
||||
Self::Nothing => default(),
|
||||
Self::Seq(fst, snd) => fst.tokens().extended(snd.tokens()),
|
||||
Self::Many(t) => t.into_iter().flat_map(|s| s.tokens()).collect(),
|
||||
Self::Identifier(ident) => vec![ident],
|
||||
Self::Expected(_, item) => item.tokens(),
|
||||
Self::Named(_, item) => item.tokens(),
|
||||
Self::NotBlock(item) => vec![item],
|
||||
Self::Or(t) => match *t {
|
||||
OrMatch::First(fst) => fst.tokens(),
|
||||
OrMatch::Second(snd) => snd.tokens(),
|
||||
},
|
||||
Self::Nothing => (),
|
||||
Self::Identifier(item) | Self::NotBlock(item) => out.push(item),
|
||||
Self::Everything(tokens) => out.extend(tokens),
|
||||
Self::Seq(fst, snd) => {
|
||||
fst.get_tokens(out);
|
||||
snd.get_tokens(out);
|
||||
}
|
||||
Self::Expected(_, box item)
|
||||
| Self::Named(_, box item)
|
||||
| Self::Or(box OrMatch::First(item) | box OrMatch::Second(item)) =>
|
||||
item.get_tokens(out),
|
||||
Self::Many(matches) => matches.into_iter().for_each(|match_| match_.get_tokens(out)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get all tokens of the match; return them in a newly-allocated vector.
|
||||
pub fn tokens(self) -> Vec<syntax::Item<'s>> {
|
||||
let mut out = vec![];
|
||||
self.get_tokens(&mut out);
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -349,8 +356,7 @@ impl Pattern {
|
||||
},
|
||||
},
|
||||
PatternData::Block(body) => match input.pop_front() {
|
||||
Some(syntax::Item::Block(tokens)) =>
|
||||
body.resolve(tokens.into_iter().rev().map_into().collect()),
|
||||
Some(syntax::Item::Block(tokens)) => body.resolve(tokens.into()),
|
||||
Some(t) => {
|
||||
input.push_front(t);
|
||||
Err(input)
|
||||
|
@ -504,7 +504,9 @@ impl<'s> Resolver<'s> {
|
||||
let out = (macro_def.body)(pattern_matched_segments);
|
||||
(out, not_used_items_of_last_segment)
|
||||
} else {
|
||||
todo!("Macro was not matched with any known macro definition. This should return an AST node indicating invalid match.")
|
||||
let message = format!("Macro was not matched with any known macro definition.\nResolved segments: {resolved_segments:?}");
|
||||
let error = syntax::tree::Error::new(message);
|
||||
(syntax::tree::Tree::invalid(error, default()), default())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -73,8 +73,9 @@ where D: serde::Deserializer<'de> {
|
||||
pub(crate) struct Error(String);
|
||||
|
||||
impl From<Error> for crate::syntax::tree::Error {
|
||||
fn from(_: Error) -> Self {
|
||||
crate::syntax::tree::Error { message: "" }
|
||||
fn from(error: Error) -> Self {
|
||||
let message = error.0.into();
|
||||
crate::syntax::tree::Error { message }
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -47,7 +47,13 @@ impl<'s> Item<'s> {
|
||||
Item::Token(token) => match token.variant {
|
||||
token::Variant::Ident(ident) => Tree::ident(token.with_variant(ident)),
|
||||
token::Variant::Number(number) => Tree::number(token.with_variant(number)),
|
||||
_ => todo!("{token:?}"),
|
||||
token::Variant::Comment(comment) => Tree::comment(token.with_variant(comment)),
|
||||
token::Variant::TextSection(text) => Tree::text_section(token.with_variant(text)),
|
||||
_ => {
|
||||
let message = format!("to_ast: Item::Token({token:?})");
|
||||
let value = Tree::ident(token.with_variant(token::variant::Ident(false, 0)));
|
||||
Tree::with_unsupported(value, message)
|
||||
}
|
||||
},
|
||||
Item::Tree(ast) => ast,
|
||||
Item::Block(items) => build_block(items),
|
||||
@ -86,24 +92,8 @@ impl<'s> TryAsRef<Item<'s>> for Item<'s> {
|
||||
/// Given a sequence of [`Item`]s belonging to one block, create an AST block node, of a type
|
||||
/// determined by the syntax of the lines in the block.
|
||||
fn build_block<'s>(items: impl IntoIterator<Item = Item<'s>>) -> Tree<'s> {
|
||||
let mut line = vec![];
|
||||
let mut block_builder = tree::block::Builder::new();
|
||||
let mut newline = None;
|
||||
for item in items {
|
||||
match item {
|
||||
Item::Token(Token { variant: token::Variant::Newline(_), left_offset, code }) => {
|
||||
let newline = mem::replace(&mut newline, Some(token::newline(left_offset, code)));
|
||||
if let Some(newline) = newline {
|
||||
let line: Vec<_> = line.drain(..).collect();
|
||||
let expression = operator::resolve_operator_precedence_if_non_empty(line);
|
||||
block_builder.push(newline, expression);
|
||||
}
|
||||
}
|
||||
_ => line.push(item),
|
||||
}
|
||||
}
|
||||
if let Some(newline) = newline {
|
||||
let expression = operator::resolve_operator_precedence_if_non_empty(line);
|
||||
for tree::block::Line { newline, expression } in tree::block::lines(items) {
|
||||
block_builder.push(newline, expression);
|
||||
}
|
||||
block_builder.build()
|
||||
|
@ -12,35 +12,6 @@ use crate::syntax::token::Token;
|
||||
// === Precedence ===
|
||||
// ==================
|
||||
|
||||
// FIXME: The current implementation hard-codes precedence values and does not support precedence
|
||||
// computations for any operator (according to the spec)
|
||||
fn precedence_of(operator: &str) -> usize {
|
||||
match operator {
|
||||
"=" => 1,
|
||||
"+" => 3,
|
||||
"-" => 3,
|
||||
"*" => 7,
|
||||
_ => panic!("Operator not supported: {}", operator),
|
||||
}
|
||||
}
|
||||
|
||||
/// An item with an assigned precedence.
|
||||
#[derive(Clone, Copy, Debug, Deref, DerefMut)]
|
||||
struct WithPrecedence<T> {
|
||||
#[deref]
|
||||
#[deref_mut]
|
||||
elem: T,
|
||||
precedence: usize,
|
||||
}
|
||||
|
||||
impl<T> WithPrecedence<T> {
|
||||
/// Constructor.
|
||||
pub fn new(precedence: usize, elem: T) -> Self {
|
||||
Self { elem, precedence }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Annotate expressions that should use spacing, because otherwise they are misleading. For
|
||||
/// example, `if cond then.x else.y` is parsed as `if cond then .x else .y`, which after expansion
|
||||
/// translates to `if cond then (\t -> t.x) else (\t -> t.y)`. However, for some macros spacing is
|
||||
@ -54,23 +25,20 @@ fn annotate_tokens_that_need_spacing(item: syntax::Item) -> syntax::Item {
|
||||
})
|
||||
}
|
||||
|
||||
/// If the input sequence is non-empty, return the result of applying
|
||||
/// [`resolve_operator_precedence`] to it.
|
||||
pub fn resolve_operator_precedence_if_non_empty(
|
||||
items: Vec<syntax::Item<'_>>,
|
||||
) -> Option<syntax::Tree<'_>> {
|
||||
match NonEmptyVec::try_from(items) {
|
||||
Ok(items) => Some(resolve_operator_precedence(items)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Take [`Item`] stream, resolve operator precedence and return the final AST.
|
||||
///
|
||||
/// The precedence resolution algorithm is based on the Shunting yard algorithm[1], extended to
|
||||
/// handle operator sections.
|
||||
/// [1]: https://en.wikipedia.org/wiki/Shunting_yard_algorithm
|
||||
pub fn resolve_operator_precedence<'s>(items: NonEmptyVec<syntax::Item<'s>>) -> syntax::Tree<'s> {
|
||||
pub fn resolve_operator_precedence(items: NonEmptyVec<syntax::Item<'_>>) -> syntax::Tree<'_> {
|
||||
resolve_operator_precedence_if_non_empty(items).unwrap()
|
||||
}
|
||||
|
||||
/// If the input sequence is non-empty, return the result of applying
|
||||
/// [`resolve_operator_precedence`] to it.
|
||||
pub fn resolve_operator_precedence_if_non_empty<'s>(
|
||||
items: impl IntoIterator<Item = syntax::Item<'s>>,
|
||||
) -> Option<syntax::Tree<'s>> {
|
||||
type Tokens<'s> = Vec<syntax::Item<'s>>;
|
||||
let mut flattened: Tokens<'s> = default();
|
||||
let mut no_space_group: Tokens<'s> = default();
|
||||
@ -80,7 +48,7 @@ pub fn resolve_operator_precedence<'s>(items: NonEmptyVec<syntax::Item<'s>>) ->
|
||||
flattened.extend(tokens);
|
||||
} else {
|
||||
let tokens = tokens.map(annotate_tokens_that_need_spacing);
|
||||
let ast = resolve_operator_precedence_internal(tokens);
|
||||
let ast = resolve_operator_precedence_internal(tokens).unwrap();
|
||||
flattened.push(ast.into());
|
||||
}
|
||||
};
|
||||
@ -107,7 +75,7 @@ pub fn resolve_operator_precedence<'s>(items: NonEmptyVec<syntax::Item<'s>>) ->
|
||||
|
||||
fn resolve_operator_precedence_internal<'s>(
|
||||
items: impl IntoIterator<Item = syntax::Item<'s>>,
|
||||
) -> syntax::Tree<'s> {
|
||||
) -> Option<syntax::Tree<'s>> {
|
||||
// Reverse-polish notation encoding.
|
||||
/// Classify an item as an operator-token, or other data; we track this state information
|
||||
/// because whenever consecutive operators or consecutive non-operators occur, we merge them
|
||||
@ -120,40 +88,55 @@ fn resolve_operator_precedence_internal<'s>(
|
||||
use ItemType::*;
|
||||
let mut was_section_used = false;
|
||||
let mut output: Vec<syntax::Item> = default();
|
||||
let mut operator_stack: Vec<WithPrecedence<syntax::tree::OperatorOrError>> = default();
|
||||
let mut operator_stack: Vec<Vec<token::Operator>> = default();
|
||||
let mut unary_operator: Option<token::Operator> = default();
|
||||
let mut prev_type = None;
|
||||
let mut precedence_error = None;
|
||||
for item in items {
|
||||
if let syntax::Item::Token(
|
||||
Token { variant: token::Variant::Operator(opr), left_offset, code }) = item {
|
||||
// Item is an operator.
|
||||
if let Some(unsatisified_opr) = unary_operator.take() {
|
||||
output.push(syntax::Tree::unary_opr_app(unsatisified_opr, None).into());
|
||||
prev_type = Some(Ast);
|
||||
}
|
||||
let prev_type = mem::replace(&mut prev_type, Some(Opr));
|
||||
|
||||
let prec = precedence_of(&code);
|
||||
if opr.can_be_binary_infix {
|
||||
} else if opr.can_be_unary_prefix {
|
||||
if prev_type == Some(Ast) {
|
||||
operator_stack.push(default());
|
||||
}
|
||||
let opr = Token(left_offset, code, opr);
|
||||
unary_operator = Some(opr);
|
||||
continue;
|
||||
} else {
|
||||
precedence_error.get_or_insert_with(|| format!("Precedence of: {:?}", code));
|
||||
};
|
||||
let prec = opr.precedence;
|
||||
let opr = Token(left_offset, code, opr);
|
||||
|
||||
if prev_type == Some(Opr) && let Some(prev_opr) = operator_stack.last_mut() {
|
||||
// Error. Multiple operators next to each other.
|
||||
match &mut prev_opr.elem {
|
||||
Err(err) => err.operators.push(opr),
|
||||
Ok(prev) => {
|
||||
let operators = NonEmptyVec::new(prev.clone(),vec![opr]);
|
||||
prev_opr.elem = Err(syntax::tree::MultipleOperatorError{operators});
|
||||
}
|
||||
}
|
||||
prev_opr.push(opr);
|
||||
} else {
|
||||
// Application has the highest precedence.
|
||||
const APP_PREC: usize = std::usize::MAX;
|
||||
while let Some(prev_opr) = operator_stack.last()
|
||||
&& prev_opr.precedence >= prec
|
||||
&& prev_opr.first().map(|opr| opr.precedence).unwrap_or(APP_PREC) >= prec
|
||||
&& let Some(prev_opr) = operator_stack.pop()
|
||||
&& let Some(rhs) = output.pop()
|
||||
{
|
||||
// Prev operator in the [`operator_stack`] has a higher precedence.
|
||||
let lhs = output.pop().map(|t| t.to_ast());
|
||||
if lhs.is_none() { was_section_used = true; }
|
||||
let ast = syntax::tree::apply_operator(lhs, prev_opr.elem, Some(rhs.to_ast()));
|
||||
let ast = syntax::tree::apply_operator(lhs, prev_opr, Some(rhs.to_ast()));
|
||||
output.push(ast.into());
|
||||
}
|
||||
operator_stack.push(WithPrecedence::new(prec, Ok(opr)));
|
||||
operator_stack.push(vec![opr]);
|
||||
}
|
||||
} else if let Some(opr) = unary_operator.take() {
|
||||
let rhs = Some(item.to_ast());
|
||||
output.push(syntax::Tree::unary_opr_app(opr, rhs).into());
|
||||
prev_type = Some(Ast);
|
||||
} else if prev_type == Some(Ast) && let Some(lhs) = output.pop() {
|
||||
// Multiple non-operators next to each other.
|
||||
let lhs = lhs.to_ast();
|
||||
@ -166,25 +149,31 @@ fn resolve_operator_precedence_internal<'s>(
|
||||
output.push(item);
|
||||
}
|
||||
}
|
||||
if let Some(unsatisified_opr) = unary_operator.take() {
|
||||
output.push(syntax::Tree::unary_opr_app(unsatisified_opr, None).into());
|
||||
prev_type = Some(Ast);
|
||||
}
|
||||
let mut opt_rhs = (prev_type == Some(Ast)).and_option_from(|| output.pop().map(|t| t.to_ast()));
|
||||
while let Some(opr) = operator_stack.pop() {
|
||||
let opt_lhs = output.pop().map(|t| t.to_ast());
|
||||
if opt_lhs.is_none() || opt_rhs.is_none() {
|
||||
was_section_used = true;
|
||||
}
|
||||
opt_rhs = Some(syntax::tree::apply_operator(opt_lhs, opr.elem, opt_rhs));
|
||||
opt_rhs = Some(syntax::tree::apply_operator(opt_lhs, opr, opt_rhs));
|
||||
}
|
||||
if !output.is_empty() {
|
||||
panic!("Internal error. Not all tokens were consumed while constructing the expression.");
|
||||
}
|
||||
|
||||
// This unwrap is safe because:
|
||||
// - resolve_operator_precedence only calls this function with non-empty sequences as inputs.
|
||||
// - Given a non-empty input, we will always have at least one output.
|
||||
let out = opt_rhs.unwrap();
|
||||
if was_section_used {
|
||||
syntax::Tree::opr_section_boundary(out)
|
||||
let out = if was_section_used {
|
||||
// This can't fail: `was_section_used` won't be true unless we had at least one input,
|
||||
// and if we have at least one input, we have output.
|
||||
let out = opt_rhs.unwrap();
|
||||
Some(syntax::Tree::opr_section_boundary(out))
|
||||
} else {
|
||||
out
|
||||
opt_rhs
|
||||
};
|
||||
if let Some(error) = precedence_error {
|
||||
return Some(syntax::Tree::with_unsupported(out.unwrap(), error));
|
||||
}
|
||||
out
|
||||
}
|
||||
|
@ -170,7 +170,7 @@ impl<'s, T> Token<'s, T> {
|
||||
|
||||
impl<'s, T: Debug> Debug for Token<'s, T> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "[{}:\"{}\"] ", self.left_offset.visible, self.code)?;
|
||||
write!(f, "[{}:{:?}] ", self.left_offset.visible, self.code)?;
|
||||
Debug::fmt(&self.variant, f)
|
||||
}
|
||||
}
|
||||
@ -262,7 +262,11 @@ macro_rules! with_token_definition { ($f:ident ($($args:tt)*)) => { $f! { $($arg
|
||||
pub is_free: bool,
|
||||
pub lift_level: usize
|
||||
},
|
||||
Operator,
|
||||
Operator {
|
||||
pub precedence: usize,
|
||||
pub can_be_binary_infix: bool,
|
||||
pub can_be_unary_prefix: bool,
|
||||
},
|
||||
Modifier,
|
||||
Comment,
|
||||
DocComment,
|
||||
|
@ -79,6 +79,11 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
|
||||
pub error: Error,
|
||||
pub ast: Tree<'s>,
|
||||
},
|
||||
/// Indicates a subtree in which an unimplemented case was reached.
|
||||
Unsupported {
|
||||
pub error: String,
|
||||
pub ast: Tree<'s>,
|
||||
},
|
||||
/// A sequence of lines introduced by a line ending in an operator.
|
||||
BodyBlock {
|
||||
/// The lines of the block.
|
||||
@ -110,6 +115,14 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
|
||||
Number {
|
||||
pub token: token::Number<'s>,
|
||||
},
|
||||
/// A comment.
|
||||
Comment {
|
||||
pub token: token::Comment<'s>,
|
||||
},
|
||||
/// A text section.
|
||||
TextSection {
|
||||
pub token: token::TextSection<'s>,
|
||||
},
|
||||
/// A simple application, like `print "hello"`.
|
||||
App {
|
||||
pub func: Tree<'s>,
|
||||
@ -123,6 +136,12 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
|
||||
pub opr: OperatorOrError<'s>,
|
||||
pub rhs: Option<Tree<'s>>,
|
||||
},
|
||||
/// Application of a unary operator, like `-a` or `~handler`. It is a syntax error for `rhs`
|
||||
/// to be `None`.
|
||||
UnaryOprApp {
|
||||
pub opr: token::Operator<'s>,
|
||||
pub rhs: Option<Tree<'s>>,
|
||||
},
|
||||
/// Defines the point where operator sections should be expanded to lambdas. Let's consider
|
||||
/// the expression `map (.sum 1)`. It should be desugared to `map (x -> x.sum 1)`, not to
|
||||
/// `map ((x -> x.sum) 1)`. The expression `.sum` will be parsed as operator section
|
||||
@ -141,10 +160,17 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
|
||||
MultiSegmentApp {
|
||||
pub segments: NonEmptyVec<MultiSegmentAppSegment<'s>>,
|
||||
},
|
||||
/// A type definition; introduced by a line consisting of the keyword `type`, an identifier
|
||||
/// to be used as the name of the type, and zero or more specifications of type parameters.
|
||||
/// The following indented block contains two types of lines:
|
||||
/// - First zero or more type constructors, and their subordinate blocks.
|
||||
/// - Then a block of statements, which may define methods or type methods.
|
||||
TypeDef {
|
||||
pub keyword: Token<'s>,
|
||||
pub name: Tree<'s>,
|
||||
pub params: Vec<Tree<'s>>,
|
||||
pub constructors: Vec<TypeConstructorLine<'s>>,
|
||||
pub block: Vec<block::Line<'s>>,
|
||||
},
|
||||
/// A variable assignment, like `foo = bar 23`.
|
||||
Assignment {
|
||||
@ -205,27 +231,34 @@ with_ast_definition!(generate_ast_definition());
|
||||
// === Invalid ===
|
||||
|
||||
/// Error of parsing attached to an [`Tree`] node.
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq, Visitor, Serialize, Reflect, Deserialize)]
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Visitor, Serialize, Reflect, Deserialize)]
|
||||
#[allow(missing_docs)]
|
||||
#[reflect(transparent)]
|
||||
#[serde(from = "crate::serialization::Error")]
|
||||
pub struct Error {
|
||||
#[serde(skip_deserializing)]
|
||||
pub message: &'static str,
|
||||
pub message: Cow<'static, str>,
|
||||
}
|
||||
|
||||
impl Error {
|
||||
/// Constructor.
|
||||
pub fn new(message: &'static str) -> Self {
|
||||
pub fn new(message: impl Into<Cow<'static, str>>) -> Self {
|
||||
let message = message.into();
|
||||
Self { message }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Tree<'s> {
|
||||
/// Constructor.
|
||||
pub fn with_error(self, message: &'static str) -> Self {
|
||||
pub fn with_error(self, message: impl Into<Cow<'static, str>>) -> Self {
|
||||
Tree::invalid(Error::new(message), self)
|
||||
}
|
||||
|
||||
/// Constructor.
|
||||
pub fn with_unsupported(self, message: String) -> Self {
|
||||
eprintln!("Unsupported syntax: {}", &message);
|
||||
Tree::unsupported(message, self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> span::Builder<'s> for Error {
|
||||
@ -235,6 +268,47 @@ impl<'s> span::Builder<'s> for Error {
|
||||
}
|
||||
|
||||
|
||||
// === Type Definitions ===
|
||||
|
||||
/// A line within a type definition, containing a type constructor definition.
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Visitor, Serialize, Reflect, Deserialize)]
|
||||
pub struct TypeConstructorLine<'s> {
|
||||
/// The token beginning the line.
|
||||
pub newline: token::Newline<'s>,
|
||||
/// The type constructor definition, unless this is an empty line.
|
||||
pub expression: Option<TypeConstructorDef<'s>>,
|
||||
}
|
||||
|
||||
impl<'s> span::Builder<'s> for TypeConstructorLine<'s> {
|
||||
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
|
||||
span.add(&mut self.newline).add(&mut self.expression)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> From<token::Newline<'s>> for TypeConstructorLine<'s> {
|
||||
fn from(newline: token::Newline<'s>) -> Self {
|
||||
Self { newline, expression: None }
|
||||
}
|
||||
}
|
||||
|
||||
/// A type constructor definition within a type definition.
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Visitor, Serialize, Reflect, Deserialize)]
|
||||
pub struct TypeConstructorDef<'s> {
|
||||
/// The identifier naming the type constructor.
|
||||
pub constructor: token::Ident<'s>,
|
||||
/// The arguments the type constructor accepts, specified inline.
|
||||
pub arguments: Vec<Tree<'s>>,
|
||||
/// The arguments the type constructor accepts, specified on their own lines.
|
||||
pub block: Vec<block::Line<'s>>,
|
||||
}
|
||||
|
||||
impl<'s> span::Builder<'s> for TypeConstructorDef<'s> {
|
||||
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
|
||||
span.add(&mut self.constructor).add(&mut self.arguments).add(&mut self.block)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// === OprApp ===
|
||||
|
||||
/// Operator or [`MultipleOperatorError`].
|
||||
@ -324,9 +398,14 @@ pub fn apply<'s>(func: Tree<'s>, mut arg: Tree<'s>) -> Tree<'s> {
|
||||
/// expression.
|
||||
pub fn apply_operator<'s>(
|
||||
lhs: Option<Tree<'s>>,
|
||||
opr: OperatorOrError<'s>,
|
||||
opr: Vec<token::Operator<'s>>,
|
||||
mut rhs: Option<Tree<'s>>,
|
||||
) -> Tree<'s> {
|
||||
let opr = match opr.len() {
|
||||
0 => return apply(lhs.unwrap(), rhs.unwrap()),
|
||||
1 => Ok(opr.into_iter().next().unwrap()),
|
||||
_ => Err(MultipleOperatorError { operators: NonEmptyVec::try_from(opr).unwrap() }),
|
||||
};
|
||||
if let Some(rhs_) = rhs.as_mut() {
|
||||
if let Variant::ArgumentBlockApplication(block) = &mut *rhs_.variant {
|
||||
if block.lhs.is_none() {
|
||||
@ -604,6 +683,31 @@ where &'a Token<'s, T>: Into<token::Ref<'s, 'a>>
|
||||
}
|
||||
|
||||
|
||||
// === String ===
|
||||
|
||||
impl<'s, 'a> TreeVisitable<'s, 'a> for String {}
|
||||
impl<'s, 'a> TreeVisitableMut<'s, 'a> for String {}
|
||||
impl<'a, 't, 's> SpanVisitable<'s, 'a> for String {}
|
||||
impl<'a, 't, 's> SpanVisitableMut<'s, 'a> for String {}
|
||||
impl<'a, 't, 's> ItemVisitable<'s, 'a> for String {}
|
||||
impl<'s> span::Builder<'s> for String {
|
||||
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
|
||||
span
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, 'a> TreeVisitable<'s, 'a> for Cow<'static, str> {}
|
||||
impl<'s, 'a> TreeVisitableMut<'s, 'a> for Cow<'static, str> {}
|
||||
impl<'a, 't, 's> SpanVisitable<'s, 'a> for Cow<'static, str> {}
|
||||
impl<'a, 't, 's> SpanVisitableMut<'s, 'a> for Cow<'static, str> {}
|
||||
impl<'a, 't, 's> ItemVisitable<'s, 'a> for Cow<'static, str> {}
|
||||
impl<'s> span::Builder<'s> for Cow<'static, str> {
|
||||
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
|
||||
span
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ==========================
|
||||
// === CodePrinterVisitor ===
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
|
||||
|
||||
use crate::syntax::token;
|
||||
use crate::syntax::tree::*;
|
||||
|
||||
|
||||
@ -250,3 +251,72 @@ impl<'s> Default for Builder<'s> {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// =============
|
||||
// === Lines ===
|
||||
// =============
|
||||
|
||||
/// Given an iterable of [`Item`]s, return an iterator of the [`Line`]s produced by dividing the
|
||||
/// input at newline tokens, and parsing the expressions with
|
||||
/// [`operator::resolve_operator_precedence`].
|
||||
pub fn lines<'s, I, J>(items: I) -> Lines<'s, J>
|
||||
where
|
||||
I: IntoIterator<IntoIter = J>,
|
||||
J: Iterator<Item = Item<'s>>, {
|
||||
let items = items.into_iter();
|
||||
let newline = default();
|
||||
let line = default();
|
||||
let finished = default();
|
||||
Lines { items, newline, line, finished }
|
||||
}
|
||||
|
||||
/// An iterator of [`Line`]s.
|
||||
#[derive(Debug)]
|
||||
pub struct Lines<'s, I> {
|
||||
items: I,
|
||||
newline: token::Newline<'s>,
|
||||
line: Vec<Item<'s>>,
|
||||
finished: bool,
|
||||
}
|
||||
|
||||
impl<'s, I> Lines<'s, I> {
|
||||
fn parse_current_line(&mut self, newline: token::Newline<'s>) -> Line<'s> {
|
||||
let line = self.line.drain(..);
|
||||
let expression = operator::resolve_operator_precedence_if_non_empty(line);
|
||||
Line { newline, expression }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, I> Iterator for Lines<'s, I>
|
||||
where I: Iterator<Item = Item<'s>>
|
||||
{
|
||||
type Item = Line<'s>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.finished {
|
||||
return None;
|
||||
}
|
||||
while let Some(item) = self.items.next() {
|
||||
match item {
|
||||
Item::Token(Token { variant: token::Variant::Newline(_), left_offset, code }) => {
|
||||
let token = token::newline(left_offset, code);
|
||||
let newline = mem::replace(&mut self.newline, token);
|
||||
if newline.code.is_empty() && self.line.is_empty() {
|
||||
// The block started with a real newline; ignore the implicit newline.
|
||||
continue;
|
||||
}
|
||||
return self.parse_current_line(newline).into();
|
||||
}
|
||||
_ => {
|
||||
self.line.push(item);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
self.finished = true;
|
||||
let newline = mem::take(&mut self.newline);
|
||||
self.parse_current_line(newline).into()
|
||||
}
|
||||
}
|
||||
|
@ -77,16 +77,91 @@ fn parentheses_nested() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn type_definition() {
|
||||
test("type Bool", block![(TypeDef (Ident type) (Ident Bool) #())]);
|
||||
test("type Option a", block![(TypeDef (Ident type) (Ident Option) #((Ident a)))]);
|
||||
fn comments() {
|
||||
// Basic, full-line comment.
|
||||
test("# a b c", block![(Comment "# a b c")]);
|
||||
}
|
||||
|
||||
|
||||
// === Type Definitions ===
|
||||
|
||||
#[test]
|
||||
fn type_definition_no_body() {
|
||||
test("type Bool", block![(TypeDef (Ident type) (Ident Bool) #() #() #())]);
|
||||
test("type Option a", block![(TypeDef (Ident type) (Ident Option) #((Ident a)) #() #())]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn type_constructors() {
|
||||
let code = [
|
||||
"type Geo",
|
||||
" Circle",
|
||||
" radius",
|
||||
" 4",
|
||||
" Rectangle width height",
|
||||
" Point",
|
||||
];
|
||||
#[rustfmt::skip]
|
||||
let expected = block![
|
||||
(TypeDef (Ident type) (Ident Geo) #()
|
||||
#(((Circle #() #((Ident radius) (Number 4))))
|
||||
((Rectangle #((Ident width) (Ident height)) #()))
|
||||
((Point #() #())))
|
||||
#())
|
||||
];
|
||||
test(&code.join("\n"), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn type_methods() {
|
||||
let code = ["type Geo", " number =", " 23", " area self = 1 + 1"];
|
||||
#[rustfmt::skip]
|
||||
let expected = block![
|
||||
(TypeDef (Ident type) (Ident Geo) #() #()
|
||||
#((Function number #() "=" (BodyBlock #((Number 23))))
|
||||
(Function area #((Ident self)) "=" (OprApp (Number 1) (Ok "+") (Number 1)))))
|
||||
];
|
||||
test(&code.join("\n"), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn type_def_full() {
|
||||
let code = [
|
||||
"type Geo",
|
||||
" Circle",
|
||||
" radius : float",
|
||||
" 4",
|
||||
" Rectangle width height",
|
||||
" Point",
|
||||
"",
|
||||
" number =",
|
||||
" 23",
|
||||
" area self = 1 + 1",
|
||||
];
|
||||
#[rustfmt::skip]
|
||||
let expected = block![
|
||||
(TypeDef (Ident type) (Ident Geo) #()
|
||||
#(((Circle #() #((OprApp (Ident radius) (Ok ":") (Ident float)) (Number 4))))
|
||||
((Rectangle #((Ident width) (Ident height)) #()))
|
||||
((Point #() #()))
|
||||
(()))
|
||||
#((Function number #() "=" (BodyBlock #((Number 23))))
|
||||
(Function area #((Ident self)) "=" (OprApp (Number 1) (Ok "+") (Number 1)))))
|
||||
];
|
||||
test(&code.join("\n"), expected);
|
||||
}
|
||||
|
||||
|
||||
// === Variable Assignment ===
|
||||
|
||||
#[test]
|
||||
fn assignment_simple() {
|
||||
test("foo = 23", block![(Assignment (Ident foo) "=" (Number 23))]);
|
||||
}
|
||||
|
||||
|
||||
// === Functions ===
|
||||
|
||||
#[test]
|
||||
fn function_inline_simple_args() {
|
||||
test("foo a = 23", block![(Function foo #((Ident a)) "=" (Number 23))]);
|
||||
@ -106,6 +181,9 @@ fn function_block_simple_args() {
|
||||
test("foo a b c =", block![(Function foo #((Ident a) (Ident b) (Ident c)) "=" ())]);
|
||||
}
|
||||
|
||||
|
||||
// === Code Blocks ===
|
||||
|
||||
#[test]
|
||||
fn code_block_body() {
|
||||
let code = ["main =", " 4"];
|
||||
@ -219,6 +297,71 @@ fn code_block_with_following_statement() {
|
||||
}
|
||||
|
||||
|
||||
// === Binary Operators ===
|
||||
|
||||
#[test]
|
||||
fn multiple_operator_error() {
|
||||
let code = ["4 + + 1"];
|
||||
let expected = block![
|
||||
(OprApp (Number 4) (Err (#("+" "+"))) (Number 1))
|
||||
];
|
||||
test(&code.join("\n"), expected);
|
||||
let code = ["4 + + + 1"];
|
||||
let expected = block![
|
||||
(OprApp (Number 4) (Err (#("+" "+" "+"))) (Number 1))
|
||||
];
|
||||
test(&code.join("\n"), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn precedence() {
|
||||
let code = ["1 * 2 + 3"];
|
||||
let expected = block![
|
||||
(OprApp (OprApp (Number 1) (Ok "*") (Number 2)) (Ok "+") (Number 3))
|
||||
];
|
||||
test(&code.join("\n"), expected);
|
||||
}
|
||||
|
||||
|
||||
// === Unary Operators ===
|
||||
|
||||
#[test]
|
||||
fn unevaluated_argument() {
|
||||
let code = ["main ~foo = 4"];
|
||||
let expected = block![
|
||||
(Function main #((UnaryOprApp "~" (Ident foo))) "=" (Number 4))
|
||||
];
|
||||
test(&code.join("\n"), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unary_operator_missing_operand() {
|
||||
let code = ["main ~ = 4"];
|
||||
let expected = block![
|
||||
(Function main #((UnaryOprApp "~" ())) "=" (Number 4))
|
||||
];
|
||||
test(&code.join("\n"), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unary_operator_at_end_of_expression() {
|
||||
let code = ["foo ~"];
|
||||
let expected = block![
|
||||
(App (Ident foo) (UnaryOprApp "~" ()))
|
||||
];
|
||||
test(&code.join("\n"), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn plus_negative() {
|
||||
let code = ["x = 4+-1"];
|
||||
let expected = block![
|
||||
(Assignment (Ident x) "=" (OprApp (Number 4) (Ok "+") (UnaryOprApp "-" (Number 1))))
|
||||
];
|
||||
test(&code.join("\n"), expected);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ====================
|
||||
// === Test Support ===
|
||||
@ -268,6 +411,7 @@ where T: serde::Serialize + Reflect {
|
||||
let mut to_s_expr = ToSExpr::new(&graph);
|
||||
to_s_expr.mapper(ast_ty, strip_hidden_fields);
|
||||
let ident_token = rust_to_meta[&token::variant::Ident::reflect().id];
|
||||
let comment_token = rust_to_meta[&token::variant::Comment::reflect().id];
|
||||
let operator_token = rust_to_meta[&token::variant::Operator::reflect().id];
|
||||
let symbol_token = rust_to_meta[&token::variant::Symbol::reflect().id];
|
||||
let number_token = rust_to_meta[&token::variant::Number::reflect().id];
|
||||
@ -283,6 +427,8 @@ where T: serde::Serialize + Reflect {
|
||||
let token_to_str_ = token_to_str.clone();
|
||||
to_s_expr.mapper(ident_token, move |token| Value::symbol(token_to_str_(token)));
|
||||
let token_to_str_ = token_to_str.clone();
|
||||
to_s_expr.mapper(comment_token, move |token| Value::string(token_to_str_(token)));
|
||||
let token_to_str_ = token_to_str.clone();
|
||||
to_s_expr.mapper(operator_token, move |token| Value::string(token_to_str_(token)));
|
||||
let token_to_str_ = token_to_str.clone();
|
||||
to_s_expr.mapper(symbol_token, move |token| Value::string(token_to_str_(token)));
|
||||
|
Loading…
Reference in New Issue
Block a user