Ensure all spans have document offsets (#8039)

- Validate spans during existing lexer and parser unit tests, and in `enso_parser_debug`.
- Fix lost span info causing failures of updated tests.

# Important Notes
- [x] Output of `parse_all_enso_files.sh` is unchanged since before #7881 (modulo libs changes since then).
- When the parser encounters an input with the first line indented, it now creates a sub-block for lines at than indent level, and emits a syntax error (every indented block must have a parent).
- When the parser encounters a number with a base but no digits (e.g. `0x`), it now emits a `Number` with `None` in the digits field rather than a 0-length digits token.
This commit is contained in:
Kaz Wesley 2023-10-19 05:36:42 -07:00 committed by GitHub
parent 24b9a1179e
commit 2edd2bd7ff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 799 additions and 251 deletions

View File

@ -71,6 +71,127 @@ exports[`Parsing ' foo bar
}
`;
exports[`Parsing '2
+ 3
+ 4' 1`] = `
{
"childrenLengthInCodeParsed": 11,
"statements": [
{
"expression": {
"childrenLengthInCodeParsed": 11,
"excess": [],
"expressions": [
{
"expression": {
"expression": {
"base": undefined,
"childrenLengthInCodeParsed": 1,
"fractionalDigits": undefined,
"integer": {
"base": undefined,
"lengthInCodeBuffer": 1,
"startInCodeBuffer": 5,
"type": "Digits",
"whitespaceLengthInCodeBuffer": 0,
"whitespaceStartInCodeBuffer": 5,
},
"type": "Number",
"whitespaceLengthInCodeParsed": 1,
"whitespaceStartInCodeParsed": 4,
},
"operator": {
"ok": true,
"value": {
"lengthInCodeBuffer": 1,
"startInCodeBuffer": 3,
"type": "Operator",
"whitespaceLengthInCodeBuffer": 1,
"whitespaceStartInCodeBuffer": 2,
},
},
},
"newline": {
"lengthInCodeBuffer": 1,
"startInCodeBuffer": 1,
"type": "Newline",
"whitespaceLengthInCodeBuffer": 0,
"whitespaceStartInCodeBuffer": 1,
},
},
{
"expression": {
"expression": {
"base": undefined,
"childrenLengthInCodeParsed": 1,
"fractionalDigits": undefined,
"integer": {
"base": undefined,
"lengthInCodeBuffer": 1,
"startInCodeBuffer": 10,
"type": "Digits",
"whitespaceLengthInCodeBuffer": 0,
"whitespaceStartInCodeBuffer": 10,
},
"type": "Number",
"whitespaceLengthInCodeParsed": 1,
"whitespaceStartInCodeParsed": 9,
},
"operator": {
"ok": true,
"value": {
"lengthInCodeBuffer": 1,
"startInCodeBuffer": 8,
"type": "Operator",
"whitespaceLengthInCodeBuffer": 1,
"whitespaceStartInCodeBuffer": 7,
},
},
},
"newline": {
"lengthInCodeBuffer": 1,
"startInCodeBuffer": 6,
"type": "Newline",
"whitespaceLengthInCodeBuffer": 0,
"whitespaceStartInCodeBuffer": 6,
},
},
],
"lhs": {
"base": undefined,
"childrenLengthInCodeParsed": 1,
"fractionalDigits": undefined,
"integer": {
"base": undefined,
"lengthInCodeBuffer": 1,
"startInCodeBuffer": 0,
"type": "Digits",
"whitespaceLengthInCodeBuffer": 0,
"whitespaceStartInCodeBuffer": 0,
},
"type": "Number",
"whitespaceLengthInCodeParsed": 0,
"whitespaceStartInCodeParsed": 0,
},
"type": "OperatorBlockApplication",
"whitespaceLengthInCodeParsed": 0,
"whitespaceStartInCodeParsed": 0,
},
"newline": {
"lengthInCodeBuffer": 0,
"startInCodeBuffer": 0,
"type": "Newline",
"whitespaceLengthInCodeBuffer": 0,
"whitespaceStartInCodeBuffer": 0,
},
},
],
"type": "BodyBlock",
"whitespaceLengthInCodeParsed": 0,
"whitespaceStartInCodeParsed": 0,
}
`;
exports[`Parsing 'Data.read
2 + 2' 1`] = `
{
@ -199,6 +320,173 @@ exports[`Parsing 'Data.read
}
`;
exports[`Parsing 'Data.read "File"
2 + 3' 1`] = `
{
"childrenLengthInCodeParsed": 22,
"statements": [
{
"expression": {
"arg": {
"childrenLengthInCodeParsed": 6,
"close": {
"lengthInCodeBuffer": 1,
"startInCodeBuffer": 15,
"type": "TextEnd",
"whitespaceLengthInCodeBuffer": 0,
"whitespaceStartInCodeBuffer": 15,
},
"elements": [
{
"text": {
"lengthInCodeBuffer": 4,
"startInCodeBuffer": 11,
"type": "TextSection",
"whitespaceLengthInCodeBuffer": 0,
"whitespaceStartInCodeBuffer": 11,
},
"type": "Section",
},
],
"newline": undefined,
"open": {
"lengthInCodeBuffer": 1,
"startInCodeBuffer": 10,
"type": "TextStart",
"whitespaceLengthInCodeBuffer": 0,
"whitespaceStartInCodeBuffer": 10,
},
"type": "TextLiteral",
"whitespaceLengthInCodeParsed": 1,
"whitespaceStartInCodeParsed": 9,
},
"childrenLengthInCodeParsed": 16,
"func": {
"childrenLengthInCodeParsed": 9,
"lhs": {
"childrenLengthInCodeParsed": 4,
"token": {
"isFree": false,
"isOperatorLexically": false,
"isTypeOrConstructor": true,
"lengthInCodeBuffer": 4,
"liftLevel": 0,
"startInCodeBuffer": 0,
"type": "Ident",
"whitespaceLengthInCodeBuffer": 0,
"whitespaceStartInCodeBuffer": 0,
},
"type": "Ident",
"whitespaceLengthInCodeParsed": 0,
"whitespaceStartInCodeParsed": 0,
},
"opr": {
"ok": true,
"value": {
"lengthInCodeBuffer": 1,
"startInCodeBuffer": 4,
"type": "Operator",
"whitespaceLengthInCodeBuffer": 0,
"whitespaceStartInCodeBuffer": 4,
},
},
"rhs": {
"childrenLengthInCodeParsed": 4,
"token": {
"isFree": false,
"isOperatorLexically": false,
"isTypeOrConstructor": false,
"lengthInCodeBuffer": 4,
"liftLevel": 0,
"startInCodeBuffer": 5,
"type": "Ident",
"whitespaceLengthInCodeBuffer": 0,
"whitespaceStartInCodeBuffer": 5,
},
"type": "Ident",
"whitespaceLengthInCodeParsed": 0,
"whitespaceStartInCodeParsed": 5,
},
"type": "OprApp",
"whitespaceLengthInCodeParsed": 0,
"whitespaceStartInCodeParsed": 0,
},
"type": "App",
"whitespaceLengthInCodeParsed": 0,
"whitespaceStartInCodeParsed": 0,
},
"newline": {
"lengthInCodeBuffer": 0,
"startInCodeBuffer": 0,
"type": "Newline",
"whitespaceLengthInCodeBuffer": 0,
"whitespaceStartInCodeBuffer": 0,
},
},
{
"expression": {
"childrenLengthInCodeParsed": 5,
"lhs": {
"base": undefined,
"childrenLengthInCodeParsed": 1,
"fractionalDigits": undefined,
"integer": {
"base": undefined,
"lengthInCodeBuffer": 1,
"startInCodeBuffer": 17,
"type": "Digits",
"whitespaceLengthInCodeBuffer": 0,
"whitespaceStartInCodeBuffer": 17,
},
"type": "Number",
"whitespaceLengthInCodeParsed": 0,
"whitespaceStartInCodeParsed": 17,
},
"opr": {
"ok": true,
"value": {
"lengthInCodeBuffer": 1,
"startInCodeBuffer": 19,
"type": "Operator",
"whitespaceLengthInCodeBuffer": 1,
"whitespaceStartInCodeBuffer": 18,
},
},
"rhs": {
"base": undefined,
"childrenLengthInCodeParsed": 1,
"fractionalDigits": undefined,
"integer": {
"base": undefined,
"lengthInCodeBuffer": 1,
"startInCodeBuffer": 21,
"type": "Digits",
"whitespaceLengthInCodeBuffer": 0,
"whitespaceStartInCodeBuffer": 21,
},
"type": "Number",
"whitespaceLengthInCodeParsed": 1,
"whitespaceStartInCodeParsed": 20,
},
"type": "OprApp",
"whitespaceLengthInCodeParsed": 0,
"whitespaceStartInCodeParsed": 17,
},
"newline": {
"lengthInCodeBuffer": 1,
"startInCodeBuffer": 16,
"type": "Newline",
"whitespaceLengthInCodeBuffer": 0,
"whitespaceStartInCodeBuffer": 16,
},
},
],
"type": "BodyBlock",
"whitespaceLengthInCodeParsed": 0,
"whitespaceStartInCodeParsed": 0,
}
`;
exports[`Parsing 'Data.read File
2 + 3' 1`] = `
{
@ -350,6 +638,77 @@ exports[`Parsing 'Data.read File
}
`;
exports[`Parsing 'foo bar
' 1`] = `
{
"childrenLengthInCodeParsed": 8,
"statements": [
{
"expression": {
"arg": {
"childrenLengthInCodeParsed": 3,
"token": {
"isFree": false,
"isOperatorLexically": false,
"isTypeOrConstructor": false,
"lengthInCodeBuffer": 3,
"liftLevel": 0,
"startInCodeBuffer": 4,
"type": "Ident",
"whitespaceLengthInCodeBuffer": 0,
"whitespaceStartInCodeBuffer": 4,
},
"type": "Ident",
"whitespaceLengthInCodeParsed": 1,
"whitespaceStartInCodeParsed": 3,
},
"childrenLengthInCodeParsed": 7,
"func": {
"childrenLengthInCodeParsed": 3,
"token": {
"isFree": false,
"isOperatorLexically": false,
"isTypeOrConstructor": false,
"lengthInCodeBuffer": 3,
"liftLevel": 0,
"startInCodeBuffer": 0,
"type": "Ident",
"whitespaceLengthInCodeBuffer": 0,
"whitespaceStartInCodeBuffer": 0,
},
"type": "Ident",
"whitespaceLengthInCodeParsed": 0,
"whitespaceStartInCodeParsed": 0,
},
"type": "App",
"whitespaceLengthInCodeParsed": 0,
"whitespaceStartInCodeParsed": 0,
},
"newline": {
"lengthInCodeBuffer": 0,
"startInCodeBuffer": 0,
"type": "Newline",
"whitespaceLengthInCodeBuffer": 0,
"whitespaceStartInCodeBuffer": 0,
},
},
{
"expression": undefined,
"newline": {
"lengthInCodeBuffer": 1,
"startInCodeBuffer": 7,
"type": "Newline",
"whitespaceLengthInCodeBuffer": 0,
"whitespaceStartInCodeBuffer": 7,
},
},
],
"type": "BodyBlock",
"whitespaceLengthInCodeParsed": 0,
"whitespaceStartInCodeParsed": 0,
}
`;
exports[`Parsing 'foo bar=baz' 1`] = `
{
"childrenLengthInCodeParsed": 11,

View File

@ -18,10 +18,10 @@ export function parseEnso(code: string): Tree {
export function parseEnsoLine(code: string): Tree {
const block = parseEnso(code)
assert(block.type === Tree.Type.BodyBlock)
const statemets = block.statements[Symbol.iterator]()
const firstLine = statemets.next()
const statements = block.statements[Symbol.iterator]()
const firstLine = statements.next()
assert(!firstLine.done)
assert(!!statemets.next().done)
assert(!!statements.next().done)
assert(firstLine.value.expression != null)
return firstLine.value.expression
}
@ -95,14 +95,13 @@ function treePath(obj: LazyObject, pred: (node: Tree) => boolean): Tree[] {
if (import.meta.vitest) {
const { test, expect } = import.meta.vitest
// Not working cases commented.
const parseCases = [
' foo bar\n',
'foo bar\n',
'Data.read\n2 + 2',
'Data.read File\n2 + 3',
// 'Data.read "File"\n2 + 3',
'Data.read "File"\n2 + 3',
'foo bar=baz',
// '2\n + 3\n + 4',
'2\n + 3\n + 4',
]
test.each(parseCases)("Parsing '%s'", (code) => {

View File

@ -3,7 +3,7 @@ import { assert } from '@/util/assert'
import { parseEnsoLine, readAstSpan, readTokenSpan } from '@/util/ast'
import type { Result } from '@/util/result'
/** An operand of one of the applications inside `GenralOprApp` */
/** An operand of one of the applications inside `GeneralOprApp` */
export type GeneralOperand =
| Operand
// A part of `GeneralOprApp`, consisting of lhs and first `statements` of applications.
@ -66,7 +66,7 @@ export class GeneralOprApp {
expectedOpr = oprCode
}
if (matchingOprs === this.apps.length) {
// If all operatros matched, the lhs may be a continuation of this chain.
// If all operators matched, the lhs may be a continuation of this chain.
if (this.lhs != null) yield* operandsOfLeftAssocOprChain(this.lhs, code, expectedOpr)
else yield null
} else {
@ -203,15 +203,14 @@ if (import.meta.vitest) {
{
code: '2\n * 3\n + 44',
result: [
{ type: 'partOfOprBlockApp', repr: '2\n * 3\n + 4', statemets: 1 },
{ type: 'partOfOprBlockApp', repr: '2\n * 3\n + 44', statements: 1 },
{ type: 'ast', repr: '44' },
],
},
// There is a bug in AST spans in some OperatorBlockApplications. Fix this test once fixed
{
code: '2\n + 3\n * 4\n + 55',
result: [
{ type: 'partOfOprBlockApp', repr: '2\n + 3\n * 4\n + 5', statemets: 2 },
{ type: 'partOfOprBlockApp', repr: '2\n + 3\n * 4\n + 55', statements: 2 },
{ type: 'ast', repr: '55' },
],
},
@ -241,7 +240,7 @@ if (import.meta.vitest) {
}: {
code: string
opr?: string
result: { type: string; repr: string; statemets?: number }[]
result: { type: string; repr: string; statements?: number }[]
}) => {
const ast = parseEnsoLine(code)
const actual = operandsOfLeftAssocOprChain(ast, code, opr)
@ -258,7 +257,7 @@ if (import.meta.vitest) {
} else {
assert(actual?.type == 'partOfOprBlockApp')
expect(readAstSpan(actual.ast, code)).toStrictEqual(expected?.repr)
expect(actual.statements).toStrictEqual(expected?.statemets)
expect(actual.statements).toStrictEqual(expected?.statements)
}
}
}

View File

@ -339,14 +339,14 @@ type XML_Element
Value (java_element:Element) (~children_cache:(Vector (XML_Element | Text)))
type XML_Error
# An error that indicates that the XML data could not be parsed.
## An error that indicates that the XML data could not be parsed.
Arguments:
- line_number: the line on which the parse failed.
- column_number: the column at which the parse failed.
Arguments:
- line_number: the line on which the parse failed.
- column_number: the column at which the parse failed.
Parse_Error (line_number : Integer) (column_number : Integer)
# Any other XML-related Java exception.
## Any other XML-related Java exception.
Other (error : Text)
## PRIVATE

View File

@ -29,7 +29,7 @@ public class JsInteropTest extends TestBase {
public void testDefaultJSPrint() {
var src = """
from Standard.Base import Json
main =
json = Json.parse <| '''
{
@ -38,7 +38,7 @@ public class JsInteropTest extends TestBase {
}
}
json.get "inner"
""";
""";
Value res = evalModule(ctx, src);
assertEquals("{\"a\":1}", res.toString());
}

View File

@ -65,7 +65,7 @@ public class NestedPatternCompilationBenchmarks {
list_of_6 =
List.Cons 1 (List.Cons 2 (List.Cons 3 (List.Cons 4 (List.Cons 5 (List.Cons 6 List.Nil)))))
""";
""";
}
@Benchmark

View File

@ -185,3 +185,47 @@ fn tuplify(value: Value) -> Value {
let cdr = tuplify(cdr);
Value::Cons(lexpr::Cons::new(car, cdr))
}
// ========================
// === Span consistency ===
// ========================
/// Check the internal consistency of the `Tree` and `Token` spans from the given root, and validate
/// that every character in the given range is covered exactly once in the token spans.
pub fn validate_spans(tree: &enso_parser::syntax::tree::Tree, expected_span: std::ops::Range<u32>) {
let mut sum_span = None;
fn concat<T: PartialEq + std::fmt::Debug + Copy>(
a: &Option<std::ops::Range<T>>,
b: &std::ops::Range<T>,
) -> std::ops::Range<T> {
match a {
Some(a) => {
assert_eq!(a.end, b.start);
a.start..b.end
}
None => b.clone(),
}
}
sum_span = Some(concat(&sum_span, &tree.span.left_offset.code.range_utf16()));
tree.visit_items(|item| match item {
enso_parser::syntax::item::Ref::Token(token) => {
if !(token.left_offset.is_empty() && token.code.is_empty()) {
sum_span = Some(concat(&sum_span, &token.left_offset.code.range_utf16()));
sum_span = Some(concat(&sum_span, &token.code.range_utf16()));
}
}
enso_parser::syntax::item::Ref::Tree(tree) => {
let children_span =
concat(&Some(tree.span.left_offset.code.range_utf16()), &tree.span.range_utf16());
validate_spans(tree, children_span.clone());
sum_span = Some(concat(&sum_span, &children_span));
}
});
if expected_span.is_empty() {
assert!(sum_span.map_or(true, |range| range.is_empty()));
} else {
assert_eq!(sum_span.unwrap(), expected_span);
}
}

View File

@ -40,6 +40,8 @@ fn check_file(path: &str, mut code: &str) {
code = code_;
}
let ast = enso_parser::Parser::new().run(code);
let expected_span = 0..(code.encode_utf16().count() as u32);
enso_parser_debug::validate_spans(&ast, expected_span);
for (parsed, original) in ast.code().lines().zip(code.lines()) {
assert_eq!(parsed, original, "Bug: dropped tokens, while parsing: {path}");
}

View File

@ -10,7 +10,7 @@ parse_json1 = Json.parse 3
main =
## The file contains three different sheets relating to operations of an
online store.
online store.
operator2 = Enso_Project.data / 3
## Read the customers table.
operator3 = operator2.read_xlsx 3
@ -19,10 +19,10 @@ operator4 = operator2.read_xlsx 3
## Read the orders history.
operator5 = operator2.read_xlsx 3
## Index the items table on `Item ID`. This will allow this data to be
joined to other tables that also contain Item IDs.
joined to other tables that also contain Item IDs.
operator7 = operator4.set_index 3
## Join the item data to the order history, to get information on item
prices in the orders table.
prices in the orders table.
operator8 = operator5.join operator7 3
operator1 = operator8.at 3
operator9 = operator8.at 3
@ -30,20 +30,20 @@ operator9 = operator8.at 3
product1 = operator1 * operator9
operator10 = operator8.set 3 product1
## Group all orders by the Customer ID, to compute the total value of orders
placed by each client.
placed by each client.
operator11 = operator10.group by=3
operator12 = operator11.at 3
## Compute the lifetime value of each client.
operator13 = operator12.sum
operator14 = operator13.rename 3
## Index the customers table by Customer ID. This will allow this table
to be joined to other tables that also contain Customer IDs.
to be joined to other tables that also contain Customer IDs.
operator15 = operator3.set_index 3
## Join the customer data into orders table, to include names in the final
ranking.
ranking.
operator16 = operator14.join operator15
## Sort the customers by their lifetime value, with the most valuable
customers at the start of the table.
customers at the start of the table.
operator17 = operator16.sort by=3 order=Sort_Order.Descending

View File

@ -135,22 +135,17 @@ fn doc_comments() {
(Function (Ident id) #((() (Ident x) () ())) "=" (Ident x)))]);
#[rustfmt::skip]
let lines = vec![
" ## Test indent handling",
" foo",
];
#[rustfmt::skip]
test!(&lines.join("\n"), (Documented (#((Section " Test indent handling")) #(())) (Ident foo)));
#[rustfmt::skip]
let lines = vec![
"type Foo",
" ## Test indent handling",
" ",
" foo",
];
#[rustfmt::skip]
test!(&lines.join("\n"),
(Documented
(#((Section " Test indent handling")) #(() ()))
(Ident foo)));
(TypeDef type Foo #() #(
(Documented
(#((Section " Test indent handling")) #(() ()))
(Ident foo)))));
}
@ -329,7 +324,7 @@ fn assignment_simple() {
#[test]
fn function_inline_simple_args() {
test(" foo a = x", block![(Function (Ident foo) #((() (Ident a) () ())) "=" (Ident x))]);
test("foo a = x", block![(Function (Ident foo) #((() (Ident a) () ())) "=" (Ident x))]);
#[rustfmt::skip]
test("foo a b = x",
block![(Function (Ident foo) #((() (Ident a) () ()) (() (Ident b) () ())) "=" (Ident x))]);
@ -340,7 +335,7 @@ fn function_inline_simple_args() {
#((() (Ident a) () ()) (() (Ident b) () ()) (() (Ident c) () ()))
"=" (Ident x))],
);
test(" foo _ = x", block![(Function (Ident foo) #((() (Wildcard -1) () ())) "=" (Ident x))]);
test("foo _ = x", block![(Function (Ident foo) #((() (Wildcard -1) () ())) "=" (Ident x))]);
}
#[test]
@ -578,6 +573,11 @@ fn operator_section_in_operator_block() {
test(&code.join("\n"), expected);
}
#[test]
fn first_line_indented() {
expect_invalid_node(" a");
}
// === Binary Operators ===
@ -710,24 +710,21 @@ fn unary_operator_at_end_of_expression() {
#[test]
fn unspaced_operator_sequence() {
let cases = [
// Add a negated value.
("x = y+-z", block![
(Assignment (Ident x) "=" (OprApp (Ident y) (Ok "+") (UnaryOprApp "-" (Ident z))))]),
// Create an operator section that adds a negated value to its input.
("x = +-z", block![
(Assignment (Ident x) "=" (OprSectionBoundary 1
(OprApp () (Ok "+") (UnaryOprApp "-" (Ident z)))))]),
// Create an operator section that adds its input, negated, to a value.
("x = y+-", block![
(Assignment (Ident x) "=" (OprSectionBoundary 1
(OprApp (Ident y) (Ok "+") (UnaryOprApp "-" ()))))]),
// Assign a negative number to x.
("x=-1", block![(Assignment (Ident x) "=" (UnaryOprApp "-" (Number () "1" ())))]),
// Assign a negated value to x.
("x=-y", block![(Assignment (Ident x) "=" (UnaryOprApp "-" (Ident y)))]),
];
cases.into_iter().for_each(|(code, expected)| test(code, expected));
// Add a negated value.
test!("x = y+-z",
(Assignment (Ident x) "=" (OprApp (Ident y) (Ok "+") (UnaryOprApp "-" (Ident z)))));
// Create an operator section that adds a negated value to its input.
test!("x = +-z",
(Assignment (Ident x) "=" (OprSectionBoundary 1
(OprApp () (Ok "+") (UnaryOprApp "-" (Ident z))))));
// Create an operator section that adds its input, negated, to a value.
test!("x = y+-",
(Assignment (Ident x) "=" (OprSectionBoundary 1
(OprApp (Ident y) (Ok "+") (UnaryOprApp "-" ())))));
// Assign a negative number to x.
test!("x=-1", (Assignment (Ident x) "=" (UnaryOprApp "-" (Number () "1" ()))));
// Assign a negated value to x.
test!("x=-y", (Assignment (Ident x) "=" (UnaryOprApp "-" (Ident y))));
}
#[test]
@ -891,7 +888,7 @@ fn metadata_raw() {
fn metadata_parsing() {
let code = metadata::ORDERS_WITH_METADATA;
let (meta, code) = enso_parser::metadata::parse(code).unwrap();
let _ast = enso_parser::Parser::new().run(code);
let _ast = parse(code);
let _meta: enso_parser::metadata::Metadata = meta.unwrap();
}
@ -989,8 +986,7 @@ x"#;
(Ident x)
];
test(code, expected);
let code = " x = \"\"\"\n Indented multiline\n x";
let code = "x = \"\"\"\n Indented multiline\nx";
#[rustfmt::skip]
let expected = block![
(Assignment (Ident x) "=" (TextLiteral #((Section "Indented multiline"))))
@ -1153,6 +1149,27 @@ fn case_expression() {
test(&code.join("\n"), expected);
}
#[test]
fn case_documentation() {
#[rustfmt::skip]
let code = [
"case a of",
" ## The Some case",
" Some -> x",
" ## The Int case",
" Int -> x",
];
#[rustfmt::skip]
let expected = block![
(CaseOf (Ident a) #(
(((#((Section " The Some case")) #()) () () ()))
((() (Ident Some) "->" (Ident x)))
(((#((Section " The Int case")) #()) () () ()))
((() (Ident Int) "->" (Ident x)))))
];
test(&code.join("\n"), expected);
}
#[test]
fn case_by_type() {
macro_rules! test_case {
@ -1247,34 +1264,50 @@ fn tuple_literals() {
// === Numeric literals ===
#[test]
fn numbers() {
test!("1 . 0", (OprApp (Number () "1" ()) (Ok ".") (Number () "0" ())));
test!("1 .0",
(App (Number () "1" ()) (OprSectionBoundary 1 (OprApp () (Ok ".") (Number () "0" ())))));
test!("1. 0",
(OprSectionBoundary 1 (App (OprApp (Number () "1" ()) (Ok ".") ()) (Number () "0" ()))));
test!("0b10101010", (Number "0b" "10101010" ()));
test!("0o122137", (Number "0o" "122137" ()));
test!("0xAE2F14", (Number "0x" "AE2F14" ()));
test!("pi = 3.14", (Assignment (Ident pi) "=" (Number () "3" ("." "14"))));
test!("0.0.x", (OprApp (Number () "0" ("." "0")) (Ok ".") (Ident x)));
}
#[cfg(test)]
mod numbers {
use super::*;
#[test]
// This syntax cannot be used until we remove old-nondecimal number support, which is
// needed for compatibility until the old parser is fully replaced.
#[ignore]
fn new_delimited_numbers() {
test!("100_000", (Number () "100_000" ()));
test!("10_000.99", (Number () "10_000" ("." "99")));
}
#[test]
fn with_decimal() {
test!("1 . 0", (OprApp (Number () "1" ()) (Ok ".") (Number () "0" ())));
test!("1 .0",
(App (Number () "1" ()) (OprSectionBoundary 1 (OprApp () (Ok ".") (Number () "0" ())))));
test!("1. 0",
(OprSectionBoundary 1 (App (OprApp (Number () "1" ()) (Ok ".") ()) (Number () "0" ()))));
test!("pi = 3.14", (Assignment (Ident pi) "=" (Number () "3" ("." "14"))));
test!("0.0.x", (OprApp (Number () "0" ("." "0")) (Ok ".") (Ident x)));
}
#[test]
fn old_nondecimal_numbers() {
test!("2_01101101", (Number "2_" "01101101" ()));
test!("-2_01101101", (UnaryOprApp "-" (Number "2_" "01101101" ())));
test!("16_17ffffffffffffffa", (Number "16_" "17ffffffffffffffa" ()));
#[test]
fn with_base() {
test!("0b10101010", (Number "0b" "10101010" ()));
test!("0o122137", (Number "0o" "122137" ()));
test!("0xAE2F14", (Number "0x" "AE2F14" ()));
}
#[test]
fn base_only() {
test!("0x", (Number "0x" () ()));
test!("0b", (Number "0b" () ()));
test!("0o", (Number "0o" () ()));
}
#[test]
// This syntax cannot be used until we remove old-nondecimal number support, which is
// needed for compatibility until the old parser is fully replaced.
#[ignore]
fn new_delimited() {
test!("100_000", (Number () "100_000" ()));
test!("10_000.99", (Number () "10_000" ("." "99")));
}
#[test]
fn old_nondecimal() {
test!("2_01101101", (Number "2_" "01101101" ()));
test!("-2_01101101", (UnaryOprApp "-" (Number "2_" "01101101" ())));
test!("16_17ffffffffffffffa", (Number "16_" "17ffffffffffffffa" ()));
}
}
@ -1538,12 +1571,19 @@ fn expect_tree_representing_code(code: &str, ast: &enso_parser::syntax::Tree) {
/// example, a `token::Number` may be represented like: `sexp![10]`, and a `token::Ident` may look
/// like `sexp![foo]`.
fn test(code: &str, expect: lexpr::Value) {
let ast = enso_parser::Parser::new().run(code);
let ast = parse(code);
let ast_s_expr = to_s_expr(&ast, code);
assert_eq!(ast_s_expr.to_string(), expect.to_string(), "{:?}", &ast);
expect_tree_representing_code(code, &ast);
}
fn parse(code: &str) -> enso_parser::syntax::tree::Tree {
let ast = enso_parser::Parser::new().run(code);
let expected_span = 0..(code.encode_utf16().count() as u32);
enso_parser_debug::validate_spans(&ast, expected_span);
ast
}
// === Testing inputs containing syntax errors ===
@ -1555,7 +1595,7 @@ struct Errors {
impl Errors {
fn collect(code: &str) -> Self {
let ast = enso_parser::Parser::new().run(code);
let ast = parse(code);
expect_tree_representing_code(code, &ast);
let errors = core::cell::Cell::new(Errors::default());
ast.map(|tree| match &*tree.variant {

View File

@ -657,7 +657,7 @@ impl<'s> Lexer<'s> {
match token.code.as_ref() {
// Special-case: Split into multiple operators.
"+-" => {
let (left, right) = token.split_at_(Bytes(1));
let (left, right) = token.split_at(code::Length::of("+"));
let lhs = analyze_operator(&left.code);
self.submit_token(left.with_variant(token::Variant::operator(lhs)));
// The `-` in this case is not identical to a free `-`: It is only allowed a
@ -886,23 +886,25 @@ impl<'s> Lexer<'s> {
if let Some(token) = token {
if let Some(base) = base {
self.submit_token(token.with_variant(token::Variant::number_base()));
let token = match base {
if let Some(digits) = match base {
token::Base::Binary => self.token(|this| this.take_while(is_binary_digit)),
token::Base::Octal => self.token(|this| this.take_while(is_octal_digit)),
token::Base::Hexadecimal =>
self.token(|this| this.take_while(is_hexadecimal_digit)),
};
let joiner = token::OperatorProperties::new()
.with_binary_infix_precedence(u32::MAX)
.as_token_joiner();
self.submit_token(Token(
Code::empty_without_offset(),
Code::empty_without_offset(),
token::Variant::operator(joiner),
));
// Every number has a digits-token, even if it's zero-length.
let token = token.unwrap_or_default();
self.submit_token(token.with_variant(token::Variant::digits(Some(base))));
} {
// The base and the digits are separate tokens so that they can have separate
// spans. A pseudo-token binds them together tightly so that the parser can
// assemble them into one number node.
let joiner = token::OperatorProperties::new()
.with_binary_infix_precedence(u32::MAX)
.as_token_joiner();
self.submit_token(Token(
Code::empty(self.current_offset.utf16),
Code::empty(self.current_offset.utf16),
token::Variant::operator(joiner),
));
self.submit_token(digits.with_variant(token::Variant::digits(Some(base))));
}
} else {
self.submit_token(token.with_variant(token::Variant::digits(None)));
}
@ -1076,11 +1078,19 @@ impl<'s> Lexer<'s> {
}
if let Some(indent) = new_indent {
if indent <= *block_indent {
self.output.push(Token::from(token::text_end(
Code::empty_without_offset(),
Code::empty_without_offset(),
)));
self.end_blocks(indent);
let text_end = {
let location = newlines
.first()
.as_ref()
.unwrap()
.left_offset
.code
.position_before();
let offset = Offset(VisibleOffset(0), location.clone());
Token(offset, location, token::Variant::text_end())
};
self.output.push(text_end);
self.end_blocks(indent, newlines.first().as_ref().unwrap());
self.output.extend(newlines);
if self.current_offset == text_start.0 {
self.last_spaces_visible_offset = text_start.1.visible;
@ -1152,7 +1162,10 @@ impl<'s> Lexer<'s> {
let close_quote_end = self.mark();
self.make_token(text_end, close_quote_end, token::Variant::text_end())
} else {
Token::from(token::text_end(Code::empty_without_offset(), Code::empty_without_offset()))
Token::from(token::text_end(
Code::empty(self.current_offset.utf16),
Code::empty(self.current_offset.utf16),
))
};
self.output.push(end_token);
TextEndedAt::End
@ -1327,20 +1340,24 @@ impl<'s> Lexer<'s> {
while let Some(token) = self.line_break() {
newlines.push(token.with_variant(token::Variant::newline()));
}
if !newlines.is_empty() {
if let Some(last) = newlines.last() {
let block_indent = self.last_spaces_visible_offset;
if block_indent > self.current_block_indent {
let block_start = self.marker_token(token::Variant::block_start());
let block_start = {
let location = last.left_offset.code.position_before();
let offset = Offset(VisibleOffset(0), location.clone());
Token(offset, location, token::Variant::block_start())
};
self.submit_token(block_start);
self.start_block(block_indent);
}
self.end_blocks(block_indent);
self.end_blocks(block_indent, newlines.first().as_ref().unwrap());
newlines.drain(..).for_each(|token| self.submit_token(token));
}
self.token_storage.set_from(newlines);
}
fn end_blocks(&mut self, block_indent: VisibleOffset) {
fn end_blocks(&mut self, block_indent: VisibleOffset, newline: &Token<'s>) {
while block_indent < self.current_block_indent {
let Some(previous_indent) = self.block_indent_stack.last().copied() else {
// If the file starts at indent > 0, we treat that as the root indent level
@ -1355,7 +1372,11 @@ impl<'s> Lexer<'s> {
break;
}
self.end_block();
let block_end = self.marker_token(token::Variant::block_end());
let block_end = {
let location = newline.left_offset.code.position_before();
let offset = Offset(VisibleOffset(0), location.clone());
Token(offset, location, token::Variant::block_end())
};
self.submit_token(block_end);
}
}
@ -1385,22 +1406,23 @@ impl<'s> Lexer<'s> {
/// Run the lexer. Return non-hierarchical list of tokens (the token groups will be represented
/// as start and end tokens).
pub fn run(mut self) -> ParseResult<Vec<Token<'s>>> {
// If the first line is indented, open a block for it.
self.spaces_after_lexeme();
self.current_block_indent = self.last_spaces_visible_offset;
let mut any_parser_matched = true;
while any_parser_matched {
any_parser_matched = false;
for f in PARSERS {
if self.run_and_check_if_progressed(f) {
any_parser_matched = true;
break;
}
}
let first_block_indent = self.last_spaces_visible_offset;
if first_block_indent.width_in_spaces != 0 {
self.submit_token(token::block_start(Code::empty(0), Code::empty(0)).into());
self.start_block(first_block_indent);
self.submit_token(token::newline(Code::empty(0), Code::empty(0)).into());
}
// Main parsing loop.
while PARSERS.iter().any(|f| self.run_and_check_if_progressed(f)) {}
// If any blocks were still open at EOF, close them.
while self.end_block().is_some() {
let block_end = self.marker_token(token::Variant::block_end());
self.submit_token(block_end);
}
// If the last line ended in whitespace, ensure it is represented; we'll attach it to a
// phantom newline token.
if self.last_spaces_visible_offset != VisibleOffset(0) {
let left_offset_start = self.current_offset - self.last_spaces_offset;
let offset_code = self.input.slice(left_offset_start.utf8..self.current_offset.utf8);
@ -1412,13 +1434,14 @@ impl<'s> Lexer<'s> {
let eof = token::variant::Variant::Newline(token::variant::Newline());
self.submit_token(Token(offset, Code::empty(self.current_offset.utf16), eof));
}
// Sanity check.
let mut internal_error = self.internal_error.take();
if self.current_char.is_some() {
let message = format!("Lexer did not consume all input. State: {self:?}");
internal_error.get_or_insert(message);
}
let value = self.output;
trace!("Tokens:\n{:#?}", value);
ParseResult { value, internal_error }
}
}
@ -1491,9 +1514,30 @@ mod tests {
}
}
/// Lex the input, check the spans for consistency, and return the tokens with the span offsets
/// stripped.
fn lex_and_validate_spans(input: &str) -> Vec<Token> {
let result: Vec<_> = run(input).unwrap();
let mut sum_span = None;
fn concat<T: PartialEq + Debug + Copy>(a: &Option<Range<T>>, b: &Range<T>) -> Range<T> {
match a {
Some(a) => {
assert_eq!(a.end, b.start);
a.start..b.end
}
None => b.clone(),
}
}
for token in &result {
sum_span = Some(concat(&sum_span, &token.left_offset.code.range_utf16()));
sum_span = Some(concat(&sum_span, &token.code.range_utf16()));
}
assert_eq!(sum_span.unwrap_or_default(), 0..(input.encode_utf16().count() as u32));
result.into_iter().map(|token| token.without_offsets()).collect()
}
fn test_lexer<'s>(input: &'s str, expected: Vec<Token<'s>>) {
let result: Vec<_> =
run(input).unwrap().into_iter().map(|token| token.without_offsets()).collect();
let result = lex_and_validate_spans(input);
let expected: Vec<_> = expected.into_iter().map(|token| token.without_offsets()).collect();
assert_eq!(result, expected);
}
@ -1517,23 +1561,21 @@ mod tests {
#[test]
fn test_case_block() {
let newline = newline_(empty(), test_code("\n"));
test_lexer_many(vec![
("\n", vec![newline_(empty(), test_code("\n"))]),
("\n foo\n bar", vec![
block_start_(empty(), empty()),
newline.clone(),
ident_(" ", "foo"),
newline.clone(),
ident_(" ", "bar"),
block_end_(empty(), empty()),
]),
("foo\n +", vec![
ident_("", "foo"),
block_start_(empty(), empty()),
newline,
operator_(" ", "+"),
block_end_(empty(), empty()),
]),
test_lexer("\n", vec![newline_(empty(), test_code("\n"))]);
test_lexer("\n foo\n bar", vec![
block_start_(empty(), empty()),
newline.clone(),
ident_(" ", "foo"),
newline.clone(),
ident_(" ", "bar"),
block_end_(empty(), empty()),
]);
test_lexer("foo\n +", vec![
ident_("", "foo"),
block_start_(empty(), empty()),
newline,
operator_(" ", "+"),
block_end_(empty(), empty()),
]);
}
@ -1541,21 +1583,29 @@ mod tests {
fn test_case_block_bad_indents() {
let newline = newline_(empty(), test_code("\n"));
#[rustfmt::skip]
test_lexer_many(vec![
("\n foo\n bar\nbaz", vec![
block_start_(empty(), empty()),
newline.clone(), ident_(" ", "foo"),
newline.clone(), ident_(" ", "bar"),
block_end_(empty(), empty()),
newline.clone(), ident_("", "baz"),
]),
("\n foo\n bar\n baz", vec![
block_start_(empty(), empty()),
newline.clone(), ident_(" ", "foo"),
newline.clone(), ident_(" ", "bar"),
newline, ident_(" ", "baz"),
block_end_(empty(), empty()),
]),
test_lexer(" foo\n bar\nbaz", vec![
block_start_(empty(), empty()),
newline_(empty(), empty()),
ident_(" ", "foo"),
newline.clone(), ident_(" ", "bar"),
block_end_(empty(), empty()),
newline.clone(), ident_("", "baz"),
]);
#[rustfmt::skip]
test_lexer("\n foo\n bar\nbaz", vec![
block_start_(empty(), empty()),
newline.clone(), ident_(" ", "foo"),
newline.clone(), ident_(" ", "bar"),
block_end_(empty(), empty()),
newline.clone(), ident_("", "baz"),
]);
#[rustfmt::skip]
test_lexer("\n foo\n bar\n baz", vec![
block_start_(empty(), empty()),
newline.clone(), ident_(" ", "foo"),
newline.clone(), ident_(" ", "bar"),
newline, ident_(" ", "baz"),
block_end_(empty(), empty()),
]);
}
@ -1594,12 +1644,10 @@ mod tests {
#[test]
fn test_case_idents() {
test_lexer_many(vec![
("", vec![]),
("_", vec![wildcard_("", "_")]),
("_'", vec![wildcard_("", "_'")]),
("_''", vec![wildcard_("", "_''")]),
]);
test_lexer("", vec![]);
test_lexer("_", vec![wildcard_("", "_")]);
test_lexer("_'", vec![wildcard_("", "_'")]);
test_lexer("_''", vec![wildcard_("", "_''")]);
test_lexer_many(lexer_case_idents(&[
"a",
"a'",
@ -1629,7 +1677,7 @@ mod tests {
#[test]
fn test_case_operators() {
test_lexer_many(lexer_case_operators(&["+", "-", "=", "==", "===", ":", ","]));
assert_eq!(run("+-").unwrap().len(), 2);
assert_eq!(lex_and_validate_spans("+-").len(), 2);
}
/// Based on https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt.
@ -1777,6 +1825,12 @@ mod tests {
/* 5.2.8 U+DBFF U+DFFF = ed af bf ed bf bf = */ "<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>"
}
}
#[test]
fn test_doc_comment() {
let code = ["## Foo.", "main = 23"].join("\n");
lex_and_validate_spans(&code);
}
}

View File

@ -201,7 +201,6 @@ impl Default for Parser {
/// interpreted as a variable assignment or method definition.
fn expression_to_statement(mut tree: syntax::Tree<'_>) -> syntax::Tree<'_> {
use syntax::tree::*;
let mut left_offset = tree.span.left_offset.position_before();
if let Tree { variant: box Variant::Annotated(annotated), .. } = &mut tree {
annotated.expression = annotated.expression.take().map(expression_to_statement);
return tree;
@ -214,14 +213,22 @@ fn expression_to_statement(mut tree: syntax::Tree<'_>) -> syntax::Tree<'_> {
documented.expression = documented.expression.take().map(expression_to_statement);
return tree;
}
if let Tree { variant: box Variant::TypeAnnotated(annotated), span } = tree {
let colon = annotated.operator;
let type_ = annotated.type_;
let variable = annotated.expression;
let mut tree = Tree::type_signature(variable, colon, type_);
tree.span.left_offset += span.left_offset;
if let Tree { variant: box Variant::TypeAnnotated(annotated), .. } = tree {
let TypeAnnotated { expression, operator, type_ } = annotated;
tree.variant = Box::new(Variant::TypeSignature(TypeSignature {
variable: expression,
operator,
type_,
}));
return tree;
}
if matches!(&tree, Tree {
variant: box Variant::ArgumentBlockApplication(ArgumentBlockApplication { lhs: None, .. }),
..
}) {
return tree.with_error("Expected expression before indented block.");
}
let mut left_offset = tree.span.left_offset.position_before();
let tree_ = &mut tree;
let opr_app = match tree_ {
Tree { variant: box Variant::OprApp(opr_app), span } => {

View File

@ -82,7 +82,10 @@ fn import_body<'s>(
let field = match header.code.as_ref() {
"polyglot" => {
body = Some(
precedence.resolve(tokens).map(expect_ident).unwrap_or_else(expected_nonempty),
precedence
.resolve(tokens)
.map(expect_ident)
.unwrap_or_else(|| expected_nonempty(header.code.position_after())),
);
&mut polyglot
}
@ -91,7 +94,7 @@ fn import_body<'s>(
precedence
.resolve(tokens)
.map(expect_qualified)
.unwrap_or_else(expected_nonempty),
.unwrap_or_else(|| expected_nonempty(header.code.position_after())),
);
&mut from
}
@ -112,14 +115,17 @@ fn import_body<'s>(
}
"as" => {
body = Some(
precedence.resolve(tokens).map(expect_ident).unwrap_or_else(expected_nonempty),
precedence
.resolve(tokens)
.map(expect_ident)
.unwrap_or_else(|| expected_nonempty(header.code.position_after())),
);
&mut as_
}
"hiding" => {
body = Some(
sequence_tree(precedence, tokens, expect_ident)
.unwrap_or_else(expected_nonempty),
.unwrap_or_else(|| expected_nonempty(header.code.position_after())),
);
&mut hiding
}
@ -175,7 +181,7 @@ fn export_body<'s>(
precedence
.resolve(tokens)
.map(expect_qualified)
.unwrap_or_else(expected_nonempty),
.unwrap_or_else(|| expected_nonempty(header.code.position_after())),
);
&mut from
}
@ -196,14 +202,17 @@ fn export_body<'s>(
}
"as" => {
body = Some(
precedence.resolve(tokens).map(expect_ident).unwrap_or_else(expected_nonempty),
precedence
.resolve(tokens)
.map(expect_ident)
.unwrap_or_else(|| expected_nonempty(header.code.position_after())),
);
&mut as_
}
"hiding" => {
body = Some(
sequence_tree(precedence, tokens, expect_ident)
.unwrap_or_else(expected_nonempty),
.unwrap_or_else(|| expected_nonempty(header.code.position_after())),
);
&mut hiding
}
@ -438,10 +447,9 @@ fn case_body<'s>(
_ => initial_case.push(item),
}
}
if let Some(_first) = initial_case.first() {
// FIXME: Create 0-length span at offset preceding `_first`.
let newline =
syntax::token::newline(Code::empty_without_offset(), Code::empty_without_offset());
if !initial_case.is_empty() {
let location = of_.code.position_after();
let newline = syntax::token::newline(location.clone(), location);
case_builder.push(syntax::item::Line { newline, items: initial_case });
}
block.into_iter().for_each(|line| case_builder.push(line));
@ -825,10 +833,10 @@ fn expect_qualified(tree: syntax::Tree) -> syntax::Tree {
}
}
fn expected_nonempty<'s>() -> syntax::Tree<'s> {
fn expected_nonempty(location: Code) -> syntax::Tree {
let empty = syntax::Tree::ident(syntax::token::ident(
Code::empty_without_offset(),
Code::empty_without_offset(),
location.clone(),
location,
false,
0,
false,

View File

@ -142,24 +142,14 @@ pub struct Resolver<'s> {
impl<'s> Resolver<'s> {
/// Create a new resolver, in statement context.
pub fn new_statement() -> Self {
let scopes = default();
let open_blocks = vec![syntax::item::Line {
newline: token::newline(Code::empty(0), Code::empty(0)),
items: default(),
}];
let macro_stack = default();
let segments = default();
let items = default();
let context = Context::Statement;
let precedence = syntax::operator::Precedence::new();
Self {
blocks: scopes,
lines: open_blocks,
macros: macro_stack,
segments,
items,
context,
precedence,
context: Context::Statement,
precedence: syntax::operator::Precedence::new(),
blocks: default(),
lines: default(),
macros: default(),
segments: default(),
items: default(),
}
}
@ -169,6 +159,10 @@ impl<'s> Resolver<'s> {
root_macro_map: &MacroMap,
tokens: impl IntoIterator<Item = Token<'s>>,
) -> syntax::Tree<'s> {
self.lines.push(syntax::item::Line {
newline: token::newline(Code::empty(0), Code::empty(0)),
items: default(),
});
tokens.into_iter().for_each(|t| self.push(root_macro_map, t));
self.finish_current_line();
let lines = self.lines.drain(..).map(|syntax::item::Line { newline, items }| {
@ -233,9 +227,11 @@ impl<'s> Resolver<'s> {
/// Append a token to the state.
fn push(&mut self, root_macro_map: &MacroMap, token: Token<'s>) {
match token.variant {
token::Variant::Newline(_) => {
self.finish_current_line();
let newline = token::newline(token.left_offset, token.code);
token::Variant::Newline(newline) => {
if !self.lines.is_empty() {
self.finish_current_line();
}
let newline = token.with_variant(newline);
self.lines.push(syntax::item::Line { newline, items: default() });
self.context = Context::Statement;
}

View File

@ -75,21 +75,24 @@ impl<'s> Code<'s> {
self.utf16
}
/// Split the UTF-8 code at the given byte offset.
pub fn split_at(&self, offset: usize) -> (Self, Self) {
let (left, right) = self.repr.split_at(offset);
let left_utf16 = left.chars().map(|c| c.len_utf16() as u32).sum();
let right_utf16 = self.utf16 - left_utf16;
/// Return the start and end of the UTF-16 source code for this element.
pub fn range_utf16(&self) -> Range<u32> {
self.offset_utf16..(self.offset_utf16 + self.utf16)
}
/// Split the code at the given location.
pub fn split_at(&self, split: Length) -> (Self, Self) {
let (left, right) = self.repr.split_at(split.utf8);
(
Self {
repr: StrRef(left),
offset_utf16: self.offset_utf16,
utf16: left_utf16,
utf16: split.utf16,
},
Self {
repr: StrRef(right),
offset_utf16: self.offset_utf16 + left_utf16,
utf16: right_utf16,
offset_utf16: self.offset_utf16 + split.utf16,
utf16: self.utf16 - split.utf16,
},
)
}
@ -209,6 +212,12 @@ pub struct Length {
}
impl Length {
/// Returns the length of the given input.
#[inline(always)]
pub fn of(s: &str) -> Self {
Self { utf8: s.len(), utf16: s.encode_utf16().count() as u32 }
}
/// Returns true if the code is empty.
#[inline(always)]
pub fn is_zero(&self) -> bool {
@ -220,6 +229,12 @@ impl Length {
pub fn utf8_bytes(&self) -> usize {
self.utf8
}
/// Return the length in UTF-16 code units.
#[inline(always)]
pub fn utf16_len(&self) -> u32 {
self.utf16
}
}
impl Add for Length {

View File

@ -101,7 +101,7 @@ impl<'s> Offset<'s> {
/// Return a 0-length `Span` representing the position after the end of this `Span`.
pub fn position_after(&self) -> Self {
Self { visible: default(), code: self.code.position_before() }
Self { visible: default(), code: self.code.position_after() }
}
/// Return this value with its start position removed (set to 0). This can be used to compare
@ -184,6 +184,18 @@ impl<'s> Span<'s> {
pub fn add<T: Builder<'s>>(self, elem: &mut T) -> Self {
Builder::add_to_span(elem, self)
}
/// Return the start and end of the UTF-16 source code for this element.
pub fn range_utf16(&self) -> Range<u32> {
let start = self.left_offset.position_after().code.range_utf16().start;
let end = start + self.code_length.utf16_len();
start..end
}
/// Return the sum of the whitespace length and the code length.
pub fn length_including_whitespace(&self) -> code::Length {
self.left_offset.code.length() + self.code_length
}
}
impl<'s> AsRef<Span<'s>> for Span<'s> {
@ -204,6 +216,11 @@ where
self.left_offset += other.left_offset;
self.code_length = other.code_length;
} else {
debug_assert_eq!(
self.left_offset.code.position_after().range_utf16().end
+ self.code_length.utf16_len(),
other.left_offset.code.position_before().range_utf16().start
);
self.code_length += other.left_offset.code.length() + other.code_length;
}
}

View File

@ -137,10 +137,10 @@ impl<'s> ExpressionBuilder<'s> {
pub fn operand(&mut self, operand: Operand<syntax::Tree<'s>>) {
if self.prev_type == Some(ItemType::Ast) {
if let Some(Operand { value: syntax::Tree { variant: box
syntax::tree::Variant::TextLiteral(ref mut lhs), .. }, .. }) = self.output.last_mut()
syntax::tree::Variant::TextLiteral(ref mut lhs), span: lhs_span }, .. }) = self.output.last_mut()
&& !lhs.closed
&& let box syntax::tree::Variant::TextLiteral(mut rhs) = operand.value.variant {
syntax::tree::join_text_literals(lhs, &mut rhs, operand.value.span);
syntax::tree::join_text_literals(lhs, &mut rhs, lhs_span, operand.value.span);
if let syntax::tree::TextLiteral { open: Some(open), newline: None, elements, closed: true, close: None } = lhs
&& open.code.starts_with('#') {
let elements = mem::take(elements);

View File

@ -135,19 +135,13 @@ impl<'s, T> Token<'s, T> {
/// position, which does not include the [`left_offset`]. It means that `split_at(Bytes(0))`
/// will split the token into left offset only and a left-trimmed token.
#[inline(always)]
pub fn split_at(self, offset: Bytes) -> (Token<'s, ()>, Token<'s, ()>, T) {
pub fn split_at(self, split: code::Length) -> (Token<'s, ()>, Token<'s, ()>) {
let left_lexeme_offset = self.left_offset;
let right_lexeme_offset = self.code.position_after();
let (left_code, right_code) = self.code.split_at(offset.unchecked_raw());
let right_lexeme_offset =
Code::empty(self.code.position_before().range_utf16().end + split.utf16_len());
let (left_code, right_code) = self.code.split_at(split);
let left = Token(left_lexeme_offset, left_code, ());
let right = Token(right_lexeme_offset, right_code, ());
(left, right, self.variant)
}
/// A version of [`split_at`] that discards the associated variant.
#[inline(always)]
pub fn split_at_(self, offset: Bytes) -> (Token<'s, ()>, Token<'s, ()>) {
let (left, right, _) = self.split_at(offset);
(left, right)
}

View File

@ -608,7 +608,7 @@ impl<'s> span::Builder<'s> for ArgumentType<'s> {
// === CaseOf ===
/// A that may contain a case-expression in a case-of expression.
/// A line that may contain a case-expression in a case-of expression.
#[derive(Clone, Debug, Default, Eq, PartialEq, Visitor, Serialize, Reflect, Deserialize)]
pub struct CaseLine<'s> {
/// The token beginning the line. This will always be present, unless the first case-expression
@ -661,7 +661,10 @@ impl<'s> Case<'s> {
impl<'s> span::Builder<'s> for Case<'s> {
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
span.add(&mut self.pattern).add(&mut self.arrow).add(&mut self.expression)
span.add(&mut self.documentation)
.add(&mut self.pattern)
.add(&mut self.arrow)
.add(&mut self.expression)
}
}
@ -755,20 +758,23 @@ impl<'s> span::Builder<'s> for OperatorDelimitedTree<'s> {
pub fn apply<'s>(mut func: Tree<'s>, mut arg: Tree<'s>) -> Tree<'s> {
match (&mut *func.variant, &mut *arg.variant) {
(Variant::Annotated(func_ @ Annotated { argument: None, .. }), _) => {
func.span.code_length += arg.span.length_including_whitespace();
func_.argument = maybe_apply(mem::take(&mut func_.argument), arg).into();
func
}
(Variant::AnnotatedBuiltin(func_), _) => {
func.span.code_length += arg.span.length_including_whitespace();
func_.expression = maybe_apply(mem::take(&mut func_.expression), arg).into();
func
}
(Variant::OprApp(OprApp { lhs: Some(_), opr: Ok(_), rhs }),
Variant::ArgumentBlockApplication(ArgumentBlockApplication { lhs: None, arguments }))
if rhs.is_none() => {
(Variant::OprApp(OprApp { lhs: Some(_), opr: Ok(_), rhs: rhs @ None }),
Variant::ArgumentBlockApplication(ArgumentBlockApplication { lhs: None, arguments })) => {
func.span.code_length += arg.span.length_including_whitespace();
*rhs = block::body_from_lines(mem::take(arguments)).into();
func
}
(_, Variant::ArgumentBlockApplication(block)) if block.lhs.is_none() => {
arg.span.code_length += arg.span.left_offset.code.length() + func.span.code_length;
let func_left_offset = func.span.left_offset.take_as_prefix();
let arg_left_offset = mem::replace(&mut arg.span.left_offset, func_left_offset);
if let Some(first) = block.arguments.first_mut() {
@ -778,6 +784,7 @@ pub fn apply<'s>(mut func: Tree<'s>, mut arg: Tree<'s>) -> Tree<'s> {
arg
}
(_, Variant::OperatorBlockApplication(block)) if block.lhs.is_none() => {
arg.span.code_length += arg.span.left_offset.code.length() + func.span.code_length;
let func_left_offset = func.span.left_offset.take_as_prefix();
let arg_left_offset = mem::replace(&mut arg.span.left_offset, func_left_offset);
if let Some(first) = block.expressions.first_mut() {
@ -822,8 +829,10 @@ fn maybe_apply<'s>(f: Option<Tree<'s>>, x: Tree<'s>) -> Tree<'s> {
pub fn join_text_literals<'s>(
lhs: &mut TextLiteral<'s>,
rhs: &mut TextLiteral<'s>,
lhs_span: &mut Span<'s>,
rhs_span: Span<'s>,
) {
lhs_span.code_length += rhs_span.length_including_whitespace();
match rhs.elements.first_mut() {
Some(TextElement::Section { text }) => text.left_offset += rhs_span.left_offset,
Some(TextElement::Escape { token }) => token.left_offset += rhs_span.left_offset,
@ -863,6 +872,7 @@ pub fn apply_operator<'s>(
Variant::Number(Number { base: None, integer, fractional_digits })) => {
func_.integer = mem::take(integer);
func_.fractional_digits = mem::take(fractional_digits);
lhs_.span.code_length += rhs_.span.code_length;
lhs.take().unwrap()
}
_ => {
@ -901,6 +911,7 @@ pub fn apply_operator<'s>(
{
let dot = opr.clone();
let digits = digits.clone();
lhs.span.code_length += dot.code.length() + rhs.span.code_length;
lhs_.fractional_digits = Some(FractionalDigits { dot, digits });
return lhs.clone();
}
@ -912,8 +923,7 @@ pub fn apply_operator<'s>(
}
let ArgumentBlockApplication { lhs: _, arguments } = block;
let arguments = mem::take(arguments);
let rhs_ = block::body_from_lines(arguments);
rhs = Some(rhs_);
*rhs_ = block::body_from_lines(arguments);
}
}
}

View File

@ -88,7 +88,7 @@ where I: Iterator<Item = Line<'s>>
match line.expression.map(Prefix::try_from) {
Some(Ok(prefix)) => {
match self.prefixes.last_mut() {
Some(prefix) => prefix.newlines().push(line.newline),
Some(prefix) => prefix.push_newline(line.newline),
None => self.newline = Some(line.newline),
};
self.prefixes.push(prefix);
@ -96,7 +96,7 @@ where I: Iterator<Item = Line<'s>>
Some(Err(mut statement)) => {
return Some(match self.prefixes.last_mut() {
Some(prefix) => {
prefix.newlines().push(line.newline);
prefix.push_newline(line.newline);
for prefix in self.prefixes.drain(..).rev() {
statement = prefix.apply_to(statement);
}
@ -108,7 +108,7 @@ where I: Iterator<Item = Line<'s>>
}
None => {
match self.prefixes.last_mut() {
Some(prefix) => prefix.newlines().push(line.newline),
Some(prefix) => prefix.push_newline(line.newline),
None => return Some(line.newline.into()),
};
}
@ -154,23 +154,27 @@ impl<'s> TryFrom<Tree<'s>> for Prefix<'s> {
}
impl<'s> Prefix<'s> {
fn newlines(&mut self) -> &mut Vec<token::Newline<'s>> {
match self {
Prefix::Annotation { node: Annotated { newlines, .. }, .. }
| Prefix::BuiltinAnnotation { node: AnnotatedBuiltin { newlines, .. }, .. }
fn push_newline(&mut self, newline: token::Newline<'s>) {
let (newlines, span) = match self {
Prefix::Annotation { node: Annotated { newlines, .. }, span }
| Prefix::BuiltinAnnotation { node: AnnotatedBuiltin { newlines, .. }, span }
| Prefix::Documentation {
node: Documented { documentation: DocComment { newlines, .. }, .. },
..
} => newlines,
}
span,
} => (newlines, span),
};
span.code_length += newline.left_offset.code.length() + newline.code.length();
newlines.push(newline);
}
fn apply_to(mut self, expression: Tree<'s>) -> Tree<'s> {
*(match &mut self {
Prefix::Annotation { node, .. } => &mut node.expression,
Prefix::BuiltinAnnotation { node, .. } => &mut node.expression,
Prefix::Documentation { node, .. } => &mut node.expression,
}) = Some(expression);
let (expr, span) = match &mut self {
Prefix::Annotation { node, span } => (&mut node.expression, span),
Prefix::BuiltinAnnotation { node, span } => (&mut node.expression, span),
Prefix::Documentation { node, span } => (&mut node.expression, span),
};
span.code_length += expression.span.left_offset.code.length() + expression.span.code_length;
*expr = Some(expression);
self.into()
}
}