line:col positions in parser (#8203)

Add `line:column` information to source code references produced by the parser. This information will be used by GUI2 as part of the solution to #8134.

# Important Notes
- `parse_all_enso_files.sh` has been used to ensure this doesn't affect tree structures.
- `parse_all_enso_files.sh` now checks emitted locations for consistency, and has been used to verify that all line:col references match the values found by an independent scan of the source up to the given UTF8 position.
This commit is contained in:
Kaz Wesley 2023-11-08 08:53:39 -08:00 committed by GitHub
parent f21e09bb65
commit ce042569b0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 1211 additions and 275 deletions

View File

@ -64,7 +64,7 @@ debug-assertions = false
[profile.test]
opt-level = 0
lto = false
debug = 1
debug = 2
debug-assertions = true
[profile.integration-test]

View File

@ -20,14 +20,14 @@ const RENAME = new Map([
['type', 'typeNode'],
// Rename source references to reflect our usage:
// - In `Tree`s:
['spanLeftOffsetCodeOffsetUtf16', 'whitespaceStartInCodeParsed'],
['spanLeftOffsetCodeUtf16', 'whitespaceLengthInCodeParsed'],
['spanLeftOffsetCodeStartUtf16', 'whitespaceStartInCodeParsed'],
['spanLeftOffsetCodeLenUtf16', 'whitespaceLengthInCodeParsed'],
['spanCodeLengthUtf16', 'childrenLengthInCodeParsed'],
// - In `Tokens`s:
['leftOffsetCodeOffsetUtf16', 'whitespaceStartInCodeBuffer'],
['leftOffsetCodeUtf16', 'whitespaceLengthInCodeBuffer'],
['codeUtf16', 'lengthInCodeBuffer'],
['codeOffsetUtf16', 'startInCodeBuffer'],
['leftOffsetCodeStartUtf16', 'whitespaceStartInCodeBuffer'],
['leftOffsetCodeLenUtf16', 'whitespaceLengthInCodeBuffer'],
['codeLenUtf16', 'lengthInCodeBuffer'],
['codeStartUtf16', 'startInCodeBuffer'],
])
export function mapIdent(ident: string): string {

View File

@ -29,5 +29,5 @@ pub fn main() {
use std::io::Read;
let mut input = String::new();
std::io::stdin().read_to_string(&mut input).unwrap();
println!("{:#?}", enso_parser::lexer::run(&input));
println!("{:#?}", enso_parser::lexer::debug::lex_and_validate_spans(&input));
}

View File

@ -23,6 +23,7 @@
#![warn(unused_qualifications)]
use enso_metamodel_lexpr::ToSExpr;
use enso_parser::source::code::debug::LocationCheck;
use enso_reflect::Reflect;
use lexpr::Value;
use std::collections::HashSet;
@ -122,10 +123,18 @@ fn strip_hidden_fields(tree: Value) -> Value {
":spanLeftOffsetVisible",
":spanLeftOffsetCodeReprBegin",
":spanLeftOffsetCodeReprLen",
":spanLeftOffsetCodeUtf16",
":spanLeftOffsetCodeOffsetUtf16",
":spanLeftOffsetCodeLenUtf8",
":spanLeftOffsetCodeLenUtf16",
":spanLeftOffsetCodeLenNewlines",
":spanLeftOffsetCodeLenLineChars16",
":spanLeftOffsetCodeStartUtf8",
":spanLeftOffsetCodeStartUtf16",
":spanLeftOffsetCodeStartLine",
":spanLeftOffsetCodeStartCol16",
":spanCodeLengthUtf8",
":spanCodeLengthUtf16",
":spanCodeLengthNewlines",
":spanCodeLengthLineChars16",
];
let hidden_tree_fields: HashSet<_> = hidden_tree_fields.into_iter().collect();
Value::list(tree.to_vec().unwrap().into_iter().filter(|val| match val {
@ -194,7 +203,11 @@ fn tuplify(value: Value) -> Value {
/// Check the internal consistency of the `Tree` and `Token` spans from the given root, and validate
/// that every character in the given range is covered exactly once in the token spans.
pub fn validate_spans(tree: &enso_parser::syntax::tree::Tree, expected_span: std::ops::Range<u32>) {
pub fn validate_spans(
tree: &enso_parser::syntax::tree::Tree,
expected_span: std::ops::Range<u32>,
locations: &mut LocationCheck,
) {
let mut sum_span = None;
fn concat<T: PartialEq + std::fmt::Debug + Copy>(
a: &Option<std::ops::Range<T>>,
@ -208,24 +221,33 @@ pub fn validate_spans(tree: &enso_parser::syntax::tree::Tree, expected_span: std
None => b.clone(),
}
}
sum_span = Some(concat(&sum_span, &tree.span.left_offset.code.range_utf16()));
sum_span = Some(concat(&sum_span, &tree.span.left_offset.code.range()));
tree.visit_items(|item| match item {
enso_parser::syntax::item::Ref::Token(token) => {
if !(token.left_offset.is_empty() && token.code.is_empty()) {
sum_span = Some(concat(&sum_span, &token.left_offset.code.range_utf16()));
sum_span = Some(concat(&sum_span, &token.code.range_utf16()));
sum_span = Some(concat(&sum_span, &token.left_offset.code.range()));
sum_span = Some(concat(&sum_span, &token.code.range()));
}
let left_offset = token.left_offset.code.range();
let code = token.code.range();
locations.extend(&[left_offset.start, left_offset.end, code.start, code.end]);
}
enso_parser::syntax::item::Ref::Tree(tree) => {
let children_span =
concat(&Some(tree.span.left_offset.code.range_utf16()), &tree.span.range_utf16());
validate_spans(tree, children_span.clone());
concat(&Some(tree.span.left_offset.code.range()), &tree.span.range());
let children_span_ = children_span.start.utf16..children_span.end.utf16;
validate_spans(tree, children_span_, locations);
sum_span = Some(concat(&sum_span, &children_span));
let left_offset = tree.span.left_offset.code.range();
let code = tree.span.range();
locations.extend(&[left_offset.start, left_offset.end, code.start, code.end]);
}
});
if expected_span.is_empty() {
assert!(sum_span.map_or(true, |range| range.is_empty()));
} else {
assert_eq!(sum_span.unwrap(), expected_span);
let sum_span = sum_span.unwrap_or_default();
let sum_span = sum_span.start.utf16..sum_span.end.utf16;
assert_eq!(sum_span, expected_span);
}
}

View File

@ -41,10 +41,12 @@ fn check_file(path: &str, mut code: &str) {
}
let ast = enso_parser::Parser::new().run(code);
let expected_span = 0..(code.encode_utf16().count() as u32);
enso_parser_debug::validate_spans(&ast, expected_span);
let mut locations = enso_parser::source::code::debug::LocationCheck::new();
enso_parser_debug::validate_spans(&ast, expected_span, &mut locations);
for (parsed, original) in ast.code().lines().zip(code.lines()) {
assert_eq!(parsed, original, "Bug: dropped tokens, while parsing: {path}");
}
locations.check(code);
let s_expr = enso_parser_debug::to_s_expr(&ast, code);
println!("{s_expr}");
}

View File

@ -482,6 +482,13 @@ fn dot_operator_blocks() {
#[test]
fn code_block_argument_list() {
#[rustfmt::skip]
let code = [
"foo",
" bar",
];
test!(&code.join("\n"), (ArgumentBlockApplication (Ident foo) #((Ident bar))));
#[rustfmt::skip]
let code = [
"value = foo",
@ -492,7 +499,6 @@ fn code_block_argument_list() {
];
test(&code.join("\n"), expect);
#[rustfmt::skip]
let code = [
"value = foo",
@ -1012,28 +1018,19 @@ x"#;
#[test]
fn interpolated_literals_in_inline_text() {
#[rustfmt::skip]
let cases = [
(r#"'Simple case.'"#, block![(TextLiteral #((Section "Simple case.")))]),
(r#"'With a `splice`.'"#, block![(TextLiteral
#((Section "With a ")
(Splice (Ident splice))
(Section ".")))]),
(r#"'` SpliceWithLeadingWhitespace`'"#, block![(TextLiteral
#((Splice (Ident SpliceWithLeadingWhitespace))))]),
(r#"'String with \n escape'"#, block![
(TextLiteral
#((Section "String with ") (Escape '\n') (Section " escape")))]),
(r#"'\x0Aescape'"#, block![
(TextLiteral #((Escape '\n') (Section "escape")))]),
(r#"'\u000Aescape'"#, block![
(TextLiteral #((Escape '\n') (Section "escape")))]),
(r#"'\u{0000A}escape'"#, block![
(TextLiteral #((Escape '\n') (Section "escape")))]),
(r#"'\U0000000Aescape'"#, block![
(TextLiteral #((Escape '\n') (Section "escape")))]),
];
cases.into_iter().for_each(|(code, expected)| test(code, expected));
test!(r#"'Simple case.'"#, (TextLiteral #((Section "Simple case."))));
test!(r#"'With a `splice`.'"#, (TextLiteral
#((Section "With a ")
(Splice (Ident splice))
(Section "."))));
test!(r#"'` SpliceWithLeadingWhitespace`'"#,
(TextLiteral #((Splice (Ident SpliceWithLeadingWhitespace)))));
test!(r#"'String with \n escape'"#,
(TextLiteral #((Section "String with ") (Escape '\n') (Section " escape"))));
test!(r#"'\x0Aescape'"#, (TextLiteral #((Escape '\n') (Section "escape"))));
test!(r#"'\u000Aescape'"#, (TextLiteral #((Escape '\n') (Section "escape"))));
test!(r#"'\u{0000A}escape'"#, (TextLiteral #((Escape '\n') (Section "escape"))));
test!(r#"'\U0000000Aescape'"#, (TextLiteral #((Escape '\n') (Section "escape"))));
}
#[test]
@ -1580,7 +1577,9 @@ fn test(code: &str, expect: lexpr::Value) {
fn parse(code: &str) -> enso_parser::syntax::tree::Tree {
let ast = enso_parser::Parser::new().run(code);
let expected_span = 0..(code.encode_utf16().count() as u32);
enso_parser_debug::validate_spans(&ast, expected_span);
let mut locations = enso_parser::source::code::debug::LocationCheck::new();
enso_parser_debug::validate_spans(&ast, expected_span, &mut locations);
locations.check(code);
ast
}

View File

@ -15,7 +15,12 @@
set -e
cargo build -p enso-parser-debug --bin enso-parser-debug
cargo build -p enso-parser-debug --bin lexer
ENSO_FILES=$(find distribution/ test/ -name '*.enso' -print | sort)
for x in $ENSO_FILES; do echo -n "$x "; target/rust/debug/enso-parser-debug <$x; done
for x in $ENSO_FILES; do
echo -n "$x "
target/rust/debug/lexer <$x >/dev/null
target/rust/debug/enso-parser-debug <$x
done

View File

@ -19,6 +19,9 @@ const CODE_GETTER: &str = "codeRepr";
const WHITESPACE_GETTER: &str = "getWhitespace";
const TREE_BEGIN: &str = "fieldSpanLeftOffsetCodeReprBegin";
const TREE_LEN: &str = "fieldSpanLeftOffsetCodeReprLen";
const TREE_WHITESPACE: &str = "fieldSpanLeftOffsetCodeLenUtf16";
const TOKEN_WHITESPACE: &str = "fieldLeftOffsetCodeLenUtf16";
const TOKEN_CODE_LENGTH: &str = "fieldCodeLenUtf16";
/// Derive deserialization for all types in the typegraph.
pub fn derive(graph: &mut TypeGraph, tree: ClassId, token: ClassId) {
@ -151,16 +154,16 @@ fn start_whitespace() -> impl for<'a> Fn(MaterializerInput<'a>) -> String + 'sta
|MaterializerInput { message }| format!("{message}.position()")
}
fn start_code_tree() -> impl for<'a> Fn(MaterializerInput<'a>) -> String + 'static {
|MaterializerInput { message }| format!("{message}.advance(fieldSpanLeftOffsetCodeUtf16)")
|MaterializerInput { message }| format!("{message}.advance({TREE_WHITESPACE})")
}
fn end_code_tree() -> impl for<'a> Fn(MaterializerInput<'a>) -> String + 'static {
|MaterializerInput { message }| format!("{message}.position()")
}
fn start_code_token() -> impl for<'a> Fn(MaterializerInput<'a>) -> String + 'static {
|MaterializerInput { message }| format!("{message}.advance(fieldLeftOffsetCodeUtf16)")
|MaterializerInput { message }| format!("{message}.advance({TOKEN_WHITESPACE})")
}
fn end_code_token() -> impl for<'a> Fn(MaterializerInput<'a>) -> String + 'static {
|MaterializerInput { message }| format!("{message}.advance(fieldCodeUtf16)")
|MaterializerInput { message }| format!("{message}.advance({TOKEN_CODE_LENGTH})")
}

View File

@ -8,10 +8,11 @@ use crate::prelude::*;
use crate::source::*;
use crate::syntax::*;
use crate::source::code::Length;
use crate::source::code::Location;
use std::str;
// =================
// === Constants ===
// =================
@ -89,35 +90,13 @@ pub struct Lexer<'s> {
token_storage: VecAllocation<Token<'s>>,
}
#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
struct StrOffset {
utf8: Bytes,
utf16: u32,
}
impl Sub for StrOffset {
type Output = Self;
fn sub(self, rhs: Self) -> Self::Output {
Self { utf8: self.utf8 - rhs.utf8, utf16: self.utf16 - rhs.utf16 }
}
}
impl Add for StrOffset {
type Output = Self;
fn add(self, rhs: Self) -> Self::Output {
Self { utf8: self.utf8 + rhs.utf8, utf16: self.utf16 + rhs.utf16 }
}
}
/// Internal state of the [`Lexer`].
#[derive(Debug, Default)]
#[allow(missing_docs)]
pub struct LexerState {
current_char: Option<char>,
current_offset: StrOffset,
last_spaces_offset: StrOffset,
current_offset: Location,
last_spaces_offset: Location,
last_spaces_visible_offset: VisibleOffset,
current_block_indent: VisibleOffset,
block_indent_stack: Vec<VisibleOffset>,
@ -139,7 +118,11 @@ enum State {
},
}
type Mark<'s> = (StrOffset, Offset<'s>);
#[derive(Debug, Clone)]
struct Mark<'s> {
location: Location,
offset: Offset<'s>,
}
impl<'s> Lexer<'s> {
/// Constructor.
@ -163,17 +146,24 @@ impl<'s> Lexer<'s> {
fn next_input_char(&mut self) -> bool {
let next = self.iterator.next();
if let Some((current_offset, current_char)) = next {
self.current_offset = StrOffset {
utf8: Bytes(current_offset),
utf16: self.current_offset.utf16
+ self.current_char.map_or(0, |c| c.len_utf16() as u32),
let prev = self.current_offset;
let char_len16 = self.current_char.map_or(0, |c| c.len_utf16() as u32);
self.current_offset = Location {
utf8: u32_from(current_offset),
utf16: prev.utf16 + char_len16,
line: prev.line,
col16: prev.col16 + char_len16,
};
self.current_char = Some(current_char);
true
} else if let Some(c) = self.current_char {
self.current_offset = StrOffset {
utf8: Bytes(self.input.len()),
utf16: self.current_offset.utf16 + c.len_utf16() as u32,
let prev = self.current_offset;
let char_len16 = c.len_utf16() as u32;
self.current_offset = Location {
utf8: u32_from(self.input.len()),
utf16: prev.utf16 + char_len16,
line: prev.line,
col16: prev.col16 + char_len16,
};
self.current_char = None;
true
@ -182,34 +172,28 @@ impl<'s> Lexer<'s> {
}
}
/// Run the provided function and compute how much input it consumed.
#[inline(always)]
fn run_and_get_offset<T>(&mut self, f: impl FnOnce(&mut Self) -> T) -> (T, StrOffset) {
let start_offset = self.current_offset;
let out = f(self);
let len = self.current_offset - start_offset;
(out, len)
}
/// Run the provided function and check if it consumed any input.
#[inline(always)]
fn run_and_check_if_progressed(&mut self, f: impl FnOnce(&mut Self)) -> bool {
self.run_and_get_offset(f).1.utf8.is_positive()
let start = self.current_offset;
f(self);
let end = self.current_offset;
end != start
}
/// Consume spaces after parsing a [`Token`] and update the internal spacing info.
#[inline(always)]
fn spaces_after_lexeme(&mut self) {
(self.last_spaces_visible_offset, self.last_spaces_offset) =
self.run_and_get_offset(|this| this.spaces());
self.last_spaces_offset = self.current_offset;
self.last_spaces_visible_offset = self.spaces();
}
/// Consume spaces after parsing a [`Token`] and update the internal spacing info. Doesn't
/// consume more than the specified [`VisibleOffset`] of spaces.
#[inline(always)]
fn spaces_after_lexeme_with_limit(&mut self, limit: VisibleOffset) {
(self.last_spaces_visible_offset, self.last_spaces_offset) =
self.run_and_get_offset(|this| this.spaces_with_limit(limit));
self.last_spaces_offset = self.current_offset;
self.last_spaces_visible_offset = self.spaces_with_limit(limit);
}
/// Run the provided function. If it consumed any chars, return the [`Token`] containing the
@ -217,21 +201,19 @@ impl<'s> Lexer<'s> {
#[inline(always)]
fn token<T>(&mut self, f: impl FnOnce(&mut Self) -> T) -> Option<Token<'s, T>> {
let start = self.current_offset;
let (elem, len) = self.run_and_get_offset(f);
len.utf8.is_positive().as_some_from(|| {
let end = start + len;
let left_offset_start = start - self.last_spaces_offset;
let (offset_code, code) = self
.input
.slice(left_offset_start.utf8..end.utf8)
.split_at(self.last_spaces_offset.utf8.unchecked_raw());
let elem = f(self);
let end = self.current_offset;
(end != start).as_some_from(|| {
let left_offset_start = self.last_spaces_offset;
let (offset_code, code) = self.input
[usize_from(left_offset_start.utf8)..usize_from(end.utf8)]
.split_at(usize_from(start.utf8 - left_offset_start.utf8));
let visible_offset = self.last_spaces_visible_offset;
let offset = Offset(
visible_offset,
Code::from_str_at_offset(offset_code, left_offset_start.utf16),
);
let offset =
Offset(visible_offset, Code::from_str_at_location(offset_code, left_offset_start));
self.spaces_after_lexeme();
Token(offset, Code::from_str_at_offset(code, start.utf16), elem)
debug_assert_eq!(left_offset_start + Length::of(offset_code), start);
Token(offset, Code::from_str_at_location(code, start), elem)
})
}
@ -240,9 +222,9 @@ impl<'s> Lexer<'s> {
#[inline(always)]
fn marker_token<T>(&mut self, elem: T) -> Token<'s, T> {
let visible_offset = VisibleOffset(0);
let start = self.current_offset - self.last_spaces_offset;
let offset = Offset(visible_offset, Code::empty(start.utf16));
Token(offset, Code::empty(start.utf16), elem)
let start = self.last_spaces_offset;
let offset = Offset(visible_offset, Code::empty(start));
Token(offset, Code::empty(start), elem)
}
/// Push the [`token`] to the result stream.
@ -657,7 +639,7 @@ impl<'s> Lexer<'s> {
match token.code.as_ref() {
// Special-case: Split into multiple operators.
"+-" => {
let (left, right) = token.split_at(code::Length::of("+"));
let (left, right) = token.split_at(Length::of("+"));
let lhs = analyze_operator(&left.code);
self.submit_token(left.with_variant(token::Variant::operator(lhs)));
// The `-` in this case is not identical to a free `-`: It is only allowed a
@ -886,6 +868,7 @@ impl<'s> Lexer<'s> {
if let Some(token) = token {
if let Some(base) = base {
self.submit_token(token.with_variant(token::Variant::number_base()));
let after_base = self.current_offset;
if let Some(digits) = match base {
token::Base::Binary => self.token(|this| this.take_while(is_binary_digit)),
token::Base::Octal => self.token(|this| this.take_while(is_octal_digit)),
@ -899,8 +882,8 @@ impl<'s> Lexer<'s> {
.with_binary_infix_precedence(u32::MAX)
.as_token_joiner();
self.submit_token(Token(
Code::empty(self.current_offset.utf16),
Code::empty(self.current_offset.utf16),
Code::empty(after_base),
Code::empty(after_base),
token::Variant::operator(joiner),
));
self.submit_token(digits.with_variant(token::Variant::digits(Some(base))));
@ -937,12 +920,12 @@ impl<'s> Lexer<'s> {
};
let indent = self.current_block_indent;
let open_quote_start = self.mark();
self.last_spaces_visible_offset = VisibleOffset(0);
self.last_spaces_offset = default();
self.take_next();
self.last_spaces_visible_offset = VisibleOffset(0);
self.last_spaces_offset = self.current_offset;
// At least two quote characters.
if let Some(char) = self.current_char && char == quote_char {
let close_quote_start = self.mark();
let close_quote_start = self.mark_without_whitespace();
self.take_next();
let mut multiline = false;
// If more than two quote characters: Start a multiline quote.
@ -955,7 +938,7 @@ impl<'s> Lexer<'s> {
return;
} else {
// Exactly two quote characters: Open and shut case.
let close_quote_end = self.mark();
let close_quote_end = self.mark_without_whitespace();
let token = self.make_token(open_quote_start, close_quote_start.clone(),
token::Variant::text_start());
self.output.push(token);
@ -965,7 +948,7 @@ impl<'s> Lexer<'s> {
}
} else {
// One quote followed by non-quote character: Inline quote.
let open_quote_end = self.mark();
let open_quote_end = self.mark_without_whitespace();
let token = self.make_token(open_quote_start, open_quote_end,
token::Variant::text_start());
self.output.push(token);
@ -980,7 +963,7 @@ impl<'s> Lexer<'s> {
block_indent: VisibleOffset,
text_type: TextType,
) {
let open_quote_end = self.mark();
let open_quote_end = self.mark_without_whitespace();
let token = self.make_token(open_quote_start, open_quote_end, token::Variant::text_start());
self.output.push(token);
let mut initial_indent = None;
@ -1002,9 +985,9 @@ impl<'s> Lexer<'s> {
}
fn end_splice(&mut self, state: State) {
let splice_quote_start = self.mark();
let splice_quote_start = self.mark_without_whitespace();
self.take_next();
let splice_quote_end = self.mark();
let splice_quote_end = self.mark_without_whitespace();
let token =
self.make_token(splice_quote_start, splice_quote_end, token::Variant::close_symbol());
self.output.push(token);
@ -1038,8 +1021,8 @@ impl<'s> Lexer<'s> {
let mut newlines = vec![];
let mut new_indent = None;
loop {
let mut before_newline = self.mark();
if before_newline.0 == text_start.0 {
let mut before_newline = self.mark_without_whitespace();
if before_newline.location == text_start.location {
before_newline = text_start.clone();
}
let mut newline = self.take_1('\r');
@ -1059,7 +1042,8 @@ impl<'s> Lexer<'s> {
} else {
before_newline = text_start;
}
let newline_end = self.mark();
self.advance_line_pos();
let newline_end = self.mark_without_whitespace();
let token =
self.make_token(before_newline, newline_end, token::Variant::newline());
newlines.push(token);
@ -1092,12 +1076,9 @@ impl<'s> Lexer<'s> {
self.output.push(text_end);
self.end_blocks(indent, newlines.first().as_ref().unwrap());
self.output.extend(newlines);
if self.current_offset == text_start.0 {
self.last_spaces_visible_offset = text_start.1.visible;
self.last_spaces_offset = StrOffset {
utf8: text_start.1.code.len(),
utf16: text_start.1.code.len_utf16(),
};
if self.current_offset == text_start.location {
self.last_spaces_visible_offset = text_start.offset.visible;
self.last_spaces_offset = text_start.offset.code.range().start;
}
return TextEndedAt::End;
}
@ -1109,7 +1090,7 @@ impl<'s> Lexer<'s> {
}
}
if interpolate && char == '\\' {
let mut backslash_start = self.mark();
let mut backslash_start = self.mark_without_whitespace();
self.take_next();
if let Some(char) = self.current_char {
let token = self.make_token(
@ -1122,13 +1103,15 @@ impl<'s> Lexer<'s> {
} else {
self.output.push(token);
}
self.last_spaces_offset = self.current_offset;
text_start = self.text_escape(backslash_start, char);
continue;
}
self.last_spaces_offset = self.current_offset;
continue;
}
if interpolate && char == '`' {
let mut splice_quote_start = self.mark();
let mut splice_quote_start = self.mark_without_whitespace();
let token = self.make_token(
text_start.clone(),
splice_quote_start.clone(),
@ -1140,7 +1123,7 @@ impl<'s> Lexer<'s> {
self.output.push(token);
}
self.take_next();
let splice_quote_end = self.mark();
let splice_quote_end = self.mark_without_whitespace();
let token = self.make_token(
splice_quote_start,
splice_quote_end,
@ -1148,23 +1131,24 @@ impl<'s> Lexer<'s> {
);
self.output.push(token);
self.stack.push(state);
self.last_spaces_offset = self.current_offset;
return TextEndedAt::Splice;
}
self.take_next();
}
let text_end = self.mark();
let text_end = self.mark_without_whitespace();
let token = self.make_token(text_start, text_end.clone(), token::Variant::text_section());
if !(token.code.is_empty() && token.left_offset.code.is_empty()) {
self.output.push(token);
}
let end_token = if self.current_char == closing_char {
self.take_next();
let close_quote_end = self.mark();
let close_quote_end = self.mark_without_whitespace();
self.make_token(text_end, close_quote_end, token::Variant::text_end())
} else {
Token::from(token::text_end(
Code::empty(self.current_offset.utf16),
Code::empty(self.current_offset.utf16),
Code::empty(self.current_offset),
Code::empty(self.current_offset),
))
};
self.output.push(end_token);
@ -1197,7 +1181,7 @@ impl<'s> Lexer<'s> {
if delimited && self.current_char == Some('}') {
self.take_next();
}
let sequence_end = self.mark();
let sequence_end = self.mark_without_whitespace();
let token = self.make_token(
backslash_start,
sequence_end.clone(),
@ -1223,7 +1207,7 @@ impl<'s> Lexer<'s> {
_ => None,
};
self.take_next();
let escape_end = self.mark();
let escape_end = self.mark_without_whitespace();
let token = self.make_token(
backslash_start,
escape_end.clone(),
@ -1236,23 +1220,30 @@ impl<'s> Lexer<'s> {
fn mark(&mut self) -> Mark<'s> {
let start = self.current_offset;
let left_offset_start = start - self.last_spaces_offset;
let offset_code = self.input.slice(left_offset_start.utf8..start.utf8);
let visible_offset = self.last_spaces_visible_offset;
self.last_spaces_visible_offset = VisibleOffset(0);
self.last_spaces_offset = default();
(
start,
Offset(visible_offset, Code::from_str_at_offset(offset_code, left_offset_start.utf16)),
)
let visible_offset = mem::take(&mut self.last_spaces_visible_offset);
let left_offset_start = mem::replace(&mut self.last_spaces_offset, start);
let offset_code = &self.input[usize_from(left_offset_start.utf8)..usize_from(start.utf8)];
Mark {
location: start,
offset: Offset(
visible_offset,
Code::from_str_at_location(offset_code, left_offset_start),
),
}
}
fn mark_without_whitespace(&mut self) -> Mark<'s> {
let start = self.current_offset;
self.last_spaces_offset = start;
self.mark()
}
fn make_token(&self, from: Mark<'s>, to: Mark<'s>, variant: token::Variant) -> Token<'s> {
let (start, offset) = from;
let end = to.0;
let start8 = start.utf8.unchecked_raw();
let end8 = end.utf8.unchecked_raw();
Token(offset, Code::from_str_at_offset(&self.input[start8..end8], start.utf16), variant)
let Mark { location: start, offset } = from;
let end = to.location;
let start8 = usize_from(start.utf8);
let end8 = usize_from(end.utf8);
Token(offset, Code::from_str_at_location(&self.input[start8..end8], start), variant)
}
}
@ -1319,15 +1310,26 @@ impl<'s> Lexer<'s> {
// =============
impl<'s> Lexer<'s> {
#[allow(clippy::collapsible_if)]
fn line_break(&mut self) -> Option<Token<'s, ()>> {
self.token(|this| {
if !this.take_1('\n') {
if this.take_1('\r') {
this.take_1('\n');
}
let token = self.token(|this| {
let matched = if this.take_1('\n') {
true
} else if this.take_1('\r') {
this.take_1('\n');
true
} else {
false
};
if matched {
this.advance_line_pos()
}
})
});
token
}
fn advance_line_pos(&mut self) {
self.current_offset.line += 1;
self.current_offset.col16 = 0;
}
fn newlines(&mut self) {
@ -1340,11 +1342,11 @@ impl<'s> Lexer<'s> {
while let Some(token) = self.line_break() {
newlines.push(token.with_variant(token::Variant::newline()));
}
if let Some(last) = newlines.last() {
if let Some(first) = newlines.first() {
let block_indent = self.last_spaces_visible_offset;
if block_indent > self.current_block_indent {
let block_start = {
let location = last.left_offset.code.position_before();
let location = first.left_offset.code.position_before();
let offset = Offset(VisibleOffset(0), location.clone());
Token(offset, location, token::Variant::block_start())
};
@ -1410,9 +1412,10 @@ impl<'s> Lexer<'s> {
self.spaces_after_lexeme();
let first_block_indent = self.last_spaces_visible_offset;
if first_block_indent.width_in_spaces != 0 {
self.submit_token(token::block_start(Code::empty(0), Code::empty(0)).into());
let start = Location::default();
self.submit_token(token::block_start(Code::empty(start), Code::empty(start)).into());
self.start_block(first_block_indent);
self.submit_token(token::newline(Code::empty(0), Code::empty(0)).into());
self.submit_token(token::newline(Code::empty(start), Code::empty(start)).into());
}
// Main parsing loop.
while PARSERS.iter().any(|f| self.run_and_check_if_progressed(f)) {}
@ -1424,15 +1427,14 @@ impl<'s> Lexer<'s> {
// If the last line ended in whitespace, ensure it is represented; we'll attach it to a
// phantom newline token.
if self.last_spaces_visible_offset != VisibleOffset(0) {
let left_offset_start = self.current_offset - self.last_spaces_offset;
let offset_code = self.input.slice(left_offset_start.utf8..self.current_offset.utf8);
let left_offset_start = self.last_spaces_offset;
let offset_code = &self.input
[usize_from(left_offset_start.utf8)..usize_from(self.current_offset.utf8)];
let visible_offset = self.last_spaces_visible_offset;
let offset = Offset(
visible_offset,
Code::from_str_at_offset(offset_code, left_offset_start.utf16),
);
let offset =
Offset(visible_offset, Code::from_str_at_location(offset_code, left_offset_start));
let eof = token::variant::Variant::Newline(token::variant::Newline());
self.submit_token(Token(offset, Code::empty(self.current_offset.utf16), eof));
self.submit_token(Token(offset, Code::empty(self.current_offset), eof));
}
// Sanity check.
let mut internal_error = self.internal_error.take();
@ -1464,7 +1466,7 @@ pub mod test {
pub use token::*;
fn test_code(code: &str) -> Code {
Code::from_str_without_offset(code)
Code::from_str_without_location(code)
}
/// Constructor.
@ -1488,36 +1490,28 @@ pub mod test {
/// Constructor.
pub fn operator_<'s>(left_offset: &'s str, code: &'s str) -> Token<'s> {
let variant = token::Variant::operator(analyze_operator(code));
let variant = Variant::operator(analyze_operator(code));
let left_offset = test_code(left_offset);
let code = test_code(code);
Token(left_offset, code, variant)
}
}
#[cfg(test)]
mod tests {
use super::test::*;
fn usize_from(x: u32) -> usize {
usize::try_from(x).unwrap()
}
fn u32_from(x: usize) -> u32 {
u32::try_from(x).unwrap()
}
/// Testing/debugging helpers.
pub mod debug {
use super::*;
fn empty<'a>() -> Code<'a> {
Code::empty_without_offset()
}
fn test_code(code: &str) -> Code {
Code::from_str_without_offset(code)
}
fn test_lexer_many<'s>(inputs: Vec<(&'s str, Vec<Token<'s>>)>) {
for (input, output) in inputs {
test_lexer(input, output)
}
}
/// Lex the input, check the spans for consistency, and return the tokens with the span offsets
/// stripped.
fn lex_and_validate_spans(input: &str) -> Vec<Token> {
let result: Vec<_> = run(input).unwrap();
/// Lex the input and check the spans for consistency.
pub fn lex_and_validate_spans(input: &str) -> Vec<Token> {
let tokens: Vec<_> = run(input).unwrap();
let mut sum_span = None;
fn concat<T: PartialEq + Debug + Copy>(a: &Option<Range<T>>, b: &Range<T>) -> Range<T> {
match a {
@ -1528,16 +1522,48 @@ mod tests {
None => b.clone(),
}
}
for token in &result {
sum_span = Some(concat(&sum_span, &token.left_offset.code.range_utf16()));
sum_span = Some(concat(&sum_span, &token.code.range_utf16()));
let mut locations = code::debug::LocationCheck::new();
for token in &tokens {
let left_offset = token.left_offset.code.range();
let code = token.code.range();
sum_span = Some(concat(&sum_span, &left_offset));
sum_span = Some(concat(&sum_span, &code));
locations.extend(&[left_offset.start, left_offset.end, code.start, code.end]);
}
let sum_span = sum_span.unwrap_or_default();
let sum_span = sum_span.start.utf16..sum_span.end.utf16;
assert_eq!(sum_span, 0..(input.encode_utf16().count() as u32));
locations.check(input);
tokens
}
}
#[cfg(test)]
mod tests {
use super::debug::*;
use super::test::*;
use super::*;
fn strip_offsets<'s>(tokens: impl IntoIterator<Item = Token<'s>>) -> Vec<Token<'s>> {
tokens.into_iter().map(|token| token.without_offsets()).collect()
}
fn empty<'a>() -> Code<'a> {
Code::empty_without_location()
}
fn test_code(code: &str) -> Code {
Code::from_str_without_location(code)
}
fn test_lexer_many<'s>(inputs: Vec<(&'s str, Vec<Token<'s>>)>) {
for (input, output) in inputs {
test_lexer(input, output)
}
assert_eq!(sum_span.unwrap_or_default(), 0..(input.encode_utf16().count() as u32));
result.into_iter().map(|token| token.without_offsets()).collect()
}
fn test_lexer<'s>(input: &'s str, expected: Vec<Token<'s>>) {
let result = lex_and_validate_spans(input);
let result = strip_offsets(lex_and_validate_spans(input));
let expected: Vec<_> = expected.into_iter().map(|token| token.without_offsets()).collect();
assert_eq!(result, expected);
}
@ -1831,6 +1857,39 @@ mod tests {
let code = ["## Foo.", "main = 23"].join("\n");
lex_and_validate_spans(&code);
}
#[test]
fn test_comment() {
let code = ["# comment", "main = 23"].join("\n");
lex_and_validate_spans(&code);
}
#[test]
fn test_text() {
lex_and_validate_spans("f 'foo' 'bar'");
lex_and_validate_spans(r#"'String with \' escape'"#);
lex_and_validate_spans("'String with `splice`.'");
lex_and_validate_spans(&["## a", "", " b"].join("\n"));
}
#[test]
fn test_indented_doc_after_blank_line() {
let code = ["type Redshift_Error_Mapper", "", " A"].join("\n");
lex_and_validate_spans(&code);
}
#[test]
fn test_based_numbers() {
lex_and_validate_spans("0x23");
lex_and_validate_spans("2_010101");
}
#[test]
fn test_line_endings() {
lex_and_validate_spans("Windows\r\n...");
lex_and_validate_spans("Linux\n...");
lex_and_validate_spans("Classic Mac OS\r...");
}
}

View File

@ -159,8 +159,9 @@ impl<'s> Resolver<'s> {
root_macro_map: &MacroMap,
tokens: impl IntoIterator<Item = Token<'s>>,
) -> syntax::Tree<'s> {
let start = crate::source::code::Location::default();
self.lines.push(syntax::item::Line {
newline: token::newline(Code::empty(0), Code::empty(0)),
newline: token::newline(Code::empty(start), Code::empty(start)),
items: default(),
});
tokens.into_iter().for_each(|t| self.push(root_macro_map, t));

View File

@ -13,98 +13,133 @@ use crate::prelude::*;
#[derive(Debug, Clone, Default, Eq, PartialEq, Deref)]
pub struct StrRef<'s>(pub &'s str);
/// A code representation. It can either be a borrowed source code or a modified owned one.
/// Identifies a location in source code.
#[derive(
Copy,
Clone,
Debug,
Default,
Eq,
PartialEq,
Serialize,
Reflect,
Deserialize,
PartialOrd,
Ord
)]
pub struct Location {
/// Offset from the beginning, in UTF-8 code units (bytes).
#[reflect(hide)]
pub utf8: u32,
/// Offset from the beginning, in UTF-16 code units (two-byte words).
#[reflect(hide)]
pub utf16: u32,
/// Line number, starting from 0. The recognized line terminators are CR, LF, or CRLF.
#[reflect(hide)]
pub line: u32,
/// Offset from start of line, in UTF-16 code units.
#[reflect(hide)]
pub col16: u32,
}
impl Add<Length> for Location {
type Output = Self;
fn add(self, rhs: Length) -> Self::Output {
Self {
utf8: self.utf8 + rhs.utf8,
utf16: self.utf16 + rhs.utf16,
line: self.line + rhs.newlines,
col16: if rhs.newlines == 0 { self.col16 } else { 0 } + rhs.line_chars16,
}
}
}
/// A code representation.
#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Reflect, Deserialize, Deref)]
#[allow(missing_docs)]
pub struct Code<'s> {
/// The borrowed string data.
#[serde(serialize_with = "crate::serialization::serialize_cow")]
#[serde(deserialize_with = "crate::serialization::deserialize_cow")]
#[reflect(as = "crate::serialization::Code", flatten, hide)]
#[deref]
pub repr: StrRef<'s>,
#[reflect(hide)]
offset_utf16: u32,
#[reflect(hide)]
utf16: u32,
pub repr: StrRef<'s>,
#[reflect(flatten)]
start: Location,
/// The length of the source code.
#[reflect(flatten)]
pub len: Length,
}
impl<'s> Code<'s> {
/// Return a code reference from the given source and offset within the document.
#[inline(always)]
pub fn from_str_at_offset(repr: &'s str, offset_utf16: u32) -> Self {
let utf16 = repr.chars().map(|c| c.len_utf16() as u32).sum();
pub fn from_str_at_location(repr: &'s str, location: Location) -> Self {
let len = Length::of(repr);
let repr = StrRef(repr);
Self { repr, offset_utf16, utf16 }
Self { repr, start: location, len }
}
/// Return a code reference at the beginning of the document. This can be used in testing, when
/// accurate code references are not needed.
#[inline(always)]
pub fn from_str_without_offset(repr: &'s str) -> Self {
Self::from_str_at_offset(repr, 0)
pub fn from_str_without_location(repr: &'s str) -> Self {
Self::from_str_at_location(repr, default())
}
/// Return a copy of this value, and set this value to a 0-length value following the returned
/// value.
#[inline(always)]
pub fn take_as_prefix(&mut self) -> Self {
let end = self.offset_utf16 + self.utf16;
let end = self.start + self.len;
Self {
repr: mem::take(&mut self.repr),
offset_utf16: mem::replace(&mut self.offset_utf16, end),
utf16: mem::take(&mut self.utf16),
repr: mem::take(&mut self.repr),
start: mem::replace(&mut self.start, end),
len: mem::take(&mut self.len),
}
}
/// Return a 0-length `Code` located immediately before the start of this `Code`.
pub fn position_before(&self) -> Self {
Self { repr: default(), offset_utf16: self.offset_utf16, utf16: default() }
Self { repr: default(), start: self.start, len: default() }
}
/// Return a 0-length `Code` located immediately after the end of this `Code`.
pub fn position_after(&self) -> Self {
Self {
repr: default(),
offset_utf16: self.offset_utf16 + self.utf16,
utf16: default(),
}
}
/// Return the length in UTF-16 code units.
pub fn len_utf16(&self) -> u32 {
self.utf16
Self { repr: default(), start: self.start + self.len, len: default() }
}
/// Return the start and end of the UTF-16 source code for this element.
pub fn range_utf16(&self) -> Range<u32> {
self.offset_utf16..(self.offset_utf16 + self.utf16)
pub fn range(&self) -> Range<Location> {
self.start..(self.start + self.len)
}
/// Split the code at the given location.
pub fn split_at(&self, split: Length) -> (Self, Self) {
let (left, right) = self.repr.split_at(split.utf8);
(
Self {
repr: StrRef(left),
offset_utf16: self.offset_utf16,
utf16: split.utf16,
},
Self {
repr: StrRef(right),
offset_utf16: self.offset_utf16 + split.utf16,
utf16: self.utf16 - split.utf16,
},
)
let (left, right) = self.repr.split_at(usize::try_from(split.utf8).unwrap());
let right_len = Length {
utf8: self.len.utf8 - split.utf8,
utf16: self.len.utf16 - split.utf16,
newlines: self.len.newlines - split.newlines,
line_chars16: self.len.line_chars16
- if split.newlines == 0 { split.line_chars16 } else { 0 },
};
(Self { repr: StrRef(left), start: self.start, len: split }, Self {
repr: StrRef(right),
start: self.start + split,
len: right_len,
})
}
/// Return a reference to an empty string, not associated with any location in the document.
pub fn empty_without_offset() -> Self {
Self { repr: StrRef(""), offset_utf16: 0, utf16: 0 }
pub fn empty_without_location() -> Self {
Self { repr: StrRef(""), start: default(), len: default() }
}
/// Return a reference to an empty string.
pub fn empty(offset: u32) -> Self {
Self { repr: StrRef(""), offset_utf16: offset, utf16: 0 }
pub fn empty(location: Location) -> Self {
Self { repr: StrRef(""), start: location, len: default() }
}
/// Length of the code in bytes.
@ -116,7 +151,7 @@ impl<'s> Code<'s> {
/// Length of the code.
#[inline(always)]
pub fn length(&self) -> Length {
Length { utf8: self.repr.len(), utf16: self.utf16 }
self.len
}
/// True if the code is the empty string.
@ -127,8 +162,8 @@ impl<'s> Code<'s> {
/// Return this value with its start position removed (set to 0). This can be used to compare
/// values ignoring offsets.
pub fn without_offset(&self) -> Self {
Self { repr: self.repr.clone(), offset_utf16: default(), utf16: self.utf16 }
pub fn without_location(&self) -> Self {
Self { repr: self.repr.clone(), start: default(), len: self.len }
}
}
@ -172,10 +207,10 @@ impl<'s> AddAssign<&Code<'s>> for Code<'s> {
match (self.is_empty(), other.is_empty()) {
(false, true) => (),
(true, true) => {
// The span builder works by starting with `Span::empty_without_offset()`, and
// appending to the right side. In order to ensure every span has an offset: When
// The span builder works by starting with `Span::empty_without_location()`, and
// appending to the right side. In order to ensure every span has a location: When
// the LHS is empty, take the location from the RHS even if the RHS is also empty.
self.offset_utf16 = other.offset_utf16;
self.start = other.start;
}
(true, false) => {
*self = other.clone();
@ -193,7 +228,7 @@ impl<'s> AddAssign<&Code<'s>> for Code<'s> {
// Concatenating two UTF-8 strings always yields a valid UTF-8 string.
self.repr = StrRef(std::str::from_utf8_unchecked(joined));
}
self.utf16 += other.utf16;
self.len += other.len;
}
}
}
@ -205,17 +240,41 @@ impl<'s> AddAssign<&Code<'s>> for Code<'s> {
/// The length of a [`Code`] object.
#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Serialize, Reflect, Deserialize)]
pub struct Length {
/// An offset, in UTF-8 code units (bytes).
#[reflect(skip)]
#[serde(skip)]
utf8: usize,
utf16: u32,
pub utf8: u32,
/// An offset, in UTF-16 code units (two-byte words).
pub utf16: u32,
/// A difference in line numbers.
#[reflect(hide)]
pub newlines: u32,
/// If `newlines` is 0, this is the difference in UTF-16 code-unit positions within a line; if
/// `newlines` is nonzero, this is the position within the line ending the range.
pub line_chars16: u32,
}
impl Length {
/// Returns the length of the given input.
#[inline(always)]
pub fn of(s: &str) -> Self {
Self { utf8: s.len(), utf16: s.encode_utf16().count() as u32 }
let mut utf16 = 0;
let mut newlines = 0;
let mut line_chars16 = 0;
let mut prev = None;
for c in s.chars() {
let char_len16 = c.len_utf16() as u32;
utf16 += char_len16;
line_chars16 += char_len16;
if c == '\r' || c == '\n' {
line_chars16 = 0;
}
if c == '\r' || (c == '\n' && prev != Some('\r')) {
newlines += 1;
}
prev = Some(c);
}
Self { utf8: u32::try_from(s.len()).unwrap(), utf16, newlines, line_chars16 }
}
/// Returns true if the code is empty.
@ -226,7 +285,7 @@ impl Length {
/// Return the length in UTF-8 code units (bytes).
#[inline(always)]
pub fn utf8_bytes(&self) -> usize {
pub fn utf8_bytes(&self) -> u32 {
self.utf8
}
@ -242,8 +301,13 @@ impl Add for Length {
#[inline(always)]
fn add(self, rhs: Self) -> Self::Output {
let Self { utf8, utf16 } = self;
Self { utf8: utf8 + rhs.utf8, utf16: utf16 + rhs.utf16 }
let Self { utf8, utf16, newlines, line_chars16 } = self;
Self {
utf8: utf8 + rhs.utf8,
utf16: utf16 + rhs.utf16,
newlines: newlines + rhs.newlines,
line_chars16: if rhs.newlines == 0 { line_chars16 } else { 0 } + rhs.line_chars16,
}
}
}
@ -259,3 +323,67 @@ impl Display for Length {
write!(f, "{}", self.utf8)
}
}
// ====================
// === Test support ===
// ====================
/// Testing/debugging helpers.
pub mod debug {
use super::*;
use std::collections::BTreeMap;
/// Checks consistency of observed `Location`s. Compares `line:col` values against values found
/// in an independent scan of the input source code.
#[derive(Debug, Default)]
pub struct LocationCheck {
locations: BTreeMap<u32, Location>,
}
impl LocationCheck {
/// Create a new empty checker.
pub fn new() -> Self {
Self::default()
}
/// Add the location to the collection waiting to be checked.
pub fn add(&mut self, location: Location) {
self.locations.insert(location.utf8, location);
}
/// Add multiple locations to the collection waiting to be checked.
pub fn extend(&mut self, locations: &[Location]) {
self.locations.extend(locations.iter().map(|loc| (loc.utf8, *loc)));
}
/// Check all previously-added locations for consistency with the input.
pub fn check(mut self, input: &str) {
let mut pos = Location::default();
let mut prev = None;
for (i, c) in input.char_indices() {
pos.utf8 = i as u32;
if let Some(loc) = self.locations.remove(&(i as u32)) {
assert_eq!(loc, pos);
}
let char_len = c.len_utf16() as u32;
pos.utf16 += char_len;
pos.col16 += char_len;
if c == '\r' || c == '\n' {
pos.col16 = 0;
}
if c == '\r' || (c == '\n' && prev != Some('\r')) {
pos.line += 1;
}
prev = Some(c);
}
if let Some(loc) = self.locations.remove(&(input.len() as u32)) {
pos.utf8 = input.len() as u32;
assert_eq!(loc, pos);
}
let non_char_boundary_locations: Vec<_> = self.locations.values().cloned().collect();
assert_eq!(&non_char_boundary_locations, &[]);
}
}
}

View File

@ -6,7 +6,7 @@ use crate::source::*;
use crate::syntax::*;
use crate::lexer;
use crate::source::code::Location;
/// Common traits.
@ -107,7 +107,7 @@ impl<'s> Offset<'s> {
/// Return this value with its start position removed (set to 0). This can be used to compare
/// spans ignoring offsets.
pub fn without_offset(&self) -> Self {
Self { visible: self.visible, code: self.code.without_offset() }
Self { visible: self.visible, code: self.code.without_location() }
}
}
@ -161,7 +161,7 @@ pub struct Span<'s> {
impl<'s> Span<'s> {
/// Constructor.
pub fn empty_without_offset() -> Self {
Self { left_offset: Code::empty_without_offset().into(), code_length: default() }
Self { left_offset: Code::empty_without_location().into(), code_length: default() }
}
/// Check whether the span is empty.
@ -185,10 +185,10 @@ impl<'s> Span<'s> {
Builder::add_to_span(elem, self)
}
/// Return the start and end of the UTF-16 source code for this element.
pub fn range_utf16(&self) -> Range<u32> {
let start = self.left_offset.position_after().code.range_utf16().start;
let end = start + self.code_length.utf16_len();
/// Return the start and end of the source code for this element.
pub fn range(&self) -> Range<Location> {
let start = self.left_offset.position_after().code.range().start;
let end = start + self.code_length;
start..end
}
@ -217,11 +217,11 @@ where
self.code_length = other.code_length;
} else {
debug_assert_eq!(
self.left_offset.code.position_after().range_utf16().end
+ self.code_length.utf16_len(),
other.left_offset.code.position_before().range_utf16().start
self.left_offset.code.position_after().range().end + self.code_length,
other.left_offset.code.position_before().range().start
);
self.code_length += other.left_offset.code.length() + other.code_length;
self.code_length += other.left_offset.code.length();
self.code_length += other.code_length;
}
}
}
@ -399,7 +399,7 @@ where T: Builder<'s>
{
#[inline(always)]
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
self.iter_mut().fold(span, |sum, new_span| Builder::add_to_span(new_span, sum))
self.as_mut_slice().add_to_span(span)
}
}

View File

@ -137,8 +137,7 @@ impl<'s, T> Token<'s, T> {
#[inline(always)]
pub fn split_at(self, split: code::Length) -> (Token<'s, ()>, Token<'s, ()>) {
let left_lexeme_offset = self.left_offset;
let right_lexeme_offset =
Code::empty(self.code.position_before().range_utf16().end + split.utf16_len());
let right_lexeme_offset = Code::empty(self.code.position_before().range().end + split);
let (left_code, right_code) = self.code.split_at(split);
let left = Token(left_lexeme_offset, left_code, ());
let right = Token(right_lexeme_offset, right_code, ());
@ -170,7 +169,7 @@ impl<'s, V: Clone> Token<'s, V> {
pub fn without_offsets(&self) -> Self {
Self {
left_offset: self.left_offset.without_offset(),
code: self.code.without_offset(),
code: self.code.without_location(),
variant: self.variant.clone(),
}
}

View File

@ -774,7 +774,8 @@ pub fn apply<'s>(mut func: Tree<'s>, mut arg: Tree<'s>) -> Tree<'s> {
func
}
(_, Variant::ArgumentBlockApplication(block)) if block.lhs.is_none() => {
arg.span.code_length += arg.span.left_offset.code.length() + func.span.code_length;
let code = func.span.code_length + arg.span.left_offset.code.length() + arg.span.code_length;
arg.span.code_length = code;
let func_left_offset = func.span.left_offset.take_as_prefix();
let arg_left_offset = mem::replace(&mut arg.span.left_offset, func_left_offset);
if let Some(first) = block.arguments.first_mut() {
@ -784,7 +785,8 @@ pub fn apply<'s>(mut func: Tree<'s>, mut arg: Tree<'s>) -> Tree<'s> {
arg
}
(_, Variant::OperatorBlockApplication(block)) if block.lhs.is_none() => {
arg.span.code_length += arg.span.left_offset.code.length() + func.span.code_length;
let code = func.span.code_length + arg.span.left_offset.code.length() + arg.span.code_length;
arg.span.code_length = code;
let func_left_offset = func.span.left_offset.take_as_prefix();
let arg_left_offset = mem::replace(&mut arg.span.left_offset, func_left_offset);
if let Some(first) = block.expressions.first_mut() {