From 31a4eb2bfd73c7e8a88adc3e88135ec03517aa6b Mon Sep 17 00:00:00 2001 From: Joshua Warner Date: Mon, 30 Jan 2023 20:21:50 -0800 Subject: [PATCH] Fix parsing of tuple accessors after an identifier - e.g. myIdent.2 --- crates/compiler/parse/src/expr.rs | 17 ++++- crates/compiler/parse/src/ident.rs | 47 +++++++++++-- crates/compiler/parse/src/pattern.rs | 67 +++++++++++-------- .../tuple_access_after_ident.expr.result-ast | 50 ++++++++++++++ .../pass/tuple_access_after_ident.expr.roc | 2 + .../test_syntax/tests/test_snapshots.rs | 3 +- crates/docs/src/lib.rs | 4 +- crates/reporting/tests/test_reporting.rs | 19 ------ 8 files changed, 150 insertions(+), 59 deletions(-) create mode 100644 crates/compiler/test_syntax/tests/snapshots/pass/tuple_access_after_ident.expr.result-ast create mode 100644 crates/compiler/test_syntax/tests/snapshots/pass/tuple_access_after_ident.expr.roc diff --git a/crates/compiler/parse/src/expr.rs b/crates/compiler/parse/src/expr.rs index 19df864ced..c86cb688b3 100644 --- a/crates/compiler/parse/src/expr.rs +++ b/crates/compiler/parse/src/expr.rs @@ -2429,7 +2429,13 @@ fn ident_to_expr<'a>(arena: &'a Bump, src: Ident<'a>) -> Expr<'a> { // The first value in the iterator is the variable name, // e.g. `foo` in `foo.bar.baz` let mut answer = match iter.next() { - Some(ident) => Expr::Var { module_name, ident }, + Some(Accessor::RecordField(ident)) => Expr::Var { module_name, ident }, + Some(Accessor::TupleIndex(_)) => { + // TODO: make this state impossible to represent in Ident::Access, + // by splitting out parts[0] into a separate field with a type of `&'a str`, + // rather than a `&'a [Accessor<'a>]`. + panic!("Parsed an Ident::Access with a first part of a tuple index"); + } None => { panic!("Parsed an Ident::Access with no parts"); } @@ -2441,7 +2447,14 @@ fn ident_to_expr<'a>(arena: &'a Bump, src: Ident<'a>) -> Expr<'a> { // Wrap the previous answer in the new one, so we end up // with a nested Expr. That way, `foo.bar.baz` gets represented // in the AST as if it had been written (foo.bar).baz all along. - answer = Expr::RecordAccess(arena.alloc(answer), field); + match field { + Accessor::RecordField(field) => { + answer = Expr::RecordAccess(arena.alloc(answer), field); + } + Accessor::TupleIndex(index) => { + answer = Expr::TupleAccess(arena.alloc(answer), index); + } + } } answer diff --git a/crates/compiler/parse/src/ident.rs b/crates/compiler/parse/src/ident.rs index 283d5c2d52..0bb2596590 100644 --- a/crates/compiler/parse/src/ident.rs +++ b/crates/compiler/parse/src/ident.rs @@ -41,7 +41,7 @@ pub enum Ident<'a> { /// foo or foo.bar or Foo.Bar.baz.qux Access { module_name: &'a str, - parts: &'a [&'a str], + parts: &'a [Accessor<'a>], }, /// .foo { foo: 42 } RecordAccessorFunction(&'a str), @@ -197,7 +197,7 @@ pub fn parse_ident<'a>( if module_name.is_empty() { if let Some(first) = parts.first() { for keyword in crate::keyword::KEYWORDS.iter() { - if first == keyword { + if first == &Accessor::RecordField(keyword) { return Err((NoProgress, EExpr::Start(initial.pos()))); } } @@ -339,11 +339,32 @@ where } } +#[derive(Debug, Clone, PartialEq, Eq)] pub enum Accessor<'a> { RecordField(&'a str), TupleIndex(&'a str), } +impl<'a> Accessor<'a> { + pub fn len(&self) -> usize { + match self { + Accessor::RecordField(name) => name.len(), + Accessor::TupleIndex(name) => name.len(), + } + } + + pub fn is_empty(&self) -> bool { + self.len() > 0 + } + + pub fn as_inner(&self) -> &'a str { + match self { + Accessor::RecordField(name) => name, + Accessor::TupleIndex(name) => name, + } + } +} + /// a `.foo` or `.1` accessor function fn chomp_accessor(buffer: &[u8], pos: Position) -> Result { // assumes the leading `.` has been chomped already @@ -474,7 +495,7 @@ fn chomp_identifier_chain<'a>( if !first_is_uppercase { let first_part = unsafe { std::str::from_utf8_unchecked(&buffer[..chomped]) }; - parts.push(first_part); + parts.push(Accessor::RecordField(first_part)); } match chomp_access_chain(&buffer[chomped..], &mut parts) { @@ -518,7 +539,7 @@ fn chomp_identifier_chain<'a>( let value = unsafe { std::str::from_utf8_unchecked(&buffer[..chomped]) }; let ident = Ident::Access { module_name: "", - parts: arena.alloc([value]), + parts: arena.alloc([Accessor::RecordField(value)]), }; Ok((chomped as u32, ident)) } @@ -591,7 +612,7 @@ fn chomp_concrete_type(buffer: &[u8]) -> Result<(&str, &str, usize), Progress> { } } -fn chomp_access_chain<'a>(buffer: &'a [u8], parts: &mut Vec<'a, &'a str>) -> Result { +fn chomp_access_chain<'a>(buffer: &'a [u8], parts: &mut Vec<'a, Accessor<'a>>) -> Result { let mut chomped = 0; while let Some(b'.') = buffer.get(chomped) { @@ -603,11 +624,23 @@ fn chomp_access_chain<'a>(buffer: &'a [u8], parts: &mut Vec<'a, &'a str>) -> Res &buffer[chomped + 1..chomped + 1 + name.len()], ) }; - parts.push(value); + parts.push(Accessor::RecordField(value)); chomped += name.len() + 1; } - Err(_) => return Err(chomped as u32 + 1), + Err(_) => match chomp_integer_part(slice) { + Ok(name) => { + let value = unsafe { + std::str::from_utf8_unchecked( + &buffer[chomped + 1..chomped + 1 + name.len()], + ) + }; + parts.push(Accessor::TupleIndex(value)); + + chomped += name.len() + 1; + } + Err(_) => return Err(chomped as u32 + 1), + }, }, None => return Err(chomped as u32 + 1), } diff --git a/crates/compiler/parse/src/pattern.rs b/crates/compiler/parse/src/pattern.rs index 197194dfcf..723631cc48 100644 --- a/crates/compiler/parse/src/pattern.rs +++ b/crates/compiler/parse/src/pattern.rs @@ -1,6 +1,6 @@ use crate::ast::{Has, Pattern, PatternAs, Spaceable}; use crate::blankspace::{space0_e, spaces, spaces_before}; -use crate::ident::{lowercase_ident, parse_ident, Ident}; +use crate::ident::{lowercase_ident, parse_ident, Accessor, Ident}; use crate::keyword; use crate::parser::Progress::{self, *}; use crate::parser::{ @@ -377,34 +377,45 @@ fn loc_ident_pattern_help<'a>( Ident::Access { module_name, parts } => { // Plain identifiers (e.g. `foo`) are allowed in patterns, but // more complex ones (e.g. `Foo.bar` or `foo.bar.baz`) are not. - if crate::keyword::KEYWORDS.contains(&parts[0]) { - Err((NoProgress, EPattern::End(original_state.pos()))) - } else if module_name.is_empty() && parts.len() == 1 { - Ok(( - MadeProgress, - Loc { - region: loc_ident.region, - value: Pattern::Identifier(parts[0]), - }, - state, - )) - } else { - let malformed_str = if module_name.is_empty() { - parts.join(".") - } else { - format!("{}.{}", module_name, parts.join(".")) - }; - Ok(( - MadeProgress, - Loc { - region: loc_ident.region, - value: Pattern::Malformed( - String::from_str_in(&malformed_str, arena).into_bump_str(), - ), - }, - state, - )) + + for keyword in crate::keyword::KEYWORDS.iter() { + if parts[0] == Accessor::RecordField(keyword) { + return Err((NoProgress, EPattern::End(original_state.pos()))); + } } + + if module_name.is_empty() && parts.len() == 1 { + if let Accessor::RecordField(var) = &parts[0] { + return Ok(( + MadeProgress, + Loc { + region: loc_ident.region, + value: Pattern::Identifier(var), + }, + state, + )); + } + } + let mut malformed_str = String::new_in(arena); + + if !module_name.is_empty() { + malformed_str.push_str(module_name); + }; + for part in parts { + if !malformed_str.is_empty() { + malformed_str.push('.'); + } + malformed_str.push_str(part.as_inner()); + } + + Ok(( + MadeProgress, + Loc { + region: loc_ident.region, + value: Pattern::Malformed(malformed_str.into_bump_str()), + }, + state, + )) } Ident::RecordAccessorFunction(_string) => Err(( MadeProgress, diff --git a/crates/compiler/test_syntax/tests/snapshots/pass/tuple_access_after_ident.expr.result-ast b/crates/compiler/test_syntax/tests/snapshots/pass/tuple_access_after_ident.expr.result-ast new file mode 100644 index 0000000000..c659930aaa --- /dev/null +++ b/crates/compiler/test_syntax/tests/snapshots/pass/tuple_access_after_ident.expr.result-ast @@ -0,0 +1,50 @@ +Defs( + Defs { + tags: [ + Index(2147483648), + ], + regions: [ + @0-15, + ], + space_before: [ + Slice(start = 0, length = 0), + ], + space_after: [ + Slice(start = 0, length = 0), + ], + spaces: [], + type_defs: [], + value_defs: [ + Body( + @0-3 Identifier( + "abc", + ), + @6-15 Tuple( + [ + @7-8 Num( + "1", + ), + @10-11 Num( + "2", + ), + @13-14 Num( + "3", + ), + ], + ), + ), + ], + }, + @16-21 SpaceBefore( + TupleAccess( + Var { + module_name: "", + ident: "abc", + }, + "0", + ), + [ + Newline, + ], + ), +) diff --git a/crates/compiler/test_syntax/tests/snapshots/pass/tuple_access_after_ident.expr.roc b/crates/compiler/test_syntax/tests/snapshots/pass/tuple_access_after_ident.expr.roc new file mode 100644 index 0000000000..ca3a64639a --- /dev/null +++ b/crates/compiler/test_syntax/tests/snapshots/pass/tuple_access_after_ident.expr.roc @@ -0,0 +1,2 @@ +abc = (1, 2, 3) +abc.0 \ No newline at end of file diff --git a/crates/compiler/test_syntax/tests/test_snapshots.rs b/crates/compiler/test_syntax/tests/test_snapshots.rs index 765860b564..3dbe08fbca 100644 --- a/crates/compiler/test_syntax/tests/test_snapshots.rs +++ b/crates/compiler/test_syntax/tests/test_snapshots.rs @@ -251,6 +251,7 @@ mod test_snapshots { malformed/malformed_ident_due_to_underscore.expr, malformed/malformed_pattern_field_access.expr, // See https://github.com/roc-lang/roc/issues/399 malformed/malformed_pattern_module_name.expr, // See https://github.com/roc-lang/roc/issues/399 + malformed/qualified_tag.expr, malformed/underscore_expr_in_def.expr, pass/ability_demand_signature_is_multiline.expr, pass/ability_multi_line.expr, @@ -405,7 +406,6 @@ mod test_snapshots { pass/positive_int.expr, pass/provides_type.header, pass/qualified_field.expr, - malformed/qualified_tag.expr, pass/qualified_var.expr, pass/record_access_after_tuple.expr, pass/record_destructure_def.expr, @@ -426,6 +426,7 @@ mod test_snapshots { pass/tag_pattern.expr, pass/ten_times_eleven.expr, pass/three_arg_closure.expr, + pass/tuple_access_after_ident.expr, pass/tuple_access_after_record.expr, pass/tuple_accessor_function.expr, pass/tuple_type.expr, diff --git a/crates/docs/src/lib.rs b/crates/docs/src/lib.rs index 08b28bada8..bafc84bb7d 100644 --- a/crates/docs/src/lib.rs +++ b/crates/docs/src/lib.rs @@ -15,7 +15,7 @@ use roc_load::docs::{ModuleDocumentation, RecordField}; use roc_load::{ExecutionMode, LoadConfig, LoadedModule, LoadingProblem, Threading}; use roc_module::symbol::{Interns, Symbol}; use roc_packaging::cache::{self, RocCacheDir}; -use roc_parse::ident::{parse_ident, Ident}; +use roc_parse::ident::{parse_ident, Accessor, Ident}; use roc_parse::state::State; use roc_region::all::Region; use std::fs; @@ -816,7 +816,7 @@ fn markdown_to_html( let mut iter = parts.iter(); match iter.next() { - Some(symbol_name) if iter.next().is_none() => { + Some(Accessor::RecordField(symbol_name)) if iter.next().is_none() => { let DocUrl { url, title } = doc_url( all_exposed_symbols, scope, diff --git a/crates/reporting/tests/test_reporting.rs b/crates/reporting/tests/test_reporting.rs index 59222d3a97..d149d3c013 100644 --- a/crates/reporting/tests/test_reporting.rs +++ b/crates/reporting/tests/test_reporting.rs @@ -5593,25 +5593,6 @@ All branches in an `if` must have the same type! "### ); - test_report!( - part_starts_with_number, - indoc!( - r#" - foo.100 - "# - ), - @r###" - ── SYNTAX PROBLEM ──────────────────────────────────────── /code/proj/Main.roc ─ - - I trying to parse a record field access here: - - 4│ foo.100 - ^ - - So I expect to see a lowercase letter next, like .name or .height. - "### - ); - test_report!( closure_underscore_ident, indoc!(