Improve parsing of scalar literals

* Unify parsing of string literals and scalar literals, to (e.g.) ensure escapes are handled uniformly. Notably, this makes unicode escapes valid in scalar literals.
* Add a variety of custom error messages about specific failure cases of parsing string/scalar literals. For example, if we're expecting a string (e.g. a package name in the header) and the user tried using single quotes, give a clear message about that.
* Fix formatting of unicode escapes (they previously used {}, now correctly use () to match roc strings)
This commit is contained in:
Joshua Warner 2023-01-07 14:41:08 -08:00
parent 6fc593142d
commit 94070e8ba6
No known key found for this signature in database
GPG Key ID: 89AD497003F93FDD
22 changed files with 411 additions and 173 deletions

View File

@ -3,7 +3,7 @@
#![allow(unused_imports)]
use bumpalo::collections::Vec as BumpVec;
use roc_can::expr::{unescape_char, IntValue};
use roc_can::expr::IntValue;
use roc_can::num::{
finish_parsing_base, finish_parsing_float, finish_parsing_num, ParsedNumResult,
};
@ -620,7 +620,7 @@ pub(crate) fn flatten_str_lines(pool: &mut Pool, lines: &[&[StrSegment<'_>]]) ->
Interpolated(loc_expr) => {
return Pattern2::UnsupportedPattern(loc_expr.region);
}
EscapedChar(escaped) => buf.push(unescape_char(escaped)),
EscapedChar(escaped) => buf.push(escaped.unescape()),
}
}
}

View File

@ -117,7 +117,7 @@ fn flatten_str_lines<'a>(
todo!()
}
}
EscapedChar(escaped) => buf.push(roc_can::expr::unescape_char(escaped)),
EscapedChar(escaped) => buf.push(escaped.unescape()),
}
}
}

View File

@ -18,7 +18,7 @@ use roc_module::called_via::CalledVia;
use roc_module::ident::{ForeignSymbol, Lowercase, TagName};
use roc_module::low_level::LowLevel;
use roc_module::symbol::Symbol;
use roc_parse::ast::{self, Defs, EscapedChar, StrLiteral};
use roc_parse::ast::{self, Defs, StrLiteral};
use roc_parse::pattern::PatternType::*;
use roc_problem::can::{PrecedenceProblem, Problem, RuntimeError};
use roc_region::all::{Loc, Region};
@ -2297,7 +2297,7 @@ fn flatten_str_lines<'a>(
);
}
}
EscapedChar(escaped) => buf.push(unescape_char(escaped)),
EscapedChar(escaped) => buf.push(escaped.unescape()),
}
}
}
@ -2355,19 +2355,6 @@ fn desugar_str_segments(var_store: &mut VarStore, segments: Vec<StrSegment>) ->
loc_expr.value
}
/// Returns the char that would have been originally parsed to
pub fn unescape_char(escaped: &EscapedChar) -> char {
use EscapedChar::*;
match escaped {
Backslash => '\\',
Quote => '"',
CarriageReturn => '\r',
Tab => '\t',
Newline => '\n',
}
}
#[derive(Clone, Debug)]
pub struct Declarations {
pub declarations: Vec<DeclarationTag>,

View File

@ -1,6 +1,6 @@
use crate::annotation::freshen_opaque_def;
use crate::env::Env;
use crate::expr::{canonicalize_expr, unescape_char, Expr, IntValue, Output};
use crate::expr::{canonicalize_expr, Expr, IntValue, Output};
use crate::num::{
finish_parsing_base, finish_parsing_float, finish_parsing_num, FloatBound, IntBound, NumBound,
ParsedNumResult,
@ -935,7 +935,7 @@ fn flatten_str_lines(lines: &[&[StrSegment<'_>]]) -> Pattern {
Interpolated(loc_expr) => {
return Pattern::UnsupportedPattern(loc_expr.region);
}
EscapedChar(escaped) => buf.push(unescape_char(escaped)),
EscapedChar(escaped) => buf.push(escaped.unescape()),
}
}
}

View File

@ -431,14 +431,30 @@ impl<'a> Formattable for Expr<'a> {
}
}
fn needs_unicode_escape(ch: char) -> bool {
matches!(ch, '\u{0000}'..='\u{001f}' | '\u{007f}'..='\u{009f}')
}
pub(crate) fn format_sq_literal(buf: &mut Buf, s: &str) {
buf.push('\'');
for c in s.chars() {
if c == '"' {
buf.push_char_literal('"')
} else {
for escaped in c.escape_default() {
buf.push_char_literal(escaped);
match c {
'"' => buf.push_str("\""),
'\'' => buf.push_str("\\\'"),
'\t' => buf.push_str("\\t"),
'\r' => buf.push_str("\\r"),
'\n' => buf.push_str("\\n"),
'\\' => buf.push_str("\\\\"),
_ => {
if needs_unicode_escape(c) {
buf.push_str(&format!("\\u({:x})", c as u32))
} else {
buf.push_char_literal(c)
}
}
}
}
}

View File

@ -2,6 +2,7 @@ use std::fmt::Debug;
use crate::header::{AppHeader, HostedHeader, InterfaceHeader, PackageHeader, PlatformHeader};
use crate::ident::Ident;
use crate::parser::ESingleQuote;
use bumpalo::collections::{String, Vec};
use bumpalo::Bump;
use roc_collections::soa::{EitherIndex, Index, Slice};
@ -116,11 +117,20 @@ pub enum StrSegment<'a> {
Interpolated(Loc<&'a Expr<'a>>), // e.g. (name) in "Hi, \(name)!"
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum SingleQuoteSegment<'a> {
Plaintext(&'a str), // e.g. 'f'
Unicode(Loc<&'a str>), // e.g. '00A0' in '\u(00A0)'
EscapedChar(EscapedChar), // e.g. '\n'
// No interpolated expressions in single-quoted strings
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum EscapedChar {
Newline, // \n
Tab, // \t
Quote, // \"
DoubleQuote, // \"
SingleQuote, // \'
Backslash, // \\
CarriageReturn, // \r
}
@ -132,12 +142,71 @@ impl EscapedChar {
match self {
Backslash => '\\',
Quote => '"',
SingleQuote => '\'',
DoubleQuote => '"',
CarriageReturn => 'r',
Tab => 't',
Newline => 'n',
}
}
pub fn unescape(self) -> char {
use EscapedChar::*;
match self {
Backslash => '\\',
SingleQuote => '\'',
DoubleQuote => '"',
CarriageReturn => '\r',
Tab => '\t',
Newline => '\n',
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum SingleQuoteLiteral<'a> {
/// The most common case: a plain character with no escapes
PlainLine(&'a str),
Line(&'a [SingleQuoteSegment<'a>]),
}
impl<'a> SingleQuoteLiteral<'a> {
pub fn to_str_in(&self, arena: &'a Bump) -> &'a str {
match self {
SingleQuoteLiteral::PlainLine(s) => s,
SingleQuoteLiteral::Line(segments) => {
let mut s = String::new_in(arena);
for segment in *segments {
match segment {
SingleQuoteSegment::Plaintext(s2) => s.push_str(s2),
SingleQuoteSegment::Unicode(loc) => {
let s2 = loc.value;
let c = u32::from_str_radix(s2, 16).expect("Invalid unicode escape");
s.push(char::from_u32(c).expect("Invalid unicode codepoint"));
}
SingleQuoteSegment::EscapedChar(c) => {
s.push(c.unescape());
}
}
}
s.into_bump_str()
}
}
}
}
impl<'a> TryFrom<StrSegment<'a>> for SingleQuoteSegment<'a> {
type Error = ESingleQuote;
fn try_from(value: StrSegment<'a>) -> Result<Self, Self::Error> {
match value {
StrSegment::Plaintext(s) => Ok(SingleQuoteSegment::Plaintext(s)),
StrSegment::Unicode(s) => Ok(SingleQuoteSegment::Unicode(s)),
StrSegment::EscapedChar(s) => Ok(SingleQuoteSegment::EscapedChar(s)),
StrSegment::Interpolated(_) => Err(ESingleQuote::InterpolationNotAllowed),
}
}
}
#[derive(Clone, Copy, Debug, PartialEq)]

View File

@ -16,6 +16,7 @@ use crate::parser::{
};
use crate::pattern::{closure_param, loc_has_parser};
use crate::state::State;
use crate::string_literal::StrLikeLiteral;
use crate::type_annotation;
use bumpalo::collections::Vec;
use bumpalo::Bump;
@ -161,8 +162,7 @@ fn loc_term_or_underscore_or_conditional<'a>(
loc_expr_in_parens_etc_help(),
loc!(specialize(EExpr::If, if_expr_help(options))),
loc!(specialize(EExpr::When, when::expr_help(options))),
loc!(specialize(EExpr::Str, string_literal_help())),
loc!(specialize(EExpr::SingleQuote, single_quote_literal_help())),
loc!(specialize(EExpr::Str, string_like_literal_help())),
loc!(specialize(EExpr::Number, positive_number_literal_help())),
loc!(specialize(EExpr::Closure, closure_help(options))),
loc!(crash_kw()),
@ -183,8 +183,7 @@ fn loc_term_or_underscore<'a>(
) -> impl Parser<'a, Loc<Expr<'a>>, EExpr<'a>> {
one_of!(
loc_expr_in_parens_etc_help(),
loc!(specialize(EExpr::Str, string_literal_help())),
loc!(specialize(EExpr::SingleQuote, single_quote_literal_help())),
loc!(specialize(EExpr::Str, string_like_literal_help())),
loc!(specialize(EExpr::Number, positive_number_literal_help())),
loc!(specialize(EExpr::Closure, closure_help(options))),
loc!(underscore_expression()),
@ -200,8 +199,7 @@ fn loc_term_or_underscore<'a>(
fn loc_term<'a>(options: ExprParseOptions) -> impl Parser<'a, Loc<Expr<'a>>, EExpr<'a>> {
one_of!(
loc_expr_in_parens_etc_help(),
loc!(specialize(EExpr::Str, string_literal_help())),
loc!(specialize(EExpr::SingleQuote, single_quote_literal_help())),
loc!(specialize(EExpr::Str, string_like_literal_help())),
loc!(specialize(EExpr::Number, positive_number_literal_help())),
loc!(specialize(EExpr::Closure, closure_help(options))),
loc!(record_literal_help()),
@ -2584,14 +2582,16 @@ fn apply_expr_access_chain<'a>(
})
}
fn string_literal_help<'a>() -> impl Parser<'a, Expr<'a>, EString<'a>> {
map!(crate::string_literal::parse(), Expr::Str)
}
fn single_quote_literal_help<'a>() -> impl Parser<'a, Expr<'a>, EString<'a>> {
map!(
crate::string_literal::parse_single_quote(),
Expr::SingleQuote
fn string_like_literal_help<'a>() -> impl Parser<'a, Expr<'a>, EString<'a>> {
map_with_arena!(
crate::string_literal::parse_str_like_literal(),
|arena, lit| match lit {
StrLikeLiteral::Str(s) => Expr::Str(s),
StrLikeLiteral::SingleQuote(s) => {
// TODO: preserve the original escaping
Expr::SingleQuote(s.to_str_in(arena))
}
}
)
}

View File

@ -331,7 +331,10 @@ pub fn package_entry<'a>() -> impl Parser<'a, Spaced<'a, PackageEntry<'a>>, EPac
pub fn package_name<'a>() -> impl Parser<'a, PackageName<'a>, EPackageName<'a>> {
then(
loc!(specialize(EPackageName::BadPath, string_literal::parse())),
loc!(specialize(
EPackageName::BadPath,
string_literal::parse_str_literal()
)),
move |_arena, state, progress, text| match text.value {
StrLiteral::PlainLine(text) => Ok((progress, PackageName(text), state)),
StrLiteral::Line(_) => Err((progress, EPackageName::Escapes(text.region.start()))),

View File

@ -181,7 +181,7 @@ fn app_header<'a>() -> impl Parser<'a, AppHeader<'a>, EHeader<'a>> {
before_name: space0_e(EHeader::IndentStart),
name: loc!(crate::parser::specialize(
EHeader::AppName,
string_literal::parse()
string_literal::parse_str_literal()
)),
packages: optional(specialize(EHeader::Packages, packages())),
imports: optional(specialize(EHeader::Imports, imports())),

View File

@ -355,8 +355,10 @@ pub enum EExpr<'a> {
InParens(EInParens<'a>, Position),
Record(ERecord<'a>, Position),
// SingleQuote errors are folded into the EString
Str(EString<'a>, Position),
SingleQuote(EString<'a>, Position),
Number(ENumber, Position),
List(EList<'a>, Position),
@ -376,13 +378,24 @@ pub enum EString<'a> {
CodePtOpen(Position),
CodePtEnd(Position),
InvalidSingleQuote(ESingleQuote, Position),
Space(BadInputError, Position),
EndlessSingle(Position),
EndlessMulti(Position),
EndlessSingleLine(Position),
EndlessMultiLine(Position),
EndlessSingleQuote(Position),
UnknownEscape(Position),
Format(&'a EExpr<'a>, Position),
FormatEnd(Position),
MultilineInsufficientIndent(Position),
ExpectedDoubleQuoteGotSingleQuote(Position),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ESingleQuote {
Empty,
TooLong,
InterpolationNotAllowed,
}
#[derive(Debug, Clone, PartialEq, Eq)]

View File

@ -8,6 +8,7 @@ use crate::parser::{
word3, EPattern, PInParens, PList, PRecord, Parser,
};
use crate::state::State;
use crate::string_literal::StrLikeLiteral;
use bumpalo::collections::string::String;
use bumpalo::collections::Vec;
use bumpalo::Bump;
@ -83,8 +84,7 @@ fn loc_pattern_help_help<'a>() -> impl Parser<'a, Loc<Pattern<'a>>, EPattern<'a>
)),
loc!(specialize(EPattern::List, list_pattern_help())),
loc!(number_pattern_help()),
loc!(string_pattern_help()),
loc!(single_quote_pattern_help()),
loc!(string_like_pattern_help()),
)
}
@ -177,8 +177,7 @@ fn loc_parse_tag_pattern_arg<'a>() -> impl Parser<'a, Loc<Pattern<'a>>, EPattern
EPattern::Record,
crate::pattern::record_pattern_help()
)),
loc!(string_pattern_help()),
loc!(single_quote_pattern_help()),
loc!(string_like_pattern_help()),
loc!(number_pattern_help())
)
}
@ -238,19 +237,18 @@ fn number_pattern_help<'a>() -> impl Parser<'a, Pattern<'a>, EPattern<'a>> {
)
}
fn string_pattern_help<'a>() -> impl Parser<'a, Pattern<'a>, EPattern<'a>> {
fn string_like_pattern_help<'a>() -> impl Parser<'a, Pattern<'a>, EPattern<'a>> {
specialize(
|_, pos| EPattern::Start(pos),
map!(crate::string_literal::parse(), Pattern::StrLiteral),
)
}
fn single_quote_pattern_help<'a>() -> impl Parser<'a, Pattern<'a>, EPattern<'a>> {
specialize(
|_, pos| EPattern::Start(pos),
map!(
crate::string_literal::parse_single_quote(),
Pattern::SingleQuote
map_with_arena!(
crate::string_literal::parse_str_like_literal(),
|arena, lit| match lit {
StrLikeLiteral::Str(s) => Pattern::StrLiteral(s),
StrLikeLiteral::SingleQuote(s) => {
// TODO: preserve the original escaping
Pattern::SingleQuote(s.to_str_in(arena))
}
}
),
)
}

View File

@ -1,8 +1,9 @@
use crate::ast::{EscapedChar, StrLiteral, StrSegment};
use crate::ast::{EscapedChar, SingleQuoteLiteral, StrLiteral, StrSegment};
use crate::expr;
use crate::parser::Progress::{self, *};
use crate::parser::{
allocated, loc, reset_min_indent, specialize_ref, word1, BadInputError, EString, Parser,
allocated, loc, reset_min_indent, specialize_ref, then, word1, BadInputError, ESingleQuote,
EString, Parser,
};
use crate::state::State;
use bumpalo::collections::vec::Vec;
@ -31,97 +32,6 @@ fn ascii_hex_digits<'a>() -> impl Parser<'a, &'a str, EString<'a>> {
}
}
pub fn parse_single_quote<'a>() -> impl Parser<'a, &'a str, EString<'a>> {
move |arena: &'a Bump, mut state: State<'a>, _min_indent: u32| {
if state.consume_mut("\'") {
// we will be parsing a single-quote-string
} else {
return Err((NoProgress, EString::Open(state.pos())));
}
// Handle back slaches in byte literal
// - starts with a backslash and used as an escape character. ex: '\n', '\t'
// - single quote floating (un closed single quote) should be an error
match state.bytes().first() {
Some(b'\\') => {
state.advance_mut(1);
match state.bytes().first() {
Some(&ch) => {
state.advance_mut(1);
if (ch == b'n' || ch == b'r' || ch == b't' || ch == b'\'' || ch == b'\\')
&& (state.bytes().first() == Some(&b'\''))
{
state.advance_mut(1);
let test = match ch {
b'n' => '\n',
b't' => '\t',
b'r' => '\r',
// since we checked the current char between the single quotes we
// know they are valid UTF-8, allowing us to use 'from_u32_unchecked'
_ => unsafe { char::from_u32_unchecked(ch as u32) },
};
return Ok((MadeProgress, &*arena.alloc_str(&test.to_string()), state));
}
// invalid error, backslah escaping something we do not recognize
return Err((NoProgress, EString::CodePtEnd(state.pos())));
}
None => {
// no close quote found
return Err((NoProgress, EString::CodePtEnd(state.pos())));
}
}
}
Some(_) => {
// do nothing for other characters, handled below
}
None => return Err((NoProgress, EString::CodePtEnd(state.pos()))),
}
let mut bytes = state.bytes().iter();
let mut end_index = 1;
// Copy paste problem in mono
loop {
match bytes.next() {
Some(b'\'') => {
break;
}
Some(_) => end_index += 1,
None => {
return Err((NoProgress, EString::Open(state.pos())));
}
}
}
if end_index == 1 {
// no progress was made
// this case is a double single quote, ex: ''
// not supporting empty single quotes
return Err((NoProgress, EString::Open(state.pos())));
}
if end_index > (std::mem::size_of::<u32>() + 1) {
// bad case: too big to fit into u32
return Err((NoProgress, EString::Open(state.pos())));
}
// happy case -> we have some bytes that will fit into a u32
// ending up w/ a slice of bytes that we want to convert into an integer
let raw_bytes = &state.bytes()[0..end_index - 1];
state.advance_mut(end_index);
match std::str::from_utf8(raw_bytes) {
Ok(string) => Ok((MadeProgress, string, state)),
Err(_) => {
// invalid UTF-8
return Err((NoProgress, EString::CodePtEnd(state.pos())));
}
}
}
}
fn consume_indent(mut state: State, mut indent: u32) -> Result<State, (Progress, EString)> {
while indent > 0 {
match state.bytes().first() {
@ -156,11 +66,28 @@ fn utf8<'a>(state: State<'a>, string_bytes: &'a [u8]) -> Result<&'a str, (Progre
})
}
pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
use StrLiteral::*;
pub enum StrLikeLiteral<'a> {
SingleQuote(SingleQuoteLiteral<'a>),
Str(StrLiteral<'a>),
}
pub fn parse_str_literal<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
then(
loc!(parse_str_like_literal()),
|_arena, state, progress, str_like| match str_like.value {
StrLikeLiteral::SingleQuote(_) => Err((
progress,
EString::ExpectedDoubleQuoteGotSingleQuote(str_like.region.start()),
)),
StrLikeLiteral::Str(str_literal) => Ok((progress, str_literal, state)),
},
)
}
pub fn parse_str_like_literal<'a>() -> impl Parser<'a, StrLikeLiteral<'a>, EString<'a>> {
move |arena: &'a Bump, mut state: State<'a>, min_indent: u32| {
let is_multiline;
let is_single_quote;
let indent = state.column();
@ -171,6 +98,7 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
// we will be parsing a multi-line string
is_multiline = true;
is_single_quote = false;
if state.consume_mut("\n") {
state = consume_indent(state, indent)?;
@ -180,6 +108,12 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
// we will be parsing a single-line string
is_multiline = false;
is_single_quote = false;
} else if state.consume_mut("'") {
start_state = state.clone();
is_multiline = false;
is_single_quote = true;
} else {
return Err((NoProgress, EString::Open(state.pos())));
}
@ -244,12 +178,16 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
segment_parsed_bytes += 1;
match byte {
b'"' => {
b'"' if !is_single_quote => {
if segment_parsed_bytes == 1 && segments.is_empty() {
// special case of the empty string
if is_multiline {
if bytes.as_slice().starts_with(b"\"\"") {
return Ok((MadeProgress, Block(&[]), state.advance(3)));
return Ok((
MadeProgress,
StrLikeLiteral::Str(StrLiteral::Block(&[])),
state.advance(3),
));
} else {
// this quote is in a block string
continue;
@ -257,7 +195,11 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
} else {
// This is the end of the string!
// Advance 1 for the close quote
return Ok((MadeProgress, PlainLine(""), state.advance(1)));
return Ok((
MadeProgress,
StrLikeLiteral::Str(StrLiteral::PlainLine("")),
state.advance(1),
));
}
} else {
// the string is non-empty, which means we need to convert any previous segments
@ -276,10 +218,14 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
other => StrLiteral::Line(arena.alloc([other])),
}
} else {
Block(arena.alloc([segments.into_bump_slice()]))
StrLiteral::Block(arena.alloc([segments.into_bump_slice()]))
};
return Ok((MadeProgress, expr, state.advance(3)));
return Ok((
MadeProgress,
StrLikeLiteral::Str(expr),
state.advance(3),
));
} else {
// this quote is in a block string
continue;
@ -295,14 +241,80 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
other => StrLiteral::Line(arena.alloc([other])),
}
} else {
Line(segments.into_bump_slice())
StrLiteral::Line(segments.into_bump_slice())
};
// Advance the state 1 to account for the closing `"`
return Ok((MadeProgress, expr, state.advance(1)));
return Ok((MadeProgress, StrLikeLiteral::Str(expr), state.advance(1)));
}
};
}
b'\'' if is_single_quote => {
end_segment!(StrSegment::Plaintext);
let expr = if segments.len() == 1 {
// We had exactly one segment, so this is a candidate
// to be SingleQuoteLiteral::Plaintext
match segments.pop().unwrap() {
StrSegment::Plaintext(string) => SingleQuoteLiteral::PlainLine(string),
other => {
let o = other.try_into().map_err(|e| {
(
MadeProgress,
EString::InvalidSingleQuote(e, start_state.pos()),
)
})?;
SingleQuoteLiteral::Line(arena.alloc([o]))
}
}
} else {
let mut new_segments = Vec::with_capacity_in(segments.len(), arena);
for segment in segments {
let segment = segment.try_into().map_err(|e| {
(
MadeProgress,
EString::InvalidSingleQuote(e, start_state.pos()),
)
})?;
new_segments.push(segment);
}
SingleQuoteLiteral::Line(new_segments.into_bump_slice())
};
// Validate that the string is a valid char literal.
// Note that currently, we accept anything that:
// * Is between 1 and 5 bytes long
// -> utf-8 encoding is trivial to extend to 5 bytes, even tho 4 is the technical max
// -> TODO: do we want to change this?
// * Decodes as valid UTF-8
// -> Might be a single code point, or multiple code points
// -> TODO: do we want to change this?
// Simply by decoding this, it's guaranteed to be valid utf-8
let text = expr.to_str_in(arena);
if text.len() > 5 {
return Err((
MadeProgress,
EString::InvalidSingleQuote(ESingleQuote::TooLong, start_state.pos()),
));
}
if text.is_empty() {
return Err((
MadeProgress,
EString::InvalidSingleQuote(ESingleQuote::Empty, start_state.pos()),
));
}
// Advance the state 1 to account for the closing `'`
return Ok((
MadeProgress,
StrLikeLiteral::SingleQuote(expr),
state.advance(1),
));
}
b'\n' => {
if is_multiline {
let without_newline = &state.bytes()[0..(segment_parsed_bytes - 1)];
@ -330,7 +342,7 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
// all remaining chars. This will mask all other errors, but
// it should make it easiest to debug; the file will be a giant
// error starting from where the open quote appeared.
return Err((MadeProgress, EString::EndlessSingle(start_state.pos())));
return Err((MadeProgress, EString::EndlessSingleLine(start_state.pos())));
}
}
b'\\' => {
@ -407,7 +419,10 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
escaped_char!(EscapedChar::Backslash);
}
Some(b'"') => {
escaped_char!(EscapedChar::Quote);
escaped_char!(EscapedChar::DoubleQuote);
}
Some(b'\'') => {
escaped_char!(EscapedChar::SingleQuote);
}
Some(b'r') => {
escaped_char!(EscapedChar::CarriageReturn);
@ -435,10 +450,12 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
// We ran out of characters before finding a closed quote
Err((
MadeProgress,
if is_multiline {
EString::EndlessMulti(start_state.pos())
if is_single_quote {
EString::EndlessSingleQuote(start_state.pos())
} else if is_multiline {
EString::EndlessMultiLine(start_state.pos())
} else {
EString::EndlessSingle(start_state.pos())
EString::EndlessSingleLine(start_state.pos())
},
))
}

View File

@ -66,7 +66,7 @@ mod test_parse {
("\\n", EscapedChar::Newline),
("\\r", EscapedChar::CarriageReturn),
("\\t", EscapedChar::Tab),
("\\\"", EscapedChar::Quote),
("\\\"", EscapedChar::DoubleQuote),
] {
let actual = parse_expr_with(&arena, arena.alloc(to_input(string)));
let expected_slice = to_expected(*escaped, &arena);

View File

@ -448,7 +448,6 @@ dependencies = [
"roc_module",
"roc_parse",
"roc_region",
"roc_test_utils",
]
[[package]]

View File

@ -1 +1 @@
Expr(Str(EndlessMulti(@3), @0), @0)
Expr(Str(EndlessMultiLine(@3), @0), @0)

View File

@ -1 +1 @@
Expr(Str(EndlessSingle(@1), @0), @0)
Expr(Str(EndlessSingleLine(@1), @0), @0)

View File

@ -0,0 +1,3 @@
SingleQuote(
"\u{7}",
)

View File

@ -258,6 +258,7 @@ mod test_snapshots {
pass/comment_before_op.expr,
pass/comment_inside_empty_list.expr,
pass/comment_with_non_ascii.expr,
pass/control_characters_in_scalar.expr,
pass/crash.expr,
pass/dbg.expr,
pass/def_without_newline.expr,
@ -567,7 +568,7 @@ mod test_snapshots {
("\\n", EscapedChar::Newline),
("\\r", EscapedChar::CarriageReturn),
("\\t", EscapedChar::Tab),
("\\\"", EscapedChar::Quote),
("\\\"", EscapedChar::DoubleQuote),
] {
let actual = parse_expr_with(&arena, arena.alloc(to_input(string)));
let expected_slice = to_expected(*escaped, &arena);

View File

@ -1,4 +1,4 @@
use roc_parse::parser::{ENumber, FileError, PList, SyntaxError};
use roc_parse::parser::{ENumber, ESingleQuote, FileError, PList, SyntaxError};
use roc_problem::Severity;
use roc_region::all::{LineColumn, LineColumnRegion, LineInfo, Position, Region};
use std::path::PathBuf;
@ -967,7 +967,96 @@ fn to_str_report<'a>(
severity: Severity::RuntimeError,
}
}
EString::EndlessSingle(pos) => {
EString::EndlessSingleQuote(pos) => {
let surroundings = Region::new(start, pos);
let region = LineColumnRegion::from_pos(lines.convert_pos(pos));
let doc = alloc.stack([
alloc.reflow(r"I cannot find the end of this scalar literal (character literal):"),
alloc.region_with_subregion(lines.convert_region(surroundings), region),
alloc.concat([
alloc.reflow(r"You could change it to something like "),
alloc.parser_suggestion("'a'"),
alloc.reflow(" or "),
alloc.parser_suggestion("'\n'"),
alloc.reflow("."),
]),
]);
Report {
filename,
doc,
title: "ENDLESS SCALAR".to_string(),
severity: Severity::RuntimeError,
}
}
EString::InvalidSingleQuote(e, pos) => {
let surroundings = Region::new(start, pos);
let region = LineColumnRegion::from_pos(lines.convert_pos(pos));
let doc = match e {
ESingleQuote::Empty => {
alloc.stack([
alloc.concat([
alloc.reflow(r"I am part way through parsing this scalar literal (character literal), "),
alloc.reflow(r"but it appears to be empty - which is not a valid scalar."),
]),
alloc.region_with_subregion(lines.convert_region(surroundings), region),
alloc.concat([
alloc.reflow(r"You could change it to something like "),
alloc.parser_suggestion("'a'"),
alloc.reflow(" or "),
alloc.parser_suggestion("'\\n'"),
alloc.reflow(". "),
alloc.reflow("Note, roc strings use double quotes, like \"hello\".")
]),
])
}
ESingleQuote::TooLong => {
alloc.stack([
alloc.concat([
alloc.reflow(r"I am part way through parsing this scalar literal (character literal), "),
alloc.reflow(r"but it's too long to fit in a U32 so it's not a valid scalar."),
]),
alloc.region_with_subregion(lines.convert_region(surroundings), region),
alloc.concat([
alloc.reflow(r"You could change it to something like "),
alloc.parser_suggestion("'a'"),
alloc.reflow(" or "),
alloc.parser_suggestion("'\\n'"),
alloc.reflow(". "),
alloc.reflow("Note, roc strings use double quotes, like \"hello\".")
]),
])
}
ESingleQuote::InterpolationNotAllowed => {
alloc.stack([
alloc.concat([
alloc.reflow("I am part way through parsing this scalar literal (character literal), "),
alloc.reflow("but I encountered a string interpolation like \"\\(this)\", which is not "),
alloc.reflow("allowed in scalar literals."),
]),
alloc.region_with_subregion(lines.convert_region(surroundings), region),
alloc.concat([
alloc.reflow(r"You could change it to something like "),
alloc.parser_suggestion("'a'"),
alloc.reflow(" or "),
alloc.parser_suggestion("'\\n'"),
alloc.reflow(". "),
alloc.reflow("Note, roc strings use double quotes, like \"hello\".")
]),
])
}
};
Report {
filename,
doc,
title: "INVALID SCALAR".to_string(),
severity: Severity::RuntimeError,
}
}
EString::EndlessSingleLine(pos) => {
let surroundings = Region::new(start, pos);
let region = LineColumnRegion::from_pos(lines.convert_pos(pos));
@ -990,7 +1079,31 @@ fn to_str_report<'a>(
severity: Severity::RuntimeError,
}
}
EString::EndlessMulti(pos) => {
EString::ExpectedDoubleQuoteGotSingleQuote(pos) => {
let surroundings = Region::new(start, pos);
let region = LineColumnRegion::from_pos(lines.convert_pos(pos));
let doc = alloc.stack([
alloc.reflow(r"I was expecting to see a string here, but I got a scalar literal."),
alloc.region_with_subregion(lines.convert_region(surroundings), region),
alloc.concat([
alloc.reflow(r"You could change it to something like "),
alloc.parser_suggestion("\"to be or not to be\""),
alloc.reflow(" or even just "),
alloc.parser_suggestion("\"\""),
alloc.reflow(". "),
alloc.reflow("Note, roc strings use double quotes."),
]),
]);
Report {
filename,
doc,
title: "EXPECTED STRING".to_string(),
severity: Severity::RuntimeError,
}
}
EString::EndlessMultiLine(pos) => {
let surroundings = Region::new(start, pos);
let region = LineColumnRegion::from_pos(lines.convert_pos(pos));

View File

@ -5303,6 +5303,23 @@ Tab characters are not allowed."###,
"###
);
test_report!(
single_quote_too_long,
r#"'abcdef'"#,
@r###"
INVALID SCALAR tmp/single_quote_too_long/Test.roc
I am part way through parsing this scalar literal (character literal),
but it's too long to fit in a U32 so it's not a valid scalar.
4 'abcdef'
^
You could change it to something like 'a' or '\n'. Note, roc strings
use double quotes, like "hello".
"###
);
test_report!(
single_no_end,
r#""there is no end"#,