mirror of
https://github.com/roc-lang/roc.git
synced 2024-11-10 10:02:38 +03:00
Improve parsing of scalar literals
* Unify parsing of string literals and scalar literals, to (e.g.) ensure escapes are handled uniformly. Notably, this makes unicode escapes valid in scalar literals. * Add a variety of custom error messages about specific failure cases of parsing string/scalar literals. For example, if we're expecting a string (e.g. a package name in the header) and the user tried using single quotes, give a clear message about that. * Fix formatting of unicode escapes (they previously used {}, now correctly use () to match roc strings)
This commit is contained in:
parent
6fc593142d
commit
94070e8ba6
@ -3,7 +3,7 @@
|
||||
#![allow(unused_imports)]
|
||||
|
||||
use bumpalo::collections::Vec as BumpVec;
|
||||
use roc_can::expr::{unescape_char, IntValue};
|
||||
use roc_can::expr::IntValue;
|
||||
use roc_can::num::{
|
||||
finish_parsing_base, finish_parsing_float, finish_parsing_num, ParsedNumResult,
|
||||
};
|
||||
@ -620,7 +620,7 @@ pub(crate) fn flatten_str_lines(pool: &mut Pool, lines: &[&[StrSegment<'_>]]) ->
|
||||
Interpolated(loc_expr) => {
|
||||
return Pattern2::UnsupportedPattern(loc_expr.region);
|
||||
}
|
||||
EscapedChar(escaped) => buf.push(unescape_char(escaped)),
|
||||
EscapedChar(escaped) => buf.push(escaped.unescape()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -117,7 +117,7 @@ fn flatten_str_lines<'a>(
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
EscapedChar(escaped) => buf.push(roc_can::expr::unescape_char(escaped)),
|
||||
EscapedChar(escaped) => buf.push(escaped.unescape()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -18,7 +18,7 @@ use roc_module::called_via::CalledVia;
|
||||
use roc_module::ident::{ForeignSymbol, Lowercase, TagName};
|
||||
use roc_module::low_level::LowLevel;
|
||||
use roc_module::symbol::Symbol;
|
||||
use roc_parse::ast::{self, Defs, EscapedChar, StrLiteral};
|
||||
use roc_parse::ast::{self, Defs, StrLiteral};
|
||||
use roc_parse::pattern::PatternType::*;
|
||||
use roc_problem::can::{PrecedenceProblem, Problem, RuntimeError};
|
||||
use roc_region::all::{Loc, Region};
|
||||
@ -2297,7 +2297,7 @@ fn flatten_str_lines<'a>(
|
||||
);
|
||||
}
|
||||
}
|
||||
EscapedChar(escaped) => buf.push(unescape_char(escaped)),
|
||||
EscapedChar(escaped) => buf.push(escaped.unescape()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2355,19 +2355,6 @@ fn desugar_str_segments(var_store: &mut VarStore, segments: Vec<StrSegment>) ->
|
||||
loc_expr.value
|
||||
}
|
||||
|
||||
/// Returns the char that would have been originally parsed to
|
||||
pub fn unescape_char(escaped: &EscapedChar) -> char {
|
||||
use EscapedChar::*;
|
||||
|
||||
match escaped {
|
||||
Backslash => '\\',
|
||||
Quote => '"',
|
||||
CarriageReturn => '\r',
|
||||
Tab => '\t',
|
||||
Newline => '\n',
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Declarations {
|
||||
pub declarations: Vec<DeclarationTag>,
|
||||
|
@ -1,6 +1,6 @@
|
||||
use crate::annotation::freshen_opaque_def;
|
||||
use crate::env::Env;
|
||||
use crate::expr::{canonicalize_expr, unescape_char, Expr, IntValue, Output};
|
||||
use crate::expr::{canonicalize_expr, Expr, IntValue, Output};
|
||||
use crate::num::{
|
||||
finish_parsing_base, finish_parsing_float, finish_parsing_num, FloatBound, IntBound, NumBound,
|
||||
ParsedNumResult,
|
||||
@ -935,7 +935,7 @@ fn flatten_str_lines(lines: &[&[StrSegment<'_>]]) -> Pattern {
|
||||
Interpolated(loc_expr) => {
|
||||
return Pattern::UnsupportedPattern(loc_expr.region);
|
||||
}
|
||||
EscapedChar(escaped) => buf.push(unescape_char(escaped)),
|
||||
EscapedChar(escaped) => buf.push(escaped.unescape()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -431,14 +431,30 @@ impl<'a> Formattable for Expr<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn needs_unicode_escape(ch: char) -> bool {
|
||||
matches!(ch, '\u{0000}'..='\u{001f}' | '\u{007f}'..='\u{009f}')
|
||||
}
|
||||
|
||||
pub(crate) fn format_sq_literal(buf: &mut Buf, s: &str) {
|
||||
buf.push('\'');
|
||||
for c in s.chars() {
|
||||
if c == '"' {
|
||||
buf.push_char_literal('"')
|
||||
} else {
|
||||
for escaped in c.escape_default() {
|
||||
buf.push_char_literal(escaped);
|
||||
match c {
|
||||
'"' => buf.push_str("\""),
|
||||
'\'' => buf.push_str("\\\'"),
|
||||
'\t' => buf.push_str("\\t"),
|
||||
'\r' => buf.push_str("\\r"),
|
||||
'\n' => buf.push_str("\\n"),
|
||||
'\\' => buf.push_str("\\\\"),
|
||||
_ => {
|
||||
if needs_unicode_escape(c) {
|
||||
buf.push_str(&format!("\\u({:x})", c as u32))
|
||||
} else {
|
||||
buf.push_char_literal(c)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2,6 +2,7 @@ use std::fmt::Debug;
|
||||
|
||||
use crate::header::{AppHeader, HostedHeader, InterfaceHeader, PackageHeader, PlatformHeader};
|
||||
use crate::ident::Ident;
|
||||
use crate::parser::ESingleQuote;
|
||||
use bumpalo::collections::{String, Vec};
|
||||
use bumpalo::Bump;
|
||||
use roc_collections::soa::{EitherIndex, Index, Slice};
|
||||
@ -116,11 +117,20 @@ pub enum StrSegment<'a> {
|
||||
Interpolated(Loc<&'a Expr<'a>>), // e.g. (name) in "Hi, \(name)!"
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum SingleQuoteSegment<'a> {
|
||||
Plaintext(&'a str), // e.g. 'f'
|
||||
Unicode(Loc<&'a str>), // e.g. '00A0' in '\u(00A0)'
|
||||
EscapedChar(EscapedChar), // e.g. '\n'
|
||||
// No interpolated expressions in single-quoted strings
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum EscapedChar {
|
||||
Newline, // \n
|
||||
Tab, // \t
|
||||
Quote, // \"
|
||||
DoubleQuote, // \"
|
||||
SingleQuote, // \'
|
||||
Backslash, // \\
|
||||
CarriageReturn, // \r
|
||||
}
|
||||
@ -132,12 +142,71 @@ impl EscapedChar {
|
||||
|
||||
match self {
|
||||
Backslash => '\\',
|
||||
Quote => '"',
|
||||
SingleQuote => '\'',
|
||||
DoubleQuote => '"',
|
||||
CarriageReturn => 'r',
|
||||
Tab => 't',
|
||||
Newline => 'n',
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unescape(self) -> char {
|
||||
use EscapedChar::*;
|
||||
|
||||
match self {
|
||||
Backslash => '\\',
|
||||
SingleQuote => '\'',
|
||||
DoubleQuote => '"',
|
||||
CarriageReturn => '\r',
|
||||
Tab => '\t',
|
||||
Newline => '\n',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum SingleQuoteLiteral<'a> {
|
||||
/// The most common case: a plain character with no escapes
|
||||
PlainLine(&'a str),
|
||||
Line(&'a [SingleQuoteSegment<'a>]),
|
||||
}
|
||||
|
||||
impl<'a> SingleQuoteLiteral<'a> {
|
||||
pub fn to_str_in(&self, arena: &'a Bump) -> &'a str {
|
||||
match self {
|
||||
SingleQuoteLiteral::PlainLine(s) => s,
|
||||
SingleQuoteLiteral::Line(segments) => {
|
||||
let mut s = String::new_in(arena);
|
||||
for segment in *segments {
|
||||
match segment {
|
||||
SingleQuoteSegment::Plaintext(s2) => s.push_str(s2),
|
||||
SingleQuoteSegment::Unicode(loc) => {
|
||||
let s2 = loc.value;
|
||||
let c = u32::from_str_radix(s2, 16).expect("Invalid unicode escape");
|
||||
s.push(char::from_u32(c).expect("Invalid unicode codepoint"));
|
||||
}
|
||||
SingleQuoteSegment::EscapedChar(c) => {
|
||||
s.push(c.unescape());
|
||||
}
|
||||
}
|
||||
}
|
||||
s.into_bump_str()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TryFrom<StrSegment<'a>> for SingleQuoteSegment<'a> {
|
||||
type Error = ESingleQuote;
|
||||
|
||||
fn try_from(value: StrSegment<'a>) -> Result<Self, Self::Error> {
|
||||
match value {
|
||||
StrSegment::Plaintext(s) => Ok(SingleQuoteSegment::Plaintext(s)),
|
||||
StrSegment::Unicode(s) => Ok(SingleQuoteSegment::Unicode(s)),
|
||||
StrSegment::EscapedChar(s) => Ok(SingleQuoteSegment::EscapedChar(s)),
|
||||
StrSegment::Interpolated(_) => Err(ESingleQuote::InterpolationNotAllowed),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
|
@ -16,6 +16,7 @@ use crate::parser::{
|
||||
};
|
||||
use crate::pattern::{closure_param, loc_has_parser};
|
||||
use crate::state::State;
|
||||
use crate::string_literal::StrLikeLiteral;
|
||||
use crate::type_annotation;
|
||||
use bumpalo::collections::Vec;
|
||||
use bumpalo::Bump;
|
||||
@ -161,8 +162,7 @@ fn loc_term_or_underscore_or_conditional<'a>(
|
||||
loc_expr_in_parens_etc_help(),
|
||||
loc!(specialize(EExpr::If, if_expr_help(options))),
|
||||
loc!(specialize(EExpr::When, when::expr_help(options))),
|
||||
loc!(specialize(EExpr::Str, string_literal_help())),
|
||||
loc!(specialize(EExpr::SingleQuote, single_quote_literal_help())),
|
||||
loc!(specialize(EExpr::Str, string_like_literal_help())),
|
||||
loc!(specialize(EExpr::Number, positive_number_literal_help())),
|
||||
loc!(specialize(EExpr::Closure, closure_help(options))),
|
||||
loc!(crash_kw()),
|
||||
@ -183,8 +183,7 @@ fn loc_term_or_underscore<'a>(
|
||||
) -> impl Parser<'a, Loc<Expr<'a>>, EExpr<'a>> {
|
||||
one_of!(
|
||||
loc_expr_in_parens_etc_help(),
|
||||
loc!(specialize(EExpr::Str, string_literal_help())),
|
||||
loc!(specialize(EExpr::SingleQuote, single_quote_literal_help())),
|
||||
loc!(specialize(EExpr::Str, string_like_literal_help())),
|
||||
loc!(specialize(EExpr::Number, positive_number_literal_help())),
|
||||
loc!(specialize(EExpr::Closure, closure_help(options))),
|
||||
loc!(underscore_expression()),
|
||||
@ -200,8 +199,7 @@ fn loc_term_or_underscore<'a>(
|
||||
fn loc_term<'a>(options: ExprParseOptions) -> impl Parser<'a, Loc<Expr<'a>>, EExpr<'a>> {
|
||||
one_of!(
|
||||
loc_expr_in_parens_etc_help(),
|
||||
loc!(specialize(EExpr::Str, string_literal_help())),
|
||||
loc!(specialize(EExpr::SingleQuote, single_quote_literal_help())),
|
||||
loc!(specialize(EExpr::Str, string_like_literal_help())),
|
||||
loc!(specialize(EExpr::Number, positive_number_literal_help())),
|
||||
loc!(specialize(EExpr::Closure, closure_help(options))),
|
||||
loc!(record_literal_help()),
|
||||
@ -2584,14 +2582,16 @@ fn apply_expr_access_chain<'a>(
|
||||
})
|
||||
}
|
||||
|
||||
fn string_literal_help<'a>() -> impl Parser<'a, Expr<'a>, EString<'a>> {
|
||||
map!(crate::string_literal::parse(), Expr::Str)
|
||||
}
|
||||
|
||||
fn single_quote_literal_help<'a>() -> impl Parser<'a, Expr<'a>, EString<'a>> {
|
||||
map!(
|
||||
crate::string_literal::parse_single_quote(),
|
||||
Expr::SingleQuote
|
||||
fn string_like_literal_help<'a>() -> impl Parser<'a, Expr<'a>, EString<'a>> {
|
||||
map_with_arena!(
|
||||
crate::string_literal::parse_str_like_literal(),
|
||||
|arena, lit| match lit {
|
||||
StrLikeLiteral::Str(s) => Expr::Str(s),
|
||||
StrLikeLiteral::SingleQuote(s) => {
|
||||
// TODO: preserve the original escaping
|
||||
Expr::SingleQuote(s.to_str_in(arena))
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -331,7 +331,10 @@ pub fn package_entry<'a>() -> impl Parser<'a, Spaced<'a, PackageEntry<'a>>, EPac
|
||||
|
||||
pub fn package_name<'a>() -> impl Parser<'a, PackageName<'a>, EPackageName<'a>> {
|
||||
then(
|
||||
loc!(specialize(EPackageName::BadPath, string_literal::parse())),
|
||||
loc!(specialize(
|
||||
EPackageName::BadPath,
|
||||
string_literal::parse_str_literal()
|
||||
)),
|
||||
move |_arena, state, progress, text| match text.value {
|
||||
StrLiteral::PlainLine(text) => Ok((progress, PackageName(text), state)),
|
||||
StrLiteral::Line(_) => Err((progress, EPackageName::Escapes(text.region.start()))),
|
||||
|
@ -181,7 +181,7 @@ fn app_header<'a>() -> impl Parser<'a, AppHeader<'a>, EHeader<'a>> {
|
||||
before_name: space0_e(EHeader::IndentStart),
|
||||
name: loc!(crate::parser::specialize(
|
||||
EHeader::AppName,
|
||||
string_literal::parse()
|
||||
string_literal::parse_str_literal()
|
||||
)),
|
||||
packages: optional(specialize(EHeader::Packages, packages())),
|
||||
imports: optional(specialize(EHeader::Imports, imports())),
|
||||
|
@ -355,8 +355,10 @@ pub enum EExpr<'a> {
|
||||
|
||||
InParens(EInParens<'a>, Position),
|
||||
Record(ERecord<'a>, Position),
|
||||
|
||||
// SingleQuote errors are folded into the EString
|
||||
Str(EString<'a>, Position),
|
||||
SingleQuote(EString<'a>, Position),
|
||||
|
||||
Number(ENumber, Position),
|
||||
List(EList<'a>, Position),
|
||||
|
||||
@ -376,13 +378,24 @@ pub enum EString<'a> {
|
||||
CodePtOpen(Position),
|
||||
CodePtEnd(Position),
|
||||
|
||||
InvalidSingleQuote(ESingleQuote, Position),
|
||||
|
||||
Space(BadInputError, Position),
|
||||
EndlessSingle(Position),
|
||||
EndlessMulti(Position),
|
||||
EndlessSingleLine(Position),
|
||||
EndlessMultiLine(Position),
|
||||
EndlessSingleQuote(Position),
|
||||
UnknownEscape(Position),
|
||||
Format(&'a EExpr<'a>, Position),
|
||||
FormatEnd(Position),
|
||||
MultilineInsufficientIndent(Position),
|
||||
ExpectedDoubleQuoteGotSingleQuote(Position),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum ESingleQuote {
|
||||
Empty,
|
||||
TooLong,
|
||||
InterpolationNotAllowed,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
|
@ -8,6 +8,7 @@ use crate::parser::{
|
||||
word3, EPattern, PInParens, PList, PRecord, Parser,
|
||||
};
|
||||
use crate::state::State;
|
||||
use crate::string_literal::StrLikeLiteral;
|
||||
use bumpalo::collections::string::String;
|
||||
use bumpalo::collections::Vec;
|
||||
use bumpalo::Bump;
|
||||
@ -83,8 +84,7 @@ fn loc_pattern_help_help<'a>() -> impl Parser<'a, Loc<Pattern<'a>>, EPattern<'a>
|
||||
)),
|
||||
loc!(specialize(EPattern::List, list_pattern_help())),
|
||||
loc!(number_pattern_help()),
|
||||
loc!(string_pattern_help()),
|
||||
loc!(single_quote_pattern_help()),
|
||||
loc!(string_like_pattern_help()),
|
||||
)
|
||||
}
|
||||
|
||||
@ -177,8 +177,7 @@ fn loc_parse_tag_pattern_arg<'a>() -> impl Parser<'a, Loc<Pattern<'a>>, EPattern
|
||||
EPattern::Record,
|
||||
crate::pattern::record_pattern_help()
|
||||
)),
|
||||
loc!(string_pattern_help()),
|
||||
loc!(single_quote_pattern_help()),
|
||||
loc!(string_like_pattern_help()),
|
||||
loc!(number_pattern_help())
|
||||
)
|
||||
}
|
||||
@ -238,19 +237,18 @@ fn number_pattern_help<'a>() -> impl Parser<'a, Pattern<'a>, EPattern<'a>> {
|
||||
)
|
||||
}
|
||||
|
||||
fn string_pattern_help<'a>() -> impl Parser<'a, Pattern<'a>, EPattern<'a>> {
|
||||
fn string_like_pattern_help<'a>() -> impl Parser<'a, Pattern<'a>, EPattern<'a>> {
|
||||
specialize(
|
||||
|_, pos| EPattern::Start(pos),
|
||||
map!(crate::string_literal::parse(), Pattern::StrLiteral),
|
||||
)
|
||||
}
|
||||
|
||||
fn single_quote_pattern_help<'a>() -> impl Parser<'a, Pattern<'a>, EPattern<'a>> {
|
||||
specialize(
|
||||
|_, pos| EPattern::Start(pos),
|
||||
map!(
|
||||
crate::string_literal::parse_single_quote(),
|
||||
Pattern::SingleQuote
|
||||
map_with_arena!(
|
||||
crate::string_literal::parse_str_like_literal(),
|
||||
|arena, lit| match lit {
|
||||
StrLikeLiteral::Str(s) => Pattern::StrLiteral(s),
|
||||
StrLikeLiteral::SingleQuote(s) => {
|
||||
// TODO: preserve the original escaping
|
||||
Pattern::SingleQuote(s.to_str_in(arena))
|
||||
}
|
||||
}
|
||||
),
|
||||
)
|
||||
}
|
||||
|
@ -1,8 +1,9 @@
|
||||
use crate::ast::{EscapedChar, StrLiteral, StrSegment};
|
||||
use crate::ast::{EscapedChar, SingleQuoteLiteral, StrLiteral, StrSegment};
|
||||
use crate::expr;
|
||||
use crate::parser::Progress::{self, *};
|
||||
use crate::parser::{
|
||||
allocated, loc, reset_min_indent, specialize_ref, word1, BadInputError, EString, Parser,
|
||||
allocated, loc, reset_min_indent, specialize_ref, then, word1, BadInputError, ESingleQuote,
|
||||
EString, Parser,
|
||||
};
|
||||
use crate::state::State;
|
||||
use bumpalo::collections::vec::Vec;
|
||||
@ -31,97 +32,6 @@ fn ascii_hex_digits<'a>() -> impl Parser<'a, &'a str, EString<'a>> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_single_quote<'a>() -> impl Parser<'a, &'a str, EString<'a>> {
|
||||
move |arena: &'a Bump, mut state: State<'a>, _min_indent: u32| {
|
||||
if state.consume_mut("\'") {
|
||||
// we will be parsing a single-quote-string
|
||||
} else {
|
||||
return Err((NoProgress, EString::Open(state.pos())));
|
||||
}
|
||||
|
||||
// Handle back slaches in byte literal
|
||||
// - starts with a backslash and used as an escape character. ex: '\n', '\t'
|
||||
// - single quote floating (un closed single quote) should be an error
|
||||
match state.bytes().first() {
|
||||
Some(b'\\') => {
|
||||
state.advance_mut(1);
|
||||
match state.bytes().first() {
|
||||
Some(&ch) => {
|
||||
state.advance_mut(1);
|
||||
if (ch == b'n' || ch == b'r' || ch == b't' || ch == b'\'' || ch == b'\\')
|
||||
&& (state.bytes().first() == Some(&b'\''))
|
||||
{
|
||||
state.advance_mut(1);
|
||||
let test = match ch {
|
||||
b'n' => '\n',
|
||||
b't' => '\t',
|
||||
b'r' => '\r',
|
||||
// since we checked the current char between the single quotes we
|
||||
// know they are valid UTF-8, allowing us to use 'from_u32_unchecked'
|
||||
_ => unsafe { char::from_u32_unchecked(ch as u32) },
|
||||
};
|
||||
|
||||
return Ok((MadeProgress, &*arena.alloc_str(&test.to_string()), state));
|
||||
}
|
||||
// invalid error, backslah escaping something we do not recognize
|
||||
return Err((NoProgress, EString::CodePtEnd(state.pos())));
|
||||
}
|
||||
None => {
|
||||
// no close quote found
|
||||
return Err((NoProgress, EString::CodePtEnd(state.pos())));
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(_) => {
|
||||
// do nothing for other characters, handled below
|
||||
}
|
||||
None => return Err((NoProgress, EString::CodePtEnd(state.pos()))),
|
||||
}
|
||||
|
||||
let mut bytes = state.bytes().iter();
|
||||
let mut end_index = 1;
|
||||
|
||||
// Copy paste problem in mono
|
||||
|
||||
loop {
|
||||
match bytes.next() {
|
||||
Some(b'\'') => {
|
||||
break;
|
||||
}
|
||||
Some(_) => end_index += 1,
|
||||
None => {
|
||||
return Err((NoProgress, EString::Open(state.pos())));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if end_index == 1 {
|
||||
// no progress was made
|
||||
// this case is a double single quote, ex: ''
|
||||
// not supporting empty single quotes
|
||||
return Err((NoProgress, EString::Open(state.pos())));
|
||||
}
|
||||
|
||||
if end_index > (std::mem::size_of::<u32>() + 1) {
|
||||
// bad case: too big to fit into u32
|
||||
return Err((NoProgress, EString::Open(state.pos())));
|
||||
}
|
||||
|
||||
// happy case -> we have some bytes that will fit into a u32
|
||||
// ending up w/ a slice of bytes that we want to convert into an integer
|
||||
let raw_bytes = &state.bytes()[0..end_index - 1];
|
||||
|
||||
state.advance_mut(end_index);
|
||||
match std::str::from_utf8(raw_bytes) {
|
||||
Ok(string) => Ok((MadeProgress, string, state)),
|
||||
Err(_) => {
|
||||
// invalid UTF-8
|
||||
return Err((NoProgress, EString::CodePtEnd(state.pos())));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn consume_indent(mut state: State, mut indent: u32) -> Result<State, (Progress, EString)> {
|
||||
while indent > 0 {
|
||||
match state.bytes().first() {
|
||||
@ -156,11 +66,28 @@ fn utf8<'a>(state: State<'a>, string_bytes: &'a [u8]) -> Result<&'a str, (Progre
|
||||
})
|
||||
}
|
||||
|
||||
pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
|
||||
use StrLiteral::*;
|
||||
pub enum StrLikeLiteral<'a> {
|
||||
SingleQuote(SingleQuoteLiteral<'a>),
|
||||
Str(StrLiteral<'a>),
|
||||
}
|
||||
|
||||
pub fn parse_str_literal<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
|
||||
then(
|
||||
loc!(parse_str_like_literal()),
|
||||
|_arena, state, progress, str_like| match str_like.value {
|
||||
StrLikeLiteral::SingleQuote(_) => Err((
|
||||
progress,
|
||||
EString::ExpectedDoubleQuoteGotSingleQuote(str_like.region.start()),
|
||||
)),
|
||||
StrLikeLiteral::Str(str_literal) => Ok((progress, str_literal, state)),
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
pub fn parse_str_like_literal<'a>() -> impl Parser<'a, StrLikeLiteral<'a>, EString<'a>> {
|
||||
move |arena: &'a Bump, mut state: State<'a>, min_indent: u32| {
|
||||
let is_multiline;
|
||||
let is_single_quote;
|
||||
|
||||
let indent = state.column();
|
||||
|
||||
@ -171,6 +98,7 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
|
||||
|
||||
// we will be parsing a multi-line string
|
||||
is_multiline = true;
|
||||
is_single_quote = false;
|
||||
|
||||
if state.consume_mut("\n") {
|
||||
state = consume_indent(state, indent)?;
|
||||
@ -180,6 +108,12 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
|
||||
|
||||
// we will be parsing a single-line string
|
||||
is_multiline = false;
|
||||
is_single_quote = false;
|
||||
} else if state.consume_mut("'") {
|
||||
start_state = state.clone();
|
||||
|
||||
is_multiline = false;
|
||||
is_single_quote = true;
|
||||
} else {
|
||||
return Err((NoProgress, EString::Open(state.pos())));
|
||||
}
|
||||
@ -244,12 +178,16 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
|
||||
segment_parsed_bytes += 1;
|
||||
|
||||
match byte {
|
||||
b'"' => {
|
||||
b'"' if !is_single_quote => {
|
||||
if segment_parsed_bytes == 1 && segments.is_empty() {
|
||||
// special case of the empty string
|
||||
if is_multiline {
|
||||
if bytes.as_slice().starts_with(b"\"\"") {
|
||||
return Ok((MadeProgress, Block(&[]), state.advance(3)));
|
||||
return Ok((
|
||||
MadeProgress,
|
||||
StrLikeLiteral::Str(StrLiteral::Block(&[])),
|
||||
state.advance(3),
|
||||
));
|
||||
} else {
|
||||
// this quote is in a block string
|
||||
continue;
|
||||
@ -257,7 +195,11 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
|
||||
} else {
|
||||
// This is the end of the string!
|
||||
// Advance 1 for the close quote
|
||||
return Ok((MadeProgress, PlainLine(""), state.advance(1)));
|
||||
return Ok((
|
||||
MadeProgress,
|
||||
StrLikeLiteral::Str(StrLiteral::PlainLine("")),
|
||||
state.advance(1),
|
||||
));
|
||||
}
|
||||
} else {
|
||||
// the string is non-empty, which means we need to convert any previous segments
|
||||
@ -276,10 +218,14 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
|
||||
other => StrLiteral::Line(arena.alloc([other])),
|
||||
}
|
||||
} else {
|
||||
Block(arena.alloc([segments.into_bump_slice()]))
|
||||
StrLiteral::Block(arena.alloc([segments.into_bump_slice()]))
|
||||
};
|
||||
|
||||
return Ok((MadeProgress, expr, state.advance(3)));
|
||||
return Ok((
|
||||
MadeProgress,
|
||||
StrLikeLiteral::Str(expr),
|
||||
state.advance(3),
|
||||
));
|
||||
} else {
|
||||
// this quote is in a block string
|
||||
continue;
|
||||
@ -295,14 +241,80 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
|
||||
other => StrLiteral::Line(arena.alloc([other])),
|
||||
}
|
||||
} else {
|
||||
Line(segments.into_bump_slice())
|
||||
StrLiteral::Line(segments.into_bump_slice())
|
||||
};
|
||||
|
||||
// Advance the state 1 to account for the closing `"`
|
||||
return Ok((MadeProgress, expr, state.advance(1)));
|
||||
return Ok((MadeProgress, StrLikeLiteral::Str(expr), state.advance(1)));
|
||||
}
|
||||
};
|
||||
}
|
||||
b'\'' if is_single_quote => {
|
||||
end_segment!(StrSegment::Plaintext);
|
||||
|
||||
let expr = if segments.len() == 1 {
|
||||
// We had exactly one segment, so this is a candidate
|
||||
// to be SingleQuoteLiteral::Plaintext
|
||||
match segments.pop().unwrap() {
|
||||
StrSegment::Plaintext(string) => SingleQuoteLiteral::PlainLine(string),
|
||||
other => {
|
||||
let o = other.try_into().map_err(|e| {
|
||||
(
|
||||
MadeProgress,
|
||||
EString::InvalidSingleQuote(e, start_state.pos()),
|
||||
)
|
||||
})?;
|
||||
SingleQuoteLiteral::Line(arena.alloc([o]))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let mut new_segments = Vec::with_capacity_in(segments.len(), arena);
|
||||
for segment in segments {
|
||||
let segment = segment.try_into().map_err(|e| {
|
||||
(
|
||||
MadeProgress,
|
||||
EString::InvalidSingleQuote(e, start_state.pos()),
|
||||
)
|
||||
})?;
|
||||
new_segments.push(segment);
|
||||
}
|
||||
|
||||
SingleQuoteLiteral::Line(new_segments.into_bump_slice())
|
||||
};
|
||||
|
||||
// Validate that the string is a valid char literal.
|
||||
// Note that currently, we accept anything that:
|
||||
// * Is between 1 and 5 bytes long
|
||||
// -> utf-8 encoding is trivial to extend to 5 bytes, even tho 4 is the technical max
|
||||
// -> TODO: do we want to change this?
|
||||
// * Decodes as valid UTF-8
|
||||
// -> Might be a single code point, or multiple code points
|
||||
// -> TODO: do we want to change this?
|
||||
|
||||
// Simply by decoding this, it's guaranteed to be valid utf-8
|
||||
let text = expr.to_str_in(arena);
|
||||
|
||||
if text.len() > 5 {
|
||||
return Err((
|
||||
MadeProgress,
|
||||
EString::InvalidSingleQuote(ESingleQuote::TooLong, start_state.pos()),
|
||||
));
|
||||
}
|
||||
|
||||
if text.is_empty() {
|
||||
return Err((
|
||||
MadeProgress,
|
||||
EString::InvalidSingleQuote(ESingleQuote::Empty, start_state.pos()),
|
||||
));
|
||||
}
|
||||
|
||||
// Advance the state 1 to account for the closing `'`
|
||||
return Ok((
|
||||
MadeProgress,
|
||||
StrLikeLiteral::SingleQuote(expr),
|
||||
state.advance(1),
|
||||
));
|
||||
}
|
||||
b'\n' => {
|
||||
if is_multiline {
|
||||
let without_newline = &state.bytes()[0..(segment_parsed_bytes - 1)];
|
||||
@ -330,7 +342,7 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
|
||||
// all remaining chars. This will mask all other errors, but
|
||||
// it should make it easiest to debug; the file will be a giant
|
||||
// error starting from where the open quote appeared.
|
||||
return Err((MadeProgress, EString::EndlessSingle(start_state.pos())));
|
||||
return Err((MadeProgress, EString::EndlessSingleLine(start_state.pos())));
|
||||
}
|
||||
}
|
||||
b'\\' => {
|
||||
@ -407,7 +419,10 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
|
||||
escaped_char!(EscapedChar::Backslash);
|
||||
}
|
||||
Some(b'"') => {
|
||||
escaped_char!(EscapedChar::Quote);
|
||||
escaped_char!(EscapedChar::DoubleQuote);
|
||||
}
|
||||
Some(b'\'') => {
|
||||
escaped_char!(EscapedChar::SingleQuote);
|
||||
}
|
||||
Some(b'r') => {
|
||||
escaped_char!(EscapedChar::CarriageReturn);
|
||||
@ -435,10 +450,12 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
|
||||
// We ran out of characters before finding a closed quote
|
||||
Err((
|
||||
MadeProgress,
|
||||
if is_multiline {
|
||||
EString::EndlessMulti(start_state.pos())
|
||||
if is_single_quote {
|
||||
EString::EndlessSingleQuote(start_state.pos())
|
||||
} else if is_multiline {
|
||||
EString::EndlessMultiLine(start_state.pos())
|
||||
} else {
|
||||
EString::EndlessSingle(start_state.pos())
|
||||
EString::EndlessSingleLine(start_state.pos())
|
||||
},
|
||||
))
|
||||
}
|
||||
|
@ -66,7 +66,7 @@ mod test_parse {
|
||||
("\\n", EscapedChar::Newline),
|
||||
("\\r", EscapedChar::CarriageReturn),
|
||||
("\\t", EscapedChar::Tab),
|
||||
("\\\"", EscapedChar::Quote),
|
||||
("\\\"", EscapedChar::DoubleQuote),
|
||||
] {
|
||||
let actual = parse_expr_with(&arena, arena.alloc(to_input(string)));
|
||||
let expected_slice = to_expected(*escaped, &arena);
|
||||
|
1
crates/compiler/test_syntax/fuzz/Cargo.lock
generated
1
crates/compiler/test_syntax/fuzz/Cargo.lock
generated
@ -448,7 +448,6 @@ dependencies = [
|
||||
"roc_module",
|
||||
"roc_parse",
|
||||
"roc_region",
|
||||
"roc_test_utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1 +1 @@
|
||||
Expr(Str(EndlessMulti(@3), @0), @0)
|
||||
Expr(Str(EndlessMultiLine(@3), @0), @0)
|
@ -1 +1 @@
|
||||
Expr(Str(EndlessSingle(@1), @0), @0)
|
||||
Expr(Str(EndlessSingleLine(@1), @0), @0)
|
@ -0,0 +1 @@
|
||||
'\u(7)'
|
@ -0,0 +1,3 @@
|
||||
SingleQuote(
|
||||
"\u{7}",
|
||||
)
|
@ -0,0 +1 @@
|
||||
''
|
@ -258,6 +258,7 @@ mod test_snapshots {
|
||||
pass/comment_before_op.expr,
|
||||
pass/comment_inside_empty_list.expr,
|
||||
pass/comment_with_non_ascii.expr,
|
||||
pass/control_characters_in_scalar.expr,
|
||||
pass/crash.expr,
|
||||
pass/dbg.expr,
|
||||
pass/def_without_newline.expr,
|
||||
@ -567,7 +568,7 @@ mod test_snapshots {
|
||||
("\\n", EscapedChar::Newline),
|
||||
("\\r", EscapedChar::CarriageReturn),
|
||||
("\\t", EscapedChar::Tab),
|
||||
("\\\"", EscapedChar::Quote),
|
||||
("\\\"", EscapedChar::DoubleQuote),
|
||||
] {
|
||||
let actual = parse_expr_with(&arena, arena.alloc(to_input(string)));
|
||||
let expected_slice = to_expected(*escaped, &arena);
|
||||
|
@ -1,4 +1,4 @@
|
||||
use roc_parse::parser::{ENumber, FileError, PList, SyntaxError};
|
||||
use roc_parse::parser::{ENumber, ESingleQuote, FileError, PList, SyntaxError};
|
||||
use roc_problem::Severity;
|
||||
use roc_region::all::{LineColumn, LineColumnRegion, LineInfo, Position, Region};
|
||||
use std::path::PathBuf;
|
||||
@ -967,7 +967,96 @@ fn to_str_report<'a>(
|
||||
severity: Severity::RuntimeError,
|
||||
}
|
||||
}
|
||||
EString::EndlessSingle(pos) => {
|
||||
EString::EndlessSingleQuote(pos) => {
|
||||
let surroundings = Region::new(start, pos);
|
||||
let region = LineColumnRegion::from_pos(lines.convert_pos(pos));
|
||||
|
||||
let doc = alloc.stack([
|
||||
alloc.reflow(r"I cannot find the end of this scalar literal (character literal):"),
|
||||
alloc.region_with_subregion(lines.convert_region(surroundings), region),
|
||||
alloc.concat([
|
||||
alloc.reflow(r"You could change it to something like "),
|
||||
alloc.parser_suggestion("'a'"),
|
||||
alloc.reflow(" or "),
|
||||
alloc.parser_suggestion("'\n'"),
|
||||
alloc.reflow("."),
|
||||
]),
|
||||
]);
|
||||
|
||||
Report {
|
||||
filename,
|
||||
doc,
|
||||
title: "ENDLESS SCALAR".to_string(),
|
||||
severity: Severity::RuntimeError,
|
||||
}
|
||||
}
|
||||
EString::InvalidSingleQuote(e, pos) => {
|
||||
let surroundings = Region::new(start, pos);
|
||||
let region = LineColumnRegion::from_pos(lines.convert_pos(pos));
|
||||
|
||||
let doc = match e {
|
||||
ESingleQuote::Empty => {
|
||||
alloc.stack([
|
||||
alloc.concat([
|
||||
alloc.reflow(r"I am part way through parsing this scalar literal (character literal), "),
|
||||
alloc.reflow(r"but it appears to be empty - which is not a valid scalar."),
|
||||
]),
|
||||
alloc.region_with_subregion(lines.convert_region(surroundings), region),
|
||||
alloc.concat([
|
||||
alloc.reflow(r"You could change it to something like "),
|
||||
alloc.parser_suggestion("'a'"),
|
||||
alloc.reflow(" or "),
|
||||
alloc.parser_suggestion("'\\n'"),
|
||||
alloc.reflow(". "),
|
||||
alloc.reflow("Note, roc strings use double quotes, like \"hello\".")
|
||||
]),
|
||||
])
|
||||
}
|
||||
ESingleQuote::TooLong => {
|
||||
alloc.stack([
|
||||
alloc.concat([
|
||||
alloc.reflow(r"I am part way through parsing this scalar literal (character literal), "),
|
||||
alloc.reflow(r"but it's too long to fit in a U32 so it's not a valid scalar."),
|
||||
]),
|
||||
alloc.region_with_subregion(lines.convert_region(surroundings), region),
|
||||
alloc.concat([
|
||||
alloc.reflow(r"You could change it to something like "),
|
||||
alloc.parser_suggestion("'a'"),
|
||||
alloc.reflow(" or "),
|
||||
alloc.parser_suggestion("'\\n'"),
|
||||
alloc.reflow(". "),
|
||||
alloc.reflow("Note, roc strings use double quotes, like \"hello\".")
|
||||
]),
|
||||
])
|
||||
}
|
||||
ESingleQuote::InterpolationNotAllowed => {
|
||||
alloc.stack([
|
||||
alloc.concat([
|
||||
alloc.reflow("I am part way through parsing this scalar literal (character literal), "),
|
||||
alloc.reflow("but I encountered a string interpolation like \"\\(this)\", which is not "),
|
||||
alloc.reflow("allowed in scalar literals."),
|
||||
]),
|
||||
alloc.region_with_subregion(lines.convert_region(surroundings), region),
|
||||
alloc.concat([
|
||||
alloc.reflow(r"You could change it to something like "),
|
||||
alloc.parser_suggestion("'a'"),
|
||||
alloc.reflow(" or "),
|
||||
alloc.parser_suggestion("'\\n'"),
|
||||
alloc.reflow(". "),
|
||||
alloc.reflow("Note, roc strings use double quotes, like \"hello\".")
|
||||
]),
|
||||
])
|
||||
}
|
||||
};
|
||||
|
||||
Report {
|
||||
filename,
|
||||
doc,
|
||||
title: "INVALID SCALAR".to_string(),
|
||||
severity: Severity::RuntimeError,
|
||||
}
|
||||
}
|
||||
EString::EndlessSingleLine(pos) => {
|
||||
let surroundings = Region::new(start, pos);
|
||||
let region = LineColumnRegion::from_pos(lines.convert_pos(pos));
|
||||
|
||||
@ -990,7 +1079,31 @@ fn to_str_report<'a>(
|
||||
severity: Severity::RuntimeError,
|
||||
}
|
||||
}
|
||||
EString::EndlessMulti(pos) => {
|
||||
EString::ExpectedDoubleQuoteGotSingleQuote(pos) => {
|
||||
let surroundings = Region::new(start, pos);
|
||||
let region = LineColumnRegion::from_pos(lines.convert_pos(pos));
|
||||
|
||||
let doc = alloc.stack([
|
||||
alloc.reflow(r"I was expecting to see a string here, but I got a scalar literal."),
|
||||
alloc.region_with_subregion(lines.convert_region(surroundings), region),
|
||||
alloc.concat([
|
||||
alloc.reflow(r"You could change it to something like "),
|
||||
alloc.parser_suggestion("\"to be or not to be\""),
|
||||
alloc.reflow(" or even just "),
|
||||
alloc.parser_suggestion("\"\""),
|
||||
alloc.reflow(". "),
|
||||
alloc.reflow("Note, roc strings use double quotes."),
|
||||
]),
|
||||
]);
|
||||
|
||||
Report {
|
||||
filename,
|
||||
doc,
|
||||
title: "EXPECTED STRING".to_string(),
|
||||
severity: Severity::RuntimeError,
|
||||
}
|
||||
}
|
||||
EString::EndlessMultiLine(pos) => {
|
||||
let surroundings = Region::new(start, pos);
|
||||
let region = LineColumnRegion::from_pos(lines.convert_pos(pos));
|
||||
|
||||
|
@ -5303,6 +5303,23 @@ Tab characters are not allowed."###,
|
||||
"###
|
||||
);
|
||||
|
||||
test_report!(
|
||||
single_quote_too_long,
|
||||
r#"'abcdef'"#,
|
||||
@r###"
|
||||
── INVALID SCALAR ───────────────────────── tmp/single_quote_too_long/Test.roc ─
|
||||
|
||||
I am part way through parsing this scalar literal (character literal),
|
||||
but it's too long to fit in a U32 so it's not a valid scalar.
|
||||
|
||||
4│ 'abcdef'
|
||||
^
|
||||
|
||||
You could change it to something like 'a' or '\n'. Note, roc strings
|
||||
use double quotes, like "hello".
|
||||
"###
|
||||
);
|
||||
|
||||
test_report!(
|
||||
single_no_end,
|
||||
r#""there is no end"#,
|
||||
|
Loading…
Reference in New Issue
Block a user