1
1
mirror of https://github.com/tweag/nickel.git synced 2024-11-10 10:46:49 +03:00

Merge pull request #227 from tweag/syntax/hash-interpolation-sequence

Use variable length `#{` for interpolation
This commit is contained in:
Yann Hamdaoui 2020-11-30 14:11:40 +00:00 committed by GitHub
commit 77a3188e50
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 89 additions and 68 deletions

View File

@ -218,19 +218,17 @@ ChunkLiteral : String =
})
};
ChunkExpr: StrChunk<RichTerm> = DollarBrace <t: SpTerm<RichTerm>> "}" => StrChunk::Expr(t, 0);
ChunkExpr: StrChunk<RichTerm> = HashBrace <t: SpTerm<RichTerm>> "}" => StrChunk::Expr(t, 0);
DollarBrace = { "${", "multstr ${" };
HashBrace = { "#{", "multstr #{" };
Str: String = "\"" <s: ChunkLiteral> "\"" => s;
ChunkLiteralPart: Either<&'input str, char> = {
"str literal" => Either::Left(<>),
"str $" => Either::Left(<>),
"str #" => Either::Left(<>),
"multstr literal" => Either::Left(<>),
"multstr $" => Either::Left(<>),
"multstr \\" => Either::Left(<>),
"multstr \\${" => Either::Left(<>),
"false interpolation" => Either::Left(<>),
"false end" => Either::Left(<>),
"str esc char" => Either::Right(<>),
};
@ -463,13 +461,11 @@ extern {
enum Token<'input> {
"identifier" => Token::Normal(NormalToken::Identifier(<&'input str>)),
"str literal" => Token::Str(StringToken::Literal(<&'input str>)),
"str $" => Token::Str(StringToken::Dollar(<&'input str>)),
"str #" => Token::Str(StringToken::Hash(<&'input str>)),
"str esc char" => Token::Str(StringToken::EscapedChar(<char>)),
"multstr literal" => Token::MultiStr(MultiStringToken::Literal(<&'input str>)),
"multstr $" => Token::MultiStr(MultiStringToken::Dollar(<&'input str>)),
"multstr \\${" => Token::MultiStr(MultiStringToken::BackslashDollarBrace(<&'input str>)),
"multstr \\" => Token::MultiStr(MultiStringToken::Backslash(<&'input str>)),
"false end" => Token::MultiStr(MultiStringToken::FalseEnd(<&'input str>)),
"false interpolation" => Token::MultiStr(MultiStringToken::FalseInterpolation(<&'input str>)),
"num literal" => Token::Normal(NormalToken::NumLiteral(<f64>)),
"if" => Token::Normal(NormalToken::If),
@ -492,11 +488,8 @@ extern {
"." => Token::Normal(NormalToken::Dot),
".$" => Token::Normal(NormalToken::DotDollar),
"$[" => Token::Normal(NormalToken::DollarBracket),
"${" => Token::Str(StringToken::DollarBrace),
// `${` and `multstr ${` are morally the same token used in the same places,
// but they correspond to two different modes, so we need to have two
// distinct token
"multstr ${" => Token::MultiStr(MultiStringToken::DollarBrace),
"#{" => Token::Str(StringToken::HashBrace),
"multstr #{" => Token::MultiStr(MultiStringToken::Interpolation),
"-$" => Token::Normal(NormalToken::MinusDollar),
"+" => Token::Normal(NormalToken::Plus),

View File

@ -94,8 +94,6 @@ pub enum NormalToken<'input> {
DollarBracket,
#[token("$=")]
DollarEquals,
#[token("${")]
DollarBrace,
#[token("\"")]
DoubleQuote,
#[token("-$")]
@ -244,16 +242,16 @@ pub enum StringToken<'input> {
#[error]
Error,
#[regex("[^\"$\\\\]+")]
#[regex("[^\"#\\\\]+")]
Literal(&'input str),
#[token("\"")]
DoubleQuote,
// Has lower matching priority than `DollarBrace` according to Logos' rules.
#[token("$")]
Dollar(&'input str),
#[token("${")]
DollarBrace,
// Has lower matching priority than `HashBrace` according to Logos' rules.
#[token("#")]
Hash(&'input str),
#[token("#{")]
HashBrace,
#[regex("\\\\.", |lex| lex.slice().chars().nth(1))]
EscapedChar(char),
}
@ -264,32 +262,30 @@ pub enum MultiStringToken<'input> {
#[error]
Error,
#[regex("[^\"$\\\\]+")]
#[regex("[^\"#]+")]
Literal(&'input str),
// A token that starts as a multiline end delimiter, but is not one. To avoid hacking
// look-aheads in the lexer (which Logos doesn't support for performance reason), we just use a
// separate token. This has lowest matching priority according to Logo's rules, so it is
// separate token. This one has lowest matching priority according to Logos' rules, so it is
// matched only if `CandidateEnd` cannot be
#[regex("\"(#+|(#+[^m]))?")]
#[regex("\"#*")]
FalseEnd(&'input str),
// A candidate end. A multiline string starting delimiter `MultiStringStart` can have a variable
// number of `#` character, so the lexer matchs candidate end delimiter, compare the number of
// characters, and either emit the `End` token above, or turn the `CandidateEnd` to a
// A candidate end. A multiline string starting delimiter `MultiStringStart` can have a
// variable number of `#` character, so the lexer matchs candidate end delimiter, compare the
// number of characters, and either emit the `End` token above, or turn the `CandidateEnd` to a
// `FalseEnd` otherwise
#[regex("\"#+m")]
CandidateEnd(&'input str),
// Same as previous: `Dollar` and `Backslash` have lower matching priority than `DollarBrace`
// and `BackslashDollarBrace`.
#[token("$")]
Dollar(&'input str),
#[token("${")]
DollarBrace,
#[token("\\")]
Backslash(&'input str),
#[token("\\${")]
BackslashDollarBrace(&'input str),
// Same as `FalseEnd` and `CandidateEnd` but for an interpolation sequence.
#[token("#+")]
FalseInterpolation(&'input str),
#[regex("#+\\{")]
CandidateInterpolation(&'input str),
// Token emitted by the modal lexer for the parser once it has decided that a `CandidateEnd` is
// an actual end token.
End,
Interpolation,
}
/// The tokens of the modal lexer.
@ -507,9 +503,20 @@ impl<'input> Iterator for Lexer<'input> {
// `DoubleQuote`, namely the the normal one.
token = Some(Normal(NormalToken::DoubleQuote));
}
Some(Str(StringToken::DollarBrace)) | Some(MultiStr(MultiStringToken::DollarBrace)) => {
self.enter_normal()
// If we encounter a `CandidateInterp` token with the right number of characters, this is
// an interpolation sequence.
Some(MultiStr(MultiStringToken::CandidateInterpolation(s)))
if s.len() == (self.count - 1) =>
{
token = Some(MultiStr(MultiStringToken::Interpolation));
self.enter_normal();
}
// Otherwise, it is just part of the string, so we transform the token into a
// `FalseInterpolation` one
Some(MultiStr(MultiStringToken::CandidateInterpolation(s))) => {
token = Some(MultiStr(MultiStringToken::FalseInterpolation(s)))
}
Some(Str(StringToken::HashBrace)) => self.enter_normal(),
// Convert escape sequences to the corresponding character.
Some(Str(StringToken::EscapedChar(c))) => {
if let Some(esc) = escape_char(*c) {
@ -547,7 +554,7 @@ fn escape_char(chr: char) -> Option<char> {
'\'' => Some('\''),
'"' => Some('"'),
'\\' => Some('\\'),
'$' => Some('$'),
'#' => Some('#'),
'n' => Some('\n'),
'r' => Some('\r'),
't' => Some('\t'),

View File

@ -235,11 +235,11 @@ fn string_lexing() {
);
assert_eq!(
lex_without_pos("\"1 + ${ 1 } + 2\""),
lex_without_pos("\"1 + #{ 1 } + 2\""),
Ok(vec![
Token::Normal(NormalToken::DoubleQuote),
Token::Str(StringToken::Literal("1 + ")),
Token::Str(StringToken::DollarBrace),
Token::Str(StringToken::HashBrace),
Token::Normal(NormalToken::NumLiteral(1.0)),
Token::Normal(NormalToken::RBrace),
Token::Str(StringToken::Literal(" + 2")),
@ -248,13 +248,13 @@ fn string_lexing() {
);
assert_eq!(
lex_without_pos("\"1 + ${ \"${ 1 }\" } + 2\""),
lex_without_pos("\"1 + #{ \"#{ 1 }\" } + 2\""),
Ok(vec![
Token::Normal(NormalToken::DoubleQuote),
Token::Str(StringToken::Literal("1 + ")),
Token::Str(StringToken::DollarBrace),
Token::Str(StringToken::HashBrace),
Token::Normal(NormalToken::DoubleQuote),
Token::Str(StringToken::DollarBrace),
Token::Str(StringToken::HashBrace),
Token::Normal(NormalToken::NumLiteral(1.0)),
Token::Normal(NormalToken::RBrace),
Token::Normal(NormalToken::DoubleQuote),
@ -273,11 +273,11 @@ fn str_escape() {
mk_single_chunk("str\twith\nescapes"),
);
assert_eq!(
parse_without_pos(r#""\$\${ }\$""#),
mk_single_chunk("$${ }$"),
parse_without_pos("\"\\#\\#{ }\\#\""),
mk_single_chunk("##{ }#"),
);
assert_eq!(
parse_without_pos(r#""$a$b$c\${d\$""#),
mk_single_chunk("$a$b$c${d$"),
parse_without_pos("\"#a#b#c\\#{d#\""),
mk_single_chunk("#a#b#c#{d#"),
);
}

View File

@ -1160,28 +1160,28 @@ Assume(#alwaysTrue -> #alwaysFalse, not ) true
}
assert_eval_str(
r#""simple ${"interp" ++ "olation"} here""#,
r#""simple #{"interp" ++ "olation"} here""#,
"simple interpolation here",
);
assert_eval_str(r#""${"alone"}""#, "alone");
assert_eval_str(r##""#{"alone"}""##, "alone");
assert_eval_str(
r#""nested ${ "${(fun x => "${x}") "expression"}" }""#,
r##""nested #{ "#{(fun x => "#{x}") "expression"}" }""##,
"nested expression",
);
assert_eval_str(
r#""${"some"}${" " ++ "se" ++ "qu"}${"${"ence"}"}""#,
r##""#{"some"}#{" " ++ "se" ++ "qu"}#{"#{"ence"}"}""##,
"some sequence",
);
assert_eval_str(
r#""nested ${ {str = {a = "braces"}.a}.str } !""#,
r##""nested #{ {str = {a = "braces"}.a}.str } !""##,
"nested braces !",
);
assert_eval_str(
r#"let x = "world" in "Hello, ${x}! Welcome in ${let y = "universe" in "the ${x}-${y}"}""#,
r##"let x = "world" in "Hello, #{x}! Welcome in #{let y = "universe" in "the #{x}-#{y}"}""##,
"Hello, world! Welcome in the world-universe",
);
match eval_string(r#""bad type ${1 + 1}""#) {
match eval_string(r##""bad type #{1 + 1}""##) {
Err(Error::EvalError(EvalError::TypeError(_, _, _, _))) => (),
_ => assert!(false),
};
@ -1234,7 +1234,7 @@ Assume(#alwaysTrue -> #alwaysFalse, not ) true
fn poly_eq() {
assert_peq!("0", "0 + 0 + 0");
assert_peq!("true", "if true then true else false");
assert_peq!("\"a\" ++ \"b\" ++ \"c\"", "\"${\"a\" ++ \"b\"}\" ++ \"c\"");
assert_peq!("\"a\" ++ \"b\" ++ \"c\"", "\"#{\"a\" ++ \"b\"}\" ++ \"c\"");
assert_npeq!("1 + 1", "0");
assert_npeq!("true", "if true then false else true");
@ -1536,13 +1536,34 @@ Assume(#alwaysTrue -> #alwaysFalse, not ) true
#[test]
fn multiline_interpolation() {
assert_peq!(
r##"m#"Simple #{"interpolated"} string"#m"##,
"\"Simple interpolated string\""
);
assert_peq!(
r###"m##"Double ##{"interpolated"} string"##m"###,
"\"Double interpolated string\""
);
assert_peq!(
r###"m##"Not #{"interpolated"}"##m"###,
"\"Not \\#{\\\"interpolated\\\"}\""
);
assert_peq!(
r####"m###"###{"Triple"} ##{not} #{interpolated}"###m"####,
"\"Triple #\\#{not} \\#{interpolated}\""
);
assert_peq!(
r###"m#"#{m##"##{"Not"} #{interpolated}"##m} ##{"string"}"#m"###,
"\"Not \\#{interpolated} #\\#{\\\"string\\\"}\""
);
assert_peq!(
r###"
m#"
${m#"thi"#m ++ "s"}
${"is" ++ " an"}
#{m#"thi"#m ++ "s"}
#{"is" ++ " an"}
indented
${"${m##"te"##m}xt"}
#{"#{m##"te"##m}xt"}
"#m
"###,
"\"this\n is an\n indented\ntext\""
@ -1553,8 +1574,8 @@ Assume(#alwaysTrue -> #alwaysFalse, not ) true
let x = "I\n need\n indent!" in
m#"
base
${x}
${x}
#{x}
#{x}
"#m
"##,
r#""base
@ -1572,8 +1593,8 @@ I
let y = "me\ntoo" in
m#"
strip
${x} ${y}
${"not\nme"}
#{x} #{y}
#{"not\nme"}
"#m
"##,
r#""strip

View File

@ -118,7 +118,7 @@ mod tests {
fn basic() {
assert_json_eq!("1 + 1", 2.0);
assert_json_eq!("if true then false else true", false);
assert_json_eq!(r#""Hello, ${"world"}!""#, "Hello, world!");
assert_json_eq!(r##""Hello, #{"world"}!""##, "Hello, world!");
assert_json_eq!("`foo", "foo");
}
@ -127,7 +127,7 @@ mod tests {
assert_json_eq!("[]", json!([]));
assert_json_eq!("[(1+1), (2+2), (3+3)]", json!([2.0, 4.0, 6.0]));
assert_json_eq!(
r#"[`a, ("b" ++ "c"), "d${"e"}f", "g"]"#,
r##"[`a, ("b" ++ "c"), "d#{"e"}f", "g"]"##,
json!(["a", "bc", "def", "g"])
);
assert_json_eq!(