1
1
mirror of https://github.com/tweag/nickel.git synced 2024-09-19 23:47:54 +03:00

Fix interpolation in multiline strings

Fix issue #596. The main problem was a `token` annotation instead of a
`regex` in the lexer. Since this bug was preventing the corresponding
code path in the lexer to not be triggered, this patch also fixes a few
other small issues that appeared along the way. Bump the `logos`
dependency and add a regression test as well.
This commit is contained in:
Yann Hamdaoui 2022-02-04 22:23:31 +01:00
parent ac9b9a1545
commit 98bc241ea6
5 changed files with 38 additions and 26 deletions

12
Cargo.lock generated
View File

@ -128,9 +128,9 @@ dependencies = [
[[package]]
name = "beef"
version = "0.4.4"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "474a626a67200bd107d44179bb3d4fc61891172d11696609264589be6a0e6a43"
checksum = "bed554bd50246729a1ec158d08aa3235d1b69d94ad120ebe187e28894787e736"
[[package]]
name = "bit-set"
@ -788,18 +788,18 @@ dependencies = [
[[package]]
name = "logos"
version = "0.11.4"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b91c49573597a5d6c094f9031617bb1fed15c0db68c81e6546d313414ce107e4"
checksum = "427e2abca5be13136da9afdbf874e6b34ad9001dd70f2b103b083a85daa7b345"
dependencies = [
"logos-derive",
]
[[package]]
name = "logos-derive"
version = "0.11.5"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "797b1f8a0571b331c1b47e7db245af3dc634838da7a92b3bef4e30376ae1c347"
checksum = "56a7d287fd2ac3f75b11f19a1c8a874a7d55744bd91f7a1b3e7cf87d4343c36d"
dependencies = [
"beef",
"fnv",

View File

@ -30,7 +30,7 @@ regex = "0.2.1"
simple-counter = "0.1.0"
codespan = "0.11"
codespan-reporting = "0.11"
logos = "0.11.4"
logos = "0.12.0"
serde = { version = "1.0.117", features = ["derive"] }
serde_json = "1.0.59"
serde_yaml = "0.8.15"

View File

@ -333,7 +333,7 @@ pub enum MultiStringToken<'input> {
/// The other rules should be sufficient to match this as a double quote followed by a
/// `CandidateInterpolation`, but if we omit this token, the lexer can fail unexpectedly on
/// valid inputs because of #200.
#[token("\"#+\\{")]
#[regex("\"#+\\{")]
QuotesCandidateInterpolation(&'input str),
/// Token emitted by the modal lexer for the parser once it has decided that a `CandidateEnd` is
/// an actual end token.
@ -437,7 +437,7 @@ impl<'input> Lexer<'input> {
// `Normal`
Some(ModalLexer::Normal(lexer)) => {
self.stack.push(ModeElt::Normal(self.count));
self.lexer.replace(morph(lexer));
self.lexer = Some(morph(lexer));
}
_ => panic!("lexer::enter_strlike"),
}
@ -457,11 +457,11 @@ impl<'input> Lexer<'input> {
match self.lexer.take() {
//count must be zero, and we do not push it on the stack
Some(ModalLexer::Str(lexer)) => {
self.lexer.replace(ModalLexer::Normal(lexer.morph()));
self.lexer = Some(ModalLexer::Normal(lexer.morph()));
self.stack.push(ModeElt::Str);
}
Some(ModalLexer::MultiStr(lexer)) => {
self.lexer.replace(ModalLexer::Normal(lexer.morph()));
self.lexer = Some(ModalLexer::Normal(lexer.morph()));
self.stack.push(ModeElt::MultiStr(self.count));
}
_ => panic!("lexer::enter_normal"),
@ -479,7 +479,7 @@ impl<'input> Lexer<'input> {
mode => panic!("lexer::leave_str (popped mode {:?})", mode),
};
self.lexer.replace(ModalLexer::Normal(lexer.morph()));
self.lexer = Some(ModalLexer::Normal(lexer.morph()));
}
_ => panic!("lexer::leave_str"),
}
@ -494,7 +494,7 @@ impl<'input> Lexer<'input> {
mode => panic!("lexer::leave_str (popped mode {:?})", mode),
};
self.lexer.replace(ModalLexer::Normal(lexer.morph()));
self.lexer = Some(ModalLexer::Normal(lexer.morph()));
}
_ => panic!("lexer::leave_str"),
}
@ -505,10 +505,10 @@ impl<'input> Lexer<'input> {
Some(ModalLexer::Normal(lexer)) => {
// count must be 0
match self.stack.pop() {
Some(ModeElt::Str) => self.lexer.replace(ModalLexer::Str(lexer.morph())),
Some(ModeElt::Str) => self.lexer = Some(ModalLexer::Str(lexer.morph())),
Some(ModeElt::MultiStr(count)) => {
self.count = count;
self.lexer.replace(ModalLexer::MultiStr(lexer.morph()))
self.lexer = Some(ModalLexer::MultiStr(lexer.morph()))
}
mode => panic!("lexer::leave_normal (popped mode {:?})", mode),
};
@ -566,28 +566,36 @@ impl<'input> Iterator for Lexer<'input> {
token = Some(MultiStr(MultiStringToken::Interpolation));
self.enter_normal();
}
// We never lex something as a `MultiStringToken::Interpolation` directly, but rather
// generate it in this very function from other tokens. However, such a token could
// have still been buffered in the previous iteration, and can thus be matched here,
// which is why we need the case below.
Some(MultiStr(MultiStringToken::Interpolation)) => self.enter_normal(),
// If we encouter a `QuotesCandidateInterpolation` token with the right number of
// characters, we need to split it into two tokens:
// - a simple `"` literal
// - a interpolation token
// - a literal starting by a `"` followed by between 0 and k hashes `#`
// - an interpolation token
// The interpolation token is put in the buffer such that it will be returned next
// time.
//
// For example, in `m##""###{exp}"##m`, the `"###{` is a `QuotesCandidateInterpolation`
// which is split as a `"#` literal followed by an interpolation token.
Some(MultiStr(MultiStringToken::QuotesCandidateInterpolation(s)))
if s.len() == self.count =>
if s.len() >= self.count =>
{
let split_at = s.len() - self.count + 1;
let next_token = MultiStr(MultiStringToken::Interpolation);
let next_span = Range {
start: span.start + 1,
start: span.start + split_at,
end: span.end,
};
self.buffer.replace((next_token, next_span));
self.buffer = Some((next_token, next_span));
token = Some(MultiStr(MultiStringToken::Literal(&s[0..1])));
token = Some(MultiStr(MultiStringToken::Literal(&s[0..split_at])));
span = Range {
start: span.start,
end: span.start + 1,
end: span.start + split_at,
};
self.enter_normal();
}
// Otherwise, it is just part of the string, so we transform the token into a
// `FalseInterpolation` one
@ -637,7 +645,7 @@ impl<'input> Iterator for Lexer<'input> {
// Ignore comment
Some(Normal(NormalToken::LineComment)) => return self.next(),
_ => (),
}
};
token.map(|t| Ok((span.start, t, span.end)))
}

View File

@ -279,8 +279,7 @@ fn string_lexing() {
lex_without_pos(r##"m#""#"#m"##),
Ok(vec![
Token::Normal(NormalToken::MultiStringStart(3)),
Token::MultiStr(MultiStringToken::Literal("\"")),
Token::MultiStr(MultiStringToken::Literal("#")),
Token::MultiStr(MultiStringToken::Literal("\"#")),
Token::MultiStr(MultiStringToken::End),
])
);

View File

@ -15,5 +15,10 @@ let Assert = fun l x => x || %blame% l in
m#""#{"foo"}""#m == "\"foo\"",
m#"""#m == "\"",
m#""#"#"#"#m == "\"#\"#\"#",
// regression test for issue #596 (https://github.com/tweag/nickel/issues/596)
let s = "Hello" in m##""##{s}" World"##m == "\"Hello\" World",
let s = "Hello" in m##""###{s}" World"##m == "\"#Hello\" World",
m##""##s"##m == "\"##s",
]
|> lists.foldl (fun x y => (x | #Assert) && y) true