mirror of
https://github.com/tweag/nickel.git
synced 2024-09-19 23:47:54 +03:00
Fix interpolation in multiline strings
Fix issue #596. The main problem was a `token` annotation instead of a `regex` in the lexer. Since this bug was preventing the corresponding code path in the lexer to not be triggered, this patch also fixes a few other small issues that appeared along the way. Bump the `logos` dependency and add a regression test as well.
This commit is contained in:
parent
ac9b9a1545
commit
98bc241ea6
12
Cargo.lock
generated
12
Cargo.lock
generated
@ -128,9 +128,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "beef"
|
||||
version = "0.4.4"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "474a626a67200bd107d44179bb3d4fc61891172d11696609264589be6a0e6a43"
|
||||
checksum = "bed554bd50246729a1ec158d08aa3235d1b69d94ad120ebe187e28894787e736"
|
||||
|
||||
[[package]]
|
||||
name = "bit-set"
|
||||
@ -788,18 +788,18 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "logos"
|
||||
version = "0.11.4"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b91c49573597a5d6c094f9031617bb1fed15c0db68c81e6546d313414ce107e4"
|
||||
checksum = "427e2abca5be13136da9afdbf874e6b34ad9001dd70f2b103b083a85daa7b345"
|
||||
dependencies = [
|
||||
"logos-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "logos-derive"
|
||||
version = "0.11.5"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "797b1f8a0571b331c1b47e7db245af3dc634838da7a92b3bef4e30376ae1c347"
|
||||
checksum = "56a7d287fd2ac3f75b11f19a1c8a874a7d55744bd91f7a1b3e7cf87d4343c36d"
|
||||
dependencies = [
|
||||
"beef",
|
||||
"fnv",
|
||||
|
@ -30,7 +30,7 @@ regex = "0.2.1"
|
||||
simple-counter = "0.1.0"
|
||||
codespan = "0.11"
|
||||
codespan-reporting = "0.11"
|
||||
logos = "0.11.4"
|
||||
logos = "0.12.0"
|
||||
serde = { version = "1.0.117", features = ["derive"] }
|
||||
serde_json = "1.0.59"
|
||||
serde_yaml = "0.8.15"
|
||||
|
@ -333,7 +333,7 @@ pub enum MultiStringToken<'input> {
|
||||
/// The other rules should be sufficient to match this as a double quote followed by a
|
||||
/// `CandidateInterpolation`, but if we omit this token, the lexer can fail unexpectedly on
|
||||
/// valid inputs because of #200.
|
||||
#[token("\"#+\\{")]
|
||||
#[regex("\"#+\\{")]
|
||||
QuotesCandidateInterpolation(&'input str),
|
||||
/// Token emitted by the modal lexer for the parser once it has decided that a `CandidateEnd` is
|
||||
/// an actual end token.
|
||||
@ -437,7 +437,7 @@ impl<'input> Lexer<'input> {
|
||||
// `Normal`
|
||||
Some(ModalLexer::Normal(lexer)) => {
|
||||
self.stack.push(ModeElt::Normal(self.count));
|
||||
self.lexer.replace(morph(lexer));
|
||||
self.lexer = Some(morph(lexer));
|
||||
}
|
||||
_ => panic!("lexer::enter_strlike"),
|
||||
}
|
||||
@ -457,11 +457,11 @@ impl<'input> Lexer<'input> {
|
||||
match self.lexer.take() {
|
||||
//count must be zero, and we do not push it on the stack
|
||||
Some(ModalLexer::Str(lexer)) => {
|
||||
self.lexer.replace(ModalLexer::Normal(lexer.morph()));
|
||||
self.lexer = Some(ModalLexer::Normal(lexer.morph()));
|
||||
self.stack.push(ModeElt::Str);
|
||||
}
|
||||
Some(ModalLexer::MultiStr(lexer)) => {
|
||||
self.lexer.replace(ModalLexer::Normal(lexer.morph()));
|
||||
self.lexer = Some(ModalLexer::Normal(lexer.morph()));
|
||||
self.stack.push(ModeElt::MultiStr(self.count));
|
||||
}
|
||||
_ => panic!("lexer::enter_normal"),
|
||||
@ -479,7 +479,7 @@ impl<'input> Lexer<'input> {
|
||||
mode => panic!("lexer::leave_str (popped mode {:?})", mode),
|
||||
};
|
||||
|
||||
self.lexer.replace(ModalLexer::Normal(lexer.morph()));
|
||||
self.lexer = Some(ModalLexer::Normal(lexer.morph()));
|
||||
}
|
||||
_ => panic!("lexer::leave_str"),
|
||||
}
|
||||
@ -494,7 +494,7 @@ impl<'input> Lexer<'input> {
|
||||
mode => panic!("lexer::leave_str (popped mode {:?})", mode),
|
||||
};
|
||||
|
||||
self.lexer.replace(ModalLexer::Normal(lexer.morph()));
|
||||
self.lexer = Some(ModalLexer::Normal(lexer.morph()));
|
||||
}
|
||||
_ => panic!("lexer::leave_str"),
|
||||
}
|
||||
@ -505,10 +505,10 @@ impl<'input> Lexer<'input> {
|
||||
Some(ModalLexer::Normal(lexer)) => {
|
||||
// count must be 0
|
||||
match self.stack.pop() {
|
||||
Some(ModeElt::Str) => self.lexer.replace(ModalLexer::Str(lexer.morph())),
|
||||
Some(ModeElt::Str) => self.lexer = Some(ModalLexer::Str(lexer.morph())),
|
||||
Some(ModeElt::MultiStr(count)) => {
|
||||
self.count = count;
|
||||
self.lexer.replace(ModalLexer::MultiStr(lexer.morph()))
|
||||
self.lexer = Some(ModalLexer::MultiStr(lexer.morph()))
|
||||
}
|
||||
mode => panic!("lexer::leave_normal (popped mode {:?})", mode),
|
||||
};
|
||||
@ -566,28 +566,36 @@ impl<'input> Iterator for Lexer<'input> {
|
||||
token = Some(MultiStr(MultiStringToken::Interpolation));
|
||||
self.enter_normal();
|
||||
}
|
||||
// We never lex something as a `MultiStringToken::Interpolation` directly, but rather
|
||||
// generate it in this very function from other tokens. However, such a token could
|
||||
// have still been buffered in the previous iteration, and can thus be matched here,
|
||||
// which is why we need the case below.
|
||||
Some(MultiStr(MultiStringToken::Interpolation)) => self.enter_normal(),
|
||||
// If we encouter a `QuotesCandidateInterpolation` token with the right number of
|
||||
// characters, we need to split it into two tokens:
|
||||
// - a simple `"` literal
|
||||
// - a interpolation token
|
||||
// - a literal starting by a `"` followed by between 0 and k hashes `#`
|
||||
// - an interpolation token
|
||||
// The interpolation token is put in the buffer such that it will be returned next
|
||||
// time.
|
||||
//
|
||||
// For example, in `m##""###{exp}"##m`, the `"###{` is a `QuotesCandidateInterpolation`
|
||||
// which is split as a `"#` literal followed by an interpolation token.
|
||||
Some(MultiStr(MultiStringToken::QuotesCandidateInterpolation(s)))
|
||||
if s.len() == self.count =>
|
||||
if s.len() >= self.count =>
|
||||
{
|
||||
let split_at = s.len() - self.count + 1;
|
||||
let next_token = MultiStr(MultiStringToken::Interpolation);
|
||||
let next_span = Range {
|
||||
start: span.start + 1,
|
||||
start: span.start + split_at,
|
||||
end: span.end,
|
||||
};
|
||||
self.buffer.replace((next_token, next_span));
|
||||
self.buffer = Some((next_token, next_span));
|
||||
|
||||
token = Some(MultiStr(MultiStringToken::Literal(&s[0..1])));
|
||||
token = Some(MultiStr(MultiStringToken::Literal(&s[0..split_at])));
|
||||
span = Range {
|
||||
start: span.start,
|
||||
end: span.start + 1,
|
||||
end: span.start + split_at,
|
||||
};
|
||||
self.enter_normal();
|
||||
}
|
||||
// Otherwise, it is just part of the string, so we transform the token into a
|
||||
// `FalseInterpolation` one
|
||||
@ -637,7 +645,7 @@ impl<'input> Iterator for Lexer<'input> {
|
||||
// Ignore comment
|
||||
Some(Normal(NormalToken::LineComment)) => return self.next(),
|
||||
_ => (),
|
||||
}
|
||||
};
|
||||
|
||||
token.map(|t| Ok((span.start, t, span.end)))
|
||||
}
|
||||
|
@ -279,8 +279,7 @@ fn string_lexing() {
|
||||
lex_without_pos(r##"m#""#"#m"##),
|
||||
Ok(vec![
|
||||
Token::Normal(NormalToken::MultiStringStart(3)),
|
||||
Token::MultiStr(MultiStringToken::Literal("\"")),
|
||||
Token::MultiStr(MultiStringToken::Literal("#")),
|
||||
Token::MultiStr(MultiStringToken::Literal("\"#")),
|
||||
Token::MultiStr(MultiStringToken::End),
|
||||
])
|
||||
);
|
||||
|
@ -15,5 +15,10 @@ let Assert = fun l x => x || %blame% l in
|
||||
m#""#{"foo"}""#m == "\"foo\"",
|
||||
m#"""#m == "\"",
|
||||
m#""#"#"#"#m == "\"#\"#\"#",
|
||||
|
||||
// regression test for issue #596 (https://github.com/tweag/nickel/issues/596)
|
||||
let s = "Hello" in m##""##{s}" World"##m == "\"Hello\" World",
|
||||
let s = "Hello" in m##""###{s}" World"##m == "\"#Hello\" World",
|
||||
m##""##s"##m == "\"##s",
|
||||
]
|
||||
|> lists.foldl (fun x y => (x | #Assert) && y) true
|
||||
|
Loading…
Reference in New Issue
Block a user