mirror of
https://github.com/ProvableHQ/leo.git
synced 2024-12-23 10:12:21 +03:00
Merge pull request #1751 from AleoHQ/abnf-bidi-overrides
[ABNF] disallow bidi override codepoints
This commit is contained in:
commit
23fca6af47
@ -41,6 +41,14 @@ fn eat_identifier(input: &mut Peekable<impl Iterator<Item = char>>) -> Option<St
|
|||||||
Some(ident)
|
Some(ident)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///
|
||||||
|
/// Checks if a char is a Unicode Bidirectional Override code point
|
||||||
|
///
|
||||||
|
fn is_bidi_override(c: char) -> bool {
|
||||||
|
let i = c as u32;
|
||||||
|
return (0x202A <= i && i <= 0x202E) || (0x2066 <= i && i <= 0x2069);
|
||||||
|
}
|
||||||
|
|
||||||
impl Token {
|
impl Token {
|
||||||
// Eats the parts of the unicode character after \u.
|
// Eats the parts of the unicode character after \u.
|
||||||
fn eat_unicode_char(input: &mut Peekable<impl Iterator<Item = char>>) -> Result<(usize, Char)> {
|
fn eat_unicode_char(input: &mut Peekable<impl Iterator<Item = char>>) -> Result<(usize, Char)> {
|
||||||
@ -198,6 +206,9 @@ impl Token {
|
|||||||
|
|
||||||
let mut len = 0;
|
let mut len = 0;
|
||||||
while let Some(c) = input.peek() {
|
while let Some(c) = input.peek() {
|
||||||
|
if is_bidi_override(*c) {
|
||||||
|
return Err(ParserError::lexer_bidi_override().into());
|
||||||
|
}
|
||||||
if c == &'"' {
|
if c == &'"' {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -215,8 +226,14 @@ impl Token {
|
|||||||
Some('\'') => {
|
Some('\'') => {
|
||||||
input.next();
|
input.next();
|
||||||
|
|
||||||
let (len, character) = Self::eat_char(&mut input)?;
|
match input.peek() {
|
||||||
|
Some(c) if is_bidi_override(*c) => {
|
||||||
|
return Err(ParserError::lexer_bidi_override().into());
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
let (len, character) = Self::eat_char(&mut input)?;
|
||||||
if input.next_if_eq(&'\'').is_some() {
|
if input.next_if_eq(&'\'').is_some() {
|
||||||
input.next();
|
input.next();
|
||||||
return Ok((len + 2, Token::CharLit(character)));
|
return Ok((len + 2, Token::CharLit(character)));
|
||||||
@ -294,6 +311,9 @@ impl Token {
|
|||||||
let mut comment = String::from("//");
|
let mut comment = String::from("//");
|
||||||
|
|
||||||
while let Some(c) = input.next_if(|c| c != &'\n') {
|
while let Some(c) = input.next_if(|c| c != &'\n') {
|
||||||
|
if is_bidi_override(c) {
|
||||||
|
return Err(ParserError::lexer_bidi_override().into());
|
||||||
|
}
|
||||||
comment.push(c);
|
comment.push(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -312,6 +332,9 @@ impl Token {
|
|||||||
|
|
||||||
let mut ended = false;
|
let mut ended = false;
|
||||||
while let Some(c) = input.next() {
|
while let Some(c) = input.next() {
|
||||||
|
if is_bidi_override(c) {
|
||||||
|
return Err(ParserError::lexer_bidi_override().into());
|
||||||
|
}
|
||||||
comment.push(c);
|
comment.push(c);
|
||||||
if c == '*' && input.next_if_eq(&'/').is_some() {
|
if c == '*' && input.next_if_eq(&'/').is_some() {
|
||||||
comment.push('/');
|
comment.push('/');
|
||||||
|
@ -21,11 +21,26 @@ along with the Leo library. If not, see <https://www.gnu.org/licenses/>.
|
|||||||
Lexical Grammar
|
Lexical Grammar
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
|
<a name="ascii"></a>
|
||||||
|
```abnf
|
||||||
|
ascii = %x0-7F
|
||||||
|
```
|
||||||
|
|
||||||
|
<a name="safe-nonascii"></a>
|
||||||
|
```abnf
|
||||||
|
safe-nonascii = %x80-2029 / %x202F-2065 / %x2070-D7FF / %xE000-10FFFF
|
||||||
|
; excludes bidi overrides and high/low surrogates
|
||||||
|
```
|
||||||
|
|
||||||
<a name="character"></a>
|
<a name="character"></a>
|
||||||
```abnf
|
```abnf
|
||||||
character = %x0-D7FF / %xE000-10FFFF ; Unicode code points decoded from UTF-8
|
character = ascii / safe-nonascii
|
||||||
|
; Unicode code points decoded from UTF-8
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Go to: _[ascii](#user-content-ascii), [safe-nonascii](#user-content-safe-nonascii)_;
|
||||||
|
|
||||||
|
|
||||||
<a name="horizontal-tab"></a>
|
<a name="horizontal-tab"></a>
|
||||||
```abnf
|
```abnf
|
||||||
horizontal-tab = %x9 ; <HT>
|
horizontal-tab = %x9 ; <HT>
|
||||||
@ -58,33 +73,48 @@ single-quote = %x27 ; '
|
|||||||
|
|
||||||
<a name="not-star"></a>
|
<a name="not-star"></a>
|
||||||
```abnf
|
```abnf
|
||||||
not-star = %x0-29 / %x2B-D7FF / %xE000-10FFFF ; anything but *
|
not-star = %x0-29 / %x2B-7F / safe-nonascii ; anything but *
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Go to: _[safe-nonascii](#user-content-safe-nonascii)_;
|
||||||
|
|
||||||
|
|
||||||
<a name="not-star-or-slash"></a>
|
<a name="not-star-or-slash"></a>
|
||||||
```abnf
|
```abnf
|
||||||
not-star-or-slash = %x0-29 / %x2B-2E / %x30-D7FF / %xE000-10FFFF
|
not-star-or-slash = %x0-29 / %x2B-2E / %x30-7F / safe-nonascii
|
||||||
; anything but * or /
|
; anything but * or /
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Go to: _[safe-nonascii](#user-content-safe-nonascii)_;
|
||||||
|
|
||||||
|
|
||||||
<a name="not-line-feed-or-carriage-return"></a>
|
<a name="not-line-feed-or-carriage-return"></a>
|
||||||
```abnf
|
```abnf
|
||||||
not-line-feed-or-carriage-return = %x0-9 / %xB-C / %xE-D7FF / %xE000-10FFFF
|
not-line-feed-or-carriage-return = %x0-9 / %xB-C / %xE-7F / safe-nonascii
|
||||||
; anything but <LF> or <CR>
|
; anything but <LF> or <CR>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Go to: _[safe-nonascii](#user-content-safe-nonascii)_;
|
||||||
|
|
||||||
|
|
||||||
<a name="not-double-quote-or-backslash"></a>
|
<a name="not-double-quote-or-backslash"></a>
|
||||||
```abnf
|
```abnf
|
||||||
not-double-quote-or-backslash = %x0-21 / %x23-5B / %x5D-D7FF / %xE000-10FFFF
|
not-double-quote-or-backslash = %x0-21 / %x23-5B / %x5D-7F / safe-nonascii
|
||||||
; anything but " or \
|
; anything but " or \
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Go to: _[safe-nonascii](#user-content-safe-nonascii)_;
|
||||||
|
|
||||||
|
|
||||||
<a name="not-single-quote-or-backslash"></a>
|
<a name="not-single-quote-or-backslash"></a>
|
||||||
```abnf
|
```abnf
|
||||||
not-single-quote-or-backslash = %x0-26 / %x28-5B / %x5D-D7FF / %xE000-10FFFF
|
not-single-quote-or-backslash = %x0-26 / %x28-5B / %x5D-7F / safe-nonascii
|
||||||
; anything but ' or \
|
; anything but ' or \
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Go to: _[safe-nonascii](#user-content-safe-nonascii)_;
|
||||||
|
|
||||||
|
|
||||||
<a name="line-terminator"></a>
|
<a name="line-terminator"></a>
|
||||||
```abnf
|
```abnf
|
||||||
line-terminator = line-feed / carriage-return / carriage-return line-feed
|
line-terminator = line-feed / carriage-return / carriage-return line-feed
|
||||||
|
@ -19,7 +19,13 @@
|
|||||||
; Lexical Grammar
|
; Lexical Grammar
|
||||||
; ---------------
|
; ---------------
|
||||||
|
|
||||||
character = %x0-D7FF / %xE000-10FFFF ; Unicode code points decoded from UTF-8
|
ascii = %x0-7F
|
||||||
|
|
||||||
|
safe-nonascii = %x80-2029 / %x202F-2065 / %x2070-D7FF / %xE000-10FFFF
|
||||||
|
; excludes bidi overrides and high/low surrogates
|
||||||
|
|
||||||
|
character = ascii / safe-nonascii
|
||||||
|
; Unicode code points decoded from UTF-8
|
||||||
|
|
||||||
horizontal-tab = %x9 ; <HT>
|
horizontal-tab = %x9 ; <HT>
|
||||||
|
|
||||||
@ -33,18 +39,18 @@ double-quote = %x22 ; "
|
|||||||
|
|
||||||
single-quote = %x27 ; '
|
single-quote = %x27 ; '
|
||||||
|
|
||||||
not-star = %x0-29 / %x2B-D7FF / %xE000-10FFFF ; anything but *
|
not-star = %x0-29 / %x2B-7F / safe-nonascii ; anything but *
|
||||||
|
|
||||||
not-star-or-slash = %x0-29 / %x2B-2E / %x30-D7FF / %xE000-10FFFF
|
not-star-or-slash = %x0-29 / %x2B-2E / %x30-7F / safe-nonascii
|
||||||
; anything but * or /
|
; anything but * or /
|
||||||
|
|
||||||
not-line-feed-or-carriage-return = %x0-9 / %xB-C / %xE-D7FF / %xE000-10FFFF
|
not-line-feed-or-carriage-return = %x0-9 / %xB-C / %xE-7F / safe-nonascii
|
||||||
; anything but <LF> or <CR>
|
; anything but <LF> or <CR>
|
||||||
|
|
||||||
not-double-quote-or-backslash = %x0-21 / %x23-5B / %x5D-D7FF / %xE000-10FFFF
|
not-double-quote-or-backslash = %x0-21 / %x23-5B / %x5D-7F / safe-nonascii
|
||||||
; anything but " or \
|
; anything but " or \
|
||||||
|
|
||||||
not-single-quote-or-backslash = %x0-26 / %x28-5B / %x5D-D7FF / %xE000-10FFFF
|
not-single-quote-or-backslash = %x0-26 / %x28-5B / %x5D-7F / safe-nonascii
|
||||||
; anything but ' or \
|
; anything but ' or \
|
||||||
|
|
||||||
line-terminator = line-feed / carriage-return / carriage-return line-feed
|
line-terminator = line-feed / carriage-return / carriage-return line-feed
|
||||||
|
@ -374,4 +374,13 @@ create_errors!(
|
|||||||
msg: "`constant` is preferred over `const` for function parameters to indicate a R1CS constant.",
|
msg: "`constant` is preferred over `const` for function parameters to indicate a R1CS constant.",
|
||||||
help: None,
|
help: None,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// For when the lexer encountered a bidi override character
|
||||||
|
@backtraced
|
||||||
|
lexer_bidi_override {
|
||||||
|
args: (),
|
||||||
|
msg: "Unicode bidi override code point encountered.",
|
||||||
|
help: None,
|
||||||
|
}
|
||||||
|
|
||||||
);
|
);
|
||||||
|
@ -48,3 +48,4 @@ outputs:
|
|||||||
- "Error [EPAR0370033]: The escaped unicode char `1234567890` is not within valid length of [1, 6]."
|
- "Error [EPAR0370033]: The escaped unicode char `1234567890` is not within valid length of [1, 6]."
|
||||||
- "Error [EPAR0370026]: Expected a closed char but found ``."
|
- "Error [EPAR0370026]: Expected a closed char but found ``."
|
||||||
- "Error [EPAR0370026]: Expected a closed char but found `😭`."
|
- "Error [EPAR0370026]: Expected a closed char but found `😭`."
|
||||||
|
- "Error [EPAR0370043]: Unicode bidi override code point encountered."
|
||||||
|
@ -15,3 +15,4 @@ outputs:
|
|||||||
- "Error [EPAR0370032]: There was no closing `}` after a escaped unicode `af🦀\"`."
|
- "Error [EPAR0370032]: There was no closing `}` after a escaped unicode `af🦀\"`."
|
||||||
- "Error [EPAR0370025]: Expected a closed string but found `\"`."
|
- "Error [EPAR0370025]: Expected a closed string but found `\"`."
|
||||||
- "Error [EPAR0370025]: Expected a closed string but found `⭇😍;`."
|
- "Error [EPAR0370025]: Expected a closed string but found `⭇😍;`."
|
||||||
|
- "Error [EPAR0370043]: Unicode bidi override code point encountered."
|
||||||
|
@ -0,0 +1,5 @@
|
|||||||
|
---
|
||||||
|
namespace: Parse
|
||||||
|
expectation: Fail
|
||||||
|
outputs:
|
||||||
|
- "Error [EPAR0370043]: Unicode bidi override code point encountered."
|
@ -0,0 +1,5 @@
|
|||||||
|
---
|
||||||
|
namespace: Parse
|
||||||
|
expectation: Fail
|
||||||
|
outputs:
|
||||||
|
- "Error [EPAR0370043]: Unicode bidi override code point encountered."
|
@ -2,5 +2,6 @@
|
|||||||
namespace: ParseStatement
|
namespace: ParseStatement
|
||||||
expectation: Fail
|
expectation: Fail
|
||||||
outputs:
|
outputs:
|
||||||
|
- "Error [EPAR0370043]: Unicode bidi override code point encountered."
|
||||||
- "Error [EPAR0370009]: unexpected string: expected 'formatted string', got '1'\n --> test:1:13\n |\n 1 | console.log(1);\n | ^"
|
- "Error [EPAR0370009]: unexpected string: expected 'formatted string', got '1'\n --> test:1:13\n |\n 1 | console.log(1);\n | ^"
|
||||||
- "Error [EPAR0370007]: unexpected identifier: expected 'assert', 'error', 'log' -- got 'test'\n --> test:1:9\n |\n 1 | console.test();\n | ^^^^"
|
- "Error [EPAR0370007]: unexpected identifier: expected 'assert', 'error', 'log' -- got 'test'\n --> test:1:9\n |\n 1 | console.test();\n | ^^^^"
|
||||||
|
@ -56,3 +56,4 @@ expectation: Fail
|
|||||||
'\u{bbbbb}\u{aaaa}'
|
'\u{bbbbb}\u{aaaa}'
|
||||||
|
|
||||||
'😭😂😘'
|
'😭😂😘'
|
||||||
|
''
|
||||||
|
@ -28,3 +28,5 @@ expectation: Fail
|
|||||||
"\"
|
"\"
|
||||||
|
|
||||||
"⭇😍;
|
"⭇😍;
|
||||||
|
|
||||||
|
"2066:"
|
||||||
|
9
tests/parser/program/bidi_comment.leo
Normal file
9
tests/parser/program/bidi_comment.leo
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
/*
|
||||||
|
namespace: Parse
|
||||||
|
expectation: Fail
|
||||||
|
*/
|
||||||
|
|
||||||
|
function main() {
|
||||||
|
// 202E<-here
|
||||||
|
let x = 1u8;
|
||||||
|
}
|
9
tests/parser/program/bidi_comment_2
Normal file
9
tests/parser/program/bidi_comment_2
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
/*
|
||||||
|
namespace: Parse
|
||||||
|
expectation: Fail
|
||||||
|
*/
|
||||||
|
|
||||||
|
function main() {
|
||||||
|
/* next line starts with 2069
|
||||||
|
*/
|
||||||
|
}
|
9
tests/parser/program/bidi_comment_2.leo
Normal file
9
tests/parser/program/bidi_comment_2.leo
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
/*
|
||||||
|
namespace: Parse
|
||||||
|
expectation: Fail
|
||||||
|
*/
|
||||||
|
|
||||||
|
function main() {
|
||||||
|
/* next line starts with 2069
|
||||||
|
*/
|
||||||
|
}
|
@ -3,6 +3,8 @@ namespace: ParseStatement
|
|||||||
expectation: Fail
|
expectation: Fail
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
console.error(""); // bidi override
|
||||||
|
|
||||||
console.log(1);
|
console.log(1);
|
||||||
|
|
||||||
console.test();
|
console.test();
|
Loading…
Reference in New Issue
Block a user