From 447e50f8b4d4f3b8b605f1a8a7009718aec99c34 Mon Sep 17 00:00:00 2001 From: Eric McCarthy Date: Thu, 14 Apr 2022 02:04:20 -0700 Subject: [PATCH] [ABNF] disallow bidi override codepoints --- docs/grammar/README.md | 63 ++++++++++++++++++++++++++--------- docs/grammar/abnf-grammar.txt | 24 ++++++++----- 2 files changed, 62 insertions(+), 25 deletions(-) diff --git a/docs/grammar/README.md b/docs/grammar/README.md index 4522f663a9..5a9d1a6850 100644 --- a/docs/grammar/README.md +++ b/docs/grammar/README.md @@ -21,11 +21,26 @@ along with the Leo library. If not, see . Lexical Grammar --------------- + +```abnf +ascii = %x0-7F +``` + + +```abnf +safe-nonascii = %x80-2029 / %x202F-2065 / %x2070-D7FF / %xE000-10FFFF + ; excludes bidi overrides and high/low surrogates +``` + ```abnf -character = %x0-D7FF / %xE000-10FFFF ; Unicode code points decoded from UTF-8 +character = ascii / safe-nonascii + ; Unicode code points decoded from UTF-8 ``` +Go to: _[ascii](#user-content-ascii), [safe-nonascii](#user-content-safe-nonascii)_; + + ```abnf horizontal-tab = %x9 ; @@ -58,33 +73,48 @@ single-quote = %x27 ; ' ```abnf -not-star = %x0-29 / %x2B-D7FF / %xE000-10FFFF ; anything but * +not-star = %x0-29 / %x2B-7F / safe-nonascii ; anything but * ``` +Go to: _[safe-nonascii](#user-content-safe-nonascii)_; + + ```abnf -not-star-or-slash = %x0-29 / %x2B-2E / %x30-D7FF / %xE000-10FFFF +not-star-or-slash = %x0-29 / %x2B-2E / %x30-7F / safe-nonascii ; anything but * or / ``` +Go to: _[safe-nonascii](#user-content-safe-nonascii)_; + + ```abnf -not-line-feed-or-carriage-return = %x0-9 / %xB-C / %xE-D7FF / %xE000-10FFFF +not-line-feed-or-carriage-return = %x0-9 / %xB-C / %xE-7F / safe-nonascii ; anything but or ``` +Go to: _[safe-nonascii](#user-content-safe-nonascii)_; + + ```abnf -not-double-quote-or-backslash = %x0-21 / %x23-5B / %x5D-D7FF / %xE000-10FFFF +not-double-quote-or-backslash = %x0-21 / %x23-5B / %x5D-7F / safe-nonascii ; anything but " or \ ``` +Go to: _[safe-nonascii](#user-content-safe-nonascii)_; + + ```abnf -not-single-quote-or-backslash = %x0-26 / %x28-5B / %x5D-D7FF / %xE000-10FFFF +not-single-quote-or-backslash = %x0-26 / %x28-5B / %x5D-7F / safe-nonascii ; anything but ' or \ ``` +Go to: _[safe-nonascii](#user-content-safe-nonascii)_; + + ```abnf line-terminator = line-feed / carriage-return / carriage-return line-feed @@ -148,6 +178,7 @@ keyword = %s"address" / %s"char" / %s"console" / %s"const" + / %s"constant" / %s"else" / %s"field" / %s"for" @@ -236,6 +267,15 @@ signed-literal = numeral ( %s"i8" / %s"i16" / %s"i32" / %s"i64" / %s"i128" ) Go to: _[numeral](#user-content-numeral)_; + +```abnf +integer-literal = unsigned-literal + / signed-literal +``` + +Go to: _[signed-literal](#user-content-signed-literal), [unsigned-literal](#user-content-unsigned-literal)_; + + ```abnf field-literal = numeral %s"field" @@ -368,15 +408,6 @@ string-literal-element = not-double-quote-or-backslash Go to: _[ascii-character-escape](#user-content-ascii-character-escape), [not-double-quote-or-backslash](#user-content-not-double-quote-or-backslash), [simple-character-escape](#user-content-simple-character-escape), [unicode-character-escape](#user-content-unicode-character-escape)_; - -```abnf -integer-literal = unsigned-literal - / signed-literal -``` - -Go to: _[signed-literal](#user-content-signed-literal), [unsigned-literal](#user-content-unsigned-literal)_; - - ```abnf atomic-literal = integer-literal @@ -805,7 +836,7 @@ Go to: _[function-parameter](#user-content-function-parameter)_; ```abnf -function-parameter = [ %s"public" / %s"const" ] identifier ":" type +function-parameter = [ %s"public" / %s"constant" / %s"const" ] identifier ":" type ``` Go to: _[identifier](#user-content-identifier), [type](#user-content-type)_; diff --git a/docs/grammar/abnf-grammar.txt b/docs/grammar/abnf-grammar.txt index 5b91993063..f783304d26 100644 --- a/docs/grammar/abnf-grammar.txt +++ b/docs/grammar/abnf-grammar.txt @@ -19,7 +19,13 @@ ; Lexical Grammar ; --------------- -character = %x0-D7FF / %xE000-10FFFF ; Unicode code points decoded from UTF-8 +ascii = %x0-7F + +safe-nonascii = %x80-2029 / %x202F-2065 / %x2070-D7FF / %xE000-10FFFF + ; excludes bidi overrides and high/low surrogates + +character = ascii / safe-nonascii + ; Unicode code points decoded from UTF-8 horizontal-tab = %x9 ; @@ -33,18 +39,18 @@ double-quote = %x22 ; " single-quote = %x27 ; ' -not-star = %x0-29 / %x2B-D7FF / %xE000-10FFFF ; anything but * +not-star = %x0-29 / %x2B-7F / safe-nonascii ; anything but * -not-star-or-slash = %x0-29 / %x2B-2E / %x30-D7FF / %xE000-10FFFF +not-star-or-slash = %x0-29 / %x2B-2E / %x30-7F / safe-nonascii ; anything but * or / -not-line-feed-or-carriage-return = %x0-9 / %xB-C / %xE-D7FF / %xE000-10FFFF +not-line-feed-or-carriage-return = %x0-9 / %xB-C / %xE-7F / safe-nonascii ; anything but or -not-double-quote-or-backslash = %x0-21 / %x23-5B / %x5D-D7FF / %xE000-10FFFF +not-double-quote-or-backslash = %x0-21 / %x23-5B / %x5D-7F / safe-nonascii ; anything but " or \ -not-single-quote-or-backslash = %x0-26 / %x28-5B / %x5D-D7FF / %xE000-10FFFF +not-single-quote-or-backslash = %x0-26 / %x28-5B / %x5D-7F / safe-nonascii ; anything but ' or \ line-terminator = line-feed / carriage-return / carriage-return line-feed @@ -112,6 +118,9 @@ unsigned-literal = numeral ( %s"u8" / %s"u16" / %s"u32" / %s"u64" / %s"u128" ) signed-literal = numeral ( %s"i8" / %s"i16" / %s"i32" / %s"i64" / %s"i128" ) +integer-literal = unsigned-literal + / signed-literal + field-literal = numeral %s"field" product-group-literal = numeral %s"group" @@ -160,9 +169,6 @@ string-literal-element = not-double-quote-or-backslash / ascii-character-escape / unicode-character-escape -integer-literal = unsigned-literal - / signed-literal - atomic-literal = integer-literal / field-literal / product-group-literal