From 3840e04f007908ac57d7c57d6c9429e322097c0f Mon Sep 17 00:00:00 2001 From: Eric McCarthy Date: Thu, 24 Mar 2022 11:55:35 -0700 Subject: [PATCH 1/4] [ABNF] remove Unicode codepoints that can not occur --- docs/grammar/README.md | 20 ++++++++++---------- docs/grammar/abnf-grammar.txt | 17 ++++++++++------- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/docs/grammar/README.md b/docs/grammar/README.md index 1e10756188..97b9dd8e56 100644 --- a/docs/grammar/README.md +++ b/docs/grammar/README.md @@ -21,10 +21,9 @@ along with the Leo library. If not, see . Lexical Grammar --------------- - -```abnf -character = %x0-10FFFF ; any Unicode code point -``` +This rule states the basic elements that form Leo code: +the Unicode code points that can be decoded from UTF-8. +character = %x0-D7FF / %xE000-10FFFF ```abnf @@ -58,29 +57,30 @@ single-quote = %x27 ; ' ```abnf -not-star = %x0-29 / %x2B-10FFFF ; anything but * +not-star = %x0-29 / %x2B-D7FF / %xE000-10FFFF ; anything but * ``` ```abnf -not-star-or-slash = %x0-29 / %x2B-2E / %x30-10FFFF ; anything but * or / +not-star-or-slash = %x0-29 / %x2B-2E / %x30-D7FF / %xE000-10FFFF + ; anything but * or / ``` ```abnf -not-line-feed-or-carriage-return = %x0-9 / %xB-C / %xE-10FFFF +not-line-feed-or-carriage-return = %x0-9 / %xB-C / %xE-D7FF / %xE000-10FFFF ; anything but or ``` ```abnf -not-double-quote-or-backslash = %x0-21 / %x23-5B / %x5D-10FFFF +not-double-quote-or-backslash = %x0-21 / %x23-5B / %x5D-D7FF / %xE000-10FFFF ; anything but " or \ ``` ```abnf -not-single-quote-or-backslash = %x0-26 / %x28-5B / %x5D-10FFFF +not-single-quote-or-backslash = %x0-26 / %x28-5B / %x5D-D7FF / %xE000-10FFFF ; anything but ' or \ ``` @@ -823,7 +823,7 @@ Format String Grammar ```abnf -not-brace = %x0-7A / %x7C / %x7E-10FFFF ; anything but { or } +not-brace = %x0-7A / %x7C / %x7E-D7FF / %xE000-10FFFF ; anything but { or } ``` diff --git a/docs/grammar/abnf-grammar.txt b/docs/grammar/abnf-grammar.txt index 21830a6f50..aafe2a6c90 100644 --- a/docs/grammar/abnf-grammar.txt +++ b/docs/grammar/abnf-grammar.txt @@ -19,7 +19,9 @@ ; Lexical Grammar ; --------------- -character = %x0-10FFFF ; any Unicode code point +; This rule states the basic elements that form Leo code: +; the Unicode code points that can be decoded from UTF-8. +character = %x0-D7FF / %xE000-10FFFF horizontal-tab = %x9 ; @@ -33,17 +35,18 @@ double-quote = %x22 ; " single-quote = %x27 ; ' -not-star = %x0-29 / %x2B-10FFFF ; anything but * +not-star = %x0-29 / %x2B-D7FF / %xE000-10FFFF ; anything but * -not-star-or-slash = %x0-29 / %x2B-2E / %x30-10FFFF ; anything but * or / +not-star-or-slash = %x0-29 / %x2B-2E / %x30-D7FF / %xE000-10FFFF + ; anything but * or / -not-line-feed-or-carriage-return = %x0-9 / %xB-C / %xE-10FFFF +not-line-feed-or-carriage-return = %x0-9 / %xB-C / %xE-D7FF / %xE000-10FFFF ; anything but or -not-double-quote-or-backslash = %x0-21 / %x23-5B / %x5D-10FFFF +not-double-quote-or-backslash = %x0-21 / %x23-5B / %x5D-D7FF / %xE000-10FFFF ; anything but " or \ -not-single-quote-or-backslash = %x0-26 / %x28-5B / %x5D-10FFFF +not-single-quote-or-backslash = %x0-26 / %x28-5B / %x5D-D7FF / %xE000-10FFFF ; anything but ' or \ line-terminator = line-feed / carriage-return / carriage-return line-feed @@ -326,7 +329,7 @@ file = *declaration ; Format String Grammar ; --------------------- -not-brace = %x0-7A / %x7C / %x7E-10FFFF ; anything but { or } +not-brace = %x0-7A / %x7C / %x7E-D7FF / %xE000-10FFFF ; anything but { or } format-string-container = "{}" From e6752d67a013d9014551ee629c92cb748c9154e7 Mon Sep 17 00:00:00 2001 From: Eric McCarthy Date: Thu, 24 Mar 2022 17:13:58 -0700 Subject: [PATCH 2/4] [ABNF] fix the code point range in the format string grammar, along with a note on what the code points mean --- docs/grammar/README.md | 6 +++++- docs/grammar/abnf-grammar.txt | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/docs/grammar/README.md b/docs/grammar/README.md index 97b9dd8e56..e8e3b233a2 100644 --- a/docs/grammar/README.md +++ b/docs/grammar/README.md @@ -821,9 +821,13 @@ file = *declaration Format String Grammar --------------------- +The code points mentioned in this grammar represent +the elements of a Leo format string already parsed, +after escapes have been processed. + ```abnf -not-brace = %x0-7A / %x7C / %x7E-D7FF / %xE000-10FFFF ; anything but { or } +not-brace = %x0-7A / %x7C / %x7E-10FFFF ; anything but { or } ``` diff --git a/docs/grammar/abnf-grammar.txt b/docs/grammar/abnf-grammar.txt index aafe2a6c90..c67ec53237 100644 --- a/docs/grammar/abnf-grammar.txt +++ b/docs/grammar/abnf-grammar.txt @@ -329,7 +329,11 @@ file = *declaration ; Format String Grammar ; --------------------- -not-brace = %x0-7A / %x7C / %x7E-D7FF / %xE000-10FFFF ; anything but { or } +; The code points mentioned in this grammar represent +; the elements of a Leo format string already parsed, +; after escapes have been processed. + +not-brace = %x0-7A / %x7C / %x7E-10FFFF ; anything but { or } format-string-container = "{}" From a96e44c49b06842de061c234d67e7b893b39cd2d Mon Sep 17 00:00:00 2001 From: Eric McCarthy Date: Thu, 24 Mar 2022 17:26:10 -0700 Subject: [PATCH 3/4] [ABNF] trim down comments, which will move to the Leo Reference --- docs/grammar/README.md | 12 +++++------- docs/grammar/abnf-grammar.txt | 9 ++------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/docs/grammar/README.md b/docs/grammar/README.md index e8e3b233a2..67d8a8af7d 100644 --- a/docs/grammar/README.md +++ b/docs/grammar/README.md @@ -21,9 +21,10 @@ along with the Leo library. If not, see . Lexical Grammar --------------- -This rule states the basic elements that form Leo code: -the Unicode code points that can be decoded from UTF-8. -character = %x0-D7FF / %xE000-10FFFF + +```abnf +character = %x0-D7FF / %xE000-10FFFF ; Unicode code points decoded from UTF-8 +``` ```abnf @@ -821,13 +822,10 @@ file = *declaration Format String Grammar --------------------- -The code points mentioned in this grammar represent -the elements of a Leo format string already parsed, -after escapes have been processed. - ```abnf not-brace = %x0-7A / %x7C / %x7E-10FFFF ; anything but { or } + ; codes permitted in string after escapes processed, except braces ``` diff --git a/docs/grammar/abnf-grammar.txt b/docs/grammar/abnf-grammar.txt index c67ec53237..3fec230784 100644 --- a/docs/grammar/abnf-grammar.txt +++ b/docs/grammar/abnf-grammar.txt @@ -19,9 +19,7 @@ ; Lexical Grammar ; --------------- -; This rule states the basic elements that form Leo code: -; the Unicode code points that can be decoded from UTF-8. -character = %x0-D7FF / %xE000-10FFFF +character = %x0-D7FF / %xE000-10FFFF ; Unicode code points decoded from UTF-8 horizontal-tab = %x9 ; @@ -329,11 +327,8 @@ file = *declaration ; Format String Grammar ; --------------------- -; The code points mentioned in this grammar represent -; the elements of a Leo format string already parsed, -; after escapes have been processed. - not-brace = %x0-7A / %x7C / %x7E-10FFFF ; anything but { or } + ; codes permitted in string after escapes processed, except braces format-string-container = "{}" From 45c731f74ddf155f5a3a6645ec974bf1f39916f9 Mon Sep 17 00:00:00 2001 From: Eric McCarthy Date: Thu, 24 Mar 2022 17:29:06 -0700 Subject: [PATCH 4/4] [ABNF] remove extra comment on not-brace --- docs/grammar/README.md | 4 ++-- docs/grammar/abnf-grammar.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/grammar/README.md b/docs/grammar/README.md index 67d8a8af7d..b22408b78d 100644 --- a/docs/grammar/README.md +++ b/docs/grammar/README.md @@ -824,8 +824,8 @@ Format String Grammar ```abnf -not-brace = %x0-7A / %x7C / %x7E-10FFFF ; anything but { or } - ; codes permitted in string after escapes processed, except braces +not-brace = %x0-7A / %x7C / %x7E-10FFFF + ; codes permitted in string after escapes processed, except { or } ``` diff --git a/docs/grammar/abnf-grammar.txt b/docs/grammar/abnf-grammar.txt index 3fec230784..b41bfdfb9c 100644 --- a/docs/grammar/abnf-grammar.txt +++ b/docs/grammar/abnf-grammar.txt @@ -327,8 +327,8 @@ file = *declaration ; Format String Grammar ; --------------------- -not-brace = %x0-7A / %x7C / %x7E-10FFFF ; anything but { or } - ; codes permitted in string after escapes processed, except braces +not-brace = %x0-7A / %x7C / %x7E-10FFFF + ; codes permitted in string after escapes processed, except { or } format-string-container = "{}"