[ABNF] remove Unicode codepoints that can not occur

This commit is contained in:
Eric McCarthy 2022-03-24 11:55:35 -07:00
parent 88c1bfd8a3
commit 3840e04f00
2 changed files with 20 additions and 17 deletions

View File

@ -21,10 +21,9 @@ along with the Leo library. If not, see <https://www.gnu.org/licenses/>.
Lexical Grammar Lexical Grammar
--------------- ---------------
<a name="character"></a> This rule states the basic elements that form Leo code:
```abnf the Unicode code points that can be decoded from UTF-8.
character = %x0-10FFFF ; any Unicode code point character = %x0-D7FF / %xE000-10FFFF
```
<a name="horizontal-tab"></a> <a name="horizontal-tab"></a>
```abnf ```abnf
@ -58,29 +57,30 @@ single-quote = %x27 ; '
<a name="not-star"></a> <a name="not-star"></a>
```abnf ```abnf
not-star = %x0-29 / %x2B-10FFFF ; anything but * not-star = %x0-29 / %x2B-D7FF / %xE000-10FFFF ; anything but *
``` ```
<a name="not-star-or-slash"></a> <a name="not-star-or-slash"></a>
```abnf ```abnf
not-star-or-slash = %x0-29 / %x2B-2E / %x30-10FFFF ; anything but * or / not-star-or-slash = %x0-29 / %x2B-2E / %x30-D7FF / %xE000-10FFFF
; anything but * or /
``` ```
<a name="not-line-feed-or-carriage-return"></a> <a name="not-line-feed-or-carriage-return"></a>
```abnf ```abnf
not-line-feed-or-carriage-return = %x0-9 / %xB-C / %xE-10FFFF not-line-feed-or-carriage-return = %x0-9 / %xB-C / %xE-D7FF / %xE000-10FFFF
; anything but <LF> or <CR> ; anything but <LF> or <CR>
``` ```
<a name="not-double-quote-or-backslash"></a> <a name="not-double-quote-or-backslash"></a>
```abnf ```abnf
not-double-quote-or-backslash = %x0-21 / %x23-5B / %x5D-10FFFF not-double-quote-or-backslash = %x0-21 / %x23-5B / %x5D-D7FF / %xE000-10FFFF
; anything but " or \ ; anything but " or \
``` ```
<a name="not-single-quote-or-backslash"></a> <a name="not-single-quote-or-backslash"></a>
```abnf ```abnf
not-single-quote-or-backslash = %x0-26 / %x28-5B / %x5D-10FFFF not-single-quote-or-backslash = %x0-26 / %x28-5B / %x5D-D7FF / %xE000-10FFFF
; anything but ' or \ ; anything but ' or \
``` ```
@ -823,7 +823,7 @@ Format String Grammar
<a name="not-brace"></a> <a name="not-brace"></a>
```abnf ```abnf
not-brace = %x0-7A / %x7C / %x7E-10FFFF ; anything but { or } not-brace = %x0-7A / %x7C / %x7E-D7FF / %xE000-10FFFF ; anything but { or }
``` ```
<a name="format-string-container"></a> <a name="format-string-container"></a>

View File

@ -19,7 +19,9 @@
; Lexical Grammar ; Lexical Grammar
; --------------- ; ---------------
character = %x0-10FFFF ; any Unicode code point ; This rule states the basic elements that form Leo code:
; the Unicode code points that can be decoded from UTF-8.
character = %x0-D7FF / %xE000-10FFFF
horizontal-tab = %x9 ; <HT> horizontal-tab = %x9 ; <HT>
@ -33,17 +35,18 @@ double-quote = %x22 ; "
single-quote = %x27 ; ' single-quote = %x27 ; '
not-star = %x0-29 / %x2B-10FFFF ; anything but * not-star = %x0-29 / %x2B-D7FF / %xE000-10FFFF ; anything but *
not-star-or-slash = %x0-29 / %x2B-2E / %x30-10FFFF ; anything but * or / not-star-or-slash = %x0-29 / %x2B-2E / %x30-D7FF / %xE000-10FFFF
; anything but * or /
not-line-feed-or-carriage-return = %x0-9 / %xB-C / %xE-10FFFF not-line-feed-or-carriage-return = %x0-9 / %xB-C / %xE-D7FF / %xE000-10FFFF
; anything but <LF> or <CR> ; anything but <LF> or <CR>
not-double-quote-or-backslash = %x0-21 / %x23-5B / %x5D-10FFFF not-double-quote-or-backslash = %x0-21 / %x23-5B / %x5D-D7FF / %xE000-10FFFF
; anything but " or \ ; anything but " or \
not-single-quote-or-backslash = %x0-26 / %x28-5B / %x5D-10FFFF not-single-quote-or-backslash = %x0-26 / %x28-5B / %x5D-D7FF / %xE000-10FFFF
; anything but ' or \ ; anything but ' or \
line-terminator = line-feed / carriage-return / carriage-return line-feed line-terminator = line-feed / carriage-return / carriage-return line-feed
@ -326,7 +329,7 @@ file = *declaration
; Format String Grammar ; Format String Grammar
; --------------------- ; ---------------------
not-brace = %x0-7A / %x7C / %x7E-10FFFF ; anything but { or } not-brace = %x0-7A / %x7C / %x7E-D7FF / %xE000-10FFFF ; anything but { or }
format-string-container = "{}" format-string-container = "{}"