leo/docs/grammar/README.md
Collin Chin 23fca6af47
Merge pull request #1751 from AleoHQ/abnf-bidi-overrides
[ABNF] disallow bidi override codepoints
2022-04-20 09:34:00 -07:00

24 KiB

Copyright (C) 2019-2022 Aleo Systems Inc. This file is part of the Leo library.

The Leo library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.

The Leo library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with the Leo library. If not, see https://www.gnu.org/licenses/.


Lexical Grammar

ascii = %x0-7F

safe-nonascii = %x80-2029 / %x202F-2065 / %x2070-D7FF / %xE000-10FFFF
                ; excludes bidi overrides and high/low surrogates

character = ascii / safe-nonascii
            ; Unicode code points decoded from UTF-8

Go to: ascii, safe-nonascii;

horizontal-tab = %x9   ; <HT>

line-feed = %xA   ; <LF>

carriage-return = %xD   ; <CR>

space = %x20   ; <SP>

double-quote = %x22   ; "

single-quote = %x27   ; '

not-star = %x0-29 / %x2B-7F / safe-nonascii  ; anything but *

Go to: safe-nonascii;

not-star-or-slash = %x0-29 / %x2B-2E / %x30-7F / safe-nonascii
                    ; anything but * or /

Go to: safe-nonascii;

not-line-feed-or-carriage-return = %x0-9 / %xB-C / %xE-7F / safe-nonascii
                                   ; anything but <LF> or <CR>

Go to: safe-nonascii;

not-double-quote-or-backslash = %x0-21 / %x23-5B / %x5D-7F / safe-nonascii
                                ; anything but " or \

Go to: safe-nonascii;

not-single-quote-or-backslash = %x0-26 / %x28-5B / %x5D-7F / safe-nonascii
                                ; anything but ' or \

Go to: safe-nonascii;

line-terminator = line-feed / carriage-return / carriage-return line-feed

Go to: carriage-return, line-feed;

whitespace = space / horizontal-tab / line-terminator

Go to: horizontal-tab, line-terminator, space;

comment = block-comment / end-of-line-comment

Go to: block-comment, end-of-line-comment;

block-comment = "/*" rest-of-block-comment

Go to: rest-of-block-comment;

rest-of-block-comment = "*" rest-of-block-comment-after-star
                      / not-star rest-of-block-comment

Go to: not-star, rest-of-block-comment-after-star, rest-of-block-comment;

rest-of-block-comment-after-star = "/"
                                 / "*" rest-of-block-comment-after-star
                                 / not-star-or-slash rest-of-block-comment

Go to: not-star-or-slash, rest-of-block-comment-after-star, rest-of-block-comment;

end-of-line-comment = "//" *not-line-feed-or-carriage-return

keyword = %s"address"
        / %s"bool"
        / %s"char"
        / %s"console"
        / %s"const"
        / %s"constant"
        / %s"else"
        / %s"field"
        / %s"for"
        / %s"function"
        / %s"group"
        / %s"i8"
        / %s"i16"
        / %s"i32"
        / %s"i64"
        / %s"i128"
        / %s"if"
        / %s"in"
        / %s"let"
        / %s"public"
        / %s"return"
        / %s"u8"
        / %s"u16"
        / %s"u32"
        / %s"u64"
        / %s"u128"

uppercase-letter = %x41-5A   ; A-Z

lowercase-letter = %x61-7A   ; a-z

letter = uppercase-letter / lowercase-letter

Go to: lowercase-letter, uppercase-letter;

decimal-digit = %x30-39   ; 0-9

octal-digit = %x30-37   ; 0-7

hexadecimal-digit = decimal-digit / "a" / "b" / "c" / "d" / "e" / "f"

Go to: decimal-digit;

identifier = letter *( letter / decimal-digit / "_" )
             ; but not a keyword or a boolean literal or aleo1...

Go to: letter;

numeral = 1*decimal-digit

unsigned-literal = numeral ( %s"u8" / %s"u16" / %s"u32" / %s"u64" / %s"u128" )

Go to: numeral;

signed-literal = numeral ( %s"i8" / %s"i16" / %s"i32" / %s"i64" / %s"i128" )

Go to: numeral;

field-literal = numeral %s"field"

Go to: numeral;

product-group-literal = numeral %s"group"

Go to: numeral;

boolean-literal = %s"true" / %s"false"

address-literal = %s"aleo1" 58( lowercase-letter / decimal-digit )

character-literal = single-quote character-literal-element single-quote

Go to: character-literal-element, single-quote;

character-literal-element = not-single-quote-or-backslash
                          / simple-character-escape
                          / ascii-character-escape
                          / unicode-character-escape

Go to: ascii-character-escape, not-single-quote-or-backslash, simple-character-escape, unicode-character-escape;

single-quote-escape = "\" single-quote   ; \'

Go to: single-quote;

double-quote-escape = "\" double-quote   ; \"

Go to: double-quote;

backslash-escape = "\\"

line-feed-escape = %s"\n"

carriage-return-escape = %s"\r"

horizontal-tab-escape = %s"\t"

null-character-escape = "\0"

simple-character-escape = single-quote-escape
                        / double-quote-escape
                        / backslash-escape
                        / line-feed-escape
                        / carriage-return-escape
                        / horizontal-tab-escape
                        / null-character-escape

Go to: backslash-escape, carriage-return-escape, double-quote-escape, horizontal-tab-escape, line-feed-escape, null-character-escape, single-quote-escape;

ascii-character-escape = %s"\x" octal-digit hexadecimal-digit

Go to: hexadecimal-digit, octal-digit;

unicode-character-escape = %s"\u{" 1*6hexadecimal-digit "}"

string-literal = double-quote *string-literal-element double-quote

Go to: double-quote;

string-literal-element = not-double-quote-or-backslash
                       / simple-character-escape
                       / ascii-character-escape
                       / unicode-character-escape

Go to: ascii-character-escape, not-double-quote-or-backslash, simple-character-escape, unicode-character-escape;

integer-literal = unsigned-literal
                / signed-literal

Go to: signed-literal, unsigned-literal;

numeric-literal = integer-literal
                / field-literal
                / product-group-literal

Go to: field-literal, integer-literal, product-group-literal;

atomic-literal = numeric-literal
               / boolean-literal
               / address-literal
               / character-literal
               / string-literal

Go to: address-literal, boolean-literal, character-literal, numeric-literal, string-literal;

symbol = "!" / "&&" / "||"
       / "==" / "!="
       / "<" / "<=" / ">" / ">="
       / "+" / "-" / "*" / "/" / "**"
       / "="
       / "(" / ")"
       / "[" / "]"
       / "{" / "}"
       / "," / "." / ".." / ";" / ":" / "?"
       / "->" / "_"
       / %s")group"

token = keyword
      / identifier
      / atomic-literal
      / numeral
      / symbol

Go to: atomic-literal, identifier, keyword, numeral, symbol;

lexeme = token / comment / whitespace

Go to: comment, token, whitespace;


Syntactic Grammar

unsigned-type = %s"u8" / %s"u16" / %s"u32" / %s"u64" / %s"u128"

signed-type = %s"i8" / %s"i16" / %s"i32" / %s"i64" / %s"i128"

integer-type = unsigned-type / signed-type

Go to: signed-type, unsigned-type;

field-type = %s"field"

group-type = %s"group"

arithmetic-type = integer-type / field-type / group-type

Go to: field-type, group-type, integer-type;

boolean-type = %s"bool"

address-type = %s"address"

character-type = %s"char"

scalar-type =  boolean-type / arithmetic-type / address-type / character-type

Go to: address-type, arithmetic-type, boolean-type, character-type;

type = scalar-type

Go to: scalar-type;

group-coordinate = ( [ "-" ] numeral ) / "+" / "-" / "_"

Go to: numeral;

affine-group-literal = "(" group-coordinate "," group-coordinate %s")group"

Go to: group-coordinate;

literal = atomic-literal / affine-group-literal

Go to: affine-group-literal, atomic-literal;

group-literal = product-group-literal / affine-group-literal

Go to: affine-group-literal, product-group-literal;

primary-expression = identifier
                   / literal
                   / "(" expression ")"
                   / function-call

Go to: expression, function-call, identifier, literal;

function-call = identifier function-arguments

Go to: function-arguments, identifier;

function-arguments = "(" [ expression *( "," expression ) [ "," ] ] ")"

Go to: expression;

unary-expression = primary-expression
                 / "!" unary-expression
                 / "-" unary-expression

Go to: primary-expression, unary-expression;

exponential-expression = unary-expression
                       / unary-expression "**" exponential-expression

Go to: exponential-expression, unary-expression;

multiplicative-expression = exponential-expression
                          / multiplicative-expression "*" exponential-expression
                          / multiplicative-expression "/" exponential-expression

Go to: exponential-expression, multiplicative-expression;

additive-expression = multiplicative-expression
                    / additive-expression "+" multiplicative-expression
                    / additive-expression "-" multiplicative-expression

Go to: additive-expression, multiplicative-expression;

ordering-expression = additive-expression
                    / additive-expression "<" additive-expression
                    / additive-expression ">" additive-expression
                    / additive-expression "<=" additive-expression
                    / additive-expression ">=" additive-expression

Go to: additive-expression;

equality-expression = ordering-expression
                    / ordering-expression "==" ordering-expression
                    / ordering-expression "!=" ordering-expression

Go to: ordering-expression;

conjunctive-expression = equality-expression
                       / conjunctive-expression "&&" equality-expression

Go to: conjunctive-expression, equality-expression;

disjunctive-expression = conjunctive-expression
                       / disjunctive-expression "||" conjunctive-expression

Go to: conjunctive-expression, disjunctive-expression;

binary-expression = disjunctive-expression

Go to: disjunctive-expression;

conditional-expression = binary-expression
                       / binary-expression "?" expression ":" expression

Go to: binary-expression, expression;

expression = conditional-expression

Go to: conditional-expression;

statement = return-statement
          / variable-declaration
          / constant-declaration
          / conditional-statement
          / loop-statement
          / assignment-statement
          / console-statement
          / block

Go to: assignment-statement, block, conditional-statement, console-statement, constant-declaration, loop-statement, return-statement, variable-declaration;

block = "{" *statement "}"

return-statement = %s"return" expression ";"

Go to: expression;

variable-declaration = %s"let" identifier ":" type "=" expression ";"

Go to: expression, identifier, type;

constant-declaration = %s"const" identifier ":" type "=" expression ";"

Go to: expression, identifier, type;

branch = %s"if" expression block

Go to: block, expression;

conditional-statement = branch
                      / branch %s"else" block
                      / branch %s"else" conditional-statement

Go to: block, branch, conditional-statement;

loop-statement = %s"for" identifier ":" type
                 %s"in" expression ".." expression
                 block

Go to: block, expression, identifier, type;

assignment-operator = "="

assignment-statement = expression assignment-operator expression ";"

Go to: assignment-operator, expression;

console-statement = %s"console" "." console-call ";"

Go to: console-call;

console-call = assert-call
             / print-call

Go to: assert-call, print-call;

assert-call = %s"assert" "(" expression ")"

Go to: expression;

print-function = %s"error" / %s"log"

print-arguments = "(" string-literal  *( "," expression ) [ "," ] ")"

Go to: string-literal;

print-call = print-function print-arguments

Go to: print-arguments, print-function;

function-declaration = %s"function" identifier
                       "(" [ function-parameters ] ")" "->" type
                       block

Go to: block, function-parameters, identifier, type;

function-parameters = function-parameter *( "," function-parameter ) [ "," ]

Go to: function-parameter;

function-parameter = [ %s"public" / %s"constant" / %s"const" ] identifier ":" type

Go to: identifier, type;

declaration = function-declaration

Go to: function-declaration;

file = *declaration

Format String Grammar

not-brace = %x0-7A / %x7C / %x7E-10FFFF
            ; codes permitted in string after escapes processed, except { or }

format-string-container = "{}"

format-string-open-brace = "{{"

format-string-close-brace = "}}"

format-string-element = not-brace
                      / format-string-container
                      / format-string-open-brace
                      / format-string-close-brace

Go to: format-string-close-brace, format-string-container, format-string-open-brace, not-brace;

format-string = *format-string-element

Input Grammar

input-type = type

Go to: type;

input-expression = literal

Go to: literal;

input-item = identifier ":" input-type "=" input-expression ";"

Go to: identifier, input-expression, input-type;

input-title = "[" identifier "]"

Go to: identifier;

input-section = input-title *input-item

Go to: input-title;

input-file = *input-section