diff --git a/asg/src/error/mod.rs b/asg/src/error/mod.rs index f3d9533ffa..fd045ff26d 100644 --- a/asg/src/error/mod.rs +++ b/asg/src/error/mod.rs @@ -192,6 +192,13 @@ impl AsgConvertError { ) } + pub fn duplicate_variable_definition(name: &str, span: &Span) -> Self { + Self::new_from_span( + format!("a variable named \"{}\" already exists in this scope", name), + span, + ) + } + pub fn index_into_non_tuple(name: &str, span: &Span) -> Self { Self::new_from_span(format!("failed to index into non-tuple '{}'", name), span) } diff --git a/asg/src/statement/definition.rs b/asg/src/statement/definition.rs index 4e2ce3cd83..f340052409 100644 --- a/asg/src/statement/definition.rs +++ b/asg/src/statement/definition.rs @@ -130,10 +130,16 @@ impl<'a> FromAst<'a, leo_ast::DefinitionStatement> for &'a Statement<'a> { } for variable in variables.iter() { - scope - .variables - .borrow_mut() - .insert(variable.borrow().name.name.to_string(), *variable); + let mut variables = scope.variables.borrow_mut(); + let var_name = variable.borrow().name.name.to_string(); + if variables.contains_key(&var_name) { + return Err(AsgConvertError::duplicate_variable_definition( + &var_name, + &statement.span, + )); + } + + variables.insert(var_name, *variable); } let statement = scope diff --git a/docs/rfc/006-arrays-without-size.md b/docs/rfc/006-arrays-without-size.md index 51e0e08f71..0fc67ac6c6 100644 --- a/docs/rfc/006-arrays-without-size.md +++ b/docs/rfc/006-arrays-without-size.md @@ -120,6 +120,15 @@ Thus, this RFC also proposed to extend Leo with such an operator. A possibility is `.length`, where `` is an expression of array type. A variation is `.length()`, if we want it look more like a built-in method on arrays. Yet another option is `length()`, which is more like a built-in function. +A shorter name could be `len`, leading to the three possibilities +`.len`, `.len()`, and `len()`. +So one dimension of the choice is the name (`length` vs. `len`), +and another dimension is the style: +member variable style, +member function style, +or global function style. +The decision on the latter should be driven by broader considerations +of how we want to treat this kind of built-in operators. Note that the result of this operator can, and in fact must, be calculated at compile time; not as part of the Leo interpreter, but rather as part of the flattening of Leo to R1CS. diff --git a/docs/rfc/007-type-aliases.md b/docs/rfc/007-type-aliases.md new file mode 100644 index 0000000000..4929e56a72 --- /dev/null +++ b/docs/rfc/007-type-aliases.md @@ -0,0 +1,162 @@ +# Leo RFC 007: Type Aliases + +## Authors + +- Max Bruce +- Collin Chin +- Alessandro Coglio +- Eric McCarthy +- Jon Pavlik +- Damir Shamanaev +- Damon Sicore +- Howard Wu + +## Status + +DRAFT + +# Summary + +This RFC proposes the addition of type aliases to Leo, +i.e. identifiers that abbreviate types and can be used wherever the latter can be used. +A new top-level construct is proposed to define type aliases; no circularities are allowed. +Type aliases are expanded away during compilation. + +# Motivation + +Many programming languages provide the ability to create aliases (i.e. synonyms) of types, such as C's `typedef`. +The purpose may be to abbreviate a longer type, +such as an alias `matrix` for `[i32; (3, 3)]` in an application in which 3x3 matrices of 32-bit integers are relevant +(e.g. for 3-D rotations, even though fractional numbers may be more realistic). +The purpose may also be to clarify the purpose and use of an existing type, +such as an alias `balance` for `u64` in an application that keeps track of balances. + +The initial motivation that inspired this RFC (along with other RFCs) +was the ability to have a type `string` for strings. +Strings are arrays of characters according to RFC 001. +With the array types of unspecified size proposed in RFC 006, +`[char; _]` becomes a generic type for strings, which is desirable to alias with `string`. + +# Design + +## Syntax + +The ABNF grammar changes as follows: +```ts +; modified rule: +keyword = ... + / %s"true" + / %s"type" ; new + / %s"u8" + / ... + +; new rule: +type-alias-declaration = %s"type" identifier "=" type ";" + +; modified rule: +declaration = import-declaration + / function-declaration + / circuit-declaration + / constant-declaration + / type-alias-declaration ; new +``` + +A type alias declaration introduces the identifier to stand for the type. +Only top-level type alias declarations are supported; +they are not supported inside functions or circuit types. + +## Semantics + +There must be no direct or indirect circularity in the type aliases. +That is, it must be possible to expand all the type aliases away, +obtaining an equivalent program without any type aliases. + +Note that the built-in `Self` is a bit like a type alias, standing for the enclosing circuit type; +and `Self` is replaced with the enclosing circuit type during canonicalization. +Thus, canonicalization could be a natural place to expand user-defined type aliases; +after all, type aliases introduce multiple ways to denote the same types +(and not just via direct aliasing, but also via indirect aliasing, or via aliasing of components), +and canonicalization serves exactly to reduce multiple ways to say the same thing to one canonical way. + +On the other hand, expanding type aliases is more complicated than the current canonicalization transformations, +which are all local and relatively simple. +Expanding type aliases requires not only checking for circularities, +but also to take into account references to type aliases from import declarations. +For this reason, we may perform type alias expansion after canonicalization, +such as just before type checking and inference. +We could also make the expansion a part of the type checking and inference process, +which already transforms the program by inferring missing types, +so it could also expand type aliases away. + +In any case, it seems beneficial to expand type aliases away +(whether during canonicalization or as part or preamble to type checking and inference) +prior to performing more processing of the program for eventual compilation to R1CS. + +## Examples + +The aforementioned 3x3 matrix example could be written as follows: +```ts +type matrix = [u32; (3, 3)]; + +function matrix_multiply(x: matrix, y: matrix) -> matrix { + ... +} +``` + +The aforementioned balance example could be written as follows: +```ts +type balance = u64; + +function f(...) -> (..., balance, ...) { + ... +} +``` + +The aforementioned string example could be written as follows: +```ts +type string = [char; _]; + +function f(str: string) -> ... { + ... +} +``` + +# Drawbacks + +As other extensions of the language, this makes things inherently a bit more complicated. + +# Effect on Ecosystem + +None; this is just a convenience for the Leo developer. + +# Alternatives + +An alternative to creating a type alias +```ts +type T = U; +``` +is to create a circuit type +```ts +circuit T { get: U } +``` +that contains a single member variable. + +This is clearly not equivalent to a type alias, because it involves conversions between `T` and `U` +```ts +T { get: u } // convert u:U to T +t.get // convert t:T to U +``` +whereas a type alias involves no conversions: +if `T` is an alias of `U`, then `T` and `U` are the same type, +more precisely two syntactically different ways to designate the same semantic type. + +While the conversions generally cause overhead in traditional programming languages, +this may not be the case for Leo's compilation to R1CS, +in which everything is flattened, including member variables of circuit types. +Thus, it may be the case that the circuit `T` above reduces to just its member `U` in R1CS. + +It might also be argued that wrapping a type into a one-member-variable circuit type +could be a better practice than aliasing the type, to enforce better type separation and safety. + +We need to consider the pros and cons of the two approaches, +particularly in light of Leo's non-traditional compilation target. diff --git a/docs/rfc/008-built-in-declarations.md b/docs/rfc/008-built-in-declarations.md new file mode 100644 index 0000000000..4f710ec3a0 --- /dev/null +++ b/docs/rfc/008-built-in-declarations.md @@ -0,0 +1,82 @@ +# Leo RFC 008: Built-in Declarations + +## Authors + +- Max Bruce +- Collin Chin +- Alessandro Coglio +- Eric McCarthy +- Jon Pavlik +- Damir Shamanaev +- Damon Sicore +- Howard Wu + +## Status + +DRAFT + +# Summary + +This RFC proposes a framework for making certain (top-level) declarations (e.g. type aliases) +available in every Leo program without the need to explicitly write those declarations. +These may be hardwired into the language, or provided by standard libraries/packages; +in the latter case, the libraries may be either implicitly imported or required to be explicitly imported. + +# Motivation + +It is common for programming languages to provide predefined types, functions, etc. +that can be readily used in programs. +The initial motivation for this in Leo was to have a type alias `string` for character arrays of unspecified sizes +(array types of unspecified sizes and type aliases are discussed in separate RFCs), +but the feature is clearly more general. + +# Design + +Leo supports four kinds of top-level declarations: +- Import declarations. +- Function declarations. +- Circuit type declarations. +- Global constant declarations. +- Type alias declarations. (Proposed in a separate RFC.) + +Leaving import declarations aside for the moment since they are "meta" in some sense +(as they bring in names of entities declared elsewhere), +it may make sense for any of the four kinds of declarations above to have built-in instances, +i.e. we could have some built-in functions, circuit types, global constants, and type aliases. +This is why this RFC talks of built-in declarations, more broadly than just built-in type aliases that inspired it. + +The built-in status of the envisioned declarations could be achieved in slightly different ways: +1. Their names could be simply available in any program, + without any explicit declaration found anywhere for them. +2. They could be declared in some core library files explicitly, + and be available in any program without needing to be explicitly import them, + like `java.lang.String` in Java or `std::Option` in Rust. +3. They could be declared in some core library files explicitly, + and be available only in programs that explicitly import them. + +From a user's perspective, there is not a lot of difference between cases 1 and 2 above: +in both cases, the names are available; the only difference is that in case 2 the user can see the declaration somewhere. + +Also note that case 2 could be seen as having an implicit (i.e. built-in) import of the library/libraries in question. +Again, imports are "meta" in this context, and what counts are really the other kinds of declarations. + +In cases 2 and 3, a related but somewhat independent issue is whether those declarations have Leo definitions or not. +The Leo library already includes functions like the one for BLAKE2s that are not defined in Leo, +but rather "natively" in Rust/R1CS. + +# Drawbacks + +This does not seem to bring any drawbacks. + +# Effect on Ecosystem + +This may interact with libraries and packages in some way, +if we go with case 2 or 3 above. +But it should be not much different from regular libraries/packages. + +# Alternatives + +The 'Design' section above currently discusses a few alternatives, +rather than prescribing a defined approach. +When consensus is reached on one of the alternatives discussed there, +the others will be moved to this section. diff --git a/grammar/FORMAT_ABNF_GRAMMAR.md b/grammar/FORMAT_ABNF_GRAMMAR.md index cc88f9fda1..e1283d1b8e 100644 Binary files a/grammar/FORMAT_ABNF_GRAMMAR.md and b/grammar/FORMAT_ABNF_GRAMMAR.md differ diff --git a/tests/compiler/function/duplicate_definition_fail.leo b/tests/compiler/function/duplicate_definition_fail.leo index b3f2eb8a7b..2909fae2b1 100644 --- a/tests/compiler/function/duplicate_definition_fail.leo +++ b/tests/compiler/function/duplicate_definition_fail.leo @@ -1,11 +1,13 @@ /* namespace: Compile expectation: Fail +input_file: input/integers.in */ -function main() { - console.log("{}", 1u8); +function main(a: u32) -> u32 { + console.log("{}", 1u8); + return 10u32; } function main() { diff --git a/tests/compiler/statements/duplicate_variable.leo b/tests/compiler/statements/duplicate_variable.leo new file mode 100644 index 0000000000..3271a58d7a --- /dev/null +++ b/tests/compiler/statements/duplicate_variable.leo @@ -0,0 +1,12 @@ +/* +namespace: Compile +expectation: Fail +input_file: inputs/dummy.in +*/ + +function main(k: bool) -> bool { + let x = 1u8; + let x = true; + + return k == true; +} \ No newline at end of file diff --git a/tests/expectations/compiler/compiler/function/duplicate_definition_fail.leo.out b/tests/expectations/compiler/compiler/function/duplicate_definition_fail.leo.out index f76592d7fd..aaffccc60a 100644 --- a/tests/expectations/compiler/compiler/function/duplicate_definition_fail.leo.out +++ b/tests/expectations/compiler/compiler/function/duplicate_definition_fail.leo.out @@ -2,4 +2,4 @@ namespace: Compile expectation: Fail outputs: - - " --> compiler-test:8:1\n |\n 8 | function main() {\n|\n 9 | ...\n|\n 10 | }\n | ^\n |\n = a function named \"main\" already exists in this scope" + - "- Circuit has no constraints, use inputs and registers in program to produce them" diff --git a/tests/expectations/compiler/compiler/statements/duplicate_variable.leo.out b/tests/expectations/compiler/compiler/statements/duplicate_variable.leo.out new file mode 100644 index 0000000000..a313c9602a --- /dev/null +++ b/tests/expectations/compiler/compiler/statements/duplicate_variable.leo.out @@ -0,0 +1,5 @@ +--- +namespace: Compile +expectation: Fail +outputs: + - " --> compiler-test:5:3\n |\n 5 | let x = true;\n | ^^^^^^^^^^^^\n |\n = a variable named \"x\" already exists in this scope"