From 519c0228ba99bbc01e1562be02000eb32c29a2a0 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Fri, 7 Jun 2024 09:58:39 +0100 Subject: [PATCH] Add documentation for extended patterns (#1924) This commit update the manual to take into account the latest extensions of pattern matching, namely wildcard patterns, constant patterns, array patterns, pattern guards and or-patterns. Doing so, we also update the examples (in the manual and in the `examples` directory) to use pattern matching whenever it looks more idiomatic and make the code more readable. Co-authored-by: jneem --- core/stdlib/std.ncl | 3 +- doc/manual/contracts.md | 14 +- doc/manual/correctness.md | 8 +- doc/manual/merging.md | 4 +- doc/manual/syntax.md | 232 ++++++++++++------ doc/manual/tutorial.md | 4 + doc/manual/types-vs-contracts.md | 2 +- examples/arrays/arrays.ncl | 6 +- examples/config-gcc/config-gcc.ncl | 54 ++-- examples/fibonacci/fibonacci.ncl | 16 +- examples/polymorphism/polymorphism.ncl | 6 +- examples/record-contract/record-contract.ncl | 4 + .../simple-contracts/simple-contract-div.ncl | 2 + flake.lock | 6 +- 14 files changed, 229 insertions(+), 132 deletions(-) diff --git a/core/stdlib/std.ncl b/core/stdlib/std.ncl index 43c11c12..5c83bff4 100644 --- a/core/stdlib/std.ncl +++ b/core/stdlib/std.ncl @@ -943,7 +943,8 @@ |> blame, # Outside of lazy data structures, we just use (==) - _ => fun ctr_label value => + _ => + fun ctr_label value => value |> check_typeof_eq ctr_label |> from_predicate ((==) constant) ctr_label, diff --git a/doc/manual/contracts.md b/doc/manual/contracts.md index 87720857..cf0407f8 100644 --- a/doc/manual/contracts.md +++ b/doc/manual/contracts.md @@ -90,13 +90,13 @@ custom contract: ```nickel { IsFoo = fun label value => - if std.is_string value then - if value == "foo" then - value - else - std.contract.blame_with_message "not equal to \"foo\"" label - else - std.contract.blame_with_message "not a string" label, + value |> match { + "foo" => value, + value if std.is_string value => + std.contract.blame_with_message "not equal to \"foo\"" label, + _ => + std.contract.blame_with_message "not a string" label, + } } ``` diff --git a/doc/manual/correctness.md b/doc/manual/correctness.md index f7b28d19..577b419c 100644 --- a/doc/manual/correctness.md +++ b/doc/manual/correctness.md @@ -207,7 +207,9 @@ using contract and type annotations. `split` can be given a contract annotation as follows: ```nickel #no-check -split | forall a. Array {key: String, value: a} -> {keys: Array String, values: Array a} = # etc. +split + | forall a. Array {key: String, value: a} + -> {keys: Array String, values: Array a} = # etc. ``` Contract annotations are checked at runtime. At this point functions are @@ -266,7 +268,9 @@ that: `split` can be given a type annotation as follows: ```nickel #no-check -split : forall a. Array {key: String, value: a} -> {keys: Array String, values: Array a} = # etc. +split + : forall a. Array {key: String, value: a} + -> {keys: Array String, values: Array a} = # etc. ``` Type annotations also give rise to contracts, which means that even if `split`'s diff --git a/doc/manual/merging.md b/doc/manual/merging.md index f880aa6d..372c6d30 100644 --- a/doc/manual/merging.md +++ b/doc/manual/merging.md @@ -607,10 +607,12 @@ argument), we do get a contract violation error: required_field2, } in + let intermediate = { foo | FooContract } & { foo.required_field1 = "here" } in + intermediate & { foo.required_field2 = "here" } |> std.deep_seq intermediate @@ -620,7 +622,7 @@ error: missing definition for `required_field2` 3 │ required_field2, │ ^^^^^^^^^^^^^^^ required here · - 8 │ & { foo.required_field1 = "here" } + 9 │ & { foo.required_field1 = "here" } │ ------------------------ in this record │ [...] diff --git a/doc/manual/syntax.md b/doc/manual/syntax.md index 1c1b5d30..4353a765 100644 --- a/doc/manual/syntax.md +++ b/doc/manual/syntax.md @@ -666,38 +666,51 @@ same language of patterns, described in the following section. A pattern starts with an optional alias of the form ` @ `. The inner pattern is either: -- an `any` pattern, which is just an identifier, and will match any value. - `any` patterns bring a new variable into scope (including when nested inside a - larger pattern). Said variables are bound to the corresponding constituent - parts of the matched value. -- a record patern +- an `any` pattern, which is just an identifier that will match any value. + `any` patterns bring a new variable into scope and can be nested inside a + larger pattern. Said variables are bound to the corresponding constituent + parts of the matched value. +- a wildcard pattern `_` which acts like an `any` pattern (matches + anything) but doesn't bind any variable. +- a constant pattern, which is a literal value: a number, a boolean, a string, + or `null`. - an enum pattern +- a record patern +- an array pattern +- an or-pattern + +Additionally, patterns can be guarded by an `if` condition. Enum patterns, record +patterns, array patterns, or-patterns and pattern guards are detailed in the +following subsections. Complete examples of patterns are given in the section on +match expressions and destructuring. #### Enum pattern An enum pattern is an enum tag optionally applied to a pattern: `' `. -That is, an enum pattern is exactly like an enum value but whose optional -argument is another pattern (instead of a value). This pattern matches an enum +An enum pattern looks exactly like an enum value but the optional argument is +another pattern instead of an arbitrary expression. This pattern matches an enum value of the corresponding shape. -For example, `'Foo`, `'Bar x` or `'protocol {x,y}` are valid enum patterns. +For example, `'Foo`, `'Bar x` or `'protocol {x,y}` are valid enum patterns. For +example, they will match values `'Foo`, `'Bar 5` and `'protocol {x = 1, y = 2}` +respectively. Two or more nested variant patterns must be parenthesized. For example, `'Ok -'Some 'Stuff` isn't a valid enum pattern. On the other hand, `'Ok ('Some 'Stuff)` -or `'Foo ('Bar x)` are valid enum patterns. +'Some 'Stuff` isn't a valid enum pattern. On the other hand, `'Ok ('Some +'Stuff)` and `'Foo ('Bar x)` are valid enum patterns. #### Record patterns -The syntax of record patterns is close to the syntax of record literals, albeit -more restricted. A record pattern is a list of field patterns enclosed into -braces, of the form `{ , .., , }`. +Similarly to other compound patterns, record patterns mimic the shape of record +literals. A record pattern is a list of field patterns enclosed into braces of +the form `{ , .., , }`. A field pattern is of the form ` = `, where `` is a -sub-pattern matching the content of the field. For example, `foo=bar` and -`foo='Ok value` are valid field patterns. The `= ` part can be omitted when +sub-pattern matching the content of the field. For example, `foo = bar` and +`foo = 'Ok value` are valid field patterns. The `= ` part can be omitted when `` is an `any` pattern with the same name as the field: that is, `some_field` is a valid field pattern and is just shorthand for -`some_field=some_field`. +`some_field = some_field`. The optional annotation `` can include either: @@ -709,99 +722,97 @@ The optional annotation `` can include either: A contract annotation and a default annotation can be combined. **The presence or the absence of a contract annotation never changes whether or -not a pattern matches a value**. For example, both `{foo}`, `{foo | Number}` and -`{foo | String}` match the value `{foo = "hello"}`. The difference is that `{foo -| Number}` will result in a later contract error if `foo` is ever used. The -contract annotation is merely a convenient way to apply a contract to a value -extracted from the pattern match on the fly. +not a pattern matches a value**. For example, all of `{foo}`, `{foo | Number}` +and `{foo | String}` match the value `{foo = "hello"}`. The difference is that +`{foo | Number}` will result in a later contract error if `foo` is ever used. +The contract annotation is merely a convenient way to apply a contract to a +value extracted from the pattern match on the fly. On the other hand, a default annotation does make a difference on matching: -`{foo ? 5}` matches `{}` (and will bind `foo` to the default value `5`), but -the pattern `{foo}` doesn't match `{}`. +`{foo ? 5}` matches `{}` (and will bind `foo` to the default value `5`), but the +pattern `{foo}` doesn't match `{}`. Note that default values don't propagate to +aliases: `whole @ {foo ? 5}` will match `{}` and assigns `whole` to `{}` and +`foo` to `5`. Note that `whole` is *not* `{foo = 5}`. The optional `` part is either an ellipsis `..` or a capture `..`. By default, record patterns are closed, meaning that they won't match a record with additional fields: `{foo, bar}` doesn't match `{foo = 1, bar = 2, baz = 3}`. -The ellipsis `..` makes the pattern open, which will match a record with -additional fields. A capture has the same effect but also capture the rest of +The ellipsis `..` makes the pattern open. An open pattern matches a record with +additional fields. A capture has the same effect but also captures the rest of the matched record in a variable. For example, matching `{foo, ..rest}` with `{foo = 1, bar = 2, baz = 3}` will bind `foo` to `1` and `rest` to the record `{bar = 2, baz = 3}`. -You can find more examples of complete patterns below to illustrate -destructuring and match expressions. +#### Array pattern -### Destructuring +An array pattern is a list of patterns enclosed into brackets, of the form `[ +, .., , ]`. The rest is either an ellipsis `..` or a capture +`..`, as for record patterns. An array pattern without a rest will match +an array value with exactly the same number of elements, and whose elements +individually match the corresponding patterns. The presence of the rest allows +for more elements to be there. -Destructuring is an extension of the basic binding mechanisms to deconstruct a -structured value. +For example, `[first, ..rest]`, `[true, _, true]` and `[x, y, z, ..]` are valid +array patterns. -Destructuring can take place on a let binding with the form `let = value -in ` or at a function declaration with the form `fun .. => -`. +#### Or-patterns -Each value or argument is matched against the corresponding pattern and the -pattern variables are brought into scope (`any` patterns, aliases and captures). -If the pattern doesn't match the value, the evaluation stops with an error. Note -that because of Nickel's lazy evaluation, it might happen that the pattern -doesn't match but no error is raised as long as the variables bound by the -pattern are not used. +An or-pattern is a pattern with several alternatives. An or-pattern is +introduced by the `or` keyword, as in ` or `. Note that `or` +isn't a reserved keyword in Nickel (`or` can be used as an identifier, including +as a pattern variable, without any ambiguity). -Examples: +Each alternative is called an or-pattern branch. All the branches of an +or-pattern must bind exactly the same variables: that is, `('Foo x) or ('Bar y)` +isn't a valid or-pattern, but `'(Foo {x,y}) or ('Bar {y,x})` is. -```nickel #repl -> let {x, y, z} = {x = 1, y = 1, z = 1} in x + y + z -3 +Aliases aren't allowed at the top-level of an or-pattern branch. For example, +`(x @ {}) or {x}` isn't a valid or-pattern. You can still alias the whole +or-pattern: `x @ (('Foo y) or ('Bar y))` is a valid or-pattern. -> let top @ {value} = {value = 1} in top & {duplicate = value} -{ duplicate = 1, value = 1, } +Additionally, enum variant patterns must be parenthesized at the top-level of an +or-pattern branch for readability reasons. For example, `'Foo x or 'Bar x` isn't +a valid or-pattern, but `('Foo x) or ('Bar x)` is. Similarly, `'Par or or 'Plus +or` isn't a valid or-pattern, but `('Par or) or ('Plus or)` is (in this case, +the `or` inside the parentheses is just a normal pattern variable). -> let 'Some {left, right = {..}} = 'Some {left = "left", right = {value="right"}} in left -"left" +Or-patterns can optionally be parenthesized when needed, as in `({..} or [..])`. -> let f = fun {deps ? [], parent ? null, children ? []} => deps @ children - in - f {deps = ["binutils"]} -[ "binutils" ] +#### Pattern guards -> let f = fun {wrapped=w1} {wrapped=w2} {wrapped=w3} => w1 + w2 + w3 - in - f {wrapped=1} {wrapped=10} {wrapped=100} -111 +A pattern guard is an optional boolean condition which is attached to a pattern +in a match expression. Note that pattern guards aren't allowed for destructuring +and they can't appear nested in a larger pattern. A guard is introduced by the +`if` keyword, as in ` if `. The condition is a Nickel +expression which can use the variables bound by the pattern and must evaluate to +a boolean. -> let {x | std.enum.TagOrString} = {x = "Hello"} in x -'Hello - -> let 'Invalid x = {} in x -error: unmatched pattern -[...] -``` +For example, `{tag = _, value = 'Wrapped x} if std.is_number x && x > 0` is a +valid guarded pattern. This pattern will match `{tag = 'Cut, value = 'Wrapped +5}` but not `{tag = 'Cut, value = 'Wrapped (-5)}`. ### Match expressions A match expression is a control flow construct which checks a value against one -or more patterns. A successful match also acts like destructuring and binds the -pattern variables to the corresponding constituent parts. When applicable, match -expressions can succintly and advantageously replace a long sequence of -if-then-else. +or more patterns. The first successful match binds the pattern variables to the +corresponding constituent parts. When applicable, match expressions can +succinctly and advantageously replace long or complex sequences of if-then-else. A match expression behaves as a function. It must be applied to the value to check. A match expression is introduced by the `match` keyword, followed by a -sequence of match arms enclosed by braces: +sequence of match arms enclosed by braces. Patterns can be guarded by an +additional condition. ```text match { - => , + ? => , ..., - => , - <_ => ?> + ? => , } ``` -The catch-all case is optional. - Examples: ```nickel #repl @@ -829,6 +840,79 @@ Examples: {format = 'elf32, ..rest} => 'Ok rest, } 'Ok { meta = { editor = "SuperCompany", }, type = 'binary, } + +> [1, 2, 3, 4] |> match { + [x] => 'Singleton x, + [x, y] => 'Pair {fst = x, snd = y}, + [x, y, ..rest] => 'PairAndTail {fst = x, snd = y, tail = rest}, + } +'PairAndTail { fst = 1, snd = 2, tail = [ 3, 4 ], } + +> {pin = "1234", security = 'Ecc} |> match { + {pin, ..} + if std.is_string pin && std.string.is_match "^\\d{4}$" pin => + 'Ok, + {pin, ..} if std.is_string pin => 'Error "Pin must be 4 digits", + {pin, ..} => 'Error "Pin must be a string", + } +'Ok +``` + +### Destructuring + +Destructuring is an extension of the basic let-binding mechanism to deconstruct +a structured value. + +Destructuring can take place on a let binding with the form `let = value +in ` or at a function declaration with the form `fun .. => +`. + +Destructuring is just a shorthand for a match expression with a single arm. That +is, `let = value in ` is equivalent to `value |> match { => + }`. `fun => ` is equivalent to `fun x => let = x in +`. If the pattern doesn't match the value, an unmatched pattern error is +raised. + +Destructuring function arguments requires additional parentheses for enum +patterns and or-patterns. Indeed, `fun 'Foo x => ` might be ambiguous: it +can be either a function of one argument expecting a value of the form `'Foo x`, +that is an enum variant with an enum tag as an argument, or a function of two +arguments expecting the first one to be the enum tag `'Foo`. To avoid the +confusion, enum variant patterns and or-patterns must be parenthesized in +argument position. That is, `fun 'Foo x` is thus a function of two arguments and +`fun ('Foo x) => ` is a function of one argument. + +Examples: + +```nickel #repl +> let {x, y, z} = {x = 1, y = 1, z = 1} in x + y + z +3 + +> let top @ {value} = {value = 1} in top & {duplicate = value} +{ duplicate = 1, value = 1, } + +> let 'Some {left, right = {..}} = 'Some {left = "left", right = {value = "right"}} in left +"left" + +> let f = fun {deps ? [], parent ? null, children ? []} => deps @ children + in + f {deps = ["binutils"]} +[ "binutils" ] + +> let f = fun {wrapped=w1} {wrapped=w2} {wrapped=w3} => w1 + w2 + w3 + in + f {wrapped=1} {wrapped=10} {wrapped=100} +111 + +> let {x | std.enum.TagOrString} = {x = "Hello"} in x +'Hello + +> let [head, ..tail] = [1, 2, 3] in tail +[ 2, 3 ] + +> let 'Invalid x = {} in x +error: unmatched pattern +[...] ``` ## Annotations @@ -1006,7 +1090,7 @@ true > {foo = 1, bar = "string"} : {_ : Number} error: incompatible types - ┌─ :1:18 + ┌─ :1:18 │ 1 │ {foo = 1, bar = "string"} : {_ : Number} │ ^^^^^^^^ this expression @@ -1066,7 +1150,7 @@ annotation but no value are forbidden outside of types. ```nickel #repl > {foo = 1, bar = "foo" } : {foo : Number, bar : String | optional} error: statically typed field without a definition - ┌─ :1:29 + ┌─ :1:29 │ 1 │ {foo = 1, bar = "foo" } : {foo : Number, bar : String | optional} │ ^^^ ------ but it has a type annotation diff --git a/doc/manual/tutorial.md b/doc/manual/tutorial.md index fa3efbba..a568bdfc 100644 --- a/doc/manual/tutorial.md +++ b/doc/manual/tutorial.md @@ -183,3 +183,7 @@ keyword, this field must be set in the final configuration. The second part tells us that in the first record in the users list, the field `name` has no value while it should have one. This is to be expected as we removed it earlier. + +From Nickel 1.5 and higher, if you are using the Nickel Language Server, you +should even see this contract violation being reported in your editor as you +type. diff --git a/doc/manual/types-vs-contracts.md b/doc/manual/types-vs-contracts.md index b1c69f9b..ea291fce 100644 --- a/doc/manual/types-vs-contracts.md +++ b/doc/manual/types-vs-contracts.md @@ -33,7 +33,7 @@ What to do depends on the context: local to a file, if your function is bound to a variable, it can be potentially reused in different places. - Example: `let append_tm: String -> String = fun s => s ++ "(TM)" in ...` + Example: `let append_tm: String -> String = fun s => "%{s} (TM)" in ...` - *Let-bound function inside a typed block: nothing or type annotation*. Inside a typed block, types are inferred, so it is OK for simple functions to not diff --git a/examples/arrays/arrays.ncl b/examples/arrays/arrays.ncl index f026c396..4caba0e4 100644 --- a/examples/arrays/arrays.ncl +++ b/examples/arrays/arrays.ncl @@ -8,8 +8,7 @@ let my_array_lib = { if arr == [] then [] else - let head = std.array.first arr in - let tail = std.array.drop_first arr in + let [head, ..tail] = arr in [f head] @ map f tail, fold : forall a b. (a -> b -> b) -> b -> Array a -> b @@ -17,8 +16,7 @@ let my_array_lib = { if arr == [] then first else - let head = std.array.first arr in - let tail = std.array.drop_first arr in + let [head, ..tail] = arr in f head (fold f first tail), } in diff --git a/examples/config-gcc/config-gcc.ncl b/examples/config-gcc/config-gcc.ncl index bc4a5ab8..8bd8adce 100644 --- a/examples/config-gcc/config-gcc.ncl +++ b/examples/config-gcc/config-gcc.ncl @@ -3,28 +3,28 @@ # Validate and normalize gcc flags. They can be either a string `-Wextra` or # a structured value `{flag = "W", arg = "extra"}`. Arguments are not checked. let GccFlag = - # We only allow the following flags - let available = ["W", "c", "S", "e", "o"] in - fun label value => - std.typeof value - |> match { - 'String => - if std.string.length value > 0 - && std.array.any ((==) (std.string.substring 0 1 value)) available then - value - else - std.contract.blame_with_message "unknown flag %{value}" label, - 'Record => - if std.record.has_field "flag" value && std.record.has_field "arg" value then - if std.array.any ((==) value.flag) available then - #Normalize the tag to a string - value.flag ++ value.arg - else - std.contract.blame_with_message "unknown flag %{value.flag}" label - else - std.contract.blame_with_message - "bad record structure: missing field `flag` or `arg`" - label, + let supported_flags = ["W", "c", "S", "e", "o"] in + let is_valid_flag + | doc "check if a string of length > 0 is a valid flag" + = fun string => + std.array.elem (std.string.substring 0 1 string) supported_flags + in + + fun label => + match { + value if std.is_string value && is_valid_flag value => + value, + { flag, arg } if std.array.elem flag supported_flags => + # Normalize the tag to a string + "%{flag}%{arg}", + value if std.is_string value => + std.contract.blame_with_message "unknown flag %{value}" label, + { flag, arg = _ } => + std.contract.blame_with_message "unknown flag %{flag}" label, + { .. } => + std.contract.blame_with_message + "bad record structure: missing field `flag` or `arg`" + label, _ => std.contract.blame_with_message "expected record or string" label, } in @@ -51,11 +51,11 @@ let SharedObjectFile = fun label value => std.contract.blame_with_message "not a string" label in -let OptLevel = fun label value => - if value == 0 || value == 1 || value == 2 then - value - else - std.contract.blame label +let OptLevel = fun label => + match { + value @ (0 or 1 or 2) => value, + _ => std.contract.blame label, + } in let Contract = { diff --git a/examples/fibonacci/fibonacci.ncl b/examples/fibonacci/fibonacci.ncl index f2cd0479..26f666b7 100644 --- a/examples/fibonacci/fibonacci.ncl +++ b/examples/fibonacci/fibonacci.ncl @@ -1,13 +1,11 @@ # test = 'pass' -# This is the naive, exponential version of fibonacci: don't call it on a big -# value! -let rec fibonacci = fun n => - if n == 0 then - 0 - else if n == 1 then - 1 - else - fibonacci (n - 1) + fibonacci (n - 2) +# This is the naive, exponential version of fibonacci: don't call it on a large +# number! +let rec fibonacci = match { + 0 => 0, + 1 => 1, + n => fibonacci (n - 1) + fibonacci (n - 2), +} in fibonacci 10 diff --git a/examples/polymorphism/polymorphism.ncl b/examples/polymorphism/polymorphism.ncl index 85d452ca..330533c7 100644 --- a/examples/polymorphism/polymorphism.ncl +++ b/examples/polymorphism/polymorphism.ncl @@ -1,8 +1,8 @@ # test = 'pass' # First projection, statically typed -let fst : forall a b. a -> b -> a = fun x y => x in +let first : forall a b. a -> b -> a = fun x y => x in # Evaluation function, statically typed -let ev : forall a b. (a -> b) -> a -> b = fun f x => f x in +let eval : forall a b. (a -> b) -> a -> b = fun f x => f x in let id : forall a. a -> a = fun x => x in -(ev id (fst 5 10) == 5 : Bool) +(eval id (first 5 10) == 5 : Bool) diff --git a/examples/record-contract/record-contract.ncl b/examples/record-contract/record-contract.ncl index e71c2e31..ff888d09 100644 --- a/examples/record-contract/record-contract.ncl +++ b/examples/record-contract/record-contract.ncl @@ -4,6 +4,10 @@ # Kubernetes configuration. # Schema and example derived from # https://github.com/kubernetes/examples/blob/master/guestbook-go/guestbook-controller.json. +# +# This example is illustrative. If you actually want to use Nickel with +# Kubernetes, consider using the auto-generated contracts from +# https://github.com/tweag/nickel-kubernetes/ instead let Port | doc "A contract for a port number" = std.contract.from_predicate diff --git a/examples/simple-contracts/simple-contract-div.ncl b/examples/simple-contracts/simple-contract-div.ncl index 4d7373f2..e2ab82f2 100644 --- a/examples/simple-contracts/simple-contract-div.ncl +++ b/examples/simple-contracts/simple-contract-div.ncl @@ -8,12 +8,14 @@ let Even = fun label value => else std.contract.blame label in + let DivBy3 = fun label value => if std.is_number value && value % 3 == 0 then value else std.contract.blame label in + # Will cause an error! 4 is not divisible by 3. ( 4 diff --git a/flake.lock b/flake.lock index d75c2820..d81a8162 100644 --- a/flake.lock +++ b/flake.lock @@ -498,11 +498,11 @@ "rust-overlay": "rust-overlay_2" }, "locked": { - "lastModified": 1717085654, - "narHash": "sha256-DoaPSJEs/3EWdzrgQtdVKFIvHALtZipwUI9DqqoSWgI=", + "lastModified": 1717691046, + "narHash": "sha256-bVDoatFPN7NRuAf4URTFNrYVU7phz2vJpROmnVmqvfw=", "owner": "tweag", "repo": "topiary", - "rev": "42f2630130c36d8b69615ed9b96b50196451c80b", + "rev": "1f11babe0d037cd84f8d909129dce497323f1e49", "type": "github" }, "original": {