diff --git a/create-dependency/src/DependencyCreator.elm b/create-dependency/src/DependencyCreator.elm index 7fb2c7f6..6ba5391f 100644 --- a/create-dependency/src/DependencyCreator.elm +++ b/create-dependency/src/DependencyCreator.elm @@ -58,12 +58,794 @@ parseThings flags = error +formatAlias alias_ = + "{ args = " ++ listOfThings stringify alias_.args ++ """ + , comment = """ ++ stringify alias_.comment ++ """ + , name = """ ++ stringify alias_.name ++ """ + , tipe = Elm.Type.Record [ ( "row", Elm.Type.Type "Basics.Int" [] ), ( "col", Elm.Type.Type "Basics.Int" [] ), ( "problem", Elm.Type.Type "Parser.Problem" [] ) ] Nothing + }""" + + +stringify : String -> String +stringify s = + "\"" ++ s ++ "\"" + + +listOfThings : (a -> String) -> List a -> String +listOfThings mapper list = + "[ " ++ String.join "\n" (List.map mapper list) ++ " ]" + + +formatModule mod = + """{ aliases = [ { args = [], comment = + |. symbol ")" + +All the parsers in this pipeline will chomp characters and produce values. So +`symbol "("` will chomp one paren and produce a `()` value. Similarly, `float` +will chomp some digits and produce a `Float` value. The `(|.)` and `(|=)` +operators just decide whether we give the values to the `Point` function. + +So in this case, we skip the `()` from `symbol "("`, we skip the `()` from +`spaces`, we keep the `Float` from `float`, etc. +", name = "|=", precedence = 5, tipe = Lambda (Type "Parser.Parser" [ Lambda (Var "a") (Var "b") ]) (Lambda (Type "Parser.Parser" [ Var "a" ]) (Type "Parser.Parser" [ Var "b" ])) } ] + , comment = " + +# Parsers +@docs Parser, run + +# Building Blocks +@docs int, float, number, symbol, keyword, variable, end + +# Pipelines +@docs succeed, (|=), (|.), lazy, andThen, problem + +# Branches +@docs oneOf, map, backtrackable, commit, token + +# Loops +@docs sequence, Trailing, loop, Step + +# Whitespace +@docs spaces, lineComment, multiComment, Nestable + +# Chompers +@docs getChompedString, chompIf, chompWhile, chompUntil, chompUntilEndOr, mapChompedString + +# Errors +@docs DeadEnd, Problem, deadEndsToString + +# Indentation +@docs withIndent, getIndent + +# Positions +@docs getPosition, getRow, getCol, getOffset, getSource +" + , name = """ ++ stringify mod.name ++ """ + , unions = [ { args = [], comment = " Not all languages handle multi-line comments the same. Multi-line comments +in C-style syntax are `NotNestable`, meaning they can be implemented like this: + + js : Parser () + js = + symbol "/*" + |. chompUntil "*/" + +In fact, `multiComment "/*" "*/" NotNestable` *is* implemented like that! It is +very simple, but it does not allow you to nest comments like this: + +```javascript +/* +line1 +/* line2 */ +line3 +*/ +``` + +It would stop on the first `*/`, eventually throwing a syntax error on the +second `*/`. This can be pretty annoying in long files. + +Languages like Elm allow you to nest multi-line comments, but your parser needs +to be a bit fancier to handle this. After you start a comment, you have to +detect if there is another one inside it! And then you have to make sure all +the `{-` and `-}` match up properly! Saying `multiComment "{-" "-}" Nestable` +does all that for you. +", name = "Nestable", tags = [ ( "NotNestable", [] ), ( "Nestable", [] ) ] }, { args = [], comment = " When you run into a `DeadEnd`, I record some information about why you +got stuck. This data is useful for producing helpful error messages. This is +how [`deadEndsToString`](#deadEndsToString) works! + +**Note:** If you feel limited by this type (i.e. having to represent custom +problems as strings) I highly recommend switching to `Parser.Advanced`. It +lets you define your own `Problem` type. It can also track "context" which +can improve error messages a ton! This is how the Elm compiler produces +relatively nice parse errors, and I am excited to see those techniques applied +elsewhere! +", name = "Problem", tags = [ ( "Expecting", [ Type "String.String" [] ] ), ( "ExpectingInt", [] ), ( "ExpectingHex", [] ), ( "ExpectingOctal", [] ), ( "ExpectingBinary", [] ), ( "ExpectingFloat", [] ), ( "ExpectingNumber", [] ), ( "ExpectingVariable", [] ), ( "ExpectingSymbol", [ Type "String.String" [] ] ), ( "ExpectingKeyword", [ Type "String.String" [] ] ), ( "ExpectingEnd", [] ), ( "UnexpectedChar", [] ), ( "Problem", [ Type "String.String" [] ] ), ( "BadRepeat", [] ) ] }, { args = [ "state", "a" ], comment = " Decide what steps to take next in your [`loop`](#loop). + +If you are `Done`, you give the result of the whole `loop`. If you decide to +`Loop` around again, you give a new state to work from. Maybe you need to add +an item to a list? Or maybe you need to track some information about what you +just saw? + +**Note:** It may be helpful to learn about [finite-state machines][fsm] to get +a broader intuition about using `state`. I.e. You may want to create a `type` +that describes four possible states, and then use `Loop` to transition between +them as you consume characters. + +[fsm]: https://en.wikipedia.org/wiki/Finite-state_machine +", name = "Step", tags = [ ( "Loop", [ Var "state" ] ), ( "Done", [ Var "a" ] ) ] }, { args = [], comment = " What’s the deal with trailing commas? Are they `Forbidden`? +Are they `Optional`? Are they `Mandatory`? Welcome to [shapes +club](https://poorlydrawnlines.com/comic/shapes-club/)! +", name = "Trailing", tags = [ ( "Forbidden", [] ), ( "Optional", [] ), ( "Mandatory", [] ) ] } ] + , values = [ { comment = " Parse one thing `andThen` parse another thing. This is useful when you want +to check on what you just parsed. For example, maybe you want U.S. zip codes +and `int` is not suitable because it does not allow leading zeros. You could +say: + + zipCode : Parser String + zipCode = + getChompedString (chompWhile Char.isDigit) + |> andThen checkZipCode + + checkZipCode : String -> Parser String + checkZipCode code = + if String.length code == 5 then + succeed code + else + problem "a U.S. zip code has exactly 5 digits" + +First we chomp digits `andThen` we check if it is a valid U.S. zip code. We +`succeed` if it has exactly five digits and report a `problem` if not. + +Check out [`examples/DoubleQuoteString.elm`](https://github.com/elm/parser/blob/master/examples/DoubleQuoteString.elm) +for another example, this time using `andThen` to verify unicode code points. + +**Note:** If you are using `andThen` recursively and blowing the stack, check +out the [`loop`](#loop) function to limit stack usage. +", name = "andThen", tipe = Lambda (Lambda (Var "a") (Type "Parser.Parser" [ Var "b" ])) (Lambda (Type "Parser.Parser" [ Var "a" ]) (Type "Parser.Parser" [ Var "b" ])) }, { comment = " It is quite tricky to use `backtrackable` well! It can be very useful, but +also can degrade performance and error message quality. + +Read [this document](https://github.com/elm/parser/blob/master/semantics.md) +to learn how `oneOf`, `backtrackable`, and `commit` work and interact with +each other. It is subtle and important! +", name = "backtrackable", tipe = Lambda (Type "Parser.Parser" [ Var "a" ]) (Type "Parser.Parser" [ Var "a" ]) }, { comment = " Chomp one character if it passes the test. + + chompUpper : Parser () + chompUpper = + chompIf Char.isUpper + +So this can chomp a character like `T` and produces a `()` value. +", name = "chompIf", tipe = Lambda (Lambda (Type "Char.Char" []) (Type "Basics.Bool" [])) (Type "Parser.Parser" [ Tuple [] ]) }, { comment = " Chomp until you see a certain string. You could define C-style multi-line +comments like this: + + comment : Parser () + comment = + symbol "/*" + |. chompUntil "*/" + +I recommend using [`multiComment`](#multiComment) for this particular scenario +though. It can be trickier than it looks! +", name = "chompUntil", tipe = Lambda (Type "String.String" []) (Type "Parser.Parser" [ Tuple [] ]) }, { comment = " Chomp until you see a certain string or until you run out of characters to +chomp! You could define single-line comments like this: + + elm : Parser () + elm = + symbol "--" + |. chompUntilEndOr "\\n" + +A file may end with a single-line comment, so the file can end before you see +a newline. Tricky! + +I recommend just using [`lineComment`](#lineComment) for this particular +scenario. +", name = "chompUntilEndOr", tipe = Lambda (Type "String.String" []) (Type "Parser.Parser" [ Tuple [] ]) }, { comment = " Chomp zero or more characters if they pass the test. This is commonly +useful for chomping whitespace or variable names: + + whitespace : Parser () + whitespace = + chompWhile (\\c -> c == ' ' || c == '\\t' || c == '\\n' || c == '\\r') + + elmVar : Parser String + elmVar = + getChompedString <| + succeed () + |. chompIf Char.isLower + |. chompWhile (\\c -> Char.isAlphaNum c || c == '_') + +**Note:** a `chompWhile` parser always succeeds! This can lead to tricky +situations, especially if you define your whitespace with it. In that case, +you could accidentally interpret `letx` as the keyword `let` followed by +"spaces" followed by the variable `x`. This is why the `keyword` and `number` +parsers peek ahead, making sure they are not followed by anything unexpected. +", name = "chompWhile", tipe = Lambda (Lambda (Type "Char.Char" []) (Type "Basics.Bool" [])) (Type "Parser.Parser" [ Tuple [] ]) }, { comment = " `commit` is almost always paired with `backtrackable` in some way, and it +is tricky to use well. + +Read [this document](https://github.com/elm/parser/blob/master/semantics.md) +to learn how `oneOf`, `backtrackable`, and `commit` work and interact with +each other. It is subtle and important! +", name = "commit", tipe = Lambda (Var "a") (Type "Parser.Parser" [ Var "a" ]) }, { comment = " Turn all the `DeadEnd` data into a string that is easier for people to +read. + +**Note:** This is just a baseline of quality. It cannot do anything with colors. +It is not interactivite. It just turns the raw data into strings. I really hope +folks will check out the source code for some inspiration on how to turn errors +into `Html` with nice colors and interaction! The `Parser.Advanced` module lets +you work with context as well, which really unlocks another level of quality! +The "context" technique is how the Elm compiler can say "I think I am parsing a +list, so I was expecting a closing `]` here." Telling users what the parser +_thinks_ is happening can be really helpful! +", name = "deadEndsToString", tipe = Lambda (Type "List.List" [ Type "Parser.DeadEnd" [] ]) (Type "String.String" []) }, { comment = " Check if you have reached the end of the string you are parsing. + + justAnInt : Parser Int + justAnInt = + succeed identity + |= int + |. end + + -- run justAnInt "90210" == Ok 90210 + -- run justAnInt "1 + 2" == Err ... + -- run int "1 + 2" == Ok 1 + +Parsers can succeed without parsing the whole string. Ending your parser +with `end` guarantees that you have successfully parsed the whole string. +", name = "end", tipe = Type "Parser.Parser" [ Tuple [] ] }, { comment = " Parse floats. + + run float "123" == Ok 123 + run float "3.1415" == Ok 3.1415 + run float "0.1234" == Ok 0.1234 + run float ".1234" == Ok 0.1234 + run float "1e-42" == Ok 1e-42 + run float "6.022e23" == Ok 6.022e23 + run float "6.022E23" == Ok 6.022e23 + run float "6.022e+23" == Ok 6.022e23 + +If you want to disable literals like `.123` (like in Elm) you could write +something like this: + + elmFloat : Parser Float + elmFloat = + oneOf + [ symbol "." + |. problem "floating point numbers must start with a digit, like 0.25" + , float + ] + +**Note:** If you want a parser for both `Int` and `Float` literals, check out +[`number`](#number) below. It will be faster than using `oneOf` to combining +`int` and `float` yourself. +", name = "float", tipe = Type "Parser.Parser" [ Type "Basics.Float" [] ] }, { comment = " Sometimes parsers like `int` or `variable` cannot do exactly what you +need. The "chomping" family of functions is meant for that case! Maybe you +need to parse [valid PHP variables][php] like `$x` and `$txt`: + + php : Parser String + php = + getChompedString <| + succeed () + |. chompIf (\\c -> c == '$') + |. chompIf (\\c -> Char.isAlpha c || c == '_') + |. chompWhile (\\c -> Char.isAlphaNum c || c == '_') + +The idea is that you create a bunch of chompers that validate the underlying +characters. Then `getChompedString` extracts the underlying `String` efficiently. + +**Note:** Maybe it is helpful to see how you can use [`getOffset`](#getOffset) +and [`getSource`](#getSource) to implement this function: + + getChompedString : Parser a -> Parser String + getChompedString parser = + succeed String.slice + |= getOffset + |. parser + |= getOffset + |= getSource + +[php]: https://www.w3schools.com/php/php_variables.asp +", name = "getChompedString", tipe = Lambda (Type "Parser.Parser" [ Var "a" ]) (Type "Parser.Parser" [ Type "String.String" [] ]) }, { comment = " This is a more efficient version of `map Tuple.second getPosition`. This +can be useful in combination with [`withIndent`](#withIndent) and +[`getIndent`](#getIndent), like this: + + checkIndent : Parser () + checkIndent = + succeed (\\indent column -> indent <= column) + |= getIndent + |= getCol + |> andThen checkIndentHelp + + checkIndentHelp : Bool -> Parser () + checkIndentHelp isIndented = + if isIndented then + succeed () + else + problem "expecting more spaces" + +So the `checkIndent` parser only succeeds when you are "deeper" than the +current indent level. You could use this to parse Elm-style `let` expressions. +", name = "getCol", tipe = Type "Parser.Parser" [ Type "Basics.Int" [] ] }, { comment = " When someone said `withIndent` earlier, what number did they put in there? + +- `getIndent` results in `0`, the default value +- `withIndent 4 getIndent` results in `4` + +So you are just asking about things you said earlier. These numbers do not leak +out of `withIndent`, so say we have: + + succeed Tuple.pair + |= withIndent 4 getIndent + |= getIndent + +Assuming there are no `withIndent` above this, you would get `(4,0)` from this. +", name = "getIndent", tipe = Type "Parser.Parser" [ Type "Basics.Int" [] ] }, { comment = " Editors think of code as a grid, but behind the scenes it is just a flat +array of UTF-16 characters. `getOffset` tells you your index in that flat +array. So if you chomp `"\\n\\n\\n\\n"` you are on row 5, column 1, and offset 4. + +**Note:** JavaScript uses a somewhat odd version of UTF-16 strings, so a single +character may take two slots. So in JavaScript, `'abc'.length === 3` but +`'πŸ™ˆπŸ™‰πŸ™Š'.length === 6`. Try it out! And since Elm runs in JavaScript, the offset +moves by those rules. +", name = "getOffset", tipe = Type "Parser.Parser" [ Type "Basics.Int" [] ] }, { comment = " Code editors treat code like a grid, with rows and columns. The start is +`row=1` and `col=1`. As you chomp characters, the `col` increments. When you +run into a `\\n` character, the `row` increments and `col` goes back to `1`. + +In the Elm compiler, I track the start and end position of every expression +like this: + + type alias Located a = + { start : (Int, Int) + , value : a + , end : (Int, Int) + } + + located : Parser a -> Parser (Located a) + located parser = + succeed Located + |= getPosition + |= parser + |= getPosition + +So if there is a problem during type inference, I use this saved position +information to underline the exact problem! + +**Note:** Tabs count as one character, so if you are parsing something like +Python, I recommend sorting that out *after* parsing. So if I wanted the `^^^^` +underline like in Elm, I would find the `row` in the source code and do +something like this: + + makeUnderline : String -> Int -> Int -> String + makeUnderline row minCol maxCol = + String.toList row + |> List.indexedMap (toUnderlineChar minCol maxCol) + |> String.fromList + + toUnderlineChar : Int -> Int -> Int -> Char -> Char + toUnderlineChar minCol maxCol col char = + if minCol <= col && col <= maxCol then + '^' + else if char == '\\t' then + '\\t' + else + ' ' + +So it would preserve any tabs from the source line. There are tons of other +ways to do this though. The point is just that you handle the tabs after +parsing but before anyone looks at the numbers in a context where tabs may +equal 2, 4, or 8. +", name = "getPosition", tipe = Type "Parser.Parser" [ Tuple [ Type "Basics.Int" [], Type "Basics.Int" [] ] ] }, { comment = " This is a more efficient version of `map Tuple.first getPosition`. Maybe +you just want to track the line number for some reason? This lets you do that. + +See [`getPosition`](#getPosition) for an explanation of rows and columns. +", name = "getRow", tipe = Type "Parser.Parser" [ Type "Basics.Int" [] ] }, { comment = " Get the full string that is being parsed. You could use this to define +`getChompedString` or `mapChompedString` if you wanted: + + getChompedString : Parser a -> Parser String + getChompedString parser = + succeed String.slice + |= getOffset + |. parser + |= getOffset + |= getSource +", name = "getSource", tipe = Type "Parser.Parser" [ Type "String.String" [] ] }, { comment = " Parse integers. + + run int "1" == Ok 1 + run int "1234" == Ok 1234 + + run int "-789" == Err ... + run int "0123" == Err ... + run int "1.34" == Err ... + run int "1e31" == Err ... + run int "123a" == Err ... + run int "0x1A" == Err ... + +If you want to handle a leading `+` or `-` you should do it with a custom +parser like this: + + myInt : Parser Int + myInt = + oneOf + [ succeed negate + |. symbol "-" + |= int + , int + ] + +**Note:** If you want a parser for both `Int` and `Float` literals, check out +[`number`](#number) below. It will be faster than using `oneOf` to combining +`int` and `float` yourself. +", name = "int", tipe = Type "Parser.Parser" [ Type "Basics.Int" [] ] }, { comment = " Parse keywords like `let`, `case`, and `type`. + + run (keyword "let") "let" == Ok () + run (keyword "let") "var" == Err ... (ExpectingKeyword "let") ... + run (keyword "let") "letters" == Err ... (ExpectingKeyword "let") ... + +**Note:** Notice the third case there! `keyword` actually looks ahead one +character to make sure it is not a letter, number, or underscore. The goal is +to help with parsers like this: + + succeed identity + |. keyword "let" + |. spaces + |= elmVar + |. spaces + |. symbol "=" + +The trouble is that `spaces` may chomp zero characters (to handle expressions +like `[1,2]` and `[ 1 , 2 ]`) and in this case, it would mean `letters` could +be parsed as `let ters` and then wonder where the equals sign is! Check out the +[`token`](#token) docs if you need to customize this! +", name = "keyword", tipe = Lambda (Type "String.String" []) (Type "Parser.Parser" [ Tuple [] ]) }, { comment = " Helper to define recursive parsers. Say we want a parser for simple +boolean expressions: + + true + false + (true || false) + (true || (true || false)) + +Notice that a boolean expression might contain *other* boolean expressions. +That means we will want to define our parser in terms of itself: + + type Boolean + = MyTrue + | MyFalse + | MyOr Boolean Boolean + + boolean : Parser Boolean + boolean = + oneOf + [ succeed MyTrue + |. keyword "true" + , succeed MyFalse + |. keyword "false" + , succeed MyOr + |. symbol "(" + |. spaces + |= lazy (\\_ -> boolean) + |. spaces + |. symbol "||" + |. spaces + |= lazy (\\_ -> boolean) + |. spaces + |. symbol ")" + ] + +**Notice that `boolean` uses `boolean` in its definition!** In Elm, you can +only define a value in terms of itself it is behind a function call. So +`lazy` helps us define these self-referential parsers. (`andThen` can be used +for this as well!) +", name = "lazy", tipe = Lambda (Lambda (Tuple []) (Type "Parser.Parser" [ Var "a" ])) (Type "Parser.Parser" [ Var "a" ]) }, { comment = " Parse single-line comments: + + elm : Parser () + elm = + lineComment "--" + + js : Parser () + js = + lineComment "//" + + python : Parser () + python = + lineComment "#" + +This parser is defined like this: + + lineComment : String -> Parser () + lineComment str = + symbol str + |. chompUntilEndOr "\\n" + +So it will consume the remainder of the line. If the file ends before you see +a newline, that is fine too. +", name = "lineComment", tipe = Lambda (Type "String.String" []) (Type "Parser.Parser" [ Tuple [] ]) }, { comment = " A parser that can loop indefinitely. This can be helpful when parsing +repeated structures, like a bunch of statements: + + statements : Parser (List Stmt) + statements = + loop [] statementsHelp + + statementsHelp : List Stmt -> Parser (Step (List Stmt) (List Stmt)) + statementsHelp revStmts = + oneOf + [ succeed (\\stmt -> Loop (stmt :: revStmts)) + |= statement + |. spaces + |. symbol ";" + |. spaces + , succeed () + |> map (\\_ -> Done (List.reverse revStmts)) + ] + + -- statement : Parser Stmt + +Notice that the statements are tracked in reverse as we `Loop`, and we reorder +them only once we are `Done`. This is a very common pattern with `loop`! + +Check out [`examples/DoubleQuoteString.elm`](https://github.com/elm/parser/blob/master/examples/DoubleQuoteString.elm) +for another example. + +**IMPORTANT NOTE:** Parsers like `succeed ()` and `chompWhile Char.isAlpha` can +succeed without consuming any characters. So in some cases you may want to use +[`getOffset`](#getOffset) to ensure that each step actually consumed characters. +Otherwise you could end up in an infinite loop! + +**Note:** Anything you can write with `loop`, you can also write as a parser +that chomps some characters `andThen` calls itself with new arguments. The +problem with calling `andThen` recursively is that it grows the stack, so you +cannot do it indefinitely. So `loop` is important because enables tail-call +elimination, allowing you to parse however many repeats you want. +", name = "loop", tipe = Lambda (Var "state") (Lambda (Lambda (Var "state") (Type "Parser.Parser" [ Type "Parser.Step" [ Var "state", Var "a" ] ])) (Type "Parser.Parser" [ Var "a" ])) }, { comment = " Transform the result of a parser. Maybe you have a value that is +an integer or `null`: + + nullOrInt : Parser (Maybe Int) + nullOrInt = + oneOf + [ map Just int + , map (\\_ -> Nothing) (keyword "null") + ] + + -- run nullOrInt "0" == Ok (Just 0) + -- run nullOrInt "13" == Ok (Just 13) + -- run nullOrInt "null" == Ok Nothing + -- run nullOrInt "zero" == Err ... +", name = "map", tipe = Lambda (Lambda (Var "a") (Var "b")) (Lambda (Type "Parser.Parser" [ Var "a" ]) (Type "Parser.Parser" [ Var "b" ])) }, { comment = " This works just like [`getChompedString`](#getChompedString) but gives +a bit more flexibility. For example, maybe you want to parse Elm doc comments +and get (1) the full comment and (2) all of the names listed in the docs. + +You could implement `mapChompedString` like this: + + mapChompedString : (String -> a -> b) -> Parser a -> Parser String + mapChompedString func parser = + succeed (\\start value end src -> func (String.slice start end src) value) + |= getOffset + |= parser + |= getOffset + |= getSource + +", name = "mapChompedString", tipe = Lambda (Lambda (Type "String.String" []) (Lambda (Var "a") (Var "b"))) (Lambda (Type "Parser.Parser" [ Var "a" ]) (Type "Parser.Parser" [ Var "b" ])) }, { comment = " Parse multi-line comments. So if you wanted to parse Elm whitespace or +JS whitespace, you could say: + + elm : Parser () + elm = + loop 0 <| ifProgress <| + oneOf + [ lineComment "--" + , multiComment "{-" "-}" Nestable + , spaces + ] + + js : Parser () + js = + loop 0 <| ifProgress <| + oneOf + [ lineComment "//" + , multiComment "/*" "*/" NotNestable + , chompWhile (\\c -> c == ' ' || c == '\\n' || c == '\\r' || c == '\\t') + ] + + ifProgress : Parser a -> Int -> Parser (Step Int ()) + ifProgress parser offset = + succeed identity + |. parser + |= getOffset + |> map (\\newOffset -> if offset == newOffset then Done () else Loop newOffset) + +**Note:** The fact that `spaces` comes last in the definition of `elm` is very +important! It can succeed without consuming any characters, so if it were the +first option, it would always succeed and bypass the others! (Same is true of +`chompWhile` in `js`.) This possibility of success without consumption is also +why wee need the `ifProgress` helper. It detects if there is no more whitespace +to consume. +", name = "multiComment", tipe = Lambda (Type "String.String" []) (Lambda (Type "String.String" []) (Lambda (Type "Parser.Nestable" []) (Type "Parser.Parser" [ Tuple [] ]))) }, { comment = " Parse a bunch of different kinds of numbers without backtracking. A parser +for Elm would need to handle integers, floats, and hexadecimal like this: + + type Expr + = Variable String + | Int Int + | Float Float + | Apply Expr Expr + + elmNumber : Parser Expr + elmNumber = + number + { int = Just Int + , hex = Just Int -- 0x001A is allowed + , octal = Nothing -- 0o0731 is not + , binary = Nothing -- 0b1101 is not + , float = Just Float + } + +If you wanted to implement the [`float`](#float) parser, it would be like this: + + float : Parser Float + float = + number + { int = Just toFloat + , hex = Nothing + , octal = Nothing + , binary = Nothing + , float = Just identity + } + +Notice that it actually is processing `int` results! This is because `123` +looks like an integer to me, but maybe it looks like a float to you. If you had +`int = Nothing`, floats would need a decimal like `1.0` in every case. If you +like explicitness, that may actually be preferable! + +**Note:** This function does not check for weird trailing characters in the +current implementation, so parsing `123abc` can succeed up to `123` and then +move on. This is helpful for people who want to parse things like `40px` or +`3m`, but it requires a bit of extra code to rule out trailing characters in +other cases. +", name = "number", tipe = Lambda (Record [ ( "int", Type "Maybe.Maybe" [ Lambda (Type "Basics.Int" []) (Var "a") ] ), ( "hex", Type "Maybe.Maybe" [ Lambda (Type "Basics.Int" []) (Var "a") ] ), ( "octal", Type "Maybe.Maybe" [ Lambda (Type "Basics.Int" []) (Var "a") ] ), ( "binary", Type "Maybe.Maybe" [ Lambda (Type "Basics.Int" []) (Var "a") ] ), ( "float", Type "Maybe.Maybe" [ Lambda (Type "Basics.Float" []) (Var "a") ] ) ] Nothing) (Type "Parser.Parser" [ Var "a" ]) }, { comment = " If you are parsing JSON, the values can be strings, floats, booleans, +arrays, objects, or null. You need a way to pick `oneOf` them! Here is a +sample of what that code might look like: + + type Json + = Number Float + | Boolean Bool + | Null + + json : Parser Json + json = + oneOf + [ map Number float + , map (\\_ -> Boolean True) (keyword "true") + , map (\\_ -> Boolean False) (keyword "false") + , map (\\_ -> Null) keyword "null" + ] + +This parser will keep trying parsers until `oneOf` them starts chomping +characters. Once a path is chosen, it does not come back and try the others. + +**Note:** I highly recommend reading [this document][semantics] to learn how +`oneOf` and `backtrackable` interact. It is subtle and important! + +[semantics]: https://github.com/elm/parser/blob/master/semantics.md +", name = "oneOf", tipe = Lambda (Type "List.List" [ Type "Parser.Parser" [ Var "a" ] ]) (Type "Parser.Parser" [ Var "a" ]) }, { comment = " Indicate that a parser has reached a dead end. "Everything was going fine +until I ran into this problem." Check out the [`andThen`](#andThen) docs to see +an example usage. +", name = "problem", tipe = Lambda (Type "String.String" []) (Type "Parser.Parser" [ Var "a" ]) }, { comment = " Try a parser. Here are some examples using the [`keyword`](#keyword) +parser: + + run (keyword "true") "true" == Ok () + run (keyword "true") "True" == Err ... + run (keyword "true") "false" == Err ... + run (keyword "true") "true!" == Ok () + +Notice the last case! A `Parser` will chomp as much as possible and not worry +about the rest. Use the [`end`](#end) parser to ensure you made it to the end +of the string! +", name = "run", tipe = Lambda (Type "Parser.Parser" [ Var "a" ]) (Lambda (Type "String.String" []) (Type "Result.Result" [ Type "List.List" [ Type "Parser.DeadEnd" [] ], Var "a" ])) }, { comment = " Handle things like lists and records, but you can customize the details +however you need. Say you want to parse C-style code blocks: + + import Parser exposing (Parser, Trailing(..)) + + block : Parser (List Stmt) + block = + Parser.sequence + { start = "{" + , separator = ";" + , end = "}" + , spaces = spaces + , item = statement + , trailing = Mandatory -- demand a trailing semi-colon + } + + -- statement : Parser Stmt + +**Note:** If you need something more custom, do not be afraid to check +out the implementation and customize it for your case. It is better to +get nice error messages with a lower-level implementation than to try +to hack high-level parsers to do things they are not made for. +", name = "sequence", tipe = Lambda (Record [ ( "start", Type "String.String" [] ), ( "separator", Type "String.String" [] ), ( "end", Type "String.String" [] ), ( "spaces", Type "Parser.Parser" [ Tuple [] ] ), ( "item", Type "Parser.Parser" [ Var "a" ] ), ( "trailing", Type "Parser.Trailing" [] ) ] Nothing) (Type "Parser.Parser" [ Type "List.List" [ Var "a" ] ]) }, { comment = " Parse zero or more `' '`, `'\\n'`, and `'\\r'` characters. + +The implementation is pretty simple: + + spaces : Parser () + spaces = + chompWhile (\\c -> c == ' ' || c == '\\n' || c == '\\r') + +So if you need something different (like tabs) just define an alternative with +the necessary tweaks! Check out [`lineComment`](#lineComment) and +[`multiComment`](#multiComment) for more complex situations. +", name = "spaces", tipe = Type "Parser.Parser" [ Tuple [] ] }, { comment = " A parser that succeeds without chomping any characters. + + run (succeed 90210 ) "mississippi" == Ok 90210 + run (succeed 3.141 ) "mississippi" == Ok 3.141 + run (succeed () ) "mississippi" == Ok () + run (succeed Nothing) "mississippi" == Ok Nothing + +Seems weird on its own, but it is very useful in combination with other +functions. The docs for [`(|=)`](#|=) and [`andThen`](#andThen) have some neat +examples. +", name = "succeed", tipe = Lambda (Var "a") (Type "Parser.Parser" [ Var "a" ]) }, { comment = " Parse symbols like `(` and `,`. + + run (symbol "[") "[" == Ok () + run (symbol "[") "4" == Err ... (ExpectingSymbol "[") ... + +**Note:** This is good for stuff like brackets and semicolons, but it probably +should not be used for binary operators like `+` and `-` because you can find +yourself in weird situations. For example, is `3--4` a typo? Or is it `3 - -4`? +I have had better luck with `chompWhile isSymbol` and sorting out which +operator it is afterwards. +", name = "symbol", tipe = Lambda (Type "String.String" []) (Type "Parser.Parser" [ Tuple [] ]) }, { comment = " Parse exactly the given string, without any regard to what comes next. + +A potential pitfall when parsing keywords is getting tricked by variables that +start with a keyword, like `let` in `letters` or `import` in `important`. This +is especially likely if you have a whitespace parser that can consume zero +charcters. So the [`keyword`](#keyword) parser is defined with `token` and a +trick to peek ahead a bit: + + keyword : String -> Parser () + keyword kwd = + succeed identity + |. backtrackable (token kwd) + |= oneOf + [ map (\\_ -> True) (backtrackable (chompIf isVarChar)) + , succeed False + ] + |> andThen (checkEnding kwd) + + checkEnding : String -> Bool -> Parser () + checkEnding kwd isBadEnding = + if isBadEnding then + problem ("expecting the `" ++ kwd ++ "` keyword") + else + commit () + + isVarChar : Char -> Bool + isVarChar char = + Char.isAlphaNum char || char == '_' + +This definition is specially designed so that (1) if you really see `let` you +commit to that path and (2) if you see `letters` instead you can backtrack and +try other options. If I had just put a `backtrackable` around the whole thing +you would not get (1) anymore. +", name = "token", tipe = Lambda (Type "String.String" []) (Type "Parser.Parser" [ Tuple [] ]) }, { comment = " Create a parser for variables. If we wanted to parse type variables in Elm, +we could try something like this: + + import Char + import Parser exposing (..) + import Set + + typeVar : Parser String + typeVar = + variable + { start = Char.isLower + , inner = \\c -> Char.isAlphaNum c || c == '_' + , reserved = Set.fromList [ "let", "in", "case", "of" ] + } + +This is saying it _must_ start with a lower-case character. After that, +characters can be letters, numbers, or underscores. It is also saying that if +you run into any of these reserved names, it is definitely not a variable. +", name = "variable", tipe = Lambda (Record [ ( "start", Lambda (Type "Char.Char" []) (Type "Basics.Bool" []) ), ( "inner", Lambda (Type "Char.Char" []) (Type "Basics.Bool" []) ), ( "reserved", Type "Set.Set" [ Type "String.String" [] ] ) ] Nothing) (Type "Parser.Parser" [ Type "String.String" [] ]) }, { comment = " Some languages are indentation sensitive. Python cares about tabs. Elm +cares about spaces sometimes. `withIndent` and `getIndent` allow you to manage +"indentation state" yourself, however is necessary in your scenario. +", name = "withIndent", tipe = Lambda (Type "Basics.Int" []) (Lambda (Type "Parser.Parser" [ Var "a" ]) (Type "Parser.Parser" [ Var "a" ])) } ] + }""" + + formatFile : Elm.Project.PackageInfo -> List Elm.Docs.Module -> String formatFile elmJson docsJson = let listOfModuleNames list = list - |> List.map (\name -> "unsafeModuleName \"" ++ Elm.Module.toString name ++ "\"") + |> List.map (\name -> "unsafeModuleName " ++ stringify (Elm.Module.toString name)) |> String.join ", " exposed = @@ -78,7 +860,7 @@ formatFile elmJson docsJson = "Hello" dependencyModules = - "[]" + "[" ++ String.join "\n , " (List.map formatModule docsJson) ++ "\n ]" in "module " ++ moduleName ++ """ exposing (dependency) @@ -90,12 +872,11 @@ import Elm.Package import Elm.Project import Elm.Type import Elm.Version -import Json.Decode as Decode import Review.Project.Dependency as Dependency exposing (Dependency) dependency : Dependency dependency = - Dependency.create \"""" ++ Elm.Package.toString elmJson.name ++ """" + Dependency.create """ ++ stringify (Elm.Package.toString elmJson.name) ++ """ elmJson dependencyModules @@ -103,22 +884,17 @@ dependency = elmJson : Elm.Project.Project elmJson = Elm.Project.Package - { deps = [ """ ++ String.join ", " (List.map (\( name, constraint ) -> "( unsafePackageName \"" ++ Elm.Package.toString name ++ "\", unsafeConstraint \"" ++ Elm.Constraint.toString constraint ++ "\" )") elmJson.deps) ++ """ ] - , elm = unsafeConstraint \"""" ++ Elm.Constraint.toString elmJson.elm ++ """" + { elm = unsafeConstraint """ ++ stringify (Elm.Constraint.toString elmJson.elm) ++ """ , exposed = """ ++ exposed ++ """ - , license = Elm.License.fromString \"""" ++ Elm.License.toString elmJson.license ++ """" |> Maybe.withDefault Elm.License.bsd3 - , name = unsafePackageName \"""" ++ Elm.Package.toString elmJson.name ++ """" - , summary = \"""" ++ elmJson.summary ++ """" - , testDeps = [] - , version = Elm.Version.fromString \"""" ++ Elm.Version.toString elmJson.version ++ """" |> Maybe.withDefault Elm.Version.one + , license = Elm.License.fromString """ ++ stringify (Elm.License.toString elmJson.license) ++ """" |> Maybe.withDefault Elm.License.bsd3 + , name = unsafePackageName """ ++ stringify (Elm.Package.toString elmJson.name) ++ """ + , summary = """ ++ stringify elmJson.summary ++ """ + , deps = [ """ ++ String.join ", " (List.map (\( name, constraint ) -> "( unsafePackageName " ++ stringify (Elm.Package.toString name) ++ ", unsafeConstraint " ++ stringify (Elm.Constraint.toString constraint) ++ ")") elmJson.deps) ++ """ ] + , testDeps = [ """ ++ String.join ", " (List.map (\( name, constraint ) -> "( unsafePackageName " ++ stringify (Elm.Package.toString name) ++ ", unsafeConstraint " ++ stringify (Elm.Constraint.toString constraint) ++ ")") elmJson.testDeps) ++ """ ] + , version = Elm.Version.fromString """ ++ stringify (Elm.Version.toString elmJson.version) ++ """ |> Maybe.withDefault Elm.Version.one } -dependencyModules : List Elm.Docs.Module -dependencyModules = - """ ++ dependencyModules ++ """ - - unsafePackageName : String -> Elm.Package.Name unsafePackageName packageName = case Elm.Package.fromString packageName of @@ -161,7 +937,7 @@ unsafeConstraint constraint = formatPackageName : Elm.Package.Name -> String formatPackageName packageName = - "Elm.Package.fromString \"" ++ Elm.Package.toString packageName ++ "\" |> Maybe.withDefault Elm.Package.one" + "Elm.Package.fromString " ++ stringify (Elm.Package.toString packageName) ++ " |> Maybe.withDefault Elm.Package.one" port sendToJs : String -> Cmd msg