Merge branch 'main' of github.com:roc-lang/roc into simplify_examples

This commit is contained in:
Anton-4 2022-09-30 19:27:30 +02:00
commit 0aa590dfe7
No known key found for this signature in database
GPG Key ID: A13F4A6E21141925
15 changed files with 1082 additions and 22 deletions

View File

@ -235,7 +235,13 @@ mod cli_run {
);
}
assert!(out.status.success());
if !out.status.success() {
// We don't need stdout, Cargo prints it for us.
panic!(
"Example program exited with status {:?}\nstderr was:\n{:#?}",
out.status, out.stderr
);
}
}
}
@ -597,6 +603,17 @@ mod cli_run {
)
}
#[test]
fn parse_movies_csv() {
test_roc_app_slim(
"examples/parser",
"parse-movies-csv.roc",
"parse-movies-csv",
"Parse success!\n",
false,
)
}
// TODO not sure if this cfg should still be here: #[cfg(not(debug_assertions))]
// this is for testing the benchmarks, to perform proper benchmarks see crates/cli/benches/README.md
mod test_benchmarks {

View File

@ -595,9 +595,10 @@ pub fn listDropAt(
if (list.isUnique()) {
var i = drop_index;
while (i < size) : (i += 1) {
while (i < size - 1) : (i += 1) {
const copy_target = source_ptr + i * element_width;
const copy_source = copy_target + element_width;
@memcpy(copy_target, copy_source, element_width);
}

View File

@ -800,6 +800,12 @@ fn strSplitHelp(array: [*]RocStr, string: RocStr, delimiter: RocStr) void {
while (delimiter_index < delimiter_len) {
var delimiterChar = delimiter_bytes_ptrs[delimiter_index];
if (str_index + delimiter_index >= str_len) {
matches_delimiter = false;
break;
}
var strChar = str_bytes[str_index + delimiter_index];
if (delimiterChar != strChar) {

View File

@ -359,6 +359,9 @@ splitFirst = \haystack, needle ->
# splitFirst when needle isn't in haystack
expect splitFirst "foo" "z" == Err NotFound
# splitFirst when needle isn't in haystack, and haystack is empty
expect splitFirst "" "z" == Err NotFound
# splitFirst when haystack ends with needle repeated
expect splitFirst "foo" "o" == Ok { before: "f", after: "o" }
@ -444,17 +447,36 @@ matchesAt = \haystack, haystackIndex, needle ->
needleLength = Str.countUtf8Bytes needle
endIndex = min (haystackIndex + needleLength) haystackLength
matchesAtHelp haystack haystackIndex needle 0 endIndex
matchesAtHelp {
haystack,
haystackIndex,
needle,
needleIndex: 0,
needleLength,
endIndex,
}
matchesAtHelp : Str, Nat, Str, Nat, Nat -> Bool
matchesAtHelp = \haystack, haystackIndex, needle, needleIndex, endIndex ->
if haystackIndex < endIndex then
if Str.getUnsafe haystack haystackIndex == Str.getUnsafe needle needleIndex then
matchesAtHelp haystack (haystackIndex + 1) needle (needleIndex + 1) endIndex
else
Bool.false
matchesAtHelp = \state ->
{ haystack, haystackIndex, needle, needleIndex, needleLength, endIndex } = state
isAtEndOfHaystack = haystackIndex >= endIndex
if isAtEndOfHaystack then
didWalkEntireNeedle = needleIndex == needleLength
didWalkEntireNeedle
else
Bool.true
doesThisMatch =
Str.getUnsafe haystack haystackIndex
==
Str.getUnsafe needle needleIndex
doesRestMatch =
matchesAtHelp
{ state &
haystackIndex: haystackIndex + 1,
needleIndex: needleIndex + 1,
}
doesThisMatch && doesRestMatch
## Walks over the string's UTF-8 bytes, calling a function which updates a state using each
## UTF-8 `U8` byte as well as the index of that byte within the string.

View File

@ -83,6 +83,8 @@ fn collect_roc_definitions<'a>(object: &object::File<'a, &'a [u8]>) -> MutMap<St
vaddresses.insert("memcpy".to_string(), address);
} else if name == "roc_memset" {
vaddresses.insert("memset".to_string(), address);
} else if name == "roc_memmove" {
vaddresses.insert("memmove".to_string(), address);
}
vaddresses.insert(name.to_string(), address);
@ -289,9 +291,28 @@ pub(crate) fn preprocess_elf(
if verbose {
println!(
"Found roc symbol definitions: {:+x?}",
md.roc_symbol_vaddresses
"Found {} roc symbol definitions:",
md.roc_symbol_vaddresses.len()
);
let (mut builtins, mut other): (Vec<_>, Vec<_>) = md
.roc_symbol_vaddresses
.iter()
.partition(|(n, _)| n.starts_with("roc_builtins"));
// sort by address
builtins.sort_by_key(|t| t.1);
other.sort_by_key(|t| t.1);
for (name, vaddr) in other.iter() {
println!("\t{:#08x}: {}", vaddr, name);
}
println!("Of which {} are builtins", builtins.len(),);
for (name, vaddr) in builtins.iter() {
println!("\t{:#08x}: {}", vaddr, name);
}
}
let exec_parsing_duration = exec_parsing_start.elapsed();

View File

@ -529,6 +529,25 @@ fn to_expr_report<'a>(
EExpr::Ability(err, pos) => to_ability_def_report(alloc, lines, filename, err, *pos),
EExpr::IndentEnd(pos) => {
let surroundings = Region::new(start, *pos);
let region = LineColumnRegion::from_pos(lines.convert_pos(*pos));
let doc = alloc.stack(vec![
alloc.reflow(r"I am partway through parsing an expression, but I got stuck here:"),
alloc.region_with_subregion(lines.convert_region(surroundings), region),
alloc.concat(vec![
alloc.reflow("Looks like the indentation ends prematurely here. "),
alloc.reflow("Did you mean to have another expression after this line?"),
]),
]);
Report {
filename,
doc,
title: "INDENT ENDS AFTER EXPRESSION".to_string(),
severity: Severity::RuntimeError,
}
}
_ => todo!("unhandled parse error: {:?}", parse_problem),
}
}

View File

@ -4586,6 +4586,29 @@ mod test_reporting {
"###
);
test_report!(
expression_indentation_end,
indoc!(
r#"
f <- Foo.foo
"#
),
@r###"
INDENT ENDS AFTER EXPRESSION tmp/expression_indentation_end/Test.roc
I am partway through parsing an expression, but I got stuck here:
1 app "test" provides [main] to "./platform"
2
3 main =
4 f <- Foo.foo
^
Looks like the indentation ends prematurely here. Did you mean to have
another expression after this line?
"###
);
test_report!(
type_inline_alias,
indoc!(

View File

@ -31,7 +31,7 @@ NamedParser a := {
## needs, consider transforming it into a [NamedParser].
Parser a := [
Succeed a,
Arg Config (List Str -> Result a [NotFound, WrongType]),
Arg Config (List Str -> Result a [NotFound Str, WrongType { arg : Str, expected : Type }]),
# TODO: hiding the record behind an alias currently causes a panic
SubCommand
(List {
@ -281,11 +281,11 @@ parseHelp : Parser a, List Str -> Result a (ParseError *)
parseHelp = \@Parser parser, args ->
when parser is
Succeed val -> Ok val
Arg { long, type } run ->
Arg _ run ->
when run args is
Ok val -> Ok val
Err NotFound -> Err (MissingRequiredArg long)
Err WrongType -> Err (WrongType { arg: long, expected: type })
Err (NotFound long) -> Err (MissingRequiredArg long)
Err (WrongType { arg, expected }) -> Err (WrongType { arg, expected })
SubCommand cmds ->
when List.get args 0 is
@ -320,10 +320,10 @@ bool : _ -> Parser Bool # TODO: panics if parameter annotation given
bool = \{ long, short ? "", help ? "" } ->
fn = \args ->
when findOneArg long short args is
Err NotFound -> Err NotFound
Err NotFound -> Err (NotFound long)
Ok "true" -> Ok Bool.true
Ok "false" -> Ok Bool.false
Ok _ -> Err WrongType
Ok _ -> Err (WrongType { arg: long, expected: Bool })
@Parser (Arg { long, short, help, type: Bool } fn)
@ -332,7 +332,7 @@ str : _ -> Parser Str # TODO: panics if parameter annotation given
str = \{ long, short ? "", help ? "" } ->
fn = \args ->
when findOneArg long short args is
Err NotFound -> Err NotFound
Err NotFound -> Err (NotFound long)
Ok foundArg -> Ok foundArg
@Parser (Arg { long, short, help, type: Str } fn)
@ -342,10 +342,10 @@ i64 : _ -> Parser I64 # TODO: panics if parameter annotation given
i64 = \{ long, short ? "", help ? "" } ->
fn = \args ->
when findOneArg long short args is
Err NotFound -> Err NotFound
Err NotFound -> Err (NotFound long)
Ok foundArg ->
Str.toI64 foundArg
|> Result.mapErr \_ -> WrongType
|> Result.mapErr \_ -> WrongType { arg: long, expected: I64 }
@Parser (Arg { long, short, help, type: I64 } fn)
@ -624,6 +624,20 @@ expect
List.all cases \args -> parseHelp parser args == Ok "foo: true bar: baz"
# one argument is missing out of multiple
expect
parser =
succeed (\foo -> \bar -> "foo: \(foo) bar: \(bar)")
|> withParser (str { long: "foo" })
|> withParser (str { long: "bar" })
List.all
[
parseHelp parser ["--foo", "zaz"] == Err (MissingRequiredArg "bar"),
parseHelp parser ["--bar", "zaz"] == Err (MissingRequiredArg "foo"),
]
(\b -> b)
# string and bool parsers build help
expect
parser =

1
examples/parser/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
parse-movies-csv

View File

@ -0,0 +1,196 @@
interface Parser.CSV
exposes [
CSV,
CSVRecord,
file,
record,
parseStr,
parseCSV,
parseStrToCSVRecord,
field,
string,
nat,
f64,
]
imports [
Parser.Core.{ Parser, parse, buildPrimitiveParser, fail, const, alt, map, map2, apply, many, maybe, oneorMore, sepBy1, between, ignore, flatten, sepBy },
Parser.Str.{ RawStr, parseStrPartial, oneOf, codeunit, codeunitSatisfies, scalar, digits, strFromRaw },
]
## This is a CSV parser which follows RFC4180
##
## For simplicity's sake, the following things are not yet supported:
## - CSV files with headings
##
## The following however *is* supported
## - A simple LF ("\n") instead of CRLF ("\r\n") to separate records.
CSV : List CSVRecord
CSVRecord : List CSVField
CSVField : RawStr
## Attempts to parse an `a` from a `Str` that is encoded in CSV format.
parseStr : Parser CSVRecord a, Str -> Result (List a) [ParsingFailure Str, SyntaxError Str, ParsingIncomplete CSVRecord]
parseStr = \csvParser, input ->
when parseStrToCSV input is
Err (ParsingIncomplete rest) ->
restStr = Parser.Str.strFromRaw rest
Err (SyntaxError restStr)
Err (ParsingFailure str) ->
Err (ParsingFailure str)
Ok csvData ->
when parseCSV csvParser csvData is
Err (ParsingFailure str) ->
Err (ParsingFailure str)
Err (ParsingIncomplete problem) ->
Err (ParsingIncomplete problem)
Ok vals ->
Ok vals
## Attempts to parse an `a` from a `CSV` datastructure (a list of lists of bytestring-fields).
parseCSV : Parser CSVRecord a, CSV -> Result (List a) [ParsingFailure Str, ParsingIncomplete CSVRecord]
parseCSV = \csvParser, csvData ->
csvData
|> List.mapWithIndex (\recordFieldsList, index -> { record: recordFieldsList, index: index })
|> List.walkUntil (Ok []) \state, { record: recordFieldsList, index: index } ->
when parseCSVRecord csvParser recordFieldsList is
Err (ParsingFailure problem) ->
indexStr = Num.toStr (index + 1)
recordStr = recordFieldsList |> List.map strFromRaw |> List.map (\val -> "\"\(val)\"") |> Str.joinWith ", "
problemStr = "\(problem)\nWhile parsing record no. \(indexStr): `\(recordStr)`"
Break (Err (ParsingFailure problemStr))
Err (ParsingIncomplete problem) ->
Break (Err (ParsingIncomplete problem))
Ok val ->
state
|> Result.map (\vals -> List.append vals val)
|> Continue
## Attempts to parse an `a` from a `CSVRecord` datastructure (a list of bytestring-fields)
##
## This parser succeeds when all fields of the CSVRecord are consumed by the parser.
parseCSVRecord : Parser CSVRecord a, CSVRecord -> Result a [ParsingFailure Str, ParsingIncomplete CSVRecord]
parseCSVRecord = \csvParser, recordFieldsList ->
parse csvParser recordFieldsList (\leftover -> leftover == [])
## Wrapper function to combine a set of fields into your desired `a`
##
## ## Usage example
##
## >>> record (\firstName -> \lastName -> \age -> User {firstName, lastName, age})
## >>> |> field string
## >>> |> field string
## >>> |> field nat
##
record : a -> Parser CSVRecord a
record = Parser.Core.const
## Turns a parser for a `List U8` into a parser that parses part of a `CSVRecord`.
field : Parser RawStr a -> Parser CSVRecord a
field = \fieldParser ->
buildPrimitiveParser \fieldsList ->
when List.get fieldsList 0 is
Err OutOfBounds ->
Err (ParsingFailure "expected another CSV field but there are no more fields in this record")
Ok rawStr ->
when Parser.Str.parseRawStr fieldParser rawStr is
Ok val ->
Ok { val: val, input: List.dropFirst fieldsList }
Err (ParsingFailure reason) ->
fieldStr = rawStr |> strFromRaw
Err (ParsingFailure "Field `\(fieldStr)` could not be parsed. \(reason)")
Err (ParsingIncomplete reason) ->
reasonStr = strFromRaw reason
fieldsStr = fieldsList |> List.map strFromRaw |> Str.joinWith ", "
Err (ParsingFailure "The field parser was unable to read the whole field: `\(reasonStr)` while parsing the first field of leftover \(fieldsStr))")
## Parser for a field containing a UTF8-encoded string
string : Parser CSVField Str
string = Parser.Str.anyString
## Parse a natural number from a CSV field
nat : Parser CSVField Nat
nat =
string
|> map
(\val ->
when Str.toNat val is
Ok num ->
Ok num
Err _ ->
Err "\(val) is not a Nat."
)
|> flatten
## Parse a 64-bit float from a CSV field
f64 : Parser CSVField F64
f64 =
string
|> map
(\val ->
when Str.toF64 val is
Ok num ->
Ok num
Err _ ->
Err "\(val) is not a F64."
)
|> flatten
## Attempts to parse a Str into the internal `CSV` datastructure (A list of lists of bytestring-fields).
parseStrToCSV : Str -> Result CSV [ParsingFailure Str, ParsingIncomplete RawStr]
parseStrToCSV = \input ->
parse file (Str.toUtf8 input) (\leftover -> leftover == [])
## Attempts to parse a Str into the internal `CSVRecord` datastructure (A list of bytestring-fields).
parseStrToCSVRecord : Str -> Result CSVRecord [ParsingFailure Str, ParsingIncomplete RawStr]
parseStrToCSVRecord = \input ->
parse csvRecord (Str.toUtf8 input) (\leftover -> leftover == [])
# The following are parsers to turn strings into CSV structures
file : Parser RawStr CSV
file = sepBy csvRecord endOfLine
csvRecord : Parser RawStr CSVRecord
csvRecord = sepBy1 csvField comma
csvField : Parser RawStr CSVField
csvField = alt escapedCsvField nonescapedCsvField
escapedCsvField : Parser RawStr CSVField
escapedCsvField = between escapedContents dquote dquote
escapedContents = many
(
oneOf [
twodquotes |> map (\_ -> 34), # An escaped double quote
comma,
cr,
lf,
textdata,
]
)
twodquotes = Parser.Str.string "\"\""
nonescapedCsvField : Parser RawStr CSVField
nonescapedCsvField = many textdata
comma = codeunit 44 # ','
dquote = codeunit 34 # '"'
endOfLine = alt (ignore crlf) (ignore lf)
cr = codeunit 13 # '\r'
lf = codeunit 10 # '\n'
crlf = Parser.Str.string "\r\n"
textdata = codeunitSatisfies (\x -> (x >= 32 && x <= 33) || (x >= 35 && x <= 43) || (x >= 45 && x <= 126)) # Any printable char except " (34) and , (44)

View File

@ -0,0 +1,292 @@
interface Parser.Core
exposes [
Parser,
ParseResult,
parse,
parsePartial,
fail,
const,
alt,
apply,
oneOf,
map,
map2,
map3,
lazy,
maybe,
oneOrMore,
many,
between,
sepBy,
sepBy1,
ignore,
buildPrimitiveParser,
flatten,
]
imports []
## Opaque type for a parser that will try to parse an `a` from an `input`.
##
## As a simple example, you might consider a parser that tries to parse a `U32` from a `Str`.
## Such a process might succeed or fail, depending on the current value of `input`.
##
## As such, a parser can be considered a recipe
## for a function of the type `input -> Result {val: a, input: input} [ParsingFailure Str]`.
##
## How a parser is _actually_ implemented internally is not important
## and this might change between versions;
## for instance to improve efficiency or error messages on parsing failures.
Parser input a := input -> ParseResult input a
ParseResult input a : Result { val : a, input : input } [ParsingFailure Str]
buildPrimitiveParser : (input -> ParseResult input a) -> Parser input a
buildPrimitiveParser = \fun ->
@Parser fun
# -- Generic parsers:
## Most general way of running a parser.
##
## Can be tought of turning the recipe of a parser into its actual parsing function
## and running this function on the given input.
##
## Many (but not all!) parsers consume part of `input` when they succeed.
## This allows you to string parsers together that run one after the other:
## The part of the input that the first parser did not consume, is used by the next parser.
## This is why a parser returns on success both the resulting value and the leftover part of the input.
##
## Of course, this is mostly useful when creating your own internal parsing building blocks.
## `run` or `Parser.Str.runStr` etc. are more useful in daily usage.
parsePartial : Parser input a, input -> ParseResult input a
parsePartial = \@Parser parser, input ->
parser input
## Runs a parser on the given input, expecting it to fully consume the input
##
## The `input -> Bool` parameter is used to check whether parsing has 'completed',
## (in other words: Whether all of the input has been consumed.)
##
## For most (but not all!) input types, a parsing run that leaves some unparsed input behind
## should be considered an error.
parse : Parser input a, input, (input -> Bool) -> Result a [ParsingFailure Str, ParsingIncomplete input]
parse = \parser, input, isParsingCompleted ->
when parsePartial parser input is
Ok { val: val, input: leftover } ->
if isParsingCompleted leftover then
Ok val
else
Err (ParsingIncomplete leftover)
Err (ParsingFailure msg) ->
Err (ParsingFailure msg)
## Parser that can never succeed, regardless of the given input.
## It will always fail with the given error message.
##
## This is mostly useful as 'base case' if all other parsers
## in a `oneOf` or `alt` have failed, to provide some more descriptive error message.
fail : Str -> Parser * *
fail = \msg ->
buildPrimitiveParser \_input -> Err (ParsingFailure msg)
## Parser that will always produce the given `val`, without looking at the actual input.
## This is useful as basic building block, especially in combination with
## `map` and `apply`.
const : a -> Parser * a
const = \val ->
buildPrimitiveParser \input ->
Ok { val: val, input: input }
## Try the `first` parser and (only) if it fails, try the `second` parser as fallback.
alt : Parser input a, Parser input a -> Parser input a
alt = \first, second ->
buildPrimitiveParser \input ->
when parsePartial first input is
Ok { val: val, input: rest } -> Ok { val: val, input: rest }
Err (ParsingFailure firstErr) ->
when parsePartial second input is
Ok { val: val, input: rest } -> Ok { val: val, input: rest }
Err (ParsingFailure secondErr) ->
Err (ParsingFailure ("\(firstErr) or \(secondErr)"))
## Runs a parser building a function, then a parser building a value,
## and finally returns the result of calling the function with the value.
##
## This is useful if you are building up a structure that requires more parameters
## than there are variants of `map`, `map2`, `map3` etc. for.
##
## For instance, the following two are the same:
##
## >>> const (\x, y, z -> Triple x y z)
## >>> |> map3 Parser.Str.nat Parser.Str.nat Parser.Str.nat
##
## >>> const (\x -> \y -> \z -> Triple x y z)
## >>> |> apply Parser.Str.nat
## >>> |> apply Parser.Str.nat
## >>> |> apply Parser.Str.nat
##
## (And indeed, this is how `map`, `map2`, `map3` etc. are implemented under the hood.)
##
## # Currying
## Be aware that when using `apply`, you need to explicitly 'curry' the parameters to the construction function.
## This means that instead of writing `\x, y, z -> ...`
## you'll need to write `\x -> \y -> \z -> ...`.
## This is because the parameters to the function will be applied one-by-one as parsing continues.
apply : Parser input (a -> b), Parser input a -> Parser input b
apply = \funParser, valParser ->
combined = \input ->
{ val: funVal, input: rest } <- Result.try (parsePartial funParser input)
parsePartial valParser rest
|> Result.map \{ val: val, input: rest2 } ->
{ val: funVal val, input: rest2 }
buildPrimitiveParser combined
# Internal utility function. Not exposed to users, since usage is discouraged!
#
# Runs `firstParser` and (only) if it succeeds,
# runs the function `buildNextParser` on its result value.
# This function returns a new parser, which is finally run.
#
# `andThen` is usually more flexible than necessary, and less efficient
# than using `const` with `map` and/or `apply`.
# Consider using those functions first.
andThen : Parser input a, (a -> Parser input b) -> Parser input b
andThen = \firstParser, buildNextParser ->
fun = \input ->
{ val: firstVal, input: rest } <- Result.try (parsePartial firstParser input)
nextParser = buildNextParser firstVal
parsePartial nextParser rest
buildPrimitiveParser fun
## Try a list of parsers in turn, until one of them succeeds
oneOf : List (Parser input a) -> Parser input a
oneOf = \parsers ->
List.walkBackwards parsers (fail "oneOf: The list of parsers was empty") (\laterParser, earlierParser -> alt earlierParser laterParser)
## Transforms the result of parsing into something else,
## using the given transformation function.
map : Parser input a, (a -> b) -> Parser input b
map = \simpleParser, transform ->
const transform
|> apply simpleParser
## Transforms the result of parsing into something else,
## using the given two-parameter transformation function.
map2 : Parser input a, Parser input b, (a, b -> c) -> Parser input c
map2 = \parserA, parserB, transform ->
const (\a -> \b -> transform a b)
|> apply parserA
|> apply parserB
## Transforms the result of parsing into something else,
## using the given three-parameter transformation function.
##
## If you need transformations with more inputs,
## take a look at `apply`.
map3 : Parser input a, Parser input b, Parser input c, (a, b, c -> d) -> Parser input d
map3 = \parserA, parserB, parserC, transform ->
const (\a -> \b -> \c -> transform a b c)
|> apply parserA
|> apply parserB
|> apply parserC
# ^ And this could be repeated for as high as we want, of course.
# Removes a layer of 'result' from running the parser.
#
# This allows for instance to map functions that return a result over the parser,
# where errors are turned into `ParsingFailure` s.
flatten : Parser input (Result a Str) -> Parser input a
flatten = \parser ->
buildPrimitiveParser \input ->
result = parsePartial parser input
when result is
Err problem ->
Err problem
Ok { val: Ok val, input: inputRest } ->
Ok { val: val, input: inputRest }
Ok { val: Err problem, input: _inputRest } ->
Err (ParsingFailure problem)
## Runs a parser lazily
##
## This is (only) useful when dealing with a recursive structure.
## For instance, consider a type `Comment : { message: String, responses: List Comment }`.
## Without `lazy`, you would ask the compiler to build an infinitely deep parser.
## (Resulting in a compiler error.)
##
lazy : ({} -> Parser input a) -> Parser input a
lazy = \thunk ->
const {}
|> andThen thunk
maybe : Parser input a -> Parser input (Result a [Nothing])
maybe = \parser ->
alt (parser |> map (\val -> Ok val)) (const (Err Nothing))
manyImpl : Parser input a, List a, input -> ParseResult input (List a)
manyImpl = \parser, vals, input ->
result = parsePartial parser input
when result is
Err _ ->
Ok { val: vals, input: input }
Ok { val: val, input: inputRest } ->
manyImpl parser (List.append vals val) inputRest
## A parser which runs the element parser *zero* or more times on the input,
## returning a list containing all the parsed elements.
##
## Also see `oneOrMore`.
many : Parser input a -> Parser input (List a)
many = \parser ->
buildPrimitiveParser \input ->
manyImpl parser [] input
## A parser which runs the element parser *one* or more times on the input,
## returning a list containing all the parsed elements.
##
## Also see `many`.
oneOrMore : Parser input a -> Parser input (List a)
oneOrMore = \parser ->
const (\val -> \vals -> List.prepend vals val)
|> apply parser
|> apply (many parser)
## Runs a parser for an 'opening' delimiter, then your main parser, then the 'closing' delimiter,
## and only returns the result of your main parser.
##
## Useful to recognize structures surrounded by delimiters (like braces, parentheses, quotes, etc.)
##
## >>> betweenBraces = \parser -> parser |> between (scalar '[') (scalar ']')
between : Parser input a, Parser input open, Parser input close -> Parser input a
between = \parser, open, close ->
const (\_ -> \val -> \_ -> val)
|> apply open
|> apply parser
|> apply close
sepBy1 : Parser input a, Parser input sep -> Parser input (List a)
sepBy1 = \parser, separator ->
parserFollowedBySep =
const (\_ -> \val -> val)
|> apply separator
|> apply parser
const (\val -> \vals -> List.prepend vals val)
|> apply parser
|> apply (many parserFollowedBySep)
sepBy : Parser input a, Parser input sep -> Parser input (List a)
sepBy = \parser, separator ->
alt (sepBy1 parser separator) (const [])
ignore : Parser input a -> Parser input {}
ignore = \parser ->
map parser (\_ -> {})

View File

@ -0,0 +1,216 @@
interface Parser.Str
exposes [
RawStr,
parseStr,
parseStrPartial,
parseRawStr,
parseRawStrPartial,
string,
stringRaw,
codeunit,
codeunitSatisfies,
anyString,
anyRawString,
anyCodeunit,
scalar,
oneOf,
digit,
digits,
strFromRaw,
]
imports [Parser.Core.{ Parser, ParseResult, const, fail, map, map2, apply, many, oneOrMore, parse, parsePartial, buildPrimitiveParser, between }]
# Specific string-based parsers:
RawStr : List U8
strFromRaw : RawStr -> Str
strFromRaw = \rawStr ->
rawStr
|> Str.fromUtf8
|> Result.withDefault "Unexpected problem while turning a List U8 (that was originally a Str) back into a Str. This should never happen!"
strToRaw : Str -> RawStr
strToRaw = \str ->
str |> Str.toUtf8
strFromScalar : U32 -> Str
strFromScalar = \scalarVal ->
Str.appendScalar "" (Num.intCast scalarVal)
|> Result.withDefault "Unexpected problem while turning a U32 (that was probably originally a scalar constant) into a Str. This should never happen!"
strFromCodeunit : U8 -> Str
strFromCodeunit = \cu ->
strFromRaw [cu]
## Runs a parser against the start of a list of scalars, allowing the parser to consume it only partially.
parseRawStrPartial : Parser RawStr a, RawStr -> ParseResult RawStr a
parseRawStrPartial = \parser, input ->
parsePartial parser input
## Runs a parser against the start of a string, allowing the parser to consume it only partially.
##
## - If the parser succeeds, returns the resulting value as well as the leftover input.
## - If the parser fails, returns `Err (ParsingFailure msg)`
parseStrPartial : Parser RawStr a, Str -> ParseResult Str a
parseStrPartial = \parser, input ->
parser
|> parseRawStrPartial (strToRaw input)
|> Result.map \{ val: val, input: restRaw } ->
{ val: val, input: strFromRaw restRaw }
## Runs a parser against a string, requiring the parser to consume it fully.
##
## - If the parser succeeds, returns `Ok val`
## - If the parser fails, returns `Err (ParsingFailure msg)`
## - If the parser succeeds but does not consume the full string, returns `Err (ParsingIncomplete leftover)`
parseRawStr : Parser RawStr a, RawStr -> Result a [ParsingFailure Str, ParsingIncomplete RawStr]
parseRawStr = \parser, input ->
parse parser input (\leftover -> List.len leftover == 0)
parseStr : Parser RawStr a, Str -> Result a [ParsingFailure Str, ParsingIncomplete Str]
parseStr = \parser, input ->
parser
|> parseRawStr (strToRaw input)
|> Result.mapErr \problem ->
when problem is
ParsingFailure msg ->
ParsingFailure msg
ParsingIncomplete leftoverRaw ->
ParsingIncomplete (strFromRaw leftoverRaw)
codeunitSatisfies : (U8 -> Bool) -> Parser RawStr U8
codeunitSatisfies = \check ->
buildPrimitiveParser \input ->
{ before: start, others: inputRest } = List.split input 1
when List.get start 0 is
Err OutOfBounds ->
Err (ParsingFailure "expected a codeunit satisfying a condition, but input was empty.")
Ok startCodeunit ->
if check startCodeunit then
Ok { val: startCodeunit, input: inputRest }
else
otherChar = strFromCodeunit startCodeunit
inputStr = strFromRaw input
Err (ParsingFailure "expected a codeunit satisfying a condition but found `\(otherChar)`.\n While reading: `\(inputStr)`")
# Implemented manually instead of on top of codeunitSatisfies
# because of better error messages
codeunit : U8 -> Parser RawStr U8
codeunit = \expectedCodeUnit ->
buildPrimitiveParser \input ->
{ before: start, others: inputRest } = List.split input 1
when List.get start 0 is
Err OutOfBounds ->
errorChar = strFromCodeunit expectedCodeUnit
Err (ParsingFailure "expected char `\(errorChar)` but input was empty.")
Ok startCodeunit ->
if startCodeunit == expectedCodeUnit then
Ok { val: expectedCodeUnit, input: inputRest }
else
errorChar = strFromCodeunit expectedCodeUnit
otherChar = strFromRaw start
inputStr = strFromRaw input
Err (ParsingFailure "expected char `\(errorChar)` but found `\(otherChar)`.\n While reading: `\(inputStr)`")
# Implemented manually instead of a sequence of codeunits
# because of efficiency and better error messages
stringRaw : List U8 -> Parser RawStr (List U8)
stringRaw = \expectedString ->
buildPrimitiveParser \input ->
{ before: start, others: inputRest } = List.split input (List.len expectedString)
if start == expectedString then
Ok { val: expectedString, input: inputRest }
else
errorString = strFromRaw expectedString
otherString = strFromRaw start
inputString = strFromRaw input
Err (ParsingFailure "expected string `\(errorString)` but found `\(otherString)`.\nWhile reading: \(inputString)")
string : Str -> Parser RawStr Str
string = \expectedString ->
strToRaw expectedString
|> stringRaw
|> map (\_val -> expectedString)
scalar : U32 -> Parser RawStr U32
scalar = \expectedScalar ->
expectedScalar
|> strFromScalar
|> string
|> map (\_ -> expectedScalar)
# Matches any codeunit
anyCodeunit : Parser RawStr U8
anyCodeunit = codeunitSatisfies (\_ -> Bool.true)
# Matches any bytestring
# and consumes all of it.
# Does not fail.
anyRawString : Parser RawStr RawStr
anyRawString = buildPrimitiveParser \rawStringValue ->
Ok { val: rawStringValue, input: [] }
# Matches any string
# as long as it is valid UTF8.
anyString : Parser RawStr Str
anyString = buildPrimitiveParser \fieldRawString ->
when Str.fromUtf8 fieldRawString is
Ok stringVal ->
Ok { val: stringVal, input: [] }
Err (BadUtf8 _ _) ->
Err (ParsingFailure "Expected a string field, but its contents cannot be parsed as UTF8.")
# betweenBraces : Parser RawStr a -> Parser RawStr a
# betweenBraces = \parser ->
# between parser (scalar '[') (scalar ']')
digit : Parser RawStr U8
digit =
digitParsers =
List.range 0 10
|> List.map \digitNum ->
digitNum
+ 48
|> codeunit
|> map (\_ -> digitNum)
oneOf digitParsers
# NOTE: Currently happily accepts leading zeroes
digits : Parser RawStr (Int *)
digits =
oneOrMore digit
|> map \digitsList ->
digitsList
|> List.map Num.intCast
|> List.walk 0 (\sum, digitVal -> 10 * sum + digitVal)
## Try a bunch of different parsers.
##
## The first parser which is tried is the one at the front of the list,
## and the next one is tried until one succeeds or the end of the list was reached.
##
## >>> boolParser : Parser RawStr Bool
## >>> boolParser = oneOf [string "true", string "false"] |> map (\x -> if x == "true" then True else False)
# NOTE: This implementation works, but is limited to parsing strings.
# Blocked until issue #3444 is fixed.
oneOf : List (Parser RawStr a) -> Parser RawStr a
oneOf = \parsers ->
buildPrimitiveParser \input ->
List.walkUntil parsers (Err (ParsingFailure "(no possibilities)")) \_, parser ->
when parseRawStrPartial parser input is
Ok val ->
Break (Ok val)
Err problem ->
Continue (Err problem)

View File

@ -0,0 +1,59 @@
app "parse-movies-csv"
packages { pf: "platform/main.roc" }
imports [Parser.Core.{ Parser, map, apply }, Parser.Str.{ RawStr }, Parser.CSV.{ CSV, record, field, string, nat }]
provides [main] to pf
input : Str
input = "Airplane!,1980,\"Robert Hays,Julie Hagerty\"\r\nCaddyshack,1980,\"Chevy Chase,Rodney Dangerfield,Ted Knight,Michael O'Keefe,Bill Murray\""
main : Str
main =
when Parser.CSV.parseStr movieInfoParser input is
Ok movies ->
moviesString =
movies
|> List.map movieInfoExplanation
|> Str.joinWith ("\n")
nMovies = List.len movies |> Num.toStr
"\(nMovies) movies were found:\n\n\(moviesString)\n\nParse success!\n"
Err problem ->
when problem is
ParsingFailure failure ->
"Parsing failure: \(failure)\n"
ParsingIncomplete leftover ->
leftoverStr = leftover |> List.map Parser.Str.strFromRaw |> List.map (\val -> "\"\(val)\"") |> Str.joinWith ", "
"Parsing incomplete. Following leftover fields while parsing a record: \(leftoverStr)\n"
SyntaxError error ->
"Parsing failure. Syntax error in the CSV: \(error)"
MovieInfo := { title : Str, releaseYear : Nat, actors : List Str }
movieInfoParser =
record (\title -> \releaseYear -> \actors -> @MovieInfo { title, releaseYear, actors })
|> apply (field string)
|> apply (field nat)
|> apply (field actorsParser)
actorsParser =
string
|> map (\val -> Str.split val ",")
movieInfoExplanation = \@MovieInfo { title, releaseYear, actors } ->
enumeratedActors = enumerate actors
releaseYearStr = Num.toStr releaseYear
"The movie '\(title)' was released in \(releaseYearStr) and stars \(enumeratedActors)"
enumerate : List Str -> Str
enumerate = \elements ->
{ before: inits, others: last } = List.split elements (List.len elements - 1)
last
|> List.prepend (inits |> Str.joinWith ", ")
|> Str.joinWith " and "

View File

@ -0,0 +1,164 @@
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
//#define ROC_PLATFORM_DEBUG
void alloc_panic(size_t size);
void *roc_alloc(size_t size, unsigned int alignment) {
#ifdef ROC_PLATFORM_DEBUG
printf("Allocating %llu (alignment %ud) ", (unsigned long long)size,
alignment);
#endif
void *result = malloc(size);
#ifdef ROC_PLATFORM_DEBUG
printf("at: %p\n", result);
#endif
if (result == NULL) {
if (size == 0) { // <- malloc is allowed to 'succeed' with NULL iff size == 0.
return NULL;
}
// Otherwise, it is an indication of failure.
alloc_panic(size);
}
return result;
}
void *roc_realloc(void *ptr, size_t new_size, size_t old_size,
unsigned int alignment) {
#ifdef ROC_PLATFORM_DEBUG
printf("Rellocating %p (%llu -> %llu) (alignment %ud) ", ptr,
(unsigned long long)old_size, (unsigned long long)new_size, alignment);
#endif
void *result = realloc(ptr, new_size);
#ifdef ROC_PLATFORM_DEBUG
printf("at: %p\n", result);
#endif
if (result == NULL) {
if (new_size ==
0) { // <- realloc is allowed to 'succeed' with NULL iff size == 0.
return NULL;
}
// Otherwise, it is an indication of failure.
alloc_panic(new_size);
}
return result;
}
void roc_dealloc(void *ptr, unsigned int alignment) {
#ifdef ROC_PLATFORM_DEBUG
printf("Deallocating %p (alignment %ud)\n", ptr, alignment);
#endif
free(ptr);
}
void roc_panic(void *ptr, unsigned int alignment) {
char *msg = (char *)ptr;
fprintf(stderr,
"Application crashed with message\n\n %s\n\nShutting down\n", msg);
exit(0);
}
void alloc_panic(size_t size) {
char msg[100];
sprintf(msg, "Memory allocation failed. Could not allocate %llu bytes",
(unsigned long long)size);
roc_panic(msg, 0);
}
void *roc_memcpy(void *dest, const void *src, size_t n) {
#ifdef ROC_PLATFORM_DEBUG
printf("memcpy %p -> %p (size: %llu)\n", src, dest, (unsigned long long)n);
#endif
return memcpy(dest, src, n);
}
void *roc_memmove(void *dest, const void *src, size_t n) {
return memmove(dest, src, n);
}
void *roc_memset(void *str, int c, size_t n) {
return memset(str, c, n);
}
struct RocStr {
char *bytes;
size_t len;
size_t capacity;
};
bool is_small_str(struct RocStr str) { return ((ssize_t)str.capacity) < 0; }
// Determine the length of the string, taking into
// account the small string optimization
size_t roc_str_len(struct RocStr str) {
char *bytes = (char *)&str;
char last_byte = bytes[sizeof(str) - 1];
char last_byte_xored = last_byte ^ 0b10000000;
size_t small_len = (size_t)(last_byte_xored);
size_t big_len = str.len;
// Avoid branch misprediction costs by always
// determining both small_len and big_len,
// so this compiles to a cmov instruction.
if (is_small_str(str)) {
return small_len;
} else {
return big_len;
}
}
extern void roc__mainForHost_1_exposed_generic(struct RocStr *string);
int main() {
struct RocStr str;
roc__mainForHost_1_exposed_generic(&str);
// Determine str_len and the str_bytes pointer,
// taking into account the small string optimization.
size_t str_len = roc_str_len(str);
char *str_bytes;
if (is_small_str(str)) {
str_bytes = (char *)&str;
} else {
str_bytes = str.bytes;
}
// Write to stdout
size_t written = fwrite(str_bytes, sizeof(char), str_len, stdout);
if (fflush(stdout) == 0 && written == str_len) {
// Writing succeeded!
// dealllocate the roc string
if (!(is_small_str(str))) {
roc_dealloc(str_bytes - 8, 1);
}
return 0;
} else {
printf("Error writing to stdout: %s\n", strerror(errno));
// dealllocate the roc string
if (!(is_small_str(str))) {
roc_dealloc(str_bytes - 8, 1);
}
return 1;
}
}

View File

@ -0,0 +1,9 @@
platform "hello-world"
requires {} { main : Str }
exposes []
packages {}
imports []
provides [mainForHost]
mainForHost : Str
mainForHost = main