Add basic parser

This commit is contained in:
Richard Feldman 2019-03-13 22:32:08 -04:00
parent 05d75e3fcc
commit 956f305485
7 changed files with 437 additions and 92 deletions

53
Cargo.lock generated
View File

@ -6,11 +6,33 @@ dependencies = [
"winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "ascii"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "byteorder"
version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "cfg-if"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "combine"
version = "3.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"ascii 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
"byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"either 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "difference"
version = "2.0.0"
@ -21,6 +43,11 @@ name = "dogged"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "either"
version = "1.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "fixedbitset"
version = "0.1.9"
@ -39,6 +66,11 @@ name = "maplit"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "memchr"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "ordermap"
version = "0.3.5"
@ -66,6 +98,7 @@ dependencies = [
name = "roc"
version = "0.1.0"
dependencies = [
"combine 3.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
"dogged 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"maplit 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
@ -73,6 +106,19 @@ dependencies = [
"pretty_assertions 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "unreachable"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "void"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "winapi"
version = "0.3.6"
@ -94,15 +140,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
[metadata]
"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
"checksum ascii 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a5fc969a8ce2c9c0c4b0429bb8431544f6658283c8326ba5ff8c762b75369335"
"checksum byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a019b10a2a7cdeb292db131fc8113e57ea2a908f6e7894b0c3c671893b65dbeb"
"checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"
"checksum combine 3.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "da3da6baa321ec19e1cc41d31bf599f00c783d0517095cdaf0332e3fe8d20680"
"checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198"
"checksum dogged 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2638df109789fe360f0d9998c5438dd19a36678aaf845e46f285b688b1a1657a"
"checksum either 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c67353c641dc847124ea1902d69bd753dee9bb3beff9aa3662ecf86c971d1fac"
"checksum fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "86d4de0081402f5e88cdac65c8dcdcc73118c1a7a465e2a05f0da05843a8ea33"
"checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6"
"checksum maplit 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "08cbb6b4fef96b6d77bfc40ec491b1690c779e77b05cd9f07f787ed376fd4c43"
"checksum memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2efc7bc57c883d4a4d6e3246905283d8dae951bb3bd32f49d6ef297f546e1c39"
"checksum ordermap 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a86ed3f5f244b372d6b1a00b72ef7f8876d0bc6a78a4c9985c53614041512063"
"checksum petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3659d1ee90221741f65dd128d9998311b0e40c5d3c23a62445938214abce4f"
"checksum pretty_assertions 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3a029430f0d744bc3d15dd474d591bed2402b645d024583082b9f63bb936dac6"
"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
"checksum winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0"
"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View File

@ -7,6 +7,7 @@ authors = ["Richard Feldman <richard.t.feldman@gmail.com>"]
dogged = { version = "0.2.0", optional = true }
log = "0.4"
petgraph = { version = "0.4.5", optional = true }
combine = "3.8.1"
[dev-dependencies]
pretty_assertions = "0.5.1"

134
oldtst/test_solve.rs Normal file
View File

@ -0,0 +1,134 @@
#[macro_use] extern crate pretty_assertions;
extern crate roc;
#[cfg(test)]
mod tests {
use std::collections::HashMap;
use roc::solve::solve_constraint;
use roc::typ::Type::*;
use roc::constrain::Constraint::*;
use roc::expr::Expr::*;
#[test]
fn test_solve_true() {
let expected = HashMap::new();
assert_eq!(Ok(expected), solve_constraint(True));
}
#[test]
fn test_solve_unify_basic() {
let expected = HashMap::new();
// TODO unify a function call.
// TODO to do this, will nee to introduce let-bindings to put stuff in the Name Map
// TODO since function calls are looked up by name.
let type_to_unify:Type = ...
let expected_type_to_unify:ExpectedType = ...
assert_eq!(Ok(expected), solve_constraint(Unify(type_to_unify, expected_type_to_unify));
}
// #[test]
// fn test_negate_number() {
// expect_type(Type::Number, CallBuiltin(Negate, WholeNumber(5)));
// }
// #[test]
// fn test_negate_float() {
// expect_type(Type::Float, CallBuiltin(Negate, FractinalNumber(3.1)));
// }
// #[test]
// fn test_negate_int_twice() {
// expect_type(Type::Int, negate_twice(HexOctalBinary(0x12)));
// }
// #[test]
// fn test_negate_number_twice() {
// expect_type(Type::Number, negate_twice(WholeNumber(5)));
// }
// #[test]
// fn test_negate_float_twice() {
// expect_type(Type::Float, negate_twice(FractinalNumber(3.1)));
// }
// #[test]
// fn test_int_literal() {
// expect_type(Type::Int, HexOctalBinary(0x12));
// }
// #[test]
// fn test_float_literal() {
// expect_type(Type::Float, FractionalNumber(3.1));
// }
// #[test]
// fn test_number_literal() {
// expect_type(Type::Number, WholeNumber(5));
// }
// #[test]
// fn add_ints_returns_int() {
// expect_type(Type::Int, CallOperator(Plus, int(), int()));
// }
// #[test]
// fn add_floats_returns_float() {
// expect_type(Type::Float, CallOperator(Plus, float(), float()));
// }
// #[test]
// fn add_nums_returns_num() {
// expect_type(Type::Number, CallOperator(Plus, num(), num()));
// }
// #[test]
// fn add_num_int_returns_int() {
// expect_type(Type::Int, CallOperator(Plus, num(), int()));
// expect_type(Type::Int, CallOperator(Plus, int(), num()));
// }
// #[test]
// fn add_num_float_returns_float() {
// expect_type(Type::Float, CallOperator(Plus, num(), float()));
// expect_type(Type::Float, CallOperator(Plus, float(), num()));
// }
// #[test]
// fn add_int_float_returns_mismatch() {
// expect_mismatch(CallOperator(Plus, int(), float()));
// }
// fn expect_type<'a>(expected_type: Type<'a>, expr: Expr<'a>) {
// assert_eq!(expected_type, infer_type(expr).unwrap());
// }
// fn expect_mismatch<'a>(expr: Expr<'a>) {
// assert_eq!(Err(Problem::Mismatch), infer_type(expr));
// }
// #[inline]
// fn negate_twice(expr) {
// CallBuiltin(Negate, CallBuiltin(Negate, expr))
// }
// fn int<'a>() -> Box<&'a Expr<'a>> { Box::new(&HexOctalBinary(0x12)) }
// fn float<'a>() -> Box<&'a Expr<'a>> { Box::new(&FractionalNumber(3.1)) }
// fn num<'a>() -> Box<&'a Expr<'a>> { Box::new(&WholeNumber(5)) }
// TODO test unions that ought to be equivalent, but only after
// a reduction of some sort, e.g.
//
// ((a|b)|c) vs (a|(b|c))
//
// ((a|z)|(b|z)) vs (a|b|z)
//
// ideally, we fix these when constructing unions
// e.g. if a user puts this in as an annotation, reduce it immediately
// and when we're inferring unions, always infer them flat.
// This way we can avoid checking recursively.
}

View File

@ -1,15 +1,16 @@
#![feature(box_patterns)]
#![feature(box_syntax, box_patterns)]
// pub mod unify;
// pub mod interpret;
// pub mod repl;
pub mod solve;
mod expr;
mod constrain;
mod canonical;
mod name;
mod typ;
pub mod expr;
pub mod constrain;
pub mod canonical;
pub mod name;
pub mod typ;
pub mod parse;
mod ena;
#[macro_use]
@ -18,3 +19,4 @@ extern crate log;
#[cfg(feature = "persistent")]
extern crate dogged;
#[macro_use] extern crate combine;

188
src/parse.rs Normal file
View File

@ -0,0 +1,188 @@
use expr::Operator;
use expr::Expr;
use self::Problem::*;
use std::char;
use std::string;
use combine::parser::char::{char, letter, spaces, digit};
use combine::{attempt, between, choice, many1, parser, sep_by, Parser, optional};
use combine::error::{ParseError, ParseResult};
use combine::stream::{Stream, Positioned};
use combine::stream::state::State;
pub enum Problem {
// Number problems
DoubleDecimalPoint, NoDigitsBeforeDecimalPoint, DoubleMinusSign
}
pub fn parse(text: &str) -> Result<Expr, Problem> {
panic!("TODO");
}
pub fn expr<I>() -> impl Parser<Input = I, Output = Expr>
where I: Stream<Item = char>,
I::Error: ParseError<I::Item, I::Range, I::Position>
{
// TODO change to expr() to reproduce rust compiler bug
expr_()
}
// This macro allows recursive parsers
parser! {
#[inline(always)]
fn expr_[I]()(I) -> Expr
where [ I: Stream<Item = char> ]
{
choice((
number_literal(),
ident(),
)).skip(spaces()).and(
optional(
operator()
.skip(spaces())
.and(expr())
)).map(|(v1, maybe_op)| {
match maybe_op {
None => v1,
Some((op, v2)) => {
Expr::CallOperator(Box::new(v1), op, Box::new(v2))
},
}
})
}
}
pub fn operator<I>() -> impl Parser<Input = I, Output = Operator>
where I: Stream<Item = char>,
I::Error: ParseError<I::Item, I::Range, I::Position>
{
choice((
char('+').map(|_| Operator::Plus),
char('-').map(|_| Operator::Minus),
char('*').map(|_| Operator::Star),
))
}
pub fn ident<I>() -> impl Parser<Input = I, Output = Expr>
where I: Stream<Item = char>,
I::Error: ParseError<I::Item, I::Range, I::Position>
{
char('.').map(|_| Expr::Int(1))
}
pub fn number_literal<I>() -> impl Parser<Input = I, Output = Expr>
where I: Stream<Item = char>,
I::Error: ParseError<I::Item, I::Range, I::Position>
{
let decimal_digits =
char('.').with(many1::<Vec<_>, _>(digit()));
optional(char('-'))
.and(many1::<Vec<_>, _>(digit()))
.and(optional(decimal_digits))
.map(|((maybe_minus, numerator_digits), decimals): ((Option<char>, Vec<char>), Option<Vec<char>>)| {
// TODO check length of nums and build it up into an i62 if possible
let str: String = numerator_digits.into_iter().collect();
let mut numerator = str.parse::<i32>().unwrap();
if maybe_minus != None {
numerator = -numerator;
}
match decimals {
Some(nums) => {
panic!("Can't handle decimals yet.");
}
None => Expr::Int(numerator as i64)
}
})
}
// pub fn parse_expr(state: &mut State) -> Result<Expr, Problem> {
// let digits = chomp_digits(state);
// if digits.is_empty() {
// Err(Problem::InvalidNumber)
// } else {
// // TODO store these in a bigint, and handle overflow.
// let num = digits.parse::<u32>().unwrap();
// if decimal_point
// Ok(Expr::Int(num))
// }
// }
// enum Parsed {
// Expr(Expr),
// Malformed(Problem),
// NotFound
// }
// #[inline]
// fn number_parser() -> {
// let has_minus_sign = false;
// let decimal_point_index: usize = 0;
// let len: usize = 0;
// for ch in state.text.chars() {
// if ch.is_ascii_digit() {
// len += 1;
// } else if ch == '-' {
// if has_minus_sign {
// if len == 1 {
// return Malformed(DoubleMinusSign);
// } else {
// // This second minus sign is a subtraction operator.
// // We've reached the end of the number!
// break;
// }
// } else {
// has_minus_sign = true;
// len += 1;
// }
// } else if ch == '.' {
// if len == 0 {
// return Malformed(NoDigitsBeforeDecimalPoint);
// } else if decimal_point_index != 0 {
// return Malformed(DoubleDecimalPoint);
// } else {
// // This might be a valid decimal number!
// decimal_point_index = len;
// len += 1;
// }
// }
// }
// state.col += len;
// if decimal_point_index == 0 {
// // This is an integer.
// Expr(Expr::Int(parse_int(&state.text[..len])))
// } else {
// // This is a decimal.
// let before_decimal_pt = &state.text[..decimal_point_index];
// let after_decimal_pt = &state.text[(decimal_point_index + 1)..];
// let numerator_str = before_decimal_pt.to_owned();
// numerator_str.push_str(after_decimal_pt);
// let numerator = parse_int(&numerator_str);
// let denominator = 10 * after_decimal_pt.len() as u64;
// Expr(Expr::Ratio(numerator, denominator))
// }
// }
// #[inline]
// fn parse_int(text: &str) -> i64 {
// // TODO parse as BigInt
// text.parse::<i64>().unwrap()
// }

53
tests/test_parse.rs Normal file
View File

@ -0,0 +1,53 @@
#![feature(box_syntax, box_patterns)]
#[macro_use] extern crate pretty_assertions;
#[macro_use] extern crate combine;
extern crate roc;
#[cfg(test)]
mod tests {
#![feature(box_syntax, box_patterns)]
use roc::expr::Expr::*;
use roc::expr::Operator::*;
use roc::parse;
use combine::{Parser};
#[test]
fn test_parse_positive_int() {
assert_eq!(Ok((Int(1234), "")), parse::number_literal().parse("1234"));
}
#[test]
fn test_parse_negative_int() {
assert_eq!(Ok((Int(-1234), "")), parse::number_literal().parse("-1234"));
}
#[test]
fn test_parse_single_operator() {
match parse::expr().parse("1234 + 567") {
Ok((CallOperator(v1, op, v2), "")) => {
assert_eq!(*v1, Int(1234));
assert_eq!(op, Plus);
assert_eq!(*v2, Int(567));
},
_ => panic!("Expression didn't parse"),
}
}
#[test]
fn test_parse_multiple_operators() {
#![feature(box_syntax, box_patterns)]
match parse::expr().parse("1 + 2 * 3") {
Ok((CallOperator(box v1, op1, box CallOperator(box v2, op2, box v3)), "")) => {
assert_eq!(v1, Int(1));
assert_eq!(op1, Plus);
assert_eq!(v2, Int(2));
assert_eq!(op2, Star);
assert_eq!(v3, Int(3));
},
_ => panic!("Expression didn't parse"),
}
}
}

View File

@ -1,86 +0,0 @@
#[macro_use] extern crate pretty_assertions;
extern crate roc;
#[cfg(test)]
mod tests {
use roc::solve::Type;
use roc::solve::Type::*;
use roc::solve::Expr;
use roc::solve::Expr::*;
use roc::solve::Operator;
use roc::solve::Operator::*;
use roc::solve::Problem;
use roc::solve::infer_type;
#[test]
fn test_int_literal() {
expect_type(Type::Int, HexOctalBinary(0x12));
}
#[test]
fn test_float_literal() {
expect_type(Type::Float, FractionalNumber(3.1));
}
#[test]
fn test_number_literal() {
expect_type(Type::Number, WholeNumber(5));
}
#[test]
fn add_ints_returns_int() {
expect_type(Type::Int, CallOperator(Plus, int(), int()));
}
#[test]
fn add_floats_returns_float() {
expect_type(Type::Float, CallOperator(Plus, float(), float()));
}
#[test]
fn add_nums_returns_num() {
expect_type(Type::Number, CallOperator(Plus, num(), num()));
}
#[test]
fn add_num_int_returns_int() {
expect_type(Type::Int, CallOperator(Plus, num(), int()));
expect_type(Type::Int, CallOperator(Plus, int(), num()));
}
#[test]
fn add_num_float_returns_float() {
expect_type(Type::Float, CallOperator(Plus, num(), float()));
expect_type(Type::Float, CallOperator(Plus, float(), num()));
}
#[test]
fn add_int_float_returns_mismatch() {
expect_mismatch(CallOperator(Plus, int(), float()));
}
fn expect_type<'a>(expected_type: Type<'a>, expr: Expr<'a>) {
assert_eq!(expected_type, infer_type(expr).unwrap());
}
fn expect_mismatch<'a>(expr: Expr<'a>) {
assert_eq!(Err(Problem::Mismatch), infer_type(expr));
}
fn int<'a>() -> Box<&'a Expr<'a>> { Box::new(&HexOctalBinary(0x12)) }
fn float<'a>() -> Box<&'a Expr<'a>> { Box::new(&FractionalNumber(3.1)) }
fn num<'a>() -> Box<&'a Expr<'a>> { Box::new(&WholeNumber(5)) }
// TODO test unions that ought to be equivalent, but only after
// a reduction of some sort, e.g.
//
// ((a|b)|c) vs (a|(b|c))
//
// ((a|z)|(b|z)) vs (a|b|z)
//
// ideally, we fix these when constructing unions
// e.g. if a user puts this in as an annotation, reduce it immediately
// and when we're inferring unions, always infer them flat.
// This way we can avoid checking recursively.
}