Merge remote-tracking branch 'origin/trunk' into inline-builtins

This commit is contained in:
Richard Feldman 2020-07-03 23:38:15 -04:00
commit 6a38e8b2de
9 changed files with 241 additions and 125 deletions

View File

@ -0,0 +1,26 @@
# So you want to add a builtin?
Builtins are the functions and modules that are implicitly imported into every module. Some of them compile down to llvm, others need to be constructed and defined. Making a new builtin means touching many files. Here is what it takes:
### module/src/symbol.rs
Towards the bottom of a file there is a `define_builtins!` macro being used that takes many modules and function names. The first level (`List`, `Int` ..) is the module name, and the second level is the function or value name (`reverse`, `mod` ..). If you wanted to add a `Int` function called `addTwo` go to `2 Int: "Int" => {` and inside that case add to the bottom `38 INT_ADD_TWO: "addTwo"` (assuming there are 37 existing ones).
Some of these have `#` inside their name (`first#list`, #lt` ..). This is a trick we are doing to hide implementation details from Roc programmers. To a Roc programmer, a name with `#` in it is invalid, because `#` means everything after it is parsed to a comment. We are constructing these functions manually, so we are circumventing the parsing step and dont have such restrictions. We get to make functions and values with `#` which as a consequence are not accessible to Roc programmers. Roc programmers simply cannot reference them.
But we can use these values and some of these are necessary for implementing builtins. For example, `List.get` returns tags, and it is not easy for us to create tags when composing LLVM. What is easier however, is:
- ..writing `List.#getUnsafe` that has the dangerous signature of `List elem, Int -> elem` in LLVM
- ..writing `List elem, Int -> Result elem [ OutOfBounds ]*` in a type safe way that uses `getUnsafe` internally, only after it checks if the `elem` at `Int` index exists.
## Bottom level LLVM values and functions
### gen/src/llvm/build.rs
This is where bottom-level functions that need to be written as LLVM are created. If the function leads to a tag thats a good sign it should not be written here in `build.rs`. If its simple fundamental stuff like `INT_ADD` then it certainly should be written here.
## More abstract values and functions that likely return tags.
### can/src/builtins.rs
If the function you are making is _not_ low level or returns something like a tag, then it should probably be written here by means of lower level functions written in `build.rs`.
## Letting the compiler know these functions exist
Its one thing to actually write these functions, its _another_ thing to let the Roc compiler know they exist. You have to tell the compiler "Hey, this function exists, and it has this type signature". That happens in these modules:
### builtins/src/std.rs
### builtins/src/unique.rs

View File

@ -10,7 +10,7 @@ use crate::pattern::{bindings_from_patterns, canonicalize_pattern, Pattern};
use crate::procedure::References;
use crate::scope::Scope;
use roc_collections::all::{default_hasher, ImMap, ImSet, MutMap, MutSet, SendMap};
use roc_module::ident::{Ident, Lowercase};
use roc_module::ident::Lowercase;
use roc_module::symbol::Symbol;
use roc_parse::ast;
use roc_parse::pattern::PatternType;
@ -41,9 +41,7 @@ pub struct Annotation {
#[derive(Debug)]
pub struct CanDefs {
// TODO don't store the Ident in here (lots of cloning!) - instead,
// make refs_by_symbol be something like MutMap<Symbol, (Region, References)>
pub refs_by_symbol: MutMap<Symbol, (Located<Ident>, References)>,
pub refs_by_symbol: MutMap<Symbol, (Region, References)>,
pub can_defs_by_symbol: MutMap<Symbol, Def>,
pub aliases: SendMap<Symbol, Alias>,
}
@ -79,7 +77,10 @@ enum PendingDef<'a> {
ann: &'a Located<ast::TypeAnnotation<'a>>,
},
ShadowedAlias,
/// An invalid alias, that is ignored in the rest of the pipeline
/// e.g. a shadowed alias, or a definition like `MyAlias 1 : Int`
/// with an incorrect pattern
InvalidAlias,
}
#[derive(Clone, Debug, PartialEq)]
@ -87,10 +88,7 @@ enum PendingDef<'a> {
pub enum Declaration {
Declare(Def),
DeclareRec(Vec<Def>),
InvalidCycle(
Vec<Located<Ident>>,
Vec<(Region /* pattern */, Region /* expr */)>,
),
InvalidCycle(Vec<Symbol>, Vec<(Region /* pattern */, Region /* expr */)>),
}
impl Declaration {
@ -168,7 +166,15 @@ pub fn canonicalize_defs<'a>(
pattern_type,
)
} else {
panic!("TODO gracefully handle the case where a type annotation appears immediately before a body def, but the patterns are different. This should be an error; put a newline or comment between them!");
// the pattern of the annotation does not match the pattern of the body directly below it
env.problems.push(Problem::SignatureDefMismatch {
annotation_pattern: pattern.region,
def_pattern: body_pattern.region,
});
// both the annotation and definition are skipped!
iter.next();
continue;
}
}
_ => to_pending_def(env, var_store, &loc_def.value, &mut scope, pattern_type),
@ -549,17 +555,18 @@ pub fn sort_can_defs(
if is_invalid_cycle {
// We want to show the entire cycle in the error message, so expand it out.
let mut loc_idents_in_cycle: Vec<Located<Ident>> = Vec::new();
let mut loc_symbols = Vec::new();
for symbol in cycle {
let refs = refs_by_symbol.get(&symbol).unwrap_or_else(|| {
panic!(
"Symbol not found in refs_by_symbol: {:?} - refs_by_symbol was: {:?}",
symbol, refs_by_symbol
)
});
loc_idents_in_cycle.push(refs.0.clone());
match refs_by_symbol.get(&symbol) {
None => unreachable!(
r#"Symbol `{:?}` not found in refs_by_symbol! refs_by_symbol was: {:?}"#,
symbol, refs_by_symbol
),
Some((region, _)) => {
loc_symbols.push(Located::at(*region, symbol));
}
}
}
let mut regions = Vec::with_capacity(can_defs_by_symbol.len());
@ -567,16 +574,19 @@ pub fn sort_can_defs(
regions.push((def.loc_pattern.region, def.loc_expr.region));
}
// Sort them to make the report more helpful.
loc_idents_in_cycle.sort();
// Sort them by line number to make the report more helpful.
loc_symbols.sort();
regions.sort();
let symbols_in_cycle: Vec<Symbol> =
loc_symbols.into_iter().map(|s| s.value).collect();
problems.push(Problem::RuntimeError(RuntimeError::CircularDef(
loc_idents_in_cycle.clone(),
symbols_in_cycle.clone(),
regions.clone(),
)));
declarations.push(Declaration::InvalidCycle(loc_idents_in_cycle, regions));
declarations.push(Declaration::InvalidCycle(symbols_in_cycle, regions));
} else {
// slightly inefficient, because we know this becomes exactly one DeclareRec already
group_to_declaration(
@ -725,7 +735,7 @@ fn canonicalize_pending_def<'a>(
scope: &mut Scope,
can_defs_by_symbol: &mut MutMap<Symbol, Def>,
var_store: &mut VarStore,
refs_by_symbol: &mut MutMap<Symbol, (Located<Ident>, References)>,
refs_by_symbol: &mut MutMap<Symbol, (Region, References)>,
aliases: &mut SendMap<Symbol, Alias>,
) -> Output {
use PendingDef::*;
@ -891,9 +901,8 @@ fn canonicalize_pending_def<'a>(
.union(&can_ann.introduced_variables);
}
ShadowedAlias => {
// Since this alias was shadowed, it gets ignored and has no
// effect on the output.
InvalidAlias => {
// invalid aliases (shadowed, incorrect patterns) get ignored
}
TypedBody(loc_pattern, loc_can_pattern, loc_ann, loc_expr) => {
let ann =
@ -993,7 +1002,7 @@ fn canonicalize_pending_def<'a>(
// Store the referenced locals in the refs_by_symbol map, so we can later figure out
// which defined names reference each other.
for (ident, (symbol, region)) in scope.idents() {
for (_, (symbol, region)) in scope.idents() {
if !vars_by_symbol.contains_key(&symbol) {
continue;
}
@ -1008,16 +1017,7 @@ fn canonicalize_pending_def<'a>(
can_output.references.clone()
};
refs_by_symbol.insert(
*symbol,
(
Located {
value: ident.clone(),
region: *region,
},
refs,
),
);
refs_by_symbol.insert(*symbol, (*region, refs));
can_defs_by_symbol.insert(
*symbol,
@ -1138,23 +1138,7 @@ fn canonicalize_pending_def<'a>(
can_output.references.clone()
};
let ident = env
.ident_ids
.get_name(symbol.ident_id())
.unwrap_or_else(|| {
panic!("Could not find {:?} in env.ident_ids", symbol);
});
refs_by_symbol.insert(
symbol,
(
Located {
value: ident.clone().into(),
region,
},
refs,
),
);
refs_by_symbol.insert(symbol, (region, refs));
can_defs_by_symbol.insert(
symbol,
@ -1357,7 +1341,13 @@ fn to_pending_def<'a>(
});
}
_ => {
panic!("TODO gracefully handle an invalid pattern appearing where a type alias rigid var should be.");
// any other pattern in this position is a syntax error.
env.problems.push(Problem::InvalidAliasRigid {
alias_name: symbol,
region: loc_var.region,
});
return PendingDef::InvalidAlias;
}
}
}
@ -1378,7 +1368,7 @@ fn to_pending_def<'a>(
shadow: loc_shadowed_symbol,
});
PendingDef::ShadowedAlias
PendingDef::InvalidAlias
}
}
}

View File

@ -17,7 +17,7 @@ mod test_can {
use roc_can::expr::Expr::{self, *};
use roc_can::expr::Recursive;
use roc_problem::can::{Problem, RuntimeError};
use roc_region::all::{Located, Region};
use roc_region::all::Region;
use std::{f64, i64};
fn assert_can(input: &str, expected: Expr) {
@ -505,10 +505,14 @@ mod test_can {
"#
);
let home = test_home();
let arena = Bump::new();
let CanExprOut {
loc_expr, problems, ..
} = can_expr_with(&arena, test_home(), src);
loc_expr,
problems,
interns,
..
} = can_expr_with(&arena, home, src);
let is_circular_def = if let RuntimeError(RuntimeError::CircularDef(_, _)) = loc_expr.value
{
@ -518,7 +522,7 @@ mod test_can {
};
let problem = Problem::RuntimeError(RuntimeError::CircularDef(
vec![Located::at(Region::new(0, 0, 0, 1), "x".into())],
vec![interns.symbol(home, "x".into())],
vec![(Region::new(0, 0, 0, 1), Region::new(0, 0, 4, 5))],
));
@ -537,16 +541,20 @@ mod test_can {
x
"#
);
let home = test_home();
let arena = Bump::new();
let CanExprOut {
loc_expr, problems, ..
} = can_expr_with(&arena, test_home(), src);
loc_expr,
problems,
interns,
..
} = can_expr_with(&arena, home, src);
let problem = Problem::RuntimeError(RuntimeError::CircularDef(
vec![
Located::at(Region::new(0, 0, 0, 1), "x".into()),
Located::at(Region::new(1, 1, 0, 1), "y".into()),
Located::at(Region::new(2, 2, 0, 1), "z".into()),
interns.symbol(home, "x".into()),
interns.symbol(home, "y".into()),
interns.symbol(home, "z".into()),
],
vec![
(Region::new(0, 0, 0, 1), Region::new(0, 0, 4, 5)),

View File

@ -118,8 +118,8 @@ pub fn constrain_expr(
let record_type = Type::Record(
field_types,
// TODO can we avoid doing Box::new on every single one of these?
// For example, could we have a single lazy_static global Box they
// could all share?
// We can put `static EMPTY_REC: Type = Type::EmptyRec`, but that requires a
// lifetime parameter on `Type`
Box::new(Type::EmptyRec),
);
let record_con = Eq(record_type, expected.clone(), Category::Record, region);
@ -600,11 +600,7 @@ pub fn constrain_expr(
}
}
// TODO check for exhaustiveness. If this `case` is non-exaustive, then:
//
// 1. Record a Problem.
// 2. Add an extra _ branch at the end which throws a runtime error.
// exhautiveness checking happens when converting to mono::Expr
exists(vec![cond_var, *expr_var], And(constraints))
}
Access {
@ -798,7 +794,6 @@ fn constrain_when_branch(
constraints: Vec::with_capacity(1),
};
// TODO ensure this is correct
// TODO investigate for error messages, is it better to unify all branches with a variable,
// then unify that variable with the expectation?
for loc_pattern in &when_branch.patterns {
@ -872,38 +867,35 @@ pub fn constrain_decls(
) -> Constraint {
let mut constraint = Constraint::SaveTheEnvironment;
let mut env = Env {
home,
rigids: ImMap::default(),
};
for decl in decls.iter().rev() {
// NOTE: rigids are empty because they are not shared between top-level definitions
// Clear the rigids from the previous iteration.
// rigids are not shared between top-level definitions
env.rigids.clear();
match decl {
Declaration::Declare(def) => {
constraint = exists_with_aliases(
aliases.clone(),
Vec::new(),
constrain_def(
&Env {
home,
rigids: ImMap::default(),
},
def,
constraint,
),
constrain_def(&env, def, constraint),
);
}
Declaration::DeclareRec(defs) => {
constraint = exists_with_aliases(
aliases.clone(),
Vec::new(),
constrain_recursive_defs(
&Env {
home,
rigids: ImMap::default(),
},
defs,
constraint,
),
constrain_recursive_defs(&env, defs, constraint),
);
}
Declaration::InvalidCycle(_, _) => panic!("TODO handle invalid cycle"),
Declaration::InvalidCycle(_, _) => {
// invalid cycles give a canonicalization error. we skip them here.
continue;
}
}
}
@ -970,8 +962,7 @@ fn constrain_def(env: &Env, def: &Def, body_con: Constraint) -> Constraint {
expr_type,
annotation_expected.clone(),
Category::Storage,
// TODO proper region
Region::zero(),
annotation.region,
));
constrain_expr(

View File

@ -78,24 +78,33 @@ pub fn constrain_decls(
sharing::annotate_usage(&def.loc_expr.value, &mut var_usage);
}
}
Declaration::InvalidCycle(_, _) => panic!("TODO handle invalid cycle"),
Declaration::InvalidCycle(_, _) => {
// any usage of a value defined in an invalid cycle will blow up
// so for the analysis usage by such values doesn't count
continue;
}
}
}
aliases_to_attr_type(var_store, &mut aliases);
let mut env = Env {
home,
rigids: ImMap::default(),
};
for decl in decls.iter().rev() {
// NOTE: rigids are empty because they are not shared between top-level definitions
// clear the set of rigids from the previous iteration.
// rigids are not shared between top-level definitions.
env.rigids.clear();
match decl {
Declaration::Declare(def) => {
constraint = exists_with_aliases(
aliases.clone(),
Vec::new(),
constrain_def(
&Env {
home,
rigids: ImMap::default(),
},
&env,
var_store,
&var_usage,
&mut ImSet::default(),
@ -109,10 +118,7 @@ pub fn constrain_decls(
aliases.clone(),
Vec::new(),
constrain_recursive_defs(
&Env {
home,
rigids: ImMap::default(),
},
&env,
var_store,
&var_usage,
&mut ImSet::default(),
@ -121,7 +127,10 @@ pub fn constrain_decls(
),
);
}
Declaration::InvalidCycle(_, _) => panic!("TODO handle invalid cycle"),
Declaration::InvalidCycle(_, _) => {
// invalid cycles give a canonicalization error. we skip them here.
continue;
}
}
}

View File

@ -46,6 +46,14 @@ pub enum Problem {
replaced_region: Region,
},
RuntimeError(RuntimeError),
SignatureDefMismatch {
annotation_pattern: Region,
def_pattern: Region,
},
InvalidAliasRigid {
alias_name: Symbol,
region: Region,
},
}
#[derive(Clone, Debug, PartialEq)]
@ -82,10 +90,7 @@ pub enum RuntimeError {
InvalidOctal(std::num::ParseIntError, Box<str>),
InvalidBinary(std::num::ParseIntError, Box<str>),
QualifiedPatternIdent(InlinableString),
CircularDef(
Vec<Located<Ident>>,
Vec<(Region /* pattern */, Region /* expr */)>,
),
CircularDef(Vec<Symbol>, Vec<(Region /* pattern */, Region /* expr */)>),
/// When the author specifies a type annotation but no implementation
NoImplementation,

View File

@ -1,6 +1,7 @@
use roc_collections::all::MutSet;
use roc_problem::can::PrecedenceProblem::BothNonAssociative;
use roc_problem::can::{Problem, RuntimeError};
use roc_region::all::Region;
use std::path::PathBuf;
use crate::report::{Annotation, Report, RocDocAllocator, RocDocBuilder};
@ -238,6 +239,29 @@ pub fn can_problem<'b>(
alloc.reflow(" definitions from this tag union type."),
]),
]),
Problem::SignatureDefMismatch {
ref annotation_pattern,
ref def_pattern,
} => alloc.stack(vec![
alloc.reflow("This annotation does not match the definition immediately following it:"),
alloc.region(Region::span_across(annotation_pattern, def_pattern)),
alloc.reflow("Is it a typo? If not, put either a newline or comment between them."),
]),
Problem::InvalidAliasRigid { alias_name, region } => alloc.stack(vec![
alloc.concat(vec![
alloc.reflow("This pattern in the definition of "),
alloc.symbol_unqualified(alias_name),
alloc.reflow(" is not what I expect:"),
]),
alloc.region(region),
alloc.concat(vec![
alloc.reflow("Only type variables like "),
alloc.type_variable("a".into()),
alloc.reflow(" or "),
alloc.type_variable("value".into()),
alloc.reflow(" can occur in this position."),
]),
]),
Problem::RuntimeError(runtime_error) => pretty_runtime_error(alloc, runtime_error),
};
@ -274,13 +298,13 @@ fn pretty_runtime_error<'b>(
RuntimeError::LookupNotInScope(loc_name, options) => {
not_found(alloc, loc_name.region, &loc_name.value, "value", options)
}
RuntimeError::CircularDef(mut idents, regions) => {
let first = idents.remove(0);
RuntimeError::CircularDef(mut symbols, regions) => {
let first = symbols.remove(0);
if idents.is_empty() {
if symbols.is_empty() {
alloc
.reflow("The ")
.append(alloc.ident(first.value))
.append(alloc.symbol_unqualified(first))
.append(alloc.reflow(
" value is defined directly in terms of itself, causing an infinite loop.",
))
@ -290,24 +314,24 @@ fn pretty_runtime_error<'b>(
alloc.stack(vec![
alloc
.reflow("The ")
.append(alloc.ident(first.value.clone()))
.append(alloc.symbol_unqualified(first))
.append(
alloc.reflow(" definition is causing a very tricky infinite loop:"),
),
alloc.region(regions[0].0),
alloc
.reflow("The ")
.append(alloc.ident(first.value.clone()))
.append(alloc.symbol_unqualified(first))
.append(alloc.reflow(
" value depends on itself through the following chain of definitions:",
)),
crate::report::cycle(
alloc,
4,
alloc.ident(first.value),
idents
alloc.symbol_unqualified(first),
symbols
.into_iter()
.map(|ident| alloc.ident(ident.value))
.map(|s| alloc.symbol_unqualified(s))
.collect::<Vec<_>>(),
),
// TODO hint?

View File

@ -1702,6 +1702,7 @@ mod test_reporting {
#[test]
fn circular_definition_self() {
// invalid recursion
report_problem_as(
indoc!(
r#"
@ -1723,6 +1724,7 @@ mod test_reporting {
#[test]
fn circular_definition() {
// invalid mutual recursion
report_problem_as(
indoc!(
r#"
@ -2413,7 +2415,7 @@ mod test_reporting {
report_problem_as(
indoc!(
r#"
Foo : { x: Bar }
Foo : { x : Bar }
Bar : { y : Foo }
f : Foo
@ -2684,6 +2686,69 @@ mod test_reporting {
)
}
#[test]
fn annotation_definition_mismatch() {
report_problem_as(
indoc!(
r#"
bar : Int
foo = \x -> x
# NOTE: neither bar or foo are defined at this point
4
"#
),
indoc!(
r#"
-- SYNTAX PROBLEM --------------------------------------------------------------
This annotation does not match the definition immediately following
it:
1 > bar : Int
2 > foo = \x -> x
Is it a typo? If not, put either a newline or comment between them.
"#
),
)
}
#[test]
fn invalid_alias_rigid_var_pattern() {
report_problem_as(
indoc!(
r#"
MyAlias 1 : Int
4
"#
),
indoc!(
r#"
-- SYNTAX PROBLEM --------------------------------------------------------------
This pattern in the definition of `MyAlias` is not what I expect:
1 MyAlias 1 : Int
^
Only type variables like `a` or `value` can occur in this position.
-- SYNTAX PROBLEM --------------------------------------------------------------
`MyAlias` is not used anywhere in your code.
1 MyAlias 1 : Int
^^^^^^^^^^^^^^^
If you didn't intend on using `MyAlias` then remove it so future readers
of your code don't wonder why it is there.
"#
),
)
}
#[test]
fn invalid_num() {
report_problem_as(

View File

@ -212,7 +212,7 @@ pub fn name_all_type_vars(variable: Variable, subs: &mut Subs) {
for root in roots {
// show the type variable number instead of `*`. useful for debugging
// set_root_name(root, &(format!("<{:?}>", root).into()), subs);
// set_root_name(root, (format!("<{:?}>", root).into()), subs);
if let Some(Appearances::Multiple) = appearances.get(&root) {
letters_used = name_root(letters_used, root, subs, &mut taken);
}
@ -227,21 +227,19 @@ fn name_root(
) -> u32 {
let (generated_name, new_letters_used) = name_type_var(letters_used, taken);
set_root_name(root, &generated_name, subs);
set_root_name(root, generated_name, subs);
new_letters_used
}
fn set_root_name(root: Variable, name: &Lowercase, subs: &mut Subs) {
fn set_root_name(root: Variable, name: Lowercase, subs: &mut Subs) {
use crate::subs::Content::*;
let mut descriptor = subs.get_without_compacting(root);
match descriptor.content {
FlexVar(None) => {
descriptor.content = FlexVar(Some(name.clone()));
// TODO is this necessary, or was mutating descriptor in place sufficient?
descriptor.content = FlexVar(Some(name));
subs.set(root, descriptor);
}
FlexVar(Some(_existing)) => {