more refactoring for seperate ast crate

This commit is contained in:
Anton-4 2021-09-22 21:02:43 +02:00
parent 033b1380d6
commit 59831f2e85
43 changed files with 10010 additions and 24 deletions

19
Cargo.lock generated
View File

@ -3387,6 +3387,25 @@ dependencies = [
"libc",
]
[[package]]
name = "roc_ast"
version = "0.1.0"
dependencies = [
"arraystring",
"bumpalo",
"libc",
"page_size",
"roc_can",
"roc_collections",
"roc_module",
"roc_parse",
"roc_problem",
"roc_region",
"roc_types",
"snafu",
"ven_graph",
]
[[package]]
name = "roc_build"
version = "0.1.0"

View File

@ -29,6 +29,7 @@ members = [
"vendor/pathfinding",
"vendor/pretty",
"editor",
"ast",
"cli",
"cli/cli_utils",
"roc_std",

24
ast/Cargo.toml Normal file
View File

@ -0,0 +1,24 @@
[package]
name = "roc_ast"
version = "0.1.0"
authors = ["The Roc Contributors"]
license = "UPL-1.0"
edition = "2018"
description = "AST as used by the editor and docs. In contrast to the compiler, these types do not keep track of a location in a file."
[dependencies]
roc_can = { path = "../compiler/can" }
roc_collections = { path = "../compiler/collections" }
roc_region = { path = "../compiler/region" }
roc_module = { path = "../compiler/module" }
roc_parse = { path = "../compiler/parse" }
roc_problem = { path = "../compiler/problem" }
roc_types = { path = "../compiler/types" }
arraystring = "0.3.0"
bumpalo = { version = "3.6.1", features = ["collections"] }
libc = "0.2"
page_size = "0.4"
snafu = { version = "0.6", features = ["backtraces"] }
ven_graph = { path = "../vendor/pathfinding" }
[dev-dependencies]

30
ast/src/ast_error.rs Normal file
View File

@ -0,0 +1,30 @@
use snafu::{Backtrace, Snafu};
use crate::lang::core::ast::ASTNodeId;
#[derive(Debug, Snafu)]
#[snafu(visibility(pub))]
pub enum ASTError {
#[snafu(display(
"ASTNodeIdWithoutExprId: The expr_id_opt in ASTNode({:?}) was `None` but I was expexting `Some(ExprId)` .",
ast_node_id
))]
ASTNodeIdWithoutExprId {
ast_node_id: ASTNodeId,
backtrace: Backtrace,
},
#[snafu(display(
"UnexpectedPattern2Variant: required a {} at this position, Pattern2 was a {}.",
required_pattern2,
encountered_pattern2,
))]
UnexpectedPattern2Variant {
required_pattern2: String,
encountered_pattern2: String,
backtrace: Backtrace,
},
}
pub type ASTResult<T, E = ASTError> = std::result::Result<T, E>;

View File

@ -0,0 +1,289 @@
use roc_can::{env::Env, expr::Output, scope::Scope};
use roc_collections::all::MutMap;
use roc_problem::can::{Problem};
use roc_region::all::{Located, Region};
use roc_types::{subs::Variable};
use crate::{lang::core::{def::def::References, expr::expr2::{Expr2, ExprId, RecordField, WhenBranch}}, pool::{pool_str::PoolStr, pool_vec::PoolVec}};
enum CanonicalizeRecordProblem {
InvalidOptionalValue {
field_name: PoolStr,
field_region: Region,
record_region: Region,
},
}
enum FieldVar {
VarAndExprId(Variable, ExprId),
OnlyVar(Variable),
}
fn canonicalize_fields<'a>(
env: &mut Env<'a>,
scope: &mut Scope,
fields: &'a [Located<roc_parse::ast::AssignedField<'a, roc_parse::ast::Expr<'a>>>],
) -> Result<(PoolVec<RecordField>, Output), CanonicalizeRecordProblem> {
let mut can_fields: MutMap<&'a str, FieldVar> = MutMap::default();
let mut output = Output::default();
for loc_field in fields.iter() {
match canonicalize_field(env, scope, &loc_field.value) {
Ok(can_field) => {
match can_field {
CanonicalField::LabelAndValue {
label,
value_expr,
value_output,
var,
} => {
let expr_id = env.pool.add(value_expr);
let replaced =
can_fields.insert(label, FieldVar::VarAndExprId(var, expr_id));
if let Some(_old) = replaced {
// env.problems.push(Problem::DuplicateRecordFieldValue {
// field_name: label,
// field_region: loc_field.region,
// record_region: region,
// replaced_region: old.region,
// });
todo!()
}
output.references.union_mut(value_output.references);
}
CanonicalField::InvalidLabelOnly { label, var } => {
let replaced = can_fields.insert(label, FieldVar::OnlyVar(var));
if let Some(_old) = replaced {
todo!()
}
}
}
}
Err(CanonicalizeFieldProblem::InvalidOptionalValue {
field_name: _,
field_region: _,
}) => {
// env.problem(Problem::InvalidOptionalValue {
// field_name: field_name.clone(),
// field_region,
// record_region: region,
// });
// return Err(CanonicalizeRecordProblem::InvalidOptionalValue {
// field_name,
// field_region,
// record_region: region,
// });
todo!()
}
}
}
let pool_vec = PoolVec::with_capacity(can_fields.len() as u32, env.pool);
for (node_id, (string, field_var)) in pool_vec.iter_node_ids().zip(can_fields.into_iter()) {
let name = PoolStr::new(string, env.pool);
match field_var {
FieldVar::VarAndExprId(var, expr_id) => {
env.pool[node_id] = RecordField::LabeledValue(name, var, expr_id);
}
FieldVar::OnlyVar(var) => {
env.pool[node_id] = RecordField::InvalidLabelOnly(name, var);
} // TODO RecordField::LabelOnly
}
}
Ok((pool_vec, output))
}
enum CanonicalizeFieldProblem {
InvalidOptionalValue {
field_name: PoolStr,
field_region: Region,
},
}
enum CanonicalField<'a> {
LabelAndValue {
label: &'a str,
value_expr: Expr2,
value_output: Output,
var: Variable,
},
InvalidLabelOnly {
label: &'a str,
var: Variable,
}, // TODO make ValidLabelOnly
}
fn canonicalize_field<'a>(
env: &mut Env<'a>,
scope: &mut Scope,
field: &'a roc_parse::ast::AssignedField<'a, roc_parse::ast::Expr<'a>>,
) -> Result<CanonicalField<'a>, CanonicalizeFieldProblem> {
use roc_parse::ast::AssignedField::*;
match field {
// Both a label and a value, e.g. `{ name: "blah" }`
RequiredValue(label, _, loc_expr) => {
let field_var = env.var_store.fresh();
let (loc_can_expr, output) = to_expr2(env, scope, &loc_expr.value, loc_expr.region);
Ok(CanonicalField::LabelAndValue {
label: label.value,
value_expr: loc_can_expr,
value_output: output,
var: field_var,
})
}
OptionalValue(label, _, loc_expr) => Err(CanonicalizeFieldProblem::InvalidOptionalValue {
field_name: PoolStr::new(label.value, env.pool),
field_region: Region::span_across(&label.region, &loc_expr.region),
}),
// A label with no value, e.g. `{ name }` (this is sugar for { name: name })
LabelOnly(label) => {
let field_var = env.var_store.fresh();
// TODO return ValidLabel if label points to in scope variable
Ok(CanonicalField::InvalidLabelOnly {
label: label.value,
var: field_var,
})
}
SpaceBefore(sub_field, _) | SpaceAfter(sub_field, _) => {
canonicalize_field(env, scope, sub_field)
}
Malformed(_string) => {
panic!("TODO canonicalize malformed record field");
}
}
}
#[inline(always)]
fn canonicalize_when_branch<'a>(
env: &mut Env<'a>,
scope: &mut Scope,
branch: &'a roc_parse::ast::WhenBranch<'a>,
output: &mut Output,
) -> (WhenBranch, References) {
let patterns = PoolVec::with_capacity(branch.patterns.len() as u32, env.pool);
let original_scope = scope;
let mut scope = original_scope.shallow_clone();
// TODO report symbols not bound in all patterns
for (node_id, loc_pattern) in patterns.iter_node_ids().zip(branch.patterns.iter()) {
let (new_output, can_pattern) = to_pattern2(
env,
&mut scope,
roc_parse::pattern::PatternType::WhenBranch,
&loc_pattern.value,
loc_pattern.region,
);
output.union(new_output);
env.set_region(node_id, loc_pattern.region);
env.pool[node_id] = can_pattern;
}
let (value, mut branch_output) =
to_expr2(env, &mut scope, &branch.value.value, branch.value.region);
let value_id = env.pool.add(value);
env.set_region(value_id, branch.value.region);
let guard = match &branch.guard {
None => None,
Some(loc_expr) => {
let (can_guard, guard_branch_output) =
to_expr2(env, &mut scope, &loc_expr.value, loc_expr.region);
let expr_id = env.pool.add(can_guard);
env.set_region(expr_id, loc_expr.region);
branch_output.union(guard_branch_output);
Some(expr_id)
}
};
// Now that we've collected all the references for this branch, check to see if
// any of the new idents it defined were unused. If any were, report it.
for (symbol, region) in scope.symbols() {
let symbol = symbol;
if !output.references.has_lookup(symbol)
&& !branch_output.references.has_lookup(symbol)
&& !original_scope.contains_symbol(symbol)
{
env.problem(Problem::UnusedDef(symbol, region));
}
}
let references = branch_output.references.clone();
output.union(branch_output);
(
WhenBranch {
patterns,
body: value_id,
guard,
},
references,
)
}
fn canonicalize_lookup(
env: &mut Env<'_>,
scope: &mut Scope,
module_name: &str,
ident: &str,
region: Region,
) -> (Expr2, Output) {
use Expr2::*;
let mut output = Output::default();
let can_expr = if module_name.is_empty() {
// Since module_name was empty, this is an unqualified var.
// Look it up in scope!
match scope.lookup(&(*ident).into(), region) {
Ok(symbol) => {
output.references.lookups.insert(symbol);
Var(symbol)
}
Err(problem) => {
env.problem(Problem::RuntimeError(problem.clone()));
RuntimeError()
}
}
} else {
// Since module_name was nonempty, this is a qualified var.
// Look it up in the env!
match env.qualified_lookup(module_name, ident, region) {
Ok(symbol) => {
output.references.lookups.insert(symbol);
Var(symbol)
}
Err(problem) => {
// Either the module wasn't imported, or
// it was imported but it doesn't expose this ident.
env.problem(Problem::RuntimeError(problem.clone()));
RuntimeError()
}
}
};
// If it's valid, this ident should be in scope already.
(can_expr, output)
}

View File

@ -0,0 +1,2 @@
pub mod canonicalize;
pub mod module;

View File

@ -0,0 +1,319 @@
#![allow(clippy::all)]
#![allow(dead_code)]
#![allow(unused_imports)]
#![allow(unused_variables)]
use bumpalo::Bump;
use roc_can::operator::desugar_def;
use roc_can::scope::Scope;
use roc_collections::all::{default_hasher, ImMap, ImSet, MutMap, MutSet, SendMap};
use roc_module::ident::Ident;
use roc_module::ident::Lowercase;
use roc_module::symbol::{IdentIds, ModuleId, ModuleIds, Symbol};
use roc_parse::ast;
use roc_parse::pattern::PatternType;
use roc_problem::can::{Problem, RuntimeError};
use roc_region::all::{Located, Region};
use roc_types::subs::{VarStore, Variable};
use crate::env::Env;
use crate::expr::output::Output;
use crate::lang::expr::output::Output;
use crate::pool::pool::NodeId;
use crate::pool::pool::Pool;
use crate::pool::pool_vec::PoolVec;
use crate::types::Alias;
pub struct ModuleOutput {
pub aliases: MutMap<Symbol, NodeId<Alias>>,
pub rigid_variables: MutMap<Variable, Lowercase>,
pub declarations: Vec<Declaration>,
pub exposed_imports: MutMap<Symbol, Variable>,
pub lookups: Vec<(Symbol, Variable, Region)>,
pub problems: Vec<Problem>,
pub ident_ids: IdentIds,
pub references: MutSet<Symbol>,
}
// TODO trim these down
#[allow(clippy::too_many_arguments)]
pub fn canonicalize_module_defs<'a>(
arena: &Bump,
loc_defs: &'a [Located<ast::Def<'a>>],
home: ModuleId,
module_ids: &ModuleIds,
exposed_ident_ids: IdentIds,
dep_idents: MutMap<ModuleId, IdentIds>,
aliases: MutMap<Symbol, Alias>,
exposed_imports: MutMap<Ident, (Symbol, Region)>,
mut exposed_symbols: MutSet<Symbol>,
var_store: &mut VarStore,
) -> Result<ModuleOutput, RuntimeError> {
let mut pool = Pool::with_capacity(1 << 10);
let mut can_exposed_imports = MutMap::default();
let mut scope = Scope::new(home, &mut pool, var_store);
let num_deps = dep_idents.len();
for (name, alias) in aliases.into_iter() {
let vars = PoolVec::with_capacity(alias.targs.len() as u32, &mut pool);
for (node_id, targ_id) in vars.iter_node_ids().zip(alias.targs.iter_node_ids()) {
let (poolstr, var) = &pool[targ_id];
pool[node_id] = (poolstr.shallow_clone(), *var);
}
scope.add_alias(&mut pool, name, vars, alias.actual);
}
// Desugar operators (convert them to Apply calls, taking into account
// operator precedence and associativity rules), before doing other canonicalization.
//
// If we did this *during* canonicalization, then each time we
// visited a BinOp node we'd recursively try to apply this to each of its nested
// operators, and then again on *their* nested operators, ultimately applying the
// rules multiple times unnecessarily.
let mut desugared =
bumpalo::collections::Vec::with_capacity_in(loc_defs.len() + num_deps, arena);
for loc_def in loc_defs.iter() {
desugared.push(&*arena.alloc(Located {
value: desugar_def(arena, &loc_def.value),
region: loc_def.region,
}));
}
let mut env = Env::new(
home,
arena,
&mut pool,
var_store,
dep_idents,
module_ids,
exposed_ident_ids,
);
let mut lookups = Vec::with_capacity(num_deps);
let rigid_variables = MutMap::default();
// Exposed values are treated like defs that appear before any others, e.g.
//
// imports [ Foo.{ bar, baz } ]
//
// ...is basically the same as if we'd added these extra defs at the start of the module:
//
// bar = Foo.bar
// baz = Foo.baz
//
// Here we essentially add those "defs" to "the beginning of the module"
// by canonicalizing them right before we canonicalize the actual ast::Def nodes.
for (ident, (symbol, region)) in exposed_imports {
let first_char = ident.as_inline_str().chars().next().unwrap();
if first_char.is_lowercase() {
// this is a value definition
let expr_var = env.var_store.fresh();
match scope.import(ident, symbol, region) {
Ok(()) => {
// Add an entry to exposed_imports using the current module's name
// as the key; e.g. if this is the Foo module and we have
// exposes [ Bar.{ baz } ] then insert Foo.baz as the key, so when
// anything references `baz` in this Foo module, it will resolve to Bar.baz.
can_exposed_imports.insert(symbol, expr_var);
// This will be used during constraint generation,
// to add the usual Lookup constraint as if this were a normal def.
lookups.push((symbol, expr_var, region));
}
Err((_shadowed_symbol, _region)) => {
panic!("TODO gracefully handle shadowing in imports.")
}
}
} else {
// This is a type alias
// the should already be added to the scope when this module is canonicalized
debug_assert!(scope.contains_alias(symbol));
}
}
let (defs, _scope, output, symbols_introduced) = canonicalize_defs(
&mut env,
Output::default(),
&scope,
&desugared,
PatternType::TopLevelDef,
);
// See if any of the new idents we defined went unused.
// If any were unused and also not exposed, report it.
for (symbol, region) in symbols_introduced {
if !output.references.has_lookup(symbol) && !exposed_symbols.contains(&symbol) {
env.problem(Problem::UnusedDef(symbol, region));
}
}
// TODO register rigids
// for (var, lowercase) in output.introduced_variables.name_by_var.clone() {
// rigid_variables.insert(var, lowercase);
// }
let mut references = MutSet::default();
// Gather up all the symbols that were referenced across all the defs' lookups.
for symbol in output.references.lookups.iter() {
references.insert(*symbol);
}
// Gather up all the symbols that were referenced across all the defs' calls.
for symbol in output.references.calls.iter() {
references.insert(*symbol);
}
// Gather up all the symbols that were referenced from other modules.
for symbol in env.qualified_lookups.iter() {
references.insert(*symbol);
}
// NOTE previously we inserted builtin defs into the list of defs here
// this is now done later, in file.rs.
match sort_can_defs(&mut env, defs, Output::default()) {
(Ok(mut declarations), output) => {
use Declaration::*;
for decl in declarations.iter() {
match decl {
Declare(def) => {
for symbol in def.symbols(env.pool) {
if exposed_symbols.contains(&symbol) {
// Remove this from exposed_symbols,
// so that at the end of the process,
// we can see if there were any
// exposed symbols which did not have
// corresponding defs.
exposed_symbols.remove(&symbol);
}
}
}
DeclareRec(defs) => {
for def in defs {
for symbol in def.symbols(env.pool) {
if exposed_symbols.contains(&symbol) {
// Remove this from exposed_symbols,
// so that at the end of the process,
// we can see if there were any
// exposed symbols which did not have
// corresponding defs.
exposed_symbols.remove(&symbol);
}
}
}
}
InvalidCycle(identifiers, _) => {
panic!("TODO gracefully handle potentially attempting to expose invalid cyclic defs {:?}" , identifiers);
}
Builtin(def) => {
// Builtins cannot be exposed in module declarations.
// This should never happen!
debug_assert!(def
.symbols(env.pool)
.iter()
.all(|symbol| !exposed_symbols.contains(symbol)));
}
}
}
let mut aliases = MutMap::default();
for (symbol, alias) in output.aliases {
// Remove this from exposed_symbols,
// so that at the end of the process,
// we can see if there were any
// exposed symbols which did not have
// corresponding defs.
exposed_symbols.remove(&symbol);
aliases.insert(symbol, alias);
}
// By this point, all exposed symbols should have been removed from
// exposed_symbols and added to exposed_vars_by_symbol. If any were
// not, that means they were declared as exposed but there was
// no actual declaration with that name!
for symbol in exposed_symbols {
env.problem(Problem::ExposedButNotDefined(symbol));
// In case this exposed value is referenced by other modules,
// create a decl for it whose implementation is a runtime error.
let mut pattern_vars = SendMap::default();
pattern_vars.insert(symbol, env.var_store.fresh());
let runtime_error = RuntimeError::ExposedButNotDefined(symbol);
let value_def = {
let pattern_id = env.pool.add(Pattern2::Identifier(symbol));
let expr_id = env.pool.add(Expr2::RuntimeError());
ValueDef::NoAnnotation {
pattern_id,
expr_id,
expr_var: env.var_store.fresh(),
}
};
let def = Def::Value(value_def);
declarations.push(Declaration::Declare(def));
}
// Incorporate any remaining output.lookups entries into references.
for symbol in output.references.lookups {
references.insert(symbol);
}
// Incorporate any remaining output.calls entries into references.
for symbol in output.references.calls {
references.insert(symbol);
}
// Gather up all the symbols that were referenced from other modules.
for symbol in env.qualified_lookups.iter() {
references.insert(*symbol);
}
// TODO find captured variables
// for declaration in declarations.iter_mut() {
// match declaration {
// Declare(def) => fix_values_captured_in_closure_def(def, &mut MutSet::default()),
// DeclareRec(defs) => {
// fix_values_captured_in_closure_defs(defs, &mut MutSet::default())
// }
// InvalidCycle(_, _) | Builtin(_) => {}
// }
// }
// TODO this loops over all symbols in the module, we can speed it up by having an
// iterator over all builtin symbols
// TODO move over the builtins
// for symbol in references.iter() {
// if symbol.is_builtin() {
// // this can fail when the symbol is for builtin types, or has no implementation yet
// if let Some(def) = builtins::builtin_defs_map(*symbol, var_store) {
// declarations.push(Declaration::Builtin(def));
// }
// }
// }
Ok(ModuleOutput {
aliases,
rigid_variables,
declarations,
references,
exposed_imports: can_exposed_imports,
problems: vec![], // TODO env.problems,
lookups,
ident_ids: env.ident_ids,
})
}
(Err(runtime_error), _) => Err(runtime_error),
}
}

1746
ast/src/constrain.rs Normal file

File diff suppressed because it is too large Load Diff

31
ast/src/lang/core/ast.rs Normal file
View File

@ -0,0 +1,31 @@
use crate::ast_error::{ASTResult, ASTNodeIdWithoutExprId};
use super::{def::def2::DefId, expr::expr2::ExprId, header::AppHeader};
#[derive(Debug)]
pub struct AST {
pub header: AppHeader,
pub def_ids: Vec<DefId>,
}
#[derive(Debug, PartialEq, Copy, Clone)]
pub enum ASTNodeId {
ADefId(DefId),
AExprId(ExprId),
}
impl ASTNodeId {
pub fn to_expr_id(&self) -> ASTResult<ExprId> {
match self {
ASTNodeId::AExprId(expr_id) => Ok(*expr_id),
_ => ASTNodeIdWithoutExprId { ast_node_id: *self }.fail()?,
}
}
pub fn to_def_id(&self) -> ASTResult<DefId> {
match self {
ASTNodeId::ADefId(def_id) => Ok(*def_id),
_ => ASTNodeIdWithoutExprId { ast_node_id: *self }.fail()?,
}
}
}

1437
ast/src/lang/core/def/def.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,15 @@
use crate::{lang::core::{expr::expr2::Expr2, pattern::Pattern2}, pool::pool::NodeId};
// A top level definition, not inside a function. For example: `main = "Hello, world!"`
#[derive(Debug)]
pub enum Def2 {
// ValueDef example: `main = "Hello, world!"`. identifier -> `main`, expr -> "Hello, world!"
ValueDef {
identifier_id: NodeId<Pattern2>,
expr_id: NodeId<Expr2>,
},
Blank,
}
pub type DefId = NodeId<Def2>;

View File

@ -0,0 +1,75 @@
use bumpalo::Bump;
use bumpalo::collections::Vec as BumpVec;
use roc_parse::pattern::PatternType;
use roc_region::all::Region;
use crate::lang::{core::pattern::to_pattern2, env::Env, scope::Scope};
use super::def2::Def2;
pub fn defs_to_defs2<'a>(
arena: &'a Bump,
env: &mut Env<'a>,
scope: &mut Scope,
parsed_defs: &'a BumpVec<roc_region::all::Loc<roc_parse::ast::Def<'a>>>,
region: Region,
) -> Vec<Def2> {
parsed_defs
.iter()
.map(|loc| to_def2_from_def(arena, env, scope, &loc.value, region))
.collect()
}
pub fn to_def2_from_def<'a>(
arena: &'a Bump,
env: &mut Env<'a>,
scope: &mut Scope,
parsed_def: &'a roc_parse::ast::Def<'a>,
region: Region,
) -> Def2 {
use roc_parse::ast::Def::*;
match parsed_def {
SpaceBefore(inner_def, _) => to_def2_from_def(arena, env, scope, inner_def, region),
SpaceAfter(inner_def, _) => to_def2_from_def(arena, env, scope, inner_def, region),
Body(&loc_pattern, &loc_expr) => {
// TODO loc_pattern use identifier
let expr2 = loc_expr_to_expr2(arena, loc_expr, env, scope, region).0;
let expr_id = env.pool.add(expr2);
use roc_parse::ast::Pattern::*;
match loc_pattern.value {
Identifier(_) => {
let (_, pattern2) = to_pattern2(
env,
scope,
PatternType::TopLevelDef,
&loc_pattern.value,
region,
);
let pattern_id = env.pool.add(pattern2);
// TODO support with annotation
Def2::ValueDef {
identifier_id: pattern_id,
expr_id,
}
}
other => {
unimplemented!(
"I don't yet know how to convert the pattern {:?} into an expr2",
other
)
}
}
}
other => {
unimplemented!(
"I don't know how to make an expr2 from this def yet: {:?}",
other
)
}
}
}

View File

@ -0,0 +1,3 @@
mod def_to_def2;
pub mod def;
pub mod def2;

View File

@ -0,0 +1,235 @@
use arraystring::{typenum::U30, ArrayString};
use roc_types::subs::Variable;
use crate::{lang::core::{fun_def::FunctionDef, pattern::Pattern2, val_def::ValueDef}, pool::{pool::NodeId, pool_str::PoolStr, pool_vec::PoolVec}};
use roc_can::expr::Recursive;
use roc_module::symbol::Symbol;
use roc_module::low_level::LowLevel;
use roc_module::operator::CalledVia;
pub type ArrString = ArrayString<U30>;
// TODO make the inner types private?
pub type ExprId = NodeId<Expr2>;
/// An Expr that fits in 32B.
/// It has a 1B discriminant and variants which hold payloads of at most 31B.
#[derive(Debug)]
pub enum Expr2 {
/// A negative number literal without a dot
SmallInt {
number: IntVal, // 16B
var: Variable, // 4B
style: IntStyle, // 1B
text: PoolStr, // 8B
},
// TODO(rvcas): rename this eventually
/// A large (over 64-bit) negative number literal without a dot.
/// This variant can't use IntVal because if IntVal stored 128-bit
/// integers, it would be 32B on its own because of alignment.
I128 {
number: i128, // 16B
var: Variable, // 4B
style: IntStyle, // 1B
text: PoolStr, // 8B
},
// TODO(rvcas): rename this eventually
/// A large (over 64-bit) nonnegative number literal without a dot
/// This variant can't use IntVal because if IntVal stored 128-bit
/// integers, it would be 32B on its own because of alignment.
U128 {
number: u128, // 16B
var: Variable, // 4B
style: IntStyle, // 1B
text: PoolStr, // 8B
},
/// A floating-point literal (with a dot)
Float {
number: FloatVal, // 16B
var: Variable, // 4B
text: PoolStr, // 8B
},
/// string literals of length up to 30B
SmallStr(ArrString), // 31B
/// string literals of length 31B or more
Str(PoolStr), // 8B
// Lookups
Var(Symbol), // 8B
InvalidLookup(PoolStr), // 8B
List {
elem_var: Variable, // 4B
elems: PoolVec<ExprId>, // 8B
},
If {
cond_var: Variable, // 4B
expr_var: Variable, // 4B
branches: PoolVec<(ExprId, ExprId)>, // 8B
final_else: ExprId, // 4B
},
When {
cond_var: Variable, // 4B
expr_var: Variable, // 4B
branches: PoolVec<WhenBranch>, // 8B
cond: ExprId, // 4B
},
LetRec {
defs: PoolVec<FunctionDef>, // 8B
body_var: Variable, // 8B
body_id: ExprId, // 4B
},
LetFunction {
def_id: NodeId<FunctionDef>, // 4B
body_var: Variable, // 8B
body_id: ExprId, // 4B
},
LetValue {
def_id: NodeId<ValueDef>, // 4B
body_id: ExprId, // 4B
body_var: Variable, // 4B
},
Call {
args: PoolVec<(Variable, ExprId)>, // 8B
expr: ExprId, // 4B
expr_var: Variable, // 4B
fn_var: Variable, // 4B
closure_var: Variable, // 4B
called_via: CalledVia, // 2B
},
RunLowLevel {
op: LowLevel, // 1B
args: PoolVec<(Variable, ExprId)>, // 8B
ret_var: Variable, // 4B
},
Closure {
args: PoolVec<(Variable, NodeId<Pattern2>)>, // 8B
name: Symbol, // 8B
body: ExprId, // 4B
function_type: Variable, // 4B
recursive: Recursive, // 1B
extra: NodeId<ClosureExtra>, // 4B
},
// Product Types
Record {
record_var: Variable, // 4B
fields: PoolVec<RecordField>, // 8B
},
/// Empty record constant
EmptyRecord,
/// Look up exactly one field on a record, e.g. (expr).foo.
Access {
field: PoolStr, // 4B
expr: ExprId, // 4B
record_var: Variable, // 4B
ext_var: Variable, // 4B
field_var: Variable, // 4B
},
/// field accessor as a function, e.g. (.foo) expr
Accessor {
function_var: Variable, // 4B
closure_var: Variable, // 4B
field: PoolStr, // 4B
record_var: Variable, // 4B
ext_var: Variable, // 4B
field_var: Variable, // 4B
},
Update {
symbol: Symbol, // 8B
updates: PoolVec<RecordField>, // 8B
record_var: Variable, // 4B
ext_var: Variable, // 4B
},
// Sum Types
GlobalTag {
name: PoolStr, // 4B
variant_var: Variable, // 4B
ext_var: Variable, // 4B
arguments: PoolVec<(Variable, ExprId)>, // 8B
},
PrivateTag {
name: Symbol, // 8B
variant_var: Variable, // 4B
ext_var: Variable, // 4B
arguments: PoolVec<(Variable, ExprId)>, // 8B
},
Blank, // Rendered as empty box in editor
// Compiles, but will crash if reached
RuntimeError(/* TODO make a version of RuntimeError that fits in 15B */),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum Problem {
RanOutOfNodeIds,
}
pub type Res<T> = Result<T, Problem>;
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum IntStyle {
Decimal,
Octal,
Hex,
Binary,
}
impl IntStyle {
pub fn from_base(base: roc_parse::ast::Base) -> Self {
use roc_parse::ast::Base;
match base {
Base::Decimal => Self::Decimal,
Base::Octal => Self::Octal,
Base::Hex => Self::Hex,
Base::Binary => Self::Binary,
}
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum IntVal {
I64(i64),
U64(u64),
I32(i32),
U32(u32),
I16(i16),
U16(u16),
I8(i8),
U8(u8),
}
#[test]
fn size_of_intval() {
assert_eq!(std::mem::size_of::<IntVal>(), 16);
}
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum FloatVal {
F64(f64),
F32(f32),
}
#[derive(Debug)]
pub enum RecordField {
InvalidLabelOnly(PoolStr, Variable),
LabelOnly(PoolStr, Variable, Symbol),
LabeledValue(PoolStr, Variable, ExprId),
}
#[derive(Debug)]
pub struct WhenBranch {
pub patterns: PoolVec<Pattern2>, // 4B
pub body: ExprId, // 3B
pub guard: Option<ExprId>, // 4B
}
/// This is overflow data from a Closure variant, which needs to store
/// more than 32B of total data
#[derive(Debug)]
pub struct ClosureExtra {
pub return_type: Variable, // 4B
pub captured_symbols: PoolVec<(Symbol, Variable)>, // 8B
pub closure_type: Variable, // 4B
pub closure_ext_var: Variable, // 4B
}

View File

@ -0,0 +1,136 @@
use crate::{lang::core::{expr::expr2::RecordField, val_def::value_def_to_string}, pool::pool::Pool};
use roc_types::subs::Variable;
use super::expr2::{Expr2, ExprId};
pub fn expr2_to_string(node_id: ExprId, pool: &Pool) -> String {
let mut full_string = String::new();
let expr2 = pool.get(node_id);
expr2_to_string_helper(expr2, 0, pool, &mut full_string);
full_string
}
fn get_spacing(indent_level: usize) -> String {
std::iter::repeat(" ")
.take(indent_level)
.collect::<Vec<&str>>()
.join("")
}
fn expr2_to_string_helper(
expr2: &Expr2,
indent_level: usize,
pool: &Pool,
out_string: &mut String,
) {
out_string.push_str(&get_spacing(indent_level));
match expr2 {
Expr2::SmallStr(arr_string) => out_string.push_str(&format!(
"{}{}{}",
"SmallStr(\"",
arr_string.as_str(),
"\")",
)),
Expr2::Str(pool_str) => {
out_string.push_str(&format!("{}{}{}", "Str(\"", pool_str.as_str(pool), "\")",))
}
Expr2::Blank => out_string.push_str("Blank"),
Expr2::EmptyRecord => out_string.push_str("EmptyRecord"),
Expr2::Record { record_var, fields } => {
out_string.push_str("Record:\n");
out_string.push_str(&var_to_string(record_var, indent_level + 1));
out_string.push_str(&format!("{}fields: [\n", get_spacing(indent_level + 1)));
let mut first_child = true;
for field in fields.iter(pool) {
if !first_child {
out_string.push_str(", ")
} else {
first_child = false;
}
match field {
RecordField::InvalidLabelOnly(pool_str, var) => {
out_string.push_str(&format!(
"{}({}, Var({:?})",
get_spacing(indent_level + 2),
pool_str.as_str(pool),
var,
));
}
RecordField::LabelOnly(pool_str, var, symbol) => {
out_string.push_str(&format!(
"{}({}, Var({:?}), Symbol({:?})",
get_spacing(indent_level + 2),
pool_str.as_str(pool),
var,
symbol
));
}
RecordField::LabeledValue(pool_str, var, val_node_id) => {
out_string.push_str(&format!(
"{}({}, Var({:?}), Expr2(\n",
get_spacing(indent_level + 2),
pool_str.as_str(pool),
var,
));
let val_expr2 = pool.get(*val_node_id);
expr2_to_string_helper(val_expr2, indent_level + 3, pool, out_string);
out_string.push_str(&format!("{})\n", get_spacing(indent_level + 2)));
}
}
}
out_string.push_str(&format!("{}]\n", get_spacing(indent_level + 1)));
}
Expr2::List { elem_var, elems } => {
out_string.push_str("List:\n");
out_string.push_str(&var_to_string(elem_var, indent_level + 1));
out_string.push_str(&format!("{}elems: [\n", get_spacing(indent_level + 1)));
let mut first_elt = true;
for elem_expr2_id in elems.iter(pool) {
if !first_elt {
out_string.push_str(", ")
} else {
first_elt = false;
}
let elem_expr2 = pool.get(*elem_expr2_id);
expr2_to_string_helper(elem_expr2, indent_level + 2, pool, out_string)
}
out_string.push_str(&format!("{}]\n", get_spacing(indent_level + 1)));
}
Expr2::InvalidLookup(pool_str) => {
out_string.push_str(&format!("InvalidLookup({})", pool_str.as_str(pool)));
}
Expr2::SmallInt { text, .. } => {
out_string.push_str(&format!("SmallInt({})", text.as_str(pool)));
}
Expr2::LetValue {
def_id, body_id, ..
} => {
out_string.push_str(&format!(
"LetValue(def_id: >>{:?}), body_id: >>{:?})",
value_def_to_string(pool.get(*def_id), pool),
pool.get(*body_id)
));
}
other => todo!("Implement for {:?}", other),
}
out_string.push('\n');
}
fn var_to_string(some_var: &Variable, indent_level: usize) -> String {
format!("{}Var({:?})\n", get_spacing(indent_level + 1), some_var)
}

View File

@ -0,0 +1,694 @@
use bumpalo::Bump;
use roc_can::num::{finish_parsing_base, finish_parsing_float, finish_parsing_int};
use roc_can::operator::desugar_expr;
use roc_parse::{ast::Expr, pattern::PatternType};
use roc_problem::can::{Problem, RuntimeError};
use roc_module::symbol::Symbol;
use roc_region::all::{Located, Region};
use crate::lang::core::pattern::flatten_str_literal;
use crate::{lang::{core::expr::expr2::{ExprId, FloatVal, IntStyle, IntVal}, env::Env, scope::Scope}, pool::{pool_str::PoolStr, pool_vec::PoolVec}};
use super::{expr2::Expr2, output::Output};
pub fn loc_expr_to_expr2<'a>(
arena: &'a Bump,
loc_expr: Located<Expr<'a>>,
env: &mut Env<'a>,
scope: &mut Scope,
region: Region,
) -> (Expr2, Output) {
let desugared_loc_expr = desugar_expr(arena, arena.alloc(loc_expr));
to_expr2(env, scope, arena.alloc(desugared_loc_expr.value), region)
}
const ZERO: Region = Region::zero();
pub fn to_expr2<'a>(
env: &mut Env<'a>,
scope: &mut Scope,
parse_expr: &'a roc_parse::ast::Expr<'a>,
region: Region,
) -> (Expr2, self::Output) {
use roc_parse::ast::Expr::*;
match parse_expr {
Float(string) => {
match finish_parsing_float(string) {
Ok(float) => {
let expr = Expr2::Float {
number: FloatVal::F64(float),
var: env.var_store.fresh(),
text: PoolStr::new(string, &mut env.pool),
};
(expr, Output::default())
}
Err((raw, error)) => {
// emit runtime error
let runtime_error = RuntimeError::InvalidFloat(error, ZERO, raw.into());
env.problem(Problem::RuntimeError(runtime_error.clone()));
//
// Expr::RuntimeError(runtime_error)
todo!()
}
}
}
Num(string) => {
match finish_parsing_int(string) {
Ok(int) => {
let expr = Expr2::SmallInt {
number: IntVal::I64(int),
var: env.var_store.fresh(),
// TODO non-hardcode
style: IntStyle::Decimal,
text: PoolStr::new(string, &mut env.pool),
};
(expr, Output::default())
}
Err((raw, error)) => {
// emit runtime error
let runtime_error = RuntimeError::InvalidInt(
error,
roc_parse::ast::Base::Decimal,
ZERO,
raw.into(),
);
env.problem(Problem::RuntimeError(runtime_error.clone()));
//
// Expr::RuntimeError(runtime_error)
todo!()
}
}
}
NonBase10Int {
string,
base,
is_negative,
} => {
match finish_parsing_base(string, *base, *is_negative) {
Ok(int) => {
let expr = Expr2::SmallInt {
number: IntVal::I64(int),
var: env.var_store.fresh(),
// TODO non-hardcode
style: IntStyle::from_base(*base),
text: PoolStr::new(string, &mut env.pool),
};
(expr, Output::default())
}
Err((raw, error)) => {
// emit runtime error
let runtime_error = RuntimeError::InvalidInt(error, *base, ZERO, raw.into());
env.problem(Problem::RuntimeError(runtime_error.clone()));
//
// Expr::RuntimeError(runtime_error)
todo!()
}
}
}
Str(literal) => flatten_str_literal(env, scope, &literal),
List { items, .. } => {
let mut output = Output::default();
let output_ref = &mut output;
let elems: PoolVec<ExprId> = PoolVec::with_capacity(items.len() as u32, env.pool);
for (node_id, item) in elems.iter_node_ids().zip(items.iter()) {
let (expr, sub_output) = to_expr2(env, scope, &item.value, item.region);
output_ref.union(sub_output);
let expr_id = env.pool.add(expr);
env.pool[node_id] = expr_id;
}
let expr = Expr2::List {
elem_var: env.var_store.fresh(),
elems,
};
(expr, output)
}
GlobalTag(tag) => {
// a global tag without any arguments
(
Expr2::GlobalTag {
name: PoolStr::new(tag, env.pool),
variant_var: env.var_store.fresh(),
ext_var: env.var_store.fresh(),
arguments: PoolVec::empty(env.pool),
},
Output::default(),
)
}
PrivateTag(name) => {
// a private tag without any arguments
let ident_id = env.ident_ids.get_or_insert(&(*name).into());
let name = Symbol::new(env.home, ident_id);
(
Expr2::PrivateTag {
name,
variant_var: env.var_store.fresh(),
ext_var: env.var_store.fresh(),
arguments: PoolVec::empty(env.pool),
},
Output::default(),
)
}
RecordUpdate {
fields,
update: loc_update,
final_comments: _,
} => {
let (can_update, update_out) =
to_expr2(env, scope, &loc_update.value, loc_update.region);
if let Expr2::Var(symbol) = &can_update {
match canonicalize_fields(env, scope, fields) {
Ok((can_fields, mut output)) => {
output.references.union_mut(update_out.references);
let answer = Expr2::Update {
record_var: env.var_store.fresh(),
ext_var: env.var_store.fresh(),
symbol: *symbol,
updates: can_fields,
};
(answer, output)
}
Err(CanonicalizeRecordProblem::InvalidOptionalValue {
field_name: _,
field_region: _,
record_region: _,
}) => {
// let runtime_error = roc_problem::can::RuntimeError::InvalidOptionalValue {
// field_name,
// field_region,
// record_region,
// };
//
// env.problem(Problem::RuntimeError(runtime_error));
todo!()
}
}
} else {
// only (optionally qualified) variables can be updated, not arbitrary expressions
// let error = roc_problem::can::RuntimeError::InvalidRecordUpdate {
// region: can_update.region,
// };
//
// let answer = Expr::RuntimeError(error.clone());
//
// env.problems.push(Problem::RuntimeError(error));
//
// (answer, Output::default())
todo!()
}
}
Record {
fields,
final_comments: _,
} => {
if fields.is_empty() {
(Expr2::EmptyRecord, Output::default())
} else {
match canonicalize_fields(env, scope, fields) {
Ok((can_fields, output)) => (
Expr2::Record {
record_var: env.var_store.fresh(),
fields: can_fields,
},
output,
),
Err(CanonicalizeRecordProblem::InvalidOptionalValue {
field_name: _,
field_region: _,
record_region: _,
}) => {
// let runtime_error = RuntimeError::InvalidOptionalValue {
// field_name,
// field_region,
// record_region,
// };
//
// env.problem(runtime_error);
// (
// Expr::RuntimeError(
// ),
// Output::default(),
//
// )
todo!()
}
}
}
}
Access(record_expr, field) => {
// TODO
let region = ZERO;
let (record_expr_id, output) = to_expr_id(env, scope, record_expr, region);
(
Expr2::Access {
record_var: env.var_store.fresh(),
field_var: env.var_store.fresh(),
ext_var: env.var_store.fresh(),
expr: record_expr_id,
field: PoolStr::new(field, env.pool),
},
output,
)
}
AccessorFunction(field) => (
Expr2::Accessor {
function_var: env.var_store.fresh(),
record_var: env.var_store.fresh(),
ext_var: env.var_store.fresh(),
closure_var: env.var_store.fresh(),
field_var: env.var_store.fresh(),
field: PoolStr::new(field, env.pool),
},
Output::default(),
),
If(branches, final_else) => {
let mut new_branches = Vec::with_capacity(branches.len());
let mut output = Output::default();
for (condition, then_branch) in branches.iter() {
let (cond, cond_output) = to_expr2(env, scope, &condition.value, condition.region);
let (then_expr, then_output) =
to_expr2(env, scope, &then_branch.value, then_branch.region);
output.references.union_mut(cond_output.references);
output.references.union_mut(then_output.references);
new_branches.push((env.pool.add(cond), env.pool.add(then_expr)));
}
let (else_expr, else_output) =
to_expr2(env, scope, &final_else.value, final_else.region);
output.references.union_mut(else_output.references);
let expr = Expr2::If {
cond_var: env.var_store.fresh(),
expr_var: env.var_store.fresh(),
branches: PoolVec::new(new_branches.into_iter(), env.pool),
final_else: env.pool.add(else_expr),
};
(expr, output)
}
When(loc_cond, branches) => {
// Infer the condition expression's type.
let cond_var = env.var_store.fresh();
let (can_cond, mut output) = to_expr2(env, scope, &loc_cond.value, loc_cond.region);
// the condition can never be a tail-call
output.tail_call = None;
let can_branches = PoolVec::with_capacity(branches.len() as u32, env.pool);
for (node_id, branch) in can_branches.iter_node_ids().zip(branches.iter()) {
let (can_when_branch, branch_references) =
canonicalize_when_branch(env, scope, *branch, &mut output);
output.references.union_mut(branch_references);
env.pool[node_id] = can_when_branch;
}
// A "when" with no branches is a runtime error, but it will mess things up
// if code gen mistakenly thinks this is a tail call just because its condition
// happened to be one. (The condition gave us our initial output value.)
if branches.is_empty() {
output.tail_call = None;
}
// Incorporate all three expressions into a combined Output value.
let expr = Expr2::When {
expr_var: env.var_store.fresh(),
cond_var,
cond: env.pool.add(can_cond),
branches: can_branches,
};
(expr, output)
}
Closure(loc_arg_patterns, loc_body_expr) => {
// The globally unique symbol that will refer to this closure once it gets converted
// into a top-level procedure for code gen.
//
// In the Foo module, this will look something like Foo.$1 or Foo.$2.
let symbol = env
.closure_name_symbol
.unwrap_or_else(|| env.gen_unique_symbol());
env.closure_name_symbol = None;
// The body expression gets a new scope for canonicalization.
// Shadow `scope` to make sure we don't accidentally use the original one for the
// rest of this block, but keep the original around for later diffing.
let original_scope = scope;
let mut scope = original_scope.shallow_clone();
let can_args = PoolVec::with_capacity(loc_arg_patterns.len() as u32, env.pool);
let mut output = Output::default();
let mut bound_by_argument_patterns = MutSet::default();
for (node_id, loc_pattern) in can_args.iter_node_ids().zip(loc_arg_patterns.iter()) {
let (new_output, can_arg) = to_pattern2(
env,
&mut scope,
roc_parse::pattern::PatternType::FunctionArg,
&loc_pattern.value,
loc_pattern.region,
);
bound_by_argument_patterns
.extend(new_output.references.bound_symbols.iter().copied());
output.union(new_output);
let pattern_id = env.add(can_arg, loc_pattern.region);
env.pool[node_id] = (env.var_store.fresh(), pattern_id);
}
let (body_expr, new_output) =
to_expr2(env, &mut scope, &loc_body_expr.value, loc_body_expr.region);
let mut captured_symbols: MutSet<Symbol> =
new_output.references.lookups.iter().copied().collect();
// filter out the closure's name itself
captured_symbols.remove(&symbol);
// symbols bound either in this pattern or deeper down are not captured!
captured_symbols.retain(|s| !new_output.references.bound_symbols.contains(s));
captured_symbols.retain(|s| !bound_by_argument_patterns.contains(s));
// filter out top-level symbols
// those will be globally available, and don't need to be captured
captured_symbols.retain(|s| !env.top_level_symbols.contains(s));
// filter out imported symbols
// those will be globally available, and don't need to be captured
captured_symbols.retain(|s| s.module_id() == env.home);
// TODO any Closure that has an empty `captured_symbols` list could be excluded!
output.union(new_output);
// filter out aliases
captured_symbols.retain(|s| !output.references.referenced_aliases.contains(s));
// filter out functions that don't close over anything
captured_symbols.retain(|s| !output.non_closures.contains(s));
// Now that we've collected all the references, check to see if any of the args we defined
// went unreferenced. If any did, report them as unused arguments.
for (sub_symbol, region) in scope.symbols() {
if !original_scope.contains_symbol(sub_symbol) {
if !output.references.has_lookup(sub_symbol) {
// The body never referenced this argument we declared. It's an unused argument!
env.problem(Problem::UnusedArgument(symbol, sub_symbol, region));
}
// We shouldn't ultimately count arguments as referenced locals. Otherwise,
// we end up with weird conclusions like the expression (\x -> x + 1)
// references the (nonexistant) local variable x!
output.references.lookups.remove(&sub_symbol);
}
}
env.register_closure(symbol, output.references.clone());
let mut captured_symbols: Vec<_> = captured_symbols
.into_iter()
.map(|s| (s, env.var_store.fresh()))
.collect();
// sort symbols, so we know the order in which they're stored in the closure record
captured_symbols.sort();
// store that this function doesn't capture anything. It will be promoted to a
// top-level function, and does not need to be captured by other surrounding functions.
if captured_symbols.is_empty() {
output.non_closures.insert(symbol);
}
let captured_symbols = PoolVec::new(captured_symbols.into_iter(), env.pool);
let extra = ClosureExtra {
return_type: env.var_store.fresh(), // 4B
captured_symbols, // 8B
closure_type: env.var_store.fresh(), // 4B
closure_ext_var: env.var_store.fresh(), // 4B
};
(
Expr2::Closure {
function_type: env.var_store.fresh(),
name: symbol,
recursive: Recursive::NotRecursive,
args: can_args,
body: env.add(body_expr, loc_body_expr.region),
extra: env.pool.add(extra),
},
output,
)
}
Apply(loc_fn, loc_args, application_style) => {
// The expression that evaluates to the function being called, e.g. `foo` in
// (foo) bar baz
let fn_region = loc_fn.region;
// Canonicalize the function expression and its arguments
let (fn_expr, mut output) = to_expr2(env, scope, &loc_fn.value, fn_region);
// The function's return type
let args = PoolVec::with_capacity(loc_args.len() as u32, env.pool);
for (node_id, loc_arg) in args.iter_node_ids().zip(loc_args.iter()) {
let (arg_expr_id, arg_out) = to_expr_id(env, scope, &loc_arg.value, loc_arg.region);
env.pool[node_id] = (env.var_store.fresh(), arg_expr_id);
output.references.union_mut(arg_out.references);
}
// Default: We're not tail-calling a symbol (by name), we're tail-calling a function value.
output.tail_call = None;
let expr = match fn_expr {
Expr2::Var(ref symbol) => {
output.references.calls.insert(*symbol);
// we're tail-calling a symbol by name, check if it's the tail-callable symbol
output.tail_call = match &env.tailcallable_symbol {
Some(tc_sym) if *tc_sym == *symbol => Some(*symbol),
Some(_) | None => None,
};
// IDEA: Expr2::CallByName?
let fn_expr_id = env.add(fn_expr, fn_region);
Expr2::Call {
args,
expr: fn_expr_id,
expr_var: env.var_store.fresh(),
fn_var: env.var_store.fresh(),
closure_var: env.var_store.fresh(),
called_via: *application_style,
}
}
Expr2::RuntimeError() => {
// We can't call a runtime error; bail out by propagating it!
return (fn_expr, output);
}
Expr2::GlobalTag {
variant_var,
ext_var,
name,
..
} => Expr2::GlobalTag {
variant_var,
ext_var,
name,
arguments: args,
},
Expr2::PrivateTag {
variant_var,
ext_var,
name,
..
} => Expr2::PrivateTag {
variant_var,
ext_var,
name,
arguments: args,
},
_ => {
// This could be something like ((if True then fn1 else fn2) arg1 arg2).
let fn_expr_id = env.add(fn_expr, fn_region);
Expr2::Call {
args,
expr: fn_expr_id,
expr_var: env.var_store.fresh(),
fn_var: env.var_store.fresh(),
closure_var: env.var_store.fresh(),
called_via: *application_style,
}
}
};
(expr, output)
}
Defs(loc_defs, loc_ret) => {
let (unsorted, mut scope, defs_output, symbols_introduced) = canonicalize_defs(
env,
Output::default(),
&scope,
loc_defs,
PatternType::DefExpr,
);
// The def as a whole is a tail call iff its return expression is a tail call.
// Use its output as a starting point because its tail_call already has the right answer!
let (ret_expr, mut output) = to_expr2(env, &mut scope, &loc_ret.value, loc_ret.region);
output
.introduced_variables
.union(&defs_output.introduced_variables);
output.references.union_mut(defs_output.references);
// Now that we've collected all the references, check to see if any of the new idents
// we defined went unused by the return expression. If any were unused, report it.
for (symbol, region) in symbols_introduced {
if !output.references.has_lookup(symbol) {
env.problem(Problem::UnusedDef(symbol, region));
}
}
let (can_defs, output) = sort_can_defs(env, unsorted, output);
match can_defs {
Ok(decls) => {
let mut expr = ret_expr;
for declaration in decls.into_iter().rev() {
expr = decl_to_let(env.pool, env.var_store, declaration, expr);
}
(expr, output)
}
Err(_err) => {
// TODO: fix this to be something from Expr2
// (RuntimeError(err), output)
todo!()
}
}
}
PrecedenceConflict { .. } => {
// use roc_problem::can::RuntimeError::*;
//
// let problem = PrecedenceProblem::BothNonAssociative(
// *whole_region,
// binop1.clone(),
// binop2.clone(),
// );
//
// env.problem(Problem::PrecedenceProblem(problem.clone()));
//
// (
// RuntimeError(InvalidPrecedence(problem, region)),
// Output::default(),
// )
todo!()
}
MalformedClosure => {
// use roc_problem::can::RuntimeError::*;
// (RuntimeError(MalformedClosure(region)), Output::default())
todo!()
}
MalformedIdent(_name, _problem) => {
// use roc_problem::can::RuntimeError::*;
//
// let problem = MalformedIdentifier((*name).into(), region);
// env.problem(Problem::RuntimeError(problem.clone()));
//
// (RuntimeError(problem), Output::default())
todo!()
}
Var { module_name, ident } => canonicalize_lookup(env, scope, module_name, ident, region),
// Below this point, we shouln't see any of these nodes anymore because
// operator desugaring should have removed them!
bad_expr @ ParensAround(_) => {
panic!(
"A ParensAround did not get removed during operator desugaring somehow: {:#?}",
bad_expr
);
}
bad_expr @ SpaceBefore(_, _) => {
panic!(
"A SpaceBefore did not get removed during operator desugaring somehow: {:#?}",
bad_expr
);
}
bad_expr @ SpaceAfter(_, _) => {
panic!(
"A SpaceAfter did not get removed during operator desugaring somehow: {:#?}",
bad_expr
);
}
bad_expr @ BinOps { .. } => {
panic!(
"A binary operator chain did not get desugared somehow: {:#?}",
bad_expr
);
}
bad_expr @ UnaryOp(_, _) => {
panic!(
"A unary operator did not get desugared somehow: {:#?}",
bad_expr
);
}
rest => todo!("not yet implemented {:?}", rest),
}
}
pub fn to_expr_id<'a>(
env: &mut Env<'a>,
scope: &mut Scope,
parse_expr: &'a roc_parse::ast::Expr<'a>,
region: Region,
) -> (ExprId, Output) {
let (expr, output) = to_expr2(env, scope, parse_expr, region);
(env.add(expr, region), output)
}

View File

@ -0,0 +1,52 @@
use roc_types::subs::{Variable};
use roc_collections::all::{MutMap};
use roc_module::ident::{Lowercase};
use roc_module::symbol::{Symbol};
#[derive(Clone, Debug, PartialEq, Default)]
pub struct IntroducedVariables {
// Rigids must be unique within a type annoation.
// E.g. in `identity : a -> a`, there should only be one
// variable (a rigid one, with name "a").
// Hence `rigids : Map<Lowercase, Variable>`
//
// But then between annotations, the same name can occur multiple times,
// but a variable can only have one name. Therefore
// `ftv : Map<Variable, Lowercase>`.
pub wildcards: Vec<Variable>,
pub var_by_name: MutMap<Lowercase, Variable>,
pub name_by_var: MutMap<Variable, Lowercase>,
pub host_exposed_aliases: MutMap<Symbol, Variable>,
}
impl IntroducedVariables {
pub fn insert_named(&mut self, name: Lowercase, var: Variable) {
self.var_by_name.insert(name.clone(), var);
self.name_by_var.insert(var, name);
}
pub fn insert_wildcard(&mut self, var: Variable) {
self.wildcards.push(var);
}
pub fn insert_host_exposed_alias(&mut self, symbol: Symbol, var: Variable) {
self.host_exposed_aliases.insert(symbol, var);
}
pub fn union(&mut self, other: &Self) {
self.wildcards.extend(other.wildcards.iter().cloned());
self.var_by_name.extend(other.var_by_name.clone());
self.name_by_var.extend(other.name_by_var.clone());
self.host_exposed_aliases
.extend(other.host_exposed_aliases.clone());
}
pub fn var_by_name(&self, name: &Lowercase) -> Option<&Variable> {
self.var_by_name.get(name)
}
pub fn name_by_var(&self, var: Variable) -> Option<&Lowercase> {
self.name_by_var.get(&var)
}
}

View File

@ -0,0 +1,5 @@
pub mod expr2;
pub mod expr2_to_string;
pub (crate) mod output;
mod introduced_vars;
pub (crate) mod expr_to_expr2;

View File

@ -0,0 +1,28 @@
use crate::{lang::core::{def::def::References, types::{Alias}}, pool::pool::NodeId};
use roc_collections::all::{MutMap, MutSet};
use roc_module::symbol::Symbol;
use super::introduced_vars::IntroducedVariables;
#[derive(Clone, Default, Debug, PartialEq)]
pub struct Output {
pub references: References,
pub tail_call: Option<Symbol>,
pub introduced_variables: IntroducedVariables,
pub aliases: MutMap<Symbol, NodeId<Alias>>,
pub non_closures: MutSet<Symbol>,
}
impl Output {
pub fn union(&mut self, other: Self) {
self.references.union_mut(other.references);
if let (None, Some(later)) = (self.tail_call, other.tail_call) {
self.tail_call = Some(later);
}
self.aliases.extend(other.aliases);
self.non_closures.extend(other.non_closures);
}
}

View File

@ -0,0 +1,56 @@
use crate::{lang::rigids::Rigids, pool::{pool::NodeId, pool_vec::PoolVec, shallow_clone::ShallowClone}};
use roc_module::symbol::Symbol;
use roc_types::subs::Variable;
use super::{expr::expr2::ExprId, pattern::PatternId, types::{Type2, TypeId}};
#[derive(Debug)]
pub enum FunctionDef {
WithAnnotation {
name: Symbol, // 8B
arguments: PoolVec<(PatternId, Type2)>, // 8B
rigids: NodeId<Rigids>, // 4B
return_type: TypeId, // 4B
body: ExprId, // 4B
},
NoAnnotation {
name: Symbol, // 8B
arguments: PoolVec<(PatternId, Variable)>, // 8B
return_var: Variable, // 4B
body: ExprId, // 4B
},
}
impl ShallowClone for FunctionDef {
fn shallow_clone(&self) -> Self {
match self {
Self::WithAnnotation {
name,
arguments,
rigids,
return_type,
body,
} => Self::WithAnnotation {
name: *name,
arguments: arguments.shallow_clone(),
rigids: *rigids,
return_type: *return_type,
body: *body,
},
Self::NoAnnotation {
name,
arguments,
return_var,
body,
} => Self::NoAnnotation {
name: *name,
arguments: arguments.shallow_clone(),
return_var: *return_var,
body: *body,
},
}
}
}

View File

@ -0,0 +1,12 @@
use super::expr::expr2::ExprId;
#[derive(Debug)]
pub struct AppHeader {
pub app_name: String,
pub packages_base: String,
pub imports: Vec<String>,
pub provides: Vec<String>,
pub ast_node_id: ExprId, // TODO probably want to create and use HeaderId
}

8
ast/src/lang/core/mod.rs Normal file
View File

@ -0,0 +1,8 @@
pub mod def;
pub mod expr;
pub mod header;
pub mod ast;
mod val_def;
mod fun_def;
mod pattern;
pub mod types;

View File

@ -0,0 +1,626 @@
#![allow(clippy::all)]
#![allow(dead_code)]
#![allow(unused_imports)]
use bumpalo::collections::Vec as BumpVec;
use roc_can::expr::unescape_char;
use roc_can::num::{finish_parsing_base, finish_parsing_float, finish_parsing_int};
use roc_collections::all::BumpMap;
use roc_module::symbol::{Interns, Symbol};
use roc_parse::ast::{StrLiteral, StrSegment};
use roc_parse::pattern::PatternType;
use roc_problem::can::{MalformedPatternProblem, Problem, RuntimeError};
use roc_region::all::Region;
use roc_types::subs::Variable;
use crate::constrain::Constraint;
use crate::lang::core::expr::expr_to_expr2::to_expr_id;
use crate::lang::env::Env;
use crate::lang::scope::Scope;
use crate::pool::pool::{NodeId, Pool};
use crate::pool::pool_str::PoolStr;
use crate::pool::pool_vec::PoolVec;
use crate::pool::shallow_clone::ShallowClone;
use crate::ast_error::{ASTResult, UnexpectedPattern2Variant};
use super::expr::expr2::{ExprId, FloatVal, IntVal};
use super::expr::output::Output;
use super::types::Type2;
pub type PatternId = NodeId<Pattern2>;
#[derive(Debug)]
pub enum Pattern2 {
Identifier(Symbol), // 8B
NumLiteral(Variable, i64), // 4B + 8B
IntLiteral(IntVal), // 16B
FloatLiteral(FloatVal), // 16B
StrLiteral(PoolStr), // 8B
Underscore, // 0B
GlobalTag {
whole_var: Variable, // 4B
ext_var: Variable, // 4B
tag_name: PoolStr, // 8B
arguments: PoolVec<(Variable, PatternId)>, // 8B
},
PrivateTag {
whole_var: Variable, // 4B
ext_var: Variable, // 4B
tag_name: Symbol, // 8B
arguments: PoolVec<(Variable, PatternId)>, // 8B
},
RecordDestructure {
whole_var: Variable, // 4B
ext_var: Variable, // 4B
destructs: PoolVec<RecordDestruct>, // 8B
},
// Runtime Exceptions
// TODO: figure out how to better handle regions
// to keep this member under 32. With 2 Regions
// it ends up at size 40
Shadowed {
shadowed_ident: PoolStr,
// definition: Region,
// shadowed_at: Region,
},
/// Example: (5 = 1 + 2) is an unsupported pattern in an assignment; Int patterns aren't allowed in assignments!
UnsupportedPattern(Region),
// parse error patterns
MalformedPattern(MalformedPatternProblem, Region),
}
impl ShallowClone for Pattern2 {
fn shallow_clone(&self) -> Self {
todo!()
}
}
#[derive(Debug)]
pub struct PatternState2<'a> {
pub headers: BumpMap<Symbol, Type2>,
pub vars: BumpVec<'a, Variable>,
pub constraints: BumpVec<'a, Constraint<'a>>,
}
#[derive(Debug)]
pub struct RecordDestruct {
pub var: Variable, // 4B
pub label: PoolStr, // 8B
pub symbol: Symbol, // 8B
pub typ: NodeId<DestructType>, // 4B
}
#[derive(Clone, Debug)]
pub enum DestructType {
Required,
Optional(Variable, ExprId), // 4B + 4B
Guard(Variable, PatternId), // 4B + 4B
}
pub fn as_pattern_id<'a>(
env: &mut Env<'a>,
scope: &mut Scope,
pattern_id: PatternId,
pattern_type: PatternType,
pattern: &roc_parse::ast::Pattern<'a>,
region: Region,
) -> Output {
let (output, can_pattern) = to_pattern2(env, scope, pattern_type, pattern, region);
env.pool[pattern_id] = can_pattern;
env.set_region(pattern_id, region);
output
}
pub fn to_pattern_id<'a>(
env: &mut Env<'a>,
scope: &mut Scope,
pattern_type: PatternType,
pattern: &roc_parse::ast::Pattern<'a>,
region: Region,
) -> (Output, PatternId) {
let (output, can_pattern) = to_pattern2(env, scope, pattern_type, pattern, region);
let pattern_id = env.pool.add(can_pattern);
env.set_region(pattern_id, region);
(output, pattern_id)
}
pub fn to_pattern2<'a>(
env: &mut Env<'a>,
scope: &mut Scope,
pattern_type: PatternType,
pattern: &roc_parse::ast::Pattern<'a>,
region: Region,
) -> (Output, Pattern2) {
use roc_parse::ast::Pattern::*;
use PatternType::*;
let mut output = Output::default();
let can_pattern = match pattern {
Identifier(name) => match scope.introduce(
(*name).into(),
&env.exposed_ident_ids,
&mut env.ident_ids,
region,
) {
Ok(symbol) => {
output.references.bound_symbols.insert(symbol);
Pattern2::Identifier(symbol)
}
Err((original_region, shadow)) => {
env.problem(Problem::RuntimeError(RuntimeError::Shadowing {
original_region,
shadow: shadow.clone(),
}));
let name: &str = shadow.value.as_ref();
Pattern2::Shadowed {
shadowed_ident: PoolStr::new(name, env.pool),
}
}
},
QualifiedIdentifier { .. } => {
let problem = MalformedPatternProblem::QualifiedIdentifier;
malformed_pattern(env, problem, region)
}
Underscore(_) => match pattern_type {
WhenBranch | FunctionArg => Pattern2::Underscore,
TopLevelDef | DefExpr => underscore_in_def(env, region),
},
FloatLiteral(ref string) => match pattern_type {
WhenBranch => match finish_parsing_float(string) {
Err(_error) => {
let problem = MalformedPatternProblem::MalformedFloat;
malformed_pattern(env, problem, region)
}
Ok(float) => Pattern2::FloatLiteral(FloatVal::F64(float)),
},
ptype => unsupported_pattern(env, ptype, region),
},
NumLiteral(string) => match pattern_type {
WhenBranch => match finish_parsing_int(string) {
Err(_error) => {
let problem = MalformedPatternProblem::MalformedInt;
malformed_pattern(env, problem, region)
}
Ok(int) => Pattern2::NumLiteral(env.var_store.fresh(), int),
},
ptype => unsupported_pattern(env, ptype, region),
},
NonBase10Literal {
string,
base,
is_negative,
} => match pattern_type {
WhenBranch => match finish_parsing_base(string, *base, *is_negative) {
Err(_error) => {
let problem = MalformedPatternProblem::MalformedBase(*base);
malformed_pattern(env, problem, region)
}
Ok(int) => {
if *is_negative {
Pattern2::IntLiteral(IntVal::I64(-int))
} else {
Pattern2::IntLiteral(IntVal::I64(int))
}
}
},
ptype => unsupported_pattern(env, ptype, region),
},
StrLiteral(literal) => match pattern_type {
WhenBranch => flatten_str_literal(env.pool, literal),
ptype => unsupported_pattern(env, ptype, region),
},
GlobalTag(name) => {
// Canonicalize the tag's name.
Pattern2::GlobalTag {
whole_var: env.var_store.fresh(),
ext_var: env.var_store.fresh(),
tag_name: PoolStr::new(name, env.pool),
arguments: PoolVec::empty(env.pool),
}
}
PrivateTag(name) => {
let ident_id = env.ident_ids.get_or_insert(&(*name).into());
// Canonicalize the tag's name.
Pattern2::PrivateTag {
whole_var: env.var_store.fresh(),
ext_var: env.var_store.fresh(),
tag_name: Symbol::new(env.home, ident_id),
arguments: PoolVec::empty(env.pool),
}
}
Apply(tag, patterns) => {
let can_patterns = PoolVec::with_capacity(patterns.len() as u32, env.pool);
for (loc_pattern, node_id) in (*patterns).iter().zip(can_patterns.iter_node_ids()) {
let (new_output, can_pattern) = to_pattern2(
env,
scope,
pattern_type,
&loc_pattern.value,
loc_pattern.region,
);
output.union(new_output);
let can_pattern_id = env.pool.add(can_pattern);
env.pool[node_id] = (env.var_store.fresh(), can_pattern_id);
}
match tag.value {
GlobalTag(name) => Pattern2::GlobalTag {
whole_var: env.var_store.fresh(),
ext_var: env.var_store.fresh(),
tag_name: PoolStr::new(name, env.pool),
arguments: can_patterns,
},
PrivateTag(name) => {
let ident_id = env.ident_ids.get_or_insert(&name.into());
Pattern2::PrivateTag {
whole_var: env.var_store.fresh(),
ext_var: env.var_store.fresh(),
tag_name: Symbol::new(env.home, ident_id),
arguments: can_patterns,
}
}
_ => unreachable!("Other patterns cannot be applied"),
}
}
RecordDestructure(patterns) => {
let ext_var = env.var_store.fresh();
let whole_var = env.var_store.fresh();
let destructs = PoolVec::with_capacity(patterns.len() as u32, env.pool);
let opt_erroneous = None;
for (node_id, loc_pattern) in destructs.iter_node_ids().zip((*patterns).iter()) {
match loc_pattern.value {
Identifier(label) => {
match scope.introduce(
label.into(),
&env.exposed_ident_ids,
&mut env.ident_ids,
region,
) {
Ok(symbol) => {
output.references.bound_symbols.insert(symbol);
let destruct = RecordDestruct {
var: env.var_store.fresh(),
label: PoolStr::new(label, env.pool),
symbol,
typ: env.pool.add(DestructType::Required),
};
env.pool[node_id] = destruct;
env.set_region(node_id, loc_pattern.region);
}
Err((original_region, shadow)) => {
env.problem(Problem::RuntimeError(RuntimeError::Shadowing {
original_region,
shadow: shadow.clone(),
}));
// let shadowed = Pattern2::Shadowed {
// definition: original_region,
// shadowed_at: loc_pattern.region,
// shadowed_ident: shadow.value,
// };
// No matter what the other patterns
// are, we're definitely shadowed and will
// get a runtime exception as soon as we
// encounter the first bad pattern.
// opt_erroneous = Some();
// env.pool[node_id] = sha;
// env.set_region(node_id, loc_pattern.region);
todo!("we must both report/store the problem, but also not lose any information")
}
};
}
RequiredField(label, loc_guard) => {
// a guard does not introduce the label into scope!
let symbol = scope.ignore(label.into(), &mut env.ident_ids);
let (new_output, can_guard) = to_pattern_id(
env,
scope,
pattern_type,
&loc_guard.value,
loc_guard.region,
);
let destruct = RecordDestruct {
var: env.var_store.fresh(),
label: PoolStr::new(label, env.pool),
symbol,
typ: env
.pool
.add(DestructType::Guard(env.var_store.fresh(), can_guard)),
};
output.union(new_output);
env.pool[node_id] = destruct;
env.set_region(node_id, loc_pattern.region);
}
OptionalField(label, loc_default) => {
// an optional DOES introduce the label into scope!
match scope.introduce(
label.into(),
&env.exposed_ident_ids,
&mut env.ident_ids,
region,
) {
Ok(symbol) => {
let (can_default, expr_output) =
to_expr_id(env, scope, &loc_default.value, loc_default.region);
// an optional field binds the symbol!
output.references.bound_symbols.insert(symbol);
output.union(expr_output);
let destruct = RecordDestruct {
var: env.var_store.fresh(),
label: PoolStr::new(label, env.pool),
symbol,
typ: env.pool.add(DestructType::Optional(
env.var_store.fresh(),
can_default,
)),
};
env.pool[node_id] = destruct;
env.set_region(node_id, loc_pattern.region);
}
Err((original_region, shadow)) => {
env.problem(Problem::RuntimeError(RuntimeError::Shadowing {
original_region,
shadow: shadow.clone(),
}));
// No matter what the other patterns
// are, we're definitely shadowed and will
// get a runtime exception as soon as we
// encounter the first bad pattern.
// opt_erroneous = Some(Pattern::Shadowed(original_region, shadow));
todo!("must report problem but also not loose any information")
}
};
}
_ => unreachable!("Any other pattern should have given a parse error"),
}
}
// If we encountered an erroneous pattern (e.g. one with shadowing),
// use the resulting RuntimeError. Otherwise, return a successful record destructure.
opt_erroneous.unwrap_or(Pattern2::RecordDestructure {
whole_var,
ext_var,
destructs,
})
}
RequiredField(_name, _loc_pattern) => {
unreachable!("should have been handled in RecordDestructure");
}
OptionalField(_name, _loc_pattern) => {
unreachable!("should have been handled in RecordDestructure");
}
Malformed(_str) => {
let problem = MalformedPatternProblem::Unknown;
malformed_pattern(env, problem, region)
}
MalformedIdent(_str, bad_ident) => {
let problem = MalformedPatternProblem::BadIdent(*bad_ident);
malformed_pattern(env, problem, region)
}
SpaceBefore(sub_pattern, _) | SpaceAfter(sub_pattern, _) => {
return to_pattern2(env, scope, pattern_type, sub_pattern, region)
}
};
(output, can_pattern)
}
pub fn symbols_from_pattern(pool: &Pool, initial: &Pattern2) -> Vec<Symbol> {
use Pattern2::*;
let mut symbols = Vec::new();
let mut stack = vec![initial];
while let Some(pattern) = stack.pop() {
match pattern {
Identifier(symbol) => {
symbols.push(*symbol);
}
GlobalTag { arguments, .. } | PrivateTag { arguments, .. } => {
for (_, pat_id) in arguments.iter(pool) {
let pat = pool.get(*pat_id);
stack.push(pat);
}
}
RecordDestructure { destructs, .. } => {
for destruct in destructs.iter(pool) {
let destruct_type = pool.get(destruct.typ);
if let DestructType::Guard(_, subpattern_id) = &destruct_type {
let subpattern = pool.get(*subpattern_id);
stack.push(subpattern);
} else {
symbols.push(destruct.symbol);
}
}
}
NumLiteral(_, _)
| IntLiteral(_)
| FloatLiteral(_)
| StrLiteral(_)
| Underscore
| MalformedPattern(_, _)
| Shadowed { .. }
| UnsupportedPattern(_) => {}
}
}
symbols
}
pub fn get_identifier_string(pattern: &Pattern2, interns: &Interns) -> ASTResult<String> {
match pattern {
Pattern2::Identifier(symbol) => Ok(symbol.ident_str(interns).to_string()),
other => UnexpectedPattern2Variant {
required_pattern2: "Identifier".to_string(),
encountered_pattern2: format!("{:?}", other),
}
.fail()?,
}
}
pub fn symbols_and_variables_from_pattern(
pool: &Pool,
initial: &Pattern2,
initial_var: Variable,
) -> Vec<(Symbol, Variable)> {
use Pattern2::*;
let mut symbols = Vec::new();
let mut stack = vec![(initial_var, initial)];
while let Some((variable, pattern)) = stack.pop() {
match pattern {
Identifier(symbol) => {
symbols.push((*symbol, variable));
}
GlobalTag { arguments, .. } | PrivateTag { arguments, .. } => {
for (var, pat_id) in arguments.iter(pool) {
let pat = pool.get(*pat_id);
stack.push((*var, pat));
}
}
RecordDestructure { destructs, .. } => {
for destruct in destructs.iter(pool) {
let destruct_type = pool.get(destruct.typ);
if let DestructType::Guard(_, subpattern_id) = &destruct_type {
let subpattern = pool.get(*subpattern_id);
stack.push((destruct.var, subpattern));
} else {
symbols.push((destruct.symbol, destruct.var));
}
}
}
NumLiteral(_, _)
| IntLiteral(_)
| FloatLiteral(_)
| StrLiteral(_)
| Underscore
| MalformedPattern(_, _)
| Shadowed { .. }
| UnsupportedPattern(_) => {}
}
}
symbols
}
/// When we detect an unsupported pattern type (e.g. 5 = 1 + 2 is unsupported because you can't
/// assign to Int patterns), report it to Env and return an UnsupportedPattern runtime error pattern.
fn unsupported_pattern<'a>(
env: &mut Env<'a>,
pattern_type: PatternType,
region: Region,
) -> Pattern2 {
use roc_problem::can::BadPattern;
env.problem(Problem::UnsupportedPattern(
BadPattern::Unsupported(pattern_type),
region,
));
Pattern2::UnsupportedPattern(region)
}
fn underscore_in_def<'a>(env: &mut Env<'a>, region: Region) -> Pattern2 {
use roc_problem::can::BadPattern;
env.problem(Problem::UnsupportedPattern(
BadPattern::UnderscoreInDef,
region,
));
Pattern2::UnsupportedPattern(region)
}
pub (crate) fn flatten_str_literal(pool: &mut Pool, literal: &StrLiteral<'_>) -> Pattern2 {
use roc_parse::ast::StrLiteral::*;
match literal {
PlainLine(str_slice) => Pattern2::StrLiteral(PoolStr::new(str_slice, pool)),
Line(segments) => flatten_str_lines(pool, &[segments]),
Block(lines) => flatten_str_lines(pool, lines),
}
}
pub (crate) fn flatten_str_lines(pool: &mut Pool, lines: &[&[StrSegment<'_>]]) -> Pattern2 {
use StrSegment::*;
let mut buf = String::new();
for line in lines {
for segment in line.iter() {
match segment {
Plaintext(string) => {
buf.push_str(string);
}
Unicode(loc_digits) => {
todo!("parse unicode digits {:?}", loc_digits);
}
Interpolated(loc_expr) => {
return Pattern2::UnsupportedPattern(loc_expr.region);
}
EscapedChar(escaped) => buf.push(unescape_char(escaped)),
}
}
}
Pattern2::StrLiteral(PoolStr::new(&buf, pool))
}
/// When we detect a malformed pattern like `3.X` or `0b5`,
/// report it to Env and return an UnsupportedPattern runtime error pattern.
fn malformed_pattern<'a>(
env: &mut Env<'a>,
problem: MalformedPatternProblem,
region: Region,
) -> Pattern2 {
env.problem(Problem::RuntimeError(RuntimeError::MalformedPattern(
problem, region,
)));
Pattern2::MalformedPattern(problem, region)
}

867
ast/src/lang/core/types.rs Normal file
View File

@ -0,0 +1,867 @@
#![allow(clippy::all)]
#![allow(dead_code)]
#![allow(unused_imports)]
// use roc_can::expr::Output;
use roc_collections::all::{MutMap, MutSet};
use roc_module::ident::{Ident, TagName};
use roc_module::symbol::Symbol;
use roc_region::all::{Located, Region};
use roc_types::types::{Problem, RecordField};
use roc_types::{subs::Variable, types::ErrorType};
use crate::lang::env::Env;
use crate::lang::scope::Scope;
use crate::pool::pool::{NodeId, Pool};
use crate::pool::pool_str::PoolStr;
use crate::pool::pool_vec::PoolVec;
use crate::pool::shallow_clone::ShallowClone;
pub type TypeId = NodeId<Type2>;
#[derive(Debug)]
pub enum Type2 {
Variable(Variable),
Alias(Symbol, PoolVec<(PoolStr, TypeId)>, TypeId), // 24B = 8B + 12B + 4B
AsAlias(Symbol, PoolVec<(PoolStr, TypeId)>, TypeId), // 24B = 8B + 12B + 4B
// 32B
HostExposedAlias {
name: Symbol, // 8B
arguments: PoolVec<(PoolStr, TypeId)>, // 12B
actual_var: Variable, // 4B
actual: TypeId, // 4B
},
EmptyTagUnion,
TagUnion(PoolVec<(TagName, PoolVec<Type2>)>, TypeId), // 16B = 12B + 4B
RecursiveTagUnion(Variable, PoolVec<(TagName, PoolVec<Type2>)>, TypeId), // 20B = 4B + 12B + 4B
EmptyRec,
Record(PoolVec<(PoolStr, RecordField<TypeId>)>, TypeId), // 16B = 12B + 4B
Function(PoolVec<Type2>, TypeId, TypeId), // 20B = 12B + 4B + 4B
Apply(Symbol, PoolVec<Type2>), // 20B = 8B + 12B
Erroneous(Problem2),
}
#[derive(Debug)]
pub enum Problem2 {
CanonicalizationProblem,
CircularType(Symbol, NodeId<ErrorType>), // 12B = 8B + 4B
CyclicAlias(Symbol, PoolVec<Symbol>), // 20B = 8B + 12B
UnrecognizedIdent(PoolStr), // 8B
Shadowed(Located<PoolStr>),
BadTypeArguments {
symbol: Symbol, // 8B
type_got: u8, // 1B
alias_needs: u8, // 1B
},
InvalidModule,
SolvedTypeError,
}
impl ShallowClone for Type2 {
fn shallow_clone(&self) -> Self {
match self {
Self::Variable(var) => Self::Variable(*var),
Self::Alias(symbol, args, alias_type_id) => {
Self::Alias(*symbol, args.shallow_clone(), alias_type_id.clone())
}
Self::Record(fields, ext_id) => Self::Record(fields.shallow_clone(), ext_id.clone()),
Self::Function(args, closure_type_id, ret_type_id) => Self::Function(
args.shallow_clone(),
closure_type_id.clone(),
ret_type_id.clone(),
),
rest => todo!("{:?}", rest),
}
}
}
impl Type2 {
fn substitute(_pool: &mut Pool, _subs: &MutMap<Variable, TypeId>, _type_id: TypeId) {
todo!()
}
pub fn variables(&self, pool: &mut Pool) -> MutSet<Variable> {
use Type2::*;
let mut stack = vec![self];
let mut result = MutSet::default();
while let Some(this) = stack.pop() {
match this {
Variable(v) => {
result.insert(*v);
}
Alias(_, _, actual) | AsAlias(_, _, actual) => {
stack.push(pool.get(*actual));
}
HostExposedAlias {
actual_var, actual, ..
} => {
result.insert(*actual_var);
stack.push(pool.get(*actual));
}
EmptyTagUnion | EmptyRec | Erroneous(_) => {}
TagUnion(tags, ext) => {
for (_, args) in tags.iter(pool) {
stack.extend(args.iter(pool));
}
stack.push(pool.get(*ext));
}
RecursiveTagUnion(rec, tags, ext) => {
for (_, args) in tags.iter(pool) {
stack.extend(args.iter(pool));
}
stack.push(pool.get(*ext));
result.insert(*rec);
}
Record(fields, ext) => {
for (_, field) in fields.iter(pool) {
stack.push(pool.get(*field.as_inner()));
}
stack.push(pool.get(*ext));
}
Function(args, closure, result) => {
stack.extend(args.iter(pool));
stack.push(pool.get(*closure));
stack.push(pool.get(*result));
}
Apply(_, args) => {
stack.extend(args.iter(pool));
}
}
}
result
}
pub fn contains_symbol(&self, _pool: &mut Pool, _needle: Symbol) -> bool {
todo!()
}
pub fn substitute_alias(&self, _pool: &mut Pool, _needle: Symbol, _actual: Self) {
todo!()
}
}
impl NodeId<Type2> {
pub fn variables(&self, _pool: &mut Pool) -> MutSet<Variable> {
todo!()
}
}
/// A temporary data structure to return a bunch of values to Def construction
pub enum Signature {
FunctionWithAliases {
annotation: Type2,
arguments: PoolVec<Type2>,
closure_type_id: TypeId,
return_type_id: TypeId,
},
Function {
arguments: PoolVec<Type2>,
closure_type_id: TypeId,
return_type_id: TypeId,
},
Value {
annotation: Type2,
},
}
pub enum Annotation2<'a> {
Annotation {
named_rigids: MutMap<&'a str, Variable>,
unnamed_rigids: MutSet<Variable>,
symbols: MutSet<Symbol>,
signature: Signature,
},
Erroneous(roc_types::types::Problem),
}
pub fn to_annotation2<'a>(
env: &mut Env,
scope: &mut Scope,
annotation: &'a roc_parse::ast::TypeAnnotation<'a>,
region: Region,
) -> Annotation2<'a> {
let mut references = References::default();
let annotation = to_type2(env, scope, &mut references, annotation, region);
// we dealias until we hit a non-alias, then we either hit a function type (and produce a
// function annotation) or anything else (and produce a value annotation)
match annotation {
Type2::Function(arguments, closure_type_id, return_type_id) => {
let References {
named,
unnamed,
symbols,
..
} = references;
let signature = Signature::Function {
arguments,
closure_type_id,
return_type_id,
};
Annotation2::Annotation {
named_rigids: named,
unnamed_rigids: unnamed,
symbols,
signature,
}
}
Type2::Alias(_, _, _) => {
// most of the time, the annotation is not an alias, so this case is comparatively
// less efficient
shallow_dealias(env, references, annotation)
}
_ => {
let References {
named,
unnamed,
symbols,
..
} = references;
let signature = Signature::Value { annotation };
Annotation2::Annotation {
named_rigids: named,
unnamed_rigids: unnamed,
symbols,
signature,
}
}
}
}
fn shallow_dealias<'a>(
env: &mut Env,
references: References<'a>,
annotation: Type2,
) -> Annotation2<'a> {
let References {
named,
unnamed,
symbols,
..
} = references;
let mut inner = &annotation;
loop {
match inner {
Type2::Alias(_, _, actual) => {
inner = env.pool.get(*actual);
}
Type2::Function(arguments, closure_type_id, return_type_id) => {
let signature = Signature::FunctionWithAliases {
arguments: arguments.shallow_clone(),
closure_type_id: *closure_type_id,
return_type_id: *return_type_id,
annotation,
};
return Annotation2::Annotation {
named_rigids: named,
unnamed_rigids: unnamed,
symbols,
signature,
};
}
_ => {
let signature = Signature::Value { annotation };
return Annotation2::Annotation {
named_rigids: named,
unnamed_rigids: unnamed,
symbols,
signature,
};
}
}
}
}
#[derive(Default)]
pub struct References<'a> {
named: MutMap<&'a str, Variable>,
unnamed: MutSet<Variable>,
hidden: MutSet<Variable>,
symbols: MutSet<Symbol>,
}
pub fn to_type_id<'a>(
env: &mut Env,
scope: &mut Scope,
rigids: &mut References<'a>,
annotation: &roc_parse::ast::TypeAnnotation<'a>,
region: Region,
) -> TypeId {
let type2 = to_type2(env, scope, rigids, annotation, region);
env.add(type2, region)
}
pub fn as_type_id<'a>(
env: &mut Env,
scope: &mut Scope,
rigids: &mut References<'a>,
type_id: TypeId,
annotation: &roc_parse::ast::TypeAnnotation<'a>,
region: Region,
) {
let type2 = to_type2(env, scope, rigids, annotation, region);
env.pool[type_id] = type2;
env.set_region(type_id, region);
}
pub fn to_type2<'a>(
env: &mut Env,
scope: &mut Scope,
references: &mut References<'a>,
annotation: &roc_parse::ast::TypeAnnotation<'a>,
region: Region,
) -> Type2 {
use roc_parse::ast::TypeAnnotation::*;
match annotation {
Apply(module_name, ident, targs) => {
match to_type_apply(env, scope, references, module_name, ident, targs, region) {
TypeApply::Apply(symbol, args) => {
references.symbols.insert(symbol);
Type2::Apply(symbol, args)
}
TypeApply::Alias(symbol, args, actual) => {
references.symbols.insert(symbol);
Type2::Alias(symbol, args, actual)
}
TypeApply::Erroneous(_problem) => {
// Type2::Erroneous(problem)
todo!()
}
}
}
Function(argument_types, return_type) => {
let arguments = PoolVec::with_capacity(argument_types.len() as u32, env.pool);
for (type_id, loc_arg) in arguments.iter_node_ids().zip(argument_types.iter()) {
as_type_id(
env,
scope,
references,
type_id,
&loc_arg.value,
loc_arg.region,
);
}
let return_type_id = to_type_id(
env,
scope,
references,
&return_type.value,
return_type.region,
);
let closure_type = Type2::Variable(env.var_store.fresh());
let closure_type_id = env.pool.add(closure_type);
Type2::Function(arguments, closure_type_id, return_type_id)
}
BoundVariable(v) => {
// a rigid type variable
match references.named.get(v) {
Some(var) => Type2::Variable(*var),
None => {
let var = env.var_store.fresh();
references.named.insert(v, var);
Type2::Variable(var)
}
}
}
Wildcard | Malformed(_) => {
let var = env.var_store.fresh();
references.unnamed.insert(var);
Type2::Variable(var)
}
Record { fields, ext, .. } => {
let field_types_map = can_assigned_fields(env, scope, references, fields, region);
let field_types = PoolVec::with_capacity(field_types_map.len() as u32, env.pool);
for (node_id, (label, field)) in field_types.iter_node_ids().zip(field_types_map) {
let poolstr = PoolStr::new(label, env.pool);
let rec_field = match field {
RecordField::Optional(_) => {
let field_id = env.pool.add(field.into_inner());
RecordField::Optional(field_id)
}
RecordField::Demanded(_) => {
let field_id = env.pool.add(field.into_inner());
RecordField::Demanded(field_id)
}
RecordField::Required(_) => {
let field_id = env.pool.add(field.into_inner());
RecordField::Required(field_id)
}
};
env.pool[node_id] = (poolstr, rec_field);
}
let ext_type = match ext {
Some(loc_ann) => to_type_id(env, scope, references, &loc_ann.value, region),
None => env.add(Type2::EmptyRec, region),
};
Type2::Record(field_types, ext_type)
}
TagUnion { tags, ext, .. } => {
let tag_types_vec = can_tags(env, scope, references, tags, region);
let tag_types = PoolVec::with_capacity(tag_types_vec.len() as u32, env.pool);
for (node_id, (tag_name, field)) in tag_types.iter_node_ids().zip(tag_types_vec) {
env.pool[node_id] = (tag_name, field);
}
let ext_type = match ext {
Some(loc_ann) => to_type_id(env, scope, references, &loc_ann.value, region),
None => env.add(Type2::EmptyTagUnion, region),
};
Type2::TagUnion(tag_types, ext_type)
}
As(loc_inner, _spaces, loc_as) => {
// e.g. `{ x : Int, y : Int } as Point }`
match loc_as.value {
Apply(module_name, ident, loc_vars) if module_name.is_empty() => {
let symbol = match scope.introduce(
ident.into(),
&env.exposed_ident_ids,
&mut env.ident_ids,
region,
) {
Ok(symbol) => symbol,
Err((_original_region, _shadow)) => {
// let problem = Problem2::Shadowed(original_region, shadow.clone());
// env.problem(roc_problem::can::Problem::ShadowingInAnnotation {
// original_region,
// shadow,
// });
// return Type2::Erroneous(problem);
todo!();
}
};
let inner_type = to_type2(env, scope, references, &loc_inner.value, region);
let vars = PoolVec::with_capacity(loc_vars.len() as u32, env.pool);
let lowercase_vars = PoolVec::with_capacity(loc_vars.len() as u32, env.pool);
for ((loc_var, named_id), var_id) in loc_vars
.iter()
.zip(lowercase_vars.iter_node_ids())
.zip(vars.iter_node_ids())
{
match loc_var.value {
BoundVariable(ident) => {
let var_name = ident;
if let Some(var) = references.named.get(&var_name) {
let poolstr = PoolStr::new(var_name, env.pool);
let type_id = env.pool.add(Type2::Variable(*var));
env.pool[var_id] = (poolstr.shallow_clone(), type_id);
env.pool[named_id] = (poolstr, *var);
env.set_region(named_id, loc_var.region);
} else {
let var = env.var_store.fresh();
references.named.insert(var_name.clone(), var);
let poolstr = PoolStr::new(var_name, env.pool);
let type_id = env.pool.add(Type2::Variable(var));
env.pool[var_id] = (poolstr.shallow_clone(), type_id);
env.pool[named_id] = (poolstr, var);
env.set_region(named_id, loc_var.region);
}
}
_ => {
// If anything other than a lowercase identifier
// appears here, the whole annotation is invalid.
return Type2::Erroneous(Problem2::CanonicalizationProblem);
}
}
}
let alias_actual = inner_type;
// TODO instantiate recursive tag union
// let alias_actual = if let Type2::TagUnion(tags, ext) = inner_type {
// let rec_var = env.var_store.fresh();
//
// let mut new_tags = Vec::with_capacity(tags.len());
// for (tag_name, args) in tags {
// let mut new_args = Vec::with_capacity(args.len());
// for arg in args {
// let mut new_arg = arg.clone();
// new_arg.substitute_alias(symbol, &Type2::Variable(rec_var));
// new_args.push(new_arg);
// }
// new_tags.push((tag_name.clone(), new_args));
// }
// Type2::RecursiveTagUnion(rec_var, new_tags, ext)
// } else {
// inner_type
// };
let mut hidden_variables = MutSet::default();
hidden_variables.extend(alias_actual.variables(env.pool));
for (_, var) in lowercase_vars.iter(env.pool) {
hidden_variables.remove(var);
}
let alias_actual_id = env.pool.add(alias_actual);
scope.add_alias(env.pool, symbol, lowercase_vars, alias_actual_id);
let alias = scope.lookup_alias(symbol).unwrap();
// local_aliases.insert(symbol, alias.clone());
// TODO host-exposed
// if vars.is_empty() && env.home == symbol.module_id() {
// let actual_var = env.var_store.fresh();
// rigids.host_exposed.insert(symbol, actual_var);
// Type::HostExposedAlias {
// name: symbol,
// arguments: vars,
// actual: Box::new(alias.typ.clone()),
// actual_var,
// }
// } else {
// Type::Alias(symbol, vars, Box::new(alias.typ.clone()))
// }
Type2::AsAlias(symbol, vars, alias.actual)
}
_ => {
// This is a syntactically invalid type alias.
Type2::Erroneous(Problem2::CanonicalizationProblem)
}
}
}
SpaceBefore(nested, _) | SpaceAfter(nested, _) => {
to_type2(env, scope, references, nested, region)
}
}
}
// TODO trim down these arguments!
#[allow(clippy::too_many_arguments)]
fn can_assigned_fields<'a>(
env: &mut Env,
scope: &mut Scope,
rigids: &mut References<'a>,
fields: &&[Located<roc_parse::ast::AssignedField<'a, roc_parse::ast::TypeAnnotation<'a>>>],
region: Region,
) -> MutMap<&'a str, RecordField<Type2>> {
use roc_parse::ast::AssignedField::*;
use roc_types::types::RecordField::*;
// SendMap doesn't have a `with_capacity`
let mut field_types = MutMap::default();
// field names we've seen so far in this record
let mut seen = std::collections::HashMap::with_capacity(fields.len());
'outer: for loc_field in fields.iter() {
let mut field = &loc_field.value;
// use this inner loop to unwrap the SpaceAfter/SpaceBefore
// when we find the name of this field, break out of the loop
// with that value, so we can check whether the field name is
// a duplicate
let new_name = 'inner: loop {
match field {
RequiredValue(field_name, _, annotation) => {
let field_type =
to_type2(env, scope, rigids, &annotation.value, annotation.region);
let label = field_name.value;
field_types.insert(label, Required(field_type));
break 'inner label;
}
OptionalValue(field_name, _, annotation) => {
let field_type =
to_type2(env, scope, rigids, &annotation.value, annotation.region);
let label = field_name.value;
field_types.insert(label.clone(), Optional(field_type));
break 'inner label;
}
LabelOnly(loc_field_name) => {
// Interpret { a, b } as { a : a, b : b }
let field_name = loc_field_name.value;
let field_type = {
if let Some(var) = rigids.named.get(&field_name) {
Type2::Variable(*var)
} else {
let field_var = env.var_store.fresh();
rigids.named.insert(field_name, field_var);
Type2::Variable(field_var)
}
};
field_types.insert(field_name.clone(), Required(field_type));
break 'inner field_name;
}
SpaceBefore(nested, _) | SpaceAfter(nested, _) => {
// check the nested field instead
field = nested;
continue 'inner;
}
Malformed(_) => {
// TODO report this?
// completely skip this element, advance to the next tag
continue 'outer;
}
}
};
// ensure that the new name is not already in this record:
// note that the right-most tag wins when there are two with the same name
if let Some(replaced_region) = seen.insert(new_name.clone(), loc_field.region) {
env.problem(roc_problem::can::Problem::DuplicateRecordFieldType {
field_name: new_name.into(),
record_region: region,
field_region: loc_field.region,
replaced_region,
});
}
}
field_types
}
fn can_tags<'a>(
env: &mut Env,
scope: &mut Scope,
rigids: &mut References<'a>,
tags: &'a [Located<roc_parse::ast::Tag<'a>>],
region: Region,
) -> Vec<(TagName, PoolVec<Type2>)> {
use roc_parse::ast::Tag;
let mut tag_types = Vec::with_capacity(tags.len());
// tag names we've seen so far in this tag union
let mut seen = std::collections::HashMap::with_capacity(tags.len());
'outer: for loc_tag in tags.iter() {
let mut tag = &loc_tag.value;
// use this inner loop to unwrap the SpaceAfter/SpaceBefore
// when we find the name of this tag, break out of the loop
// with that value, so we can check whether the tag name is
// a duplicate
let new_name = 'inner: loop {
match tag {
Tag::Global { name, args } => {
let arg_types = PoolVec::with_capacity(args.len() as u32, env.pool);
for (type_id, loc_arg) in arg_types.iter_node_ids().zip(args.iter()) {
as_type_id(env, scope, rigids, type_id, &loc_arg.value, loc_arg.region);
}
let tag_name = TagName::Global(name.value.into());
tag_types.push((tag_name.clone(), arg_types));
break 'inner tag_name;
}
Tag::Private { name, args } => {
let ident_id = env.ident_ids.get_or_insert(&name.value.into());
let symbol = Symbol::new(env.home, ident_id);
let arg_types = PoolVec::with_capacity(args.len() as u32, env.pool);
for (type_id, loc_arg) in arg_types.iter_node_ids().zip(args.iter()) {
as_type_id(env, scope, rigids, type_id, &loc_arg.value, loc_arg.region);
}
let tag_name = TagName::Private(symbol);
tag_types.push((tag_name.clone(), arg_types));
break 'inner tag_name;
}
Tag::SpaceBefore(nested, _) | Tag::SpaceAfter(nested, _) => {
// check the nested tag instead
tag = nested;
continue 'inner;
}
Tag::Malformed(_) => {
// TODO report this?
// completely skip this element, advance to the next tag
continue 'outer;
}
}
};
// ensure that the new name is not already in this tag union:
// note that the right-most tag wins when there are two with the same name
if let Some(replaced_region) = seen.insert(new_name.clone(), loc_tag.region) {
env.problem(roc_problem::can::Problem::DuplicateTag {
tag_region: loc_tag.region,
tag_union_region: region,
replaced_region,
tag_name: new_name,
});
}
}
tag_types
}
enum TypeApply {
Apply(Symbol, PoolVec<Type2>),
Alias(Symbol, PoolVec<(PoolStr, TypeId)>, TypeId),
Erroneous(roc_types::types::Problem),
}
#[inline(always)]
fn to_type_apply<'a>(
env: &mut Env,
scope: &mut Scope,
rigids: &mut References<'a>,
module_name: &str,
ident: &str,
type_arguments: &[Located<roc_parse::ast::TypeAnnotation<'a>>],
region: Region,
) -> TypeApply {
let symbol = if module_name.is_empty() {
// Since module_name was empty, this is an unqualified type.
// Look it up in scope!
let ident: Ident = (*ident).into();
match scope.lookup(&ident, region) {
Ok(symbol) => symbol,
Err(problem) => {
env.problem(roc_problem::can::Problem::RuntimeError(problem));
return TypeApply::Erroneous(Problem::UnrecognizedIdent(ident.into()));
}
}
} else {
match env.qualified_lookup(module_name, ident, region) {
Ok(symbol) => symbol,
Err(problem) => {
// Either the module wasn't imported, or
// it was imported but it doesn't expose this ident.
env.problem(roc_problem::can::Problem::RuntimeError(problem));
return TypeApply::Erroneous(Problem::UnrecognizedIdent((*ident).into()));
}
}
};
let argument_type_ids = PoolVec::with_capacity(type_arguments.len() as u32, env.pool);
for (type_id, loc_arg) in argument_type_ids.iter_node_ids().zip(type_arguments.iter()) {
as_type_id(env, scope, rigids, type_id, &loc_arg.value, loc_arg.region);
}
let args = type_arguments;
let opt_alias = scope.lookup_alias(symbol);
match opt_alias {
Some(ref alias) => {
// use a known alias
let actual = alias.actual;
let mut substitutions: MutMap<Variable, TypeId> = MutMap::default();
if alias.targs.len() != args.len() {
let error = TypeApply::Erroneous(Problem::BadTypeArguments {
symbol,
region,
alias_needs: alias.targs.len() as u8,
type_got: args.len() as u8,
});
return error;
}
let arguments = PoolVec::with_capacity(type_arguments.len() as u32, env.pool);
let it = arguments.iter_node_ids().zip(
argument_type_ids
.iter_node_ids()
.zip(alias.targs.iter_node_ids()),
);
for (node_id, (type_id, loc_var_id)) in it {
let loc_var = &env.pool[loc_var_id];
let name = loc_var.0.shallow_clone();
let var = loc_var.1;
env.pool[node_id] = (name, type_id);
substitutions.insert(var, type_id);
}
// make sure the recursion variable is freshly instantiated
// have to allocate these outside of the if for lifetime reasons...
let new = env.var_store.fresh();
let fresh = env.pool.add(Type2::Variable(new));
if let Type2::RecursiveTagUnion(rvar, ref tags, ext) = &mut env.pool[actual] {
substitutions.insert(*rvar, fresh);
env.pool[actual] = Type2::RecursiveTagUnion(new, tags.shallow_clone(), *ext);
}
// make sure hidden variables are freshly instantiated
for var_id in alias.hidden_variables.iter_node_ids() {
let var = env.pool[var_id];
let fresh = env.pool.add(Type2::Variable(env.var_store.fresh()));
substitutions.insert(var, fresh);
}
// instantiate variables
Type2::substitute(env.pool, &substitutions, actual);
TypeApply::Alias(symbol, arguments, actual)
}
None => TypeApply::Apply(symbol, argument_type_ids),
}
}
#[derive(Debug)]
pub struct Alias {
pub targs: PoolVec<(PoolStr, Variable)>,
pub actual: TypeId,
/// hidden type variables, like the closure variable in `a -> b`
pub hidden_variables: PoolVec<Variable>,
}
impl ShallowClone for Alias {
fn shallow_clone(&self) -> Self {
Self {
targs: self.targs.shallow_clone(),
hidden_variables: self.hidden_variables.shallow_clone(),
actual: self.actual,
}
}
}

View File

@ -0,0 +1,91 @@
use crate::{lang::{core::expr::expr2_to_string::expr2_to_string, rigids::Rigids}, pool::{pool::{NodeId, Pool}, shallow_clone::ShallowClone}};
use roc_types::subs::{Variable};
use super::{expr::expr2::ExprId, pattern::{Pattern2, PatternId}, types::TypeId};
#[derive(Debug)]
pub enum ValueDef {
WithAnnotation {
pattern_id: PatternId, // 4B
expr_id: ExprId, // 4B
type_id: TypeId,
rigids: Rigids,
expr_var: Variable, // 4B
},
NoAnnotation {
pattern_id: PatternId, // 4B
expr_id: ExprId, // 4B
expr_var: Variable, // 4B
},
}
impl ShallowClone for ValueDef {
fn shallow_clone(&self) -> Self {
match self {
Self::WithAnnotation {
pattern_id,
expr_id,
type_id,
rigids,
expr_var,
} => Self::WithAnnotation {
pattern_id: *pattern_id,
expr_id: *expr_id,
type_id: *type_id,
rigids: rigids.shallow_clone(),
expr_var: *expr_var,
},
Self::NoAnnotation {
pattern_id,
expr_id,
expr_var,
} => Self::NoAnnotation {
pattern_id: *pattern_id,
expr_id: *expr_id,
expr_var: *expr_var,
},
}
}
}
impl ValueDef {
pub fn get_expr_id(&self) -> ExprId {
match self {
ValueDef::WithAnnotation { expr_id, .. } => *expr_id,
ValueDef::NoAnnotation { expr_id, .. } => *expr_id,
}
}
pub fn get_pattern_id(&self) -> NodeId<Pattern2> {
match self {
ValueDef::WithAnnotation { pattern_id, .. } => *pattern_id,
ValueDef::NoAnnotation { pattern_id, .. } => *pattern_id,
}
}
}
pub fn value_def_to_string(val_def: &ValueDef, pool: &Pool) -> String {
match val_def {
ValueDef::WithAnnotation {
pattern_id,
expr_id,
type_id,
rigids,
expr_var,
} => {
format!("WithAnnotation {{ pattern_id: {:?}, expr_id: {:?}, type_id: {:?}, rigids: {:?}, expr_var: {:?}}}", pool.get(*pattern_id), expr2_to_string(*expr_id, pool), pool.get(*type_id), rigids, expr_var)
}
ValueDef::NoAnnotation {
pattern_id,
expr_id,
expr_var,
} => {
format!(
"NoAnnotation {{ pattern_id: {:?}, expr_id: {:?}, expr_var: {:?}}}",
pool.get(*pattern_id),
expr2_to_string(*expr_id, pool),
expr_var
)
}
}
}

168
ast/src/lang/env.rs Normal file
View File

@ -0,0 +1,168 @@
use bumpalo::{collections::Vec as BumpVec, Bump};
use roc_collections::all::{MutMap, MutSet};
use roc_module::ident::{Ident, ModuleName};
use roc_module::symbol::{IdentIds, ModuleId, ModuleIds, Symbol};
use roc_problem::can::{Problem, RuntimeError};
use roc_region::all::{Located, Region};
use roc_types::subs::{VarStore};
use crate::pool::pool::{NodeId, Pool};
use super::core::def::def::References;
#[derive(Debug)]
pub struct Env<'a> {
pub home: ModuleId,
pub var_store: &'a mut VarStore,
pub pool: &'a mut Pool,
pub arena: &'a Bump,
pub problems: BumpVec<'a, Problem>,
pub dep_idents: MutMap<ModuleId, IdentIds>,
pub module_ids: &'a ModuleIds,
pub ident_ids: IdentIds,
pub exposed_ident_ids: IdentIds,
pub closures: MutMap<Symbol, References>,
/// Symbols which were referenced by qualified lookups.
pub qualified_lookups: MutSet<Symbol>,
pub top_level_symbols: MutSet<Symbol>,
pub closure_name_symbol: Option<Symbol>,
pub tailcallable_symbol: Option<Symbol>,
}
impl<'a> Env<'a> {
pub fn new(
home: ModuleId,
arena: &'a Bump,
pool: &'a mut Pool,
var_store: &'a mut VarStore,
dep_idents: MutMap<ModuleId, IdentIds>,
module_ids: &'a ModuleIds,
exposed_ident_ids: IdentIds,
) -> Env<'a> {
Env {
home,
arena,
pool,
problems: BumpVec::new_in(arena),
var_store,
dep_idents,
module_ids,
ident_ids: exposed_ident_ids.clone(), // we start with these, but will add more later
exposed_ident_ids,
closures: MutMap::default(),
qualified_lookups: MutSet::default(),
tailcallable_symbol: None,
closure_name_symbol: None,
top_level_symbols: MutSet::default(),
}
}
pub fn add<T>(&mut self, item: T, region: Region) -> NodeId<T> {
let id = self.pool.add(item);
self.set_region(id, region);
id
}
pub fn problem(&mut self, problem: Problem) {
self.problems.push(problem);
}
pub fn set_region<T>(&mut self, _node_id: NodeId<T>, _region: Region) {
dbg!("Don't Forget to set the region eventually");
}
pub fn register_closure(&mut self, symbol: Symbol, references: References) {
self.closures.insert(symbol, references);
}
/// Generates a unique, new symbol like "$1" or "$5",
/// using the home module as the module_id.
///
/// This is used, for example, during canonicalization of an Expr::Closure
/// to generate a unique symbol to refer to that closure.
pub fn gen_unique_symbol(&mut self) -> Symbol {
let ident_id = self.ident_ids.gen_unique();
Symbol::new(self.home, ident_id)
}
/// Returns Err if the symbol resolved, but it was not exposed by the given module
pub fn qualified_lookup(
&mut self,
module_name: &str,
ident: &str,
region: Region,
) -> Result<Symbol, RuntimeError> {
debug_assert!(
!module_name.is_empty(),
"Called env.qualified_lookup with an unqualified ident: {:?}",
ident
);
let module_name: ModuleName = module_name.into();
match self.module_ids.get_id(&module_name) {
Some(&module_id) => {
let ident: Ident = ident.into();
// You can do qualified lookups on your own module, e.g.
// if I'm in the Foo module, I can do a `Foo.bar` lookup.
if module_id == self.home {
match self.ident_ids.get_id(&ident) {
Some(ident_id) => {
let symbol = Symbol::new(module_id, *ident_id);
self.qualified_lookups.insert(symbol);
Ok(symbol)
}
None => Err(RuntimeError::LookupNotInScope(
Located {
value: ident,
region,
},
self.ident_ids
.idents()
.map(|(_, string)| string.as_ref().into())
.collect(),
)),
}
} else {
match self
.dep_idents
.get(&module_id)
.and_then(|exposed_ids| exposed_ids.get_id(&ident))
{
Some(ident_id) => {
let symbol = Symbol::new(module_id, *ident_id);
self.qualified_lookups.insert(symbol);
Ok(symbol)
}
None => Err(RuntimeError::ValueNotExposed {
module_name: ModuleName::from(module_name),
ident,
region,
}),
}
}
}
None => Err(RuntimeError::ModuleNotImported {
module_name,
imported_modules: self
.module_ids
.available_modules()
.map(|string| string.as_ref().into())
.collect(),
region,
}),
}
}
}

4
ast/src/lang/mod.rs Normal file
View File

@ -0,0 +1,4 @@
pub mod core;
mod scope;
mod rigids;
mod env;

80
ast/src/lang/rigids.rs Normal file
View File

@ -0,0 +1,80 @@
use std::{collections::{HashMap, HashSet}, hash::BuildHasherDefault};
use crate::pool::{pool::Pool, pool_str::PoolStr, pool_vec::PoolVec, shallow_clone::ShallowClone};
use roc_collections::all::WyHash;
use roc_types::subs::{Variable};
#[derive(Debug)]
pub struct Rigids {
pub names: PoolVec<(Option<PoolStr>, Variable)>, // 8B
padding: [u8; 1],
}
#[allow(clippy::needless_collect)]
impl Rigids {
pub fn new(
named: HashMap<&str, Variable, BuildHasherDefault<WyHash>>,
unnamed: HashSet<Variable, BuildHasherDefault<WyHash>>,
pool: &mut Pool,
) -> Self {
let names = PoolVec::with_capacity((named.len() + unnamed.len()) as u32, pool);
let mut temp_names = Vec::new();
temp_names.extend(named.iter().map(|(name, var)| (Some(*name), *var)));
temp_names.extend(unnamed.iter().map(|var| (None, *var)));
for (node_id, (opt_name, variable)) in names.iter_node_ids().zip(temp_names) {
let poolstr = opt_name.map(|name| PoolStr::new(name, pool));
pool[node_id] = (poolstr, variable);
}
Self {
names,
padding: Default::default(),
}
}
pub fn named(&self, pool: &mut Pool) -> PoolVec<(PoolStr, Variable)> {
let named = self
.names
.iter(pool)
.filter_map(|(opt_pool_str, var)| {
if let Some(pool_str) = opt_pool_str {
Some((*pool_str, *var))
} else {
None
}
})
.collect::<Vec<(PoolStr, Variable)>>();
PoolVec::new(named.into_iter(), pool)
}
pub fn unnamed(&self, pool: &mut Pool) -> PoolVec<Variable> {
let unnamed = self
.names
.iter(pool)
.filter_map(|(opt_pool_str, var)| {
if opt_pool_str.is_none() {
Some(*var)
} else {
None
}
})
.collect::<Vec<Variable>>();
PoolVec::new(unnamed.into_iter(), pool)
}
}
impl ShallowClone for Rigids {
fn shallow_clone(&self) -> Self {
Self {
names: self.names.shallow_clone(),
padding: self.padding,
}
}
}

330
ast/src/lang/scope.rs Normal file
View File

@ -0,0 +1,330 @@
#![allow(clippy::all)]
#![allow(dead_code)]
#![allow(unused_imports)]
use crate::pool::pool::Pool;
use crate::pool::pool_str::PoolStr;
use crate::pool::pool_vec::PoolVec;
use crate::pool::shallow_clone::ShallowClone;
use roc_collections::all::{MutMap, MutSet};
use roc_module::ident::{Ident, Lowercase};
use roc_module::symbol::{IdentIds, ModuleId, Symbol};
use roc_problem::can::RuntimeError;
use roc_region::all::{Located, Region};
use roc_types::{
builtin_aliases,
solved_types::{BuiltinAlias, FreeVars, SolvedType},
subs::{VarId, VarStore, Variable},
};
use super::core::types::{Alias, Type2, TypeId};
fn solved_type_to_type_id(
pool: &mut Pool,
solved_type: &SolvedType,
free_vars: &mut FreeVars,
var_store: &mut VarStore,
) -> TypeId {
let typ2 = to_type2(pool, solved_type, free_vars, var_store);
pool.add(typ2)
}
fn to_type2(
pool: &mut Pool,
solved_type: &SolvedType,
free_vars: &mut FreeVars,
var_store: &mut VarStore,
) -> Type2 {
match solved_type {
SolvedType::Alias(symbol, solved_type_variables, _todo, solved_actual) => {
let type_variables = PoolVec::with_capacity(solved_type_variables.len() as u32, pool);
for (type_variable_node_id, (lowercase, solved_arg)) in type_variables
.iter_node_ids()
.zip(solved_type_variables.iter())
{
let typ2 = to_type2(pool, solved_arg, free_vars, var_store);
let node = pool.add(typ2);
pool[type_variable_node_id] = (PoolStr::new(lowercase.as_str(), pool), node);
}
let actual_typ2 = to_type2(pool, solved_actual, free_vars, var_store);
let actual = pool.add(actual_typ2);
let typ2 = Type2::Alias(*symbol, type_variables, actual);
typ2
}
SolvedType::TagUnion(tags, ext) => {
let new_tags = PoolVec::with_capacity(tags.len() as u32, pool);
for (tag_node_id, (tag_name, args)) in new_tags.iter_node_ids().zip(tags.iter()) {
let new_args: PoolVec<Type2> = PoolVec::with_capacity(args.len() as u32, pool);
for (arg_node_id, arg) in new_args.iter_node_ids().zip(args.iter()) {
let node = to_type2(pool, arg, free_vars, var_store);
pool[arg_node_id] = node;
}
pool[tag_node_id] = (tag_name.clone(), new_args);
}
let actual_typ2 = to_type2(pool, ext, free_vars, var_store);
let actual = pool.add(actual_typ2);
let typ2 = Type2::TagUnion(new_tags, actual);
typ2
}
SolvedType::Flex(var_id) => {
Type2::Variable(var_id_to_flex_var(*var_id, free_vars, var_store))
}
SolvedType::EmptyTagUnion => Type2::EmptyTagUnion,
rest => todo!("{:?}", rest),
}
}
fn var_id_to_flex_var(
var_id: VarId,
free_vars: &mut FreeVars,
var_store: &mut VarStore,
) -> Variable {
if let Some(var) = free_vars.unnamed_vars.get(&var_id) {
*var
} else {
let var = var_store.fresh();
free_vars.unnamed_vars.insert(var_id, var);
var
}
}
#[derive(Debug)]
pub struct Scope {
/// All the identifiers in scope, mapped to were they were defined and
/// the Symbol they resolve to.
idents: MutMap<Ident, (Symbol, Region)>,
/// A cache of all the symbols in scope. This makes lookups much
/// faster when checking for unused defs and unused arguments.
symbols: MutMap<Symbol, Region>,
/// The type aliases currently in scope
aliases: MutMap<Symbol, Alias>,
/// The current module being processed. This will be used to turn
/// unqualified idents into Symbols.
home: ModuleId,
}
impl Scope {
pub fn new(home: ModuleId, pool: &mut Pool, var_store: &mut VarStore) -> Scope {
let solved_aliases = builtin_aliases::aliases();
let mut aliases = MutMap::default();
for (symbol, builtin_alias) in solved_aliases {
// let BuiltinAlias { region, vars, typ } = builtin_alias;
let BuiltinAlias { vars, typ, .. } = builtin_alias;
let mut free_vars = FreeVars::default();
// roc_types::solved_types::to_type(&typ, &mut free_vars, var_store);
let actual = solved_type_to_type_id(pool, &typ, &mut free_vars, var_store);
// make sure to sort these variables to make them line up with the type arguments
let mut type_variables: Vec<_> = free_vars.unnamed_vars.into_iter().collect();
type_variables.sort();
debug_assert_eq!(vars.len(), type_variables.len());
let variables = PoolVec::with_capacity(vars.len() as u32, pool);
let it = variables
.iter_node_ids()
.zip(vars.iter())
.zip(type_variables);
for ((node_id, loc_name), (_, var)) in it {
// TODO region is ignored, but "fake" anyway. How to resolve?
let name = PoolStr::new(loc_name.value.as_str(), pool);
pool[node_id] = (name, var);
}
let alias = Alias {
actual,
/// We know that builtin aliases have no hiddden variables (e.g. in closures)
hidden_variables: PoolVec::empty(pool),
targs: variables,
};
aliases.insert(symbol, alias);
}
let idents = Symbol::default_in_scope();
let idents: MutMap<_, _> = idents.into_iter().collect();
Scope {
home,
idents,
symbols: MutMap::default(),
aliases,
}
}
pub fn idents(&self) -> impl Iterator<Item = (&Ident, &(Symbol, Region))> {
self.idents.iter()
}
pub fn symbols(&self) -> impl Iterator<Item = (Symbol, Region)> + '_ {
self.symbols.iter().map(|(x, y)| (*x, *y))
}
pub fn contains_ident(&self, ident: &Ident) -> bool {
self.idents.contains_key(ident)
}
pub fn contains_symbol(&self, symbol: Symbol) -> bool {
self.symbols.contains_key(&symbol)
}
pub fn num_idents(&self) -> usize {
self.idents.len()
}
pub fn lookup(&mut self, ident: &Ident, region: Region) -> Result<Symbol, RuntimeError> {
match self.idents.get(ident) {
Some((symbol, _)) => Ok(*symbol),
None => Err(RuntimeError::LookupNotInScope(
Located {
region,
value: ident.clone().into(),
},
self.idents.keys().map(|v| v.as_ref().into()).collect(),
)),
}
}
pub fn lookup_alias(&self, symbol: Symbol) -> Option<&Alias> {
self.aliases.get(&symbol)
}
/// Introduce a new ident to scope.
///
/// Returns Err if this would shadow an existing ident, including the
/// Symbol and Region of the ident we already had in scope under that name.
pub fn introduce(
&mut self,
ident: Ident,
exposed_ident_ids: &IdentIds,
all_ident_ids: &mut IdentIds,
region: Region,
) -> Result<Symbol, (Region, Located<Ident>)> {
match self.idents.get(&ident) {
Some((_, original_region)) => {
let shadow = Located {
value: ident,
region,
};
Err((*original_region, shadow))
}
None => {
// If this IdentId was already added previously
// when the value was exposed in the module header,
// use that existing IdentId. Otherwise, create a fresh one.
let ident_id = match exposed_ident_ids.get_id(&ident) {
Some(ident_id) => *ident_id,
None => all_ident_ids.add(ident.clone().into()),
};
let symbol = Symbol::new(self.home, ident_id);
self.symbols.insert(symbol, region);
self.idents.insert(ident, (symbol, region));
Ok(symbol)
}
}
}
/// Ignore an identifier.
///
/// Used for record guards like { x: Just _ }
pub fn ignore(&mut self, ident: Ident, all_ident_ids: &mut IdentIds) -> Symbol {
let ident_id = all_ident_ids.add(ident.into());
Symbol::new(self.home, ident_id)
}
/// Import a Symbol from another module into this module's top-level scope.
///
/// Returns Err if this would shadow an existing ident, including the
/// Symbol and Region of the ident we already had in scope under that name.
pub fn import(
&mut self,
ident: Ident,
symbol: Symbol,
region: Region,
) -> Result<(), (Symbol, Region)> {
match self.idents.get(&ident) {
Some(shadowed) => Err(*shadowed),
None => {
self.symbols.insert(symbol, region);
self.idents.insert(ident, (symbol, region));
Ok(())
}
}
}
pub fn add_alias(
&mut self,
pool: &mut Pool,
name: Symbol,
vars: PoolVec<(PoolStr, Variable)>,
typ: TypeId,
) {
let mut hidden_variables = MutSet::default();
hidden_variables.extend(typ.variables(pool));
for loc_var in vars.iter(pool) {
hidden_variables.remove(&loc_var.1);
}
let hidden_variables_vec = PoolVec::with_capacity(hidden_variables.len() as u32, pool);
for (node_id, var) in hidden_variables_vec.iter_node_ids().zip(hidden_variables) {
pool[node_id] = var;
}
let alias = Alias {
targs: vars,
hidden_variables: hidden_variables_vec,
actual: typ,
};
self.aliases.insert(name, alias);
}
pub fn contains_alias(&mut self, name: Symbol) -> bool {
self.aliases.contains_key(&name)
}
}
impl ShallowClone for Scope {
fn shallow_clone(&self) -> Self {
Self {
idents: self.idents.clone(),
symbols: self.symbols.clone(),
aliases: self
.aliases
.iter()
.map(|(s, a)| (*s, a.shallow_clone()))
.collect(),
home: self.home,
}
}
}

5
ast/src/lib.rs Normal file
View File

@ -0,0 +1,5 @@
pub mod lang;
pub mod pool;
mod constrain;
mod canonicalize;
mod ast_error;

14
ast/src/mod.rs Normal file
View File

@ -0,0 +1,14 @@
pub mod ast;
mod constrain;
pub mod lang;
mod module;
pub mod parse;
mod pattern;
pub mod pool;
pub mod roc_file;
mod scope;
mod solve;
mod types;
mod rigids;
mod canonicalize;
mod ast_error;

View File

@ -0,0 +1,34 @@
pub fn parse_from_string<'a>(
code_str: &'a str,
env: &mut Env<'a>,
ast_arena: &'a Bump,
) -> Result<AST, SyntaxError<'a>> {
let blank_line_indx = code_str
.find("\n\n")
.expect("I was expecting a double newline to split header and rest of code.");
let header_str = &code_str[0..blank_line_indx];
let tail_str = &code_str[blank_line_indx..];
let mut scope = Scope::new(env.home, env.pool, env.var_store);
let region = Region::new(0, 0, 0, 0);
let mut def_ids = Vec::<DefId>::new();
let def2_vec = str_to_def2(ast_arena, tail_str, env, &mut scope, region)?;
for def2 in def2_vec {
let def_id = env.pool.add(def2);
def_ids.push(def_id);
}
let ast_node_id = env.pool.add(Expr2::Blank);
Ok(AST {
header: AppHeader::parse_from_string(header_str, ast_node_id),
def_ids,
})
}

View File

View File

@ -0,0 +1,11 @@
// TODO don't use mock struct and actually parse string
pub fn parse_from_string(_header_str: &str, ast_node_id: ExprId) -> AppHeader {
AppHeader {
app_name: "\"untitled-app\"".to_owned(),
packages_base: "\"platform\"".to_owned(),
imports: vec![],
provides: vec!["main".to_owned()],
ast_node_id,
}
}

4
ast/src/pool/mod.rs Normal file
View File

@ -0,0 +1,4 @@
pub mod pool;
pub mod pool_str;
pub mod pool_vec;
pub mod shallow_clone;

228
ast/src/pool/pool.rs Normal file
View File

@ -0,0 +1,228 @@
/// A memory pool of 32-byte nodes. The node value 0 is reserved for the pool's
/// use, and valid nodes may never have that value.
///
/// Internally, the pool is divided into pages of 4096 bytes. It stores nodes
/// into one page at a time, and when it runs out, it uses mmap to reserve an
/// anonymous memory page in which to store nodes.
///
/// Since nodes are 32 bytes, one page can store 128 nodes; you can access a
/// particular node by its NodeId, which is an opaque wrapper around a pointer.
///
/// Pages also use the node value 0 (all 0 bits) to mark nodes as unoccupied.
/// This is important for performance.
use libc::{c_void, MAP_ANONYMOUS, MAP_PRIVATE, PROT_READ, PROT_WRITE};
use std::any::type_name;
use std::marker::PhantomData;
use std::mem::size_of;
use std::ptr::null;
pub const NODE_BYTES: usize = 32;
// Each page has 128 slots. Each slot holds one 32B node
// This means each page is 4096B, which is the size of a memory page
// on typical systems where the compiler will be run.
//
// Nice things about this system include:
// * Allocating a new page is as simple as asking the OS for a memory page.
// * Since each node is 32B, each node's memory address will be a multiple of 16.
// * Thanks to the free lists and our consistent chunk sizes, we should
// end up with very little fragmentation.
// * Finding a slot for a given node should be very fast: see if the relevant
// free list has any openings; if not, try the next size up.
//
// Less nice things include:
// * This system makes it very hard to ever give a page back to the OS.
// We could try doing the Mesh Allocator strategy: whenever we allocate
// something, assign it to a random slot in the page, and then periodically
// try to merge two pages into one (by locking and remapping them in the OS)
// and then returning the redundant physical page back to the OS. This should
// work in theory, but is pretty complicated, and we'd need to schedule it.
// Keep in mind that we can't use the Mesh Allocator itself because it returns
// usize pointers, which would be too big for us to have 16B nodes.
// On the plus side, we could be okay with higher memory usage early on,
// and then later use the Mesh strategy to reduce long-running memory usage.
//
// With this system, we can allocate up to 4B nodes. If we wanted to keep
// a generational index in there, like https://crates.io/crates/sharded-slab
// does, we could use some of the 32 bits for that. For example, if we wanted
// to have a 5-bit generational index (supporting up to 32 generations), then
// we would have 27 bits remaining, meaning we could only support at most
// 134M nodes. Since the editor has a separate Pool for each module, is that
// enough for any single module we'll encounter in practice? Probably, and
// especially if we allocate super large collection literals on the heap instead
// of in the pool.
//
// Another possible design is to try to catch reuse bugs using an "ASan" like
// approach: in development builds, whenever we "free" a particular slot, we
// can add it to a dev-build-only "freed nodes" list and don't hand it back
// out (so, we leak the memory.) Then we can (again, in development builds only)
// check to see if we're about to store something in zeroed-out memory; if so, check
// to see if it was
#[derive(Debug, Eq)]
pub (crate) struct NodeId<T> {
pub (super) index: u32,
_phantom: PhantomData<T>,
}
impl<T> Clone for NodeId<T> {
fn clone(&self) -> Self {
NodeId {
index: self.index,
_phantom: PhantomData::default(),
}
}
}
impl<T> PartialEq for NodeId<T> {
fn eq(&self, other: &Self) -> bool {
self.index == other.index
}
}
impl<T> Copy for NodeId<T> {}
#[derive(Debug)]
pub struct Pool {
pub (super) nodes: *mut [u8; NODE_BYTES],
num_nodes: u32,
capacity: u32,
// free_1node_slots: Vec<NodeId<T>>,
}
impl Pool {
pub fn with_capacity(nodes: u32) -> Self {
// round up number of nodes requested to nearest page size in bytes
let bytes_per_page = page_size::get();
let node_bytes = NODE_BYTES * nodes as usize;
let leftover = node_bytes % bytes_per_page;
let bytes_to_mmap = if leftover == 0 {
node_bytes
} else {
node_bytes + bytes_per_page - leftover
};
let nodes = unsafe {
// mmap anonymous memory pages - that is, contiguous virtual memory
// addresses from the OS which will be lazily translated into
// physical memory one 4096-byte page at a time, once we actually
// try to read or write in that page's address range.
libc::mmap(
null::<c_void>() as *mut c_void,
bytes_to_mmap,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,
0,
0,
)
} as *mut [u8; NODE_BYTES];
// This is our actual capacity, in nodes.
// It might be higher than the requested capacity due to rounding up
// to nearest page size.
let capacity = (bytes_to_mmap / NODE_BYTES) as u32;
Pool {
nodes,
num_nodes: 0,
capacity,
}
}
pub fn add<T>(&mut self, node: T) -> NodeId<T> {
// It's only safe to store this if T fits in S.
debug_assert!(
size_of::<T>() <= NODE_BYTES,
"{} has a size of {}, but it needs to be at most {}",
type_name::<T>(),
size_of::<T>(),
NODE_BYTES
);
let node_id = self.reserve(1);
let node_ptr = unsafe { self.nodes.offset(node_id.index as isize) } as *mut T;
unsafe { *node_ptr = node };
node_id
}
/// Reserves the given number of contiguous node slots, and returns
/// the NodeId of the first one. We only allow reserving 2^32 in a row.
pub (super) fn reserve<T>(&mut self, nodes: u32) -> NodeId<T> {
// TODO once we have a free list, look in there for an open slot first!
let index = self.num_nodes;
if index < self.capacity {
self.num_nodes = index + nodes;
NodeId {
index,
_phantom: PhantomData::default(),
}
} else {
todo!("pool ran out of capacity. TODO reallocate the nodes pointer to map to a bigger space. Can use mremap on Linux, but must memcpy lots of bytes on macOS and Windows.");
}
}
pub fn get<'a, 'b, T>(&'a self, node_id: NodeId<T>) -> &'b T {
unsafe {
let node_ptr = self.nodes.offset(node_id.index as isize) as *const T;
&*node_ptr
}
}
pub fn get_mut<T>(&mut self, node_id: NodeId<T>) -> &mut T {
unsafe {
let node_ptr = self.nodes.offset(node_id.index as isize) as *mut T;
&mut *node_ptr
}
}
pub fn set<T>(&mut self, node_id: NodeId<T>, element: T) {
unsafe {
let node_ptr = self.nodes.offset(node_id.index as isize) as *mut T;
*node_ptr = element;
}
}
// A node is available iff its bytes are all zeroes
#[allow(dead_code)]
fn is_available<T>(&self, node_id: NodeId<T>) -> bool {
debug_assert_eq!(size_of::<T>(), NODE_BYTES);
unsafe {
let node_ptr = self.nodes.offset(node_id.index as isize) as *const [u8; NODE_BYTES];
*node_ptr == [0; NODE_BYTES]
}
}
}
impl<T> std::ops::Index<NodeId<T>> for Pool {
type Output = T;
fn index(&self, node_id: NodeId<T>) -> &Self::Output {
self.get(node_id)
}
}
impl<T> std::ops::IndexMut<NodeId<T>> for Pool {
fn index_mut(&mut self, node_id: NodeId<T>) -> &mut Self::Output {
self.get_mut(node_id)
}
}
impl Drop for Pool {
fn drop(&mut self) {
unsafe {
libc::munmap(
self.nodes as *mut c_void,
NODE_BYTES * self.capacity as usize,
);
}
}
}

88
ast/src/pool/pool_str.rs Normal file
View File

@ -0,0 +1,88 @@
use super::pool::{NodeId, Pool, NODE_BYTES};
use super::shallow_clone::ShallowClone;
use libc::{c_void};
use std::marker::PhantomData;
use std::mem::size_of;
/// A string containing at most 2^32 pool-allocated bytes.
#[derive(Debug, Copy, Clone)]
pub struct PoolStr {
first_node_id: NodeId<()>,
len: u32,
}
#[test]
fn pool_str_size() {
assert_eq!(size_of::<PoolStr>(), 8);
}
impl PoolStr {
pub fn new(string: &str, pool: &mut Pool) -> Self {
debug_assert!(string.len() <= u32::MAX as usize);
let chars_per_node = NODE_BYTES / size_of::<char>();
let number_of_nodes = f64::ceil(string.len() as f64 / chars_per_node as f64) as u32;
if number_of_nodes > 0 {
let first_node_id = pool.reserve(number_of_nodes);
let index = first_node_id.index as isize;
let next_node_ptr = unsafe { pool.nodes.offset(index) } as *mut c_void;
unsafe {
libc::memcpy(
next_node_ptr,
string.as_ptr() as *const c_void,
string.len(),
);
}
PoolStr {
first_node_id,
len: string.len() as u32,
}
} else {
PoolStr {
first_node_id: NodeId {
index: 0,
_phantom: PhantomData::default(),
},
len: 0,
}
}
}
pub fn as_str(&self, pool: &Pool) -> &str {
unsafe {
let node_ptr = pool.nodes.offset(self.first_node_id.index as isize) as *const u8;
let node_slice: &[u8] = std::slice::from_raw_parts(node_ptr, self.len as usize);
std::str::from_utf8_unchecked(&node_slice[0..self.len as usize])
}
}
#[allow(clippy::len_without_is_empty)]
pub fn len(&self, pool: &Pool) -> usize {
let contents = self.as_str(pool);
contents.len()
}
pub fn is_empty(&self, pool: &Pool) -> bool {
self.len(pool) == 0
}
}
impl ShallowClone for PoolStr {
fn shallow_clone(&self) -> Self {
// Question: should this fully clone, or is a shallow copy
// (and the aliasing it entails) OK?
Self {
first_node_id: self.first_node_id,
len: self.len,
}
}
}

324
ast/src/pool/pool_vec.rs Normal file
View File

@ -0,0 +1,324 @@
use super::pool::{NodeId, Pool, NODE_BYTES};
use super::shallow_clone::ShallowClone;
use libc::{c_void};
use std::marker::PhantomData;
use std::mem::size_of;
use std::any::type_name;
use std::cmp::Ordering;
/// An array of at most 2^32 pool-allocated nodes.
#[derive(Debug)]
pub struct PoolVec<T> {
first_node_id: NodeId<T>,
len: u32,
}
#[test]
fn pool_vec_size() {
assert_eq!(size_of::<PoolVec<()>>(), 8);
}
impl<'a, T: 'a + Sized> PoolVec<T> {
pub fn empty(pool: &mut Pool) -> Self {
Self::new(std::iter::empty(), pool)
}
pub fn with_capacity(len: u32, pool: &mut Pool) -> Self {
debug_assert!(
size_of::<T>() <= NODE_BYTES,
"{} has a size of {}",
type_name::<T>(),
size_of::<T>()
);
if len == 0 {
Self::empty(pool)
} else {
let first_node_id = pool.reserve(len);
PoolVec { first_node_id, len }
}
}
pub fn len(&self) -> usize {
self.len as usize
}
pub fn is_empty(&self) -> bool {
self.len == 0
}
pub fn new<I: ExactSizeIterator<Item = T>>(nodes: I, pool: &mut Pool) -> Self {
debug_assert!(nodes.len() <= u32::MAX as usize);
debug_assert!(size_of::<T>() <= NODE_BYTES);
let len = nodes.len() as u32;
if len > 0 {
let first_node_id = pool.reserve(len);
let index = first_node_id.index as isize;
let mut next_node_ptr = unsafe { pool.nodes.offset(index) } as *mut T;
for (indx_inc, node) in nodes.enumerate() {
unsafe {
*next_node_ptr = node;
next_node_ptr = pool.nodes.offset(index + (indx_inc as isize) + 1) as *mut T;
}
}
PoolVec { first_node_id, len }
} else {
PoolVec {
first_node_id: NodeId {
index: 0,
_phantom: PhantomData::default(),
},
len: 0,
}
}
}
pub fn iter(&self, pool: &'a Pool) -> impl ExactSizeIterator<Item = &'a T> {
self.pool_list_iter(pool)
}
pub fn iter_mut(&self, pool: &'a mut Pool) -> impl ExactSizeIterator<Item = &'a mut T> {
self.pool_list_iter_mut(pool)
}
pub fn iter_node_ids(&self) -> impl ExactSizeIterator<Item = NodeId<T>> {
self.pool_list_iter_node_ids()
}
/// Private version of into_iter which exposes the implementation detail
/// of PoolVecIter. We don't want that struct to be public, but we
/// actually do want to have this separate function for code reuse
/// in the iterator's next() method.
#[inline(always)]
fn pool_list_iter(&self, pool: &'a Pool) -> PoolVecIter<'a, T> {
PoolVecIter {
pool,
current_node_id: self.first_node_id,
len_remaining: self.len,
}
}
#[inline(always)]
fn pool_list_iter_mut(&self, pool: &'a Pool) -> PoolVecIterMut<'a, T> {
PoolVecIterMut {
pool,
current_node_id: self.first_node_id,
len_remaining: self.len,
}
}
#[inline(always)]
fn pool_list_iter_node_ids(&self) -> PoolVecIterNodeIds<T> {
PoolVecIterNodeIds {
current_node_id: self.first_node_id,
len_remaining: self.len,
}
}
pub fn free<S>(self, pool: &'a mut Pool) {
// zero out the memory
unsafe {
let index = self.first_node_id.index as isize;
let node_ptr = pool.nodes.offset(index) as *mut c_void;
let bytes = self.len as usize * NODE_BYTES;
libc::memset(node_ptr, 0, bytes);
}
// TODO insert it into the pool's free list
}
}
impl<T> ShallowClone for PoolVec<T> {
fn shallow_clone(&self) -> Self {
// Question: should this fully clone, or is a shallow copy
// (and the aliasing it entails) OK?
Self {
first_node_id: self.first_node_id,
len: self.len,
}
}
}
struct PoolVecIter<'a, T> {
pool: &'a Pool,
current_node_id: NodeId<T>,
len_remaining: u32,
}
impl<'a, T> ExactSizeIterator for PoolVecIter<'a, T>
where
T: 'a,
{
fn len(&self) -> usize {
self.len_remaining as usize
}
}
impl<'a, T> Iterator for PoolVecIter<'a, T>
where
T: 'a,
{
type Item = &'a T;
fn next(&mut self) -> Option<Self::Item> {
let len_remaining = self.len_remaining;
match len_remaining.cmp(&1) {
Ordering::Greater => {
// Get the current node
let index = self.current_node_id.index;
let node_ptr = unsafe { self.pool.nodes.offset(index as isize) } as *const T;
// Advance the node pointer to the next node in the current page
self.current_node_id = NodeId {
index: index + 1,
_phantom: PhantomData::default(),
};
self.len_remaining = len_remaining - 1;
Some(unsafe { &*node_ptr })
}
Ordering::Equal => {
self.len_remaining = 0;
// Don't advance the node pointer's node, because that might
// advance past the end of the page!
let index = self.current_node_id.index;
let node_ptr = unsafe { self.pool.nodes.offset(index as isize) } as *const T;
Some(unsafe { &*node_ptr })
}
Ordering::Less => {
// len_remaining was 0
None
}
}
}
}
struct PoolVecIterMut<'a, T> {
pool: &'a Pool,
current_node_id: NodeId<T>,
len_remaining: u32,
}
impl<'a, T> ExactSizeIterator for PoolVecIterMut<'a, T>
where
T: 'a,
{
fn len(&self) -> usize {
self.len_remaining as usize
}
}
impl<'a, T> Iterator for PoolVecIterMut<'a, T>
where
T: 'a,
{
type Item = &'a mut T;
fn next(&mut self) -> Option<Self::Item> {
let len_remaining = self.len_remaining;
match len_remaining.cmp(&1) {
Ordering::Greater => {
// Get the current node
let index = self.current_node_id.index;
let node_ptr = unsafe { self.pool.nodes.offset(index as isize) } as *mut T;
// Advance the node pointer to the next node in the current page
self.current_node_id = NodeId {
index: index + 1,
_phantom: PhantomData::default(),
};
self.len_remaining = len_remaining - 1;
Some(unsafe { &mut *node_ptr })
}
Ordering::Equal => {
self.len_remaining = 0;
// Don't advance the node pointer's node, because that might
// advance past the end of the page!
let index = self.current_node_id.index;
let node_ptr = unsafe { self.pool.nodes.offset(index as isize) } as *mut T;
Some(unsafe { &mut *node_ptr })
}
Ordering::Less => {
// len_remaining was 0
None
}
}
}
}
struct PoolVecIterNodeIds<T> {
current_node_id: NodeId<T>,
len_remaining: u32,
}
impl<T> ExactSizeIterator for PoolVecIterNodeIds<T> {
fn len(&self) -> usize {
self.len_remaining as usize
}
}
impl<T> Iterator for PoolVecIterNodeIds<T> {
type Item = NodeId<T>;
fn next(&mut self) -> Option<Self::Item> {
let len_remaining = self.len_remaining;
match len_remaining.cmp(&1) {
Ordering::Greater => {
// Get the current node
let current = self.current_node_id;
let index = current.index;
// Advance the node pointer to the next node in the current page
self.current_node_id = NodeId {
index: index + 1,
_phantom: PhantomData::default(),
};
self.len_remaining = len_remaining - 1;
Some(current)
}
Ordering::Equal => {
self.len_remaining = 0;
// Don't advance the node pointer's node, because that might
// advance past the end of the page!
Some(self.current_node_id)
}
Ordering::Less => {
// len_remaining was 0
None
}
}
}
}
#[test]
fn pool_vec_iter_test() {
let expected_vec: Vec<usize> = vec![2, 4, 8, 16];
let mut test_pool = Pool::with_capacity(1024);
let pool_vec = PoolVec::new(expected_vec.clone().into_iter(), &mut test_pool);
let current_vec: Vec<usize> = pool_vec.iter(&test_pool).copied().collect();
assert_eq!(current_vec, expected_vec);
}

View File

@ -0,0 +1,33 @@
use roc_can::expected::Expected;
use roc_can::expected::PExpected;
/// Clones the outer node, but does not clone any nodeids
pub trait ShallowClone {
fn shallow_clone(&self) -> Self;
}
impl<T: ShallowClone> ShallowClone for Expected<T> {
fn shallow_clone(&self) -> Self {
use Expected::*;
match self {
NoExpectation(t) => NoExpectation(t.shallow_clone()),
ForReason(reason, t, region) => ForReason(reason.clone(), t.shallow_clone(), *region),
FromAnnotation(loc_pat, n, source, t) => {
FromAnnotation(loc_pat.clone(), *n, *source, t.shallow_clone())
}
}
}
}
impl<T: ShallowClone> ShallowClone for PExpected<T> {
fn shallow_clone(&self) -> Self {
use PExpected::*;
match self {
NoExpectation(t) => NoExpectation(t.shallow_clone()),
ForReason(reason, t, region) => ForReason(reason.clone(), t.shallow_clone(), *region),
}
}
}

133
ast/src/roc_file.rs Normal file
View File

@ -0,0 +1,133 @@
use bumpalo::collections::Vec;
use bumpalo::Bump;
use roc_fmt::def::fmt_def;
use roc_fmt::module::fmt_module;
use roc_parse::ast::{Def, Module};
use roc_parse::module::module_defs;
use roc_parse::parser;
use roc_parse::parser::{Parser, SyntaxError};
use roc_region::all::Located;
use std::ffi::OsStr;
use std::path::Path;
use std::{fs, io};
#[derive(Debug)]
pub struct File<'a> {
path: &'a Path,
module_header: Module<'a>,
content: Vec<'a, Located<Def<'a>>>,
}
#[derive(Debug)]
pub enum ReadError<'a> {
Read(std::io::Error),
ParseDefs(SyntaxError<'a>),
ParseHeader(SyntaxError<'a>),
DoesntHaveRocExtension,
}
impl<'a> File<'a> {
pub fn read(path: &'a Path, arena: &'a Bump) -> Result<File<'a>, ReadError<'a>> {
if path.extension() != Some(OsStr::new("roc")) {
return Err(ReadError::DoesntHaveRocExtension);
}
let bytes = fs::read(path).map_err(ReadError::Read)?;
let allocation = arena.alloc(bytes);
let module_parse_state = parser::State::new(allocation);
let parsed_module = roc_parse::module::parse_header(arena, module_parse_state);
match parsed_module {
Ok((module, state)) => {
let parsed_defs = module_defs().parse(arena, state);
match parsed_defs {
Ok((_, defs, _)) => Ok(File {
path,
module_header: module,
content: defs,
}),
Err((_, error, _)) => Err(ReadError::ParseDefs(error)),
}
}
Err(error) => Err(ReadError::ParseHeader(SyntaxError::Header(error))),
}
}
pub fn fmt(&self) -> String {
let arena = Bump::new();
let mut formatted_file = String::new();
let mut module_header_buf = bumpalo::collections::String::new_in(&arena);
fmt_module(&mut module_header_buf, &self.module_header);
formatted_file.push_str(module_header_buf.as_str());
for def in &self.content {
let mut def_buf = bumpalo::collections::String::new_in(&arena);
fmt_def(&mut def_buf, &def.value, 0);
formatted_file.push_str(def_buf.as_str());
}
formatted_file
}
pub fn fmt_then_write_to(&self, write_path: &'a Path) -> io::Result<()> {
let formatted_file = self.fmt();
fs::write(write_path, formatted_file)
}
pub fn fmt_then_write_with_name(&self, new_name: &str) -> io::Result<()> {
self.fmt_then_write_to(
self.path
.with_file_name(new_name)
.with_extension("roc")
.as_path(),
)
}
pub fn fmt_then_write(&self) -> io::Result<()> {
self.fmt_then_write_to(self.path)
}
}
#[cfg(test)]
mod test_file {
use crate::lang::roc_file;
use bumpalo::Bump;
use std::path::Path;
#[test]
fn read_and_fmt_simple_roc_module() {
let simple_module_path = Path::new("./tests/modules/SimpleUnformatted.roc");
let arena = Bump::new();
let file = roc_file::File::read(simple_module_path, &arena)
.expect("Could not read SimpleUnformatted.roc in test_file test");
assert_eq!(
file.fmt(),
indoc!(
r#"
interface Simple
exposes [
v, x
]
imports []
v : Str
v = "Value!"
x : Int
x = 4"#
)
);
}
}

1752
ast/src/solve_type.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@ -5,7 +5,6 @@ authors = ["The Roc Contributors"]
license = "UPL-1.0"
edition = "2018"
description = "An editor for Roc"
exclude = ["src/shaders/*.spv"]
[dependencies]
roc_collections = { path = "../compiler/collections" }

View File

@ -26,29 +26,7 @@ pub const NODE_BYTES: usize = 32;
// on typical systems where the compiler will be run.
//
// Nice things about this system include:
// * Allocating a new page is as simple as asking the OS for a memory page.
// * Since each node is 32B, each node's memory address will be a multiple of 16.
// * Thanks to the free lists and our consistent chunk sizes, we should
// end up with very little fragmentation.
// * Finding a slot for a given node should be very fast: see if the relevant
// free list has any openings; if not, try the next size up.
//
// Less nice things include:
// * This system makes it very hard to ever give a page back to the OS.
// We could try doing the Mesh Allocator strategy: whenever we allocate
// something, assign it to a random slot in the page, and then periodically
// try to merge two pages into one (by locking and remapping them in the OS)
// and then returning the redundant physical page back to the OS. This should
// work in theory, but is pretty complicated, and we'd need to schedule it.
// Keep in mind that we can't use the Mesh Allocator itself because it returns
// usize pointers, which would be too big for us to have 16B nodes.
// On the plus side, we could be okay with higher memory usage early on,
// and then later use the Mesh strategy to reduce long-running memory usage.
//
// With this system, we can allocate up to 4B nodes. If we wanted to keep
// a generational index in there, like https://crates.io/crates/sharded-slab
// does, we could use some of the 32 bits for that. For example, if we wanted
// to have a 5-bit generational index (supporting up to 32 generations), then
// * Allocating a new pagShallowCloneal index (supporting up to 32 generations), then
// we would have 27 bits remaining, meaning we could only support at most
// 134M nodes. Since the editor has a separate Pool for each module, is that
// enough for any single module we'll encounter in practice? Probably, and