From 199558b66136bc6647c78c50b9743777e89ca80b Mon Sep 17 00:00:00 2001 From: Folkert Date: Mon, 1 Apr 2024 21:36:39 +0200 Subject: [PATCH] basic transitive inference --- crates/compiler/mono/src/borrow.rs | 348 ++++++++++++++++++-- crates/compiler/mono/src/inc_dec.rs | 22 +- crates/compiler/mono/src/ir.rs | 10 + examples/platform-switching/rocLovesZig.roc | 4 +- 4 files changed, 347 insertions(+), 37 deletions(-) diff --git a/crates/compiler/mono/src/borrow.rs b/crates/compiler/mono/src/borrow.rs index 9e9b9cc427..0d307555a6 100644 --- a/crates/compiler/mono/src/borrow.rs +++ b/crates/compiler/mono/src/borrow.rs @@ -1,20 +1,159 @@ use bumpalo::{collections::Vec, Bump}; +use roc_collections::{MutMap, ReferenceMatrix}; +use roc_error_macros::todo_lambda_erasure; use roc_module::symbol::Symbol; use crate::{ inc_dec::Ownership, - ir::{Call, CallType, Expr, Proc, Stmt}, - layout::{Builtin, InLayout, LayoutInterner, LayoutRepr}, + ir::{Call, CallType, Expr, Proc, ProcLayout, Stmt}, + layout::{Builtin, InLayout, LayoutInterner, LayoutRepr, Niche}, }; -#[allow(unused)] -pub(crate) fn infer_borrow_signature<'a>( +#[derive(Clone, Copy)] +pub(crate) struct BorrowSignature(u64); + +impl std::fmt::Debug for BorrowSignature { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut f = &mut f.debug_struct("BorrowSignature"); + + dbg!(self.0); + + for (i, ownership) in self.iter().enumerate() { + f = f.field(&format!("_{i}"), &ownership); + } + + f.finish() + } +} + +impl BorrowSignature { + fn new(len: usize) -> Self { + assert!(len < 64 - 8); + + Self(len as _) + } + + fn len(&self) -> usize { + (self.0 & 0xFF) as usize + } + + fn get(&self, index: usize) -> Option<&Ownership> { + if index >= self.len() { + return None; + } + + match self.0 & (1 << (index + 8)) { + 0 => Some(&Ownership::Borrowed), + _ => Some(&Ownership::Owned), + } + } + + fn set(&mut self, index: usize, ownership: Ownership) { + assert!(index < self.len()); + + let mask = 1 << (index + 8); + + match ownership { + Ownership::Owned => self.0 |= mask, + Ownership::Borrowed => self.0 &= !mask, + } + } + + fn iter(&self) -> impl Iterator + '_ { + let mut i = 0; + + std::iter::from_fn(move || { + let value = self.get(i)?; + i += 1; + Some(*value) + }) + } +} + +impl std::ops::Index for BorrowSignature { + type Output = Ownership; + + fn index(&self, index: usize) -> &Self::Output { + self.get(index).unwrap() + } +} + +pub(crate) type BorrowSignatures<'a> = MutMap<(Symbol, ProcLayout<'a>), BorrowSignature>; + +pub(crate) fn infer_borrow_signatures<'a>( arena: &'a Bump, interner: &impl LayoutInterner<'a>, + procs: &'a MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>, +) -> BorrowSignatures<'a> { + let host_exposed_procs = &[]; + + let mut borrow_signatures = procs + .iter() + .map(|(key, proc)| { + let mut signature = BorrowSignature::new(proc.args.len()); + + for (i, in_layout) in key.1.arguments.iter().enumerate() { + signature.set(i, layout_to_ownership(*in_layout, interner)); + } + + (*key, signature) + }) + .collect(); + + // next we first partition the functions into strongly connected components, then do a + // topological sort on these components, finally run the fix-point borrow analysis on each + // component (in top-sorted order, from primitives (std-lib) to main) + + let matrix = construct_reference_matrix(arena, procs); + let sccs = matrix.strongly_connected_components_all(); + + let mut env = (); + + for (group, _) in sccs.groups() { + // This is a fixed-point analysis + // + // all functions initially own all their parameters + // through a series of checks and heuristics, some arguments are set to borrowed + // when that doesn't lead to conflicts the change is kept, otherwise it may be reverted + // + // when the signatures no longer change, the analysis stops and returns the signatures + loop { + for index in group.iter_ones() { + let (key, proc) = procs.iter().nth(index).unwrap(); + + if proc.args.is_empty() { + continue; + } + + // host-exposed functions must always own their arguments. + let is_host_exposed = host_exposed_procs.contains(&key.0); + + let mut state = State::new(arena, interner, &mut borrow_signatures, proc); + state.inspect_stmt(&mut borrow_signatures, &proc.body); + + borrow_signatures.insert(*key, state.borrow_signature); + } + + // if there were no modifications, we're done + // if !env.modified { + if true { + break; + } + } + } + + borrow_signatures +} + +#[allow(unused)] +fn infer_borrow_signature<'a>( + arena: &'a Bump, + interner: &impl LayoutInterner<'a>, + borrow_signatures: &'a mut BorrowSignatures<'a>, proc: &'a Proc<'a>, -) -> &'a [Ownership] { - let mut state = State::new(arena, interner, proc); - state.inspect_stmt(&proc.body); +) -> BorrowSignature { + let mut state = State::new(arena, interner, borrow_signatures, proc); + state.inspect_stmt(borrow_signatures, &proc.body); state.borrow_signature } @@ -22,7 +161,7 @@ struct State<'a> { /// Argument symbols with a layout of `List *` or `Str`, i.e. the layouts /// for which borrow inference might decide to pass as borrowed args: &'a [(InLayout<'a>, Symbol)], - borrow_signature: &'a mut [Ownership], + borrow_signature: BorrowSignature, } fn layout_to_ownership<'a>( @@ -39,32 +178,45 @@ fn layout_to_ownership<'a>( } impl<'a> State<'a> { - fn new(arena: &'a Bump, interner: &impl LayoutInterner<'a>, proc: &'a Proc<'a>) -> Self { - let borrow_signature = Vec::from_iter_in( - proc.args - .iter() - .map(|(in_layout, _)| layout_to_ownership(*in_layout, interner)), - arena, - ) - .into_bump_slice_mut(); + fn new( + arena: &'a Bump, + interner: &impl LayoutInterner<'a>, + borrow_signatures: &mut BorrowSignatures<'a>, + proc: &'a Proc<'a>, + ) -> Self { + let key = (proc.name.name(), proc.proc_layout(arena)); + + // initialize the borrow signature based on the layout if first time + let borrow_signature = borrow_signatures.entry(key).or_insert_with(|| { + let mut borrow_signature = BorrowSignature::new(proc.args.len()); + + for (i, in_layout) in key.1.arguments.iter().enumerate() { + borrow_signature.set(i, layout_to_ownership(*in_layout, interner)); + } + + borrow_signature + }); Self { args: proc.args, - borrow_signature, + borrow_signature: *borrow_signature, } } + /// Mark the given argument symbol as Owned if the symbol participates in borrow inference + /// + /// Currently argument symbols participate if `layout_to_ownership` returns `Borrowed` for their layout. fn mark_owned(&mut self, symbol: Symbol) { if let Some(index) = self.args.iter().position(|(_, s)| *s == symbol) { - self.borrow_signature[index] = Ownership::Owned; + self.borrow_signature.set(index, Ownership::Owned); } } - fn inspect_stmt(&mut self, stmt: &'a Stmt<'a>) { + fn inspect_stmt(&mut self, borrow_signatures: &mut BorrowSignatures<'a>, stmt: &'a Stmt<'a>) { match stmt { Stmt::Let(_, expr, _, stmt) => { - self.inspect_expr(expr); - self.inspect_stmt(stmt); + self.inspect_expr(borrow_signatures, expr); + self.inspect_stmt(borrow_signatures, stmt); } Stmt::Switch { branches, @@ -72,12 +224,16 @@ impl<'a> State<'a> { .. } => { for (_, _, stmt) in branches.iter() { - self.inspect_stmt(stmt); + self.inspect_stmt(borrow_signatures, stmt); } - self.inspect_stmt(default_branch.1); + self.inspect_stmt(borrow_signatures, default_branch.1); } - Stmt::Ret(_) => todo!(), - Stmt::Refcounting(_, _) => todo!(), + Stmt::Ret(s) => { + // to return a value we must own it + // (with the current implementation anyway) + self.mark_owned(*s); + } + Stmt::Refcounting(_, _) => unreachable!("not inserted yet"), Stmt::Expect { .. } | Stmt::ExpectFx { .. } => { // TODO do we rely on values being passed by-value here? // it would be better to pass by-reference in general @@ -89,31 +245,48 @@ impl<'a> State<'a> { Stmt::Join { body, remainder, .. } => { - self.inspect_stmt(body); - self.inspect_stmt(remainder); + self.inspect_stmt(borrow_signatures, body); + self.inspect_stmt(borrow_signatures, remainder); } Stmt::Jump(_, _) | Stmt::Crash(_, _) => { /* not relevant for ownership */ } } } - fn inspect_expr(&mut self, expr: &'a Expr<'a>) { + fn inspect_expr(&mut self, borrow_signatures: &mut BorrowSignatures<'a>, expr: &'a Expr<'a>) { if let Expr::Call(call) = expr { - self.inspect_call(call) + self.inspect_call(borrow_signatures, call) } } - fn inspect_call(&mut self, call: &'a Call<'a>) { + fn inspect_call(&mut self, borrow_signatures: &mut BorrowSignatures<'a>, call: &'a Call<'a>) { let Call { call_type, arguments, } = call; match call_type { - CallType::ByName { name: _, .. } => { - // TODO ownership should depend on the borrow signature of the called function - for argument in arguments.iter() { - self.mark_owned(*argument) + CallType::ByName { + name, + arg_layouts, + ret_layout, + .. + } => { + let proc_layout = ProcLayout { + arguments: arg_layouts, + result: *ret_layout, + niche: Niche::NONE, + }; + + let borrow_signature = match borrow_signatures.get(&(name.name(), proc_layout)) { + Some(s) => s, + None => todo!("no borrow signature for function/layout"), + }; + + for (argument, ownership) in arguments.iter().zip(borrow_signature.iter()) { + if let Ownership::Owned = ownership { + self.mark_owned(*argument); + } } } CallType::LowLevel { op, .. } => { @@ -134,3 +307,110 @@ impl<'a> State<'a> { } } } + +fn construct_reference_matrix<'a>( + arena: &'a Bump, + procs: &MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>, +) -> ReferenceMatrix { + let mut matrix = ReferenceMatrix::new(procs.len()); + + let mut call_info = CallInfo::new(arena); + + for (row, proc) in procs.values().enumerate() { + call_info.clear(); + call_info.stmt(arena, &proc.body); + + for key in call_info.keys.iter() { + // the same symbol can be in `keys` multiple times (with different layouts) + for (col, (k, _)) in procs.keys().enumerate() { + if k == key { + matrix.set_row_col(row, col, true); + } + } + } + } + + matrix +} + +struct CallInfo<'a> { + keys: Vec<'a, Symbol>, +} + +impl<'a> CallInfo<'a> { + fn new(arena: &'a Bump) -> Self { + CallInfo { + keys: Vec::new_in(arena), + } + } + + fn clear(&mut self) { + self.keys.clear() + } + + fn call(&mut self, call: &crate::ir::Call<'a>) { + use crate::ir::CallType::*; + use crate::ir::HigherOrderLowLevel; + use crate::ir::PassedFunction; + + match call.call_type { + ByName { name, .. } => { + self.keys.push(name.name()); + } + ByPointer { .. } => { + todo_lambda_erasure!() + } + Foreign { .. } => {} + LowLevel { .. } => {} + HigherOrder(HigherOrderLowLevel { + passed_function: PassedFunction { name, .. }, + .. + }) => { + self.keys.push(name.name()); + } + } + } + + fn stmt(&mut self, arena: &'a Bump, stmt: &Stmt<'a>) { + use Stmt::*; + + let mut stack = bumpalo::vec![in arena; stmt]; + + while let Some(stmt) = stack.pop() { + match stmt { + Join { + remainder: v, + body: b, + .. + } => { + stack.push(v); + stack.push(b); + } + Let(_, expr, _, cont) => { + if let Expr::Call(call) = expr { + self.call(call); + } + stack.push(cont); + } + Switch { + branches, + default_branch, + .. + } => { + stack.extend(branches.iter().map(|b| &b.2)); + stack.push(default_branch.1); + } + + Dbg { remainder, .. } => stack.push(remainder), + Expect { remainder, .. } => stack.push(remainder), + ExpectFx { remainder, .. } => stack.push(remainder), + + Refcounting(_, _) => unreachable!("these have not been introduced yet"), + + Ret(_) | Jump(_, _) | Crash(..) => { + // these are terminal, do nothing + } + } + } + } +} diff --git a/crates/compiler/mono/src/inc_dec.rs b/crates/compiler/mono/src/inc_dec.rs index beb0af0d5c..55460329b7 100644 --- a/crates/compiler/mono/src/inc_dec.rs +++ b/crates/compiler/mono/src/inc_dec.rs @@ -15,6 +15,7 @@ use roc_module::low_level::LowLevel; use roc_module::{low_level::LowLevelWrapperType, symbol::Symbol}; use crate::ir::ErasedField; +use crate::layout::LambdaName; use crate::{ ir::{ BranchInfo, Call, CallType, Expr, HigherOrderLowLevel, JoinPointId, ListLiteralElement, @@ -30,8 +31,19 @@ Insert the reference count operations for procedures. pub fn insert_inc_dec_operations<'a>( arena: &'a Bump, layout_interner: &STLayoutInterner<'a>, - procedures: &mut HashMap<(Symbol, ProcLayout), Proc<'a>, BuildHasherDefault>, + procedures: &mut HashMap<(Symbol, ProcLayout<'a>), Proc<'a>, BuildHasherDefault>, ) { + // TODO remove this clone? + let x = procedures.clone(); + let ps = arena.alloc(x); + + let borrow_signatures = crate::borrow::infer_borrow_signatures(arena, layout_interner, ps); + let borrow_signatures = arena.alloc(borrow_signatures); + + for ((s, _), sig) in borrow_signatures.iter() { + dbg!((s, sig)); + } + // All calls to lowlevels are wrapped in another function to help with type inference and return/parameter layouts. // But this lowlevel might get inlined into the caller of the wrapper and thus removing any reference counting operations. // Thus, these rc operations are performed on the caller of the wrapper instead, and we skip rc on the lowlevel. @@ -43,7 +55,7 @@ pub fn insert_inc_dec_operations<'a>( LowLevelWrapperType::NotALowLevelWrapper ) { let symbol_rc_types_env = SymbolRcTypesEnv::from_layout_interner(layout_interner); - insert_inc_dec_operations_proc(arena, symbol_rc_types_env, proc); + insert_inc_dec_operations_proc(arena, symbol_rc_types_env, borrow_signatures, proc); } } } @@ -245,6 +257,8 @@ Type containing data about the symbols consumption of a join point. */ type JoinPointConsumption = MutSet; +type Key<'a> = (LambdaName<'a>, InLayout<'a>, &'a [InLayout<'a>]); + /** The environment for the reference counting pass. Contains the symbols rc types and the ownership. @@ -256,6 +270,8 @@ struct RefcountEnvironment<'v> { // The Koka implementation assumes everything that is not owned to be borrowed. symbols_ownership: SymbolsOwnership, jointpoint_closures: MutMap, + // inferred borrow signatures of roc functions + borrow_signatures: &'v crate::borrow::BorrowSignatures<'v>, } impl<'v> RefcountEnvironment<'v> { @@ -403,6 +419,7 @@ impl<'v> RefcountEnvironment<'v> { fn insert_inc_dec_operations_proc<'a>( arena: &'a Bump, mut symbol_rc_types_env: SymbolRcTypesEnv<'a, '_>, + borrow_signatures: &'a crate::borrow::BorrowSignatures<'a>, proc: &mut Proc<'a>, ) { // Clone the symbol_rc_types_env and insert the symbols in the current procedure. @@ -413,6 +430,7 @@ fn insert_inc_dec_operations_proc<'a>( symbols_rc_types: &symbol_rc_types_env.symbols_rc_type, symbols_ownership: MutMap::default(), jointpoint_closures: MutMap::default(), + borrow_signatures, }; // Add all arguments to the environment (if they are reference counted) diff --git a/crates/compiler/mono/src/ir.rs b/crates/compiler/mono/src/ir.rs index a813d43420..a6d9621c55 100644 --- a/crates/compiler/mono/src/ir.rs +++ b/crates/compiler/mono/src/ir.rs @@ -396,6 +396,16 @@ impl<'a> Proc<'a> { w.push(b'\n'); String::from_utf8(w).unwrap() } + + pub fn proc_layout(&self, arena: &'a Bump) -> ProcLayout<'a> { + let args = Vec::from_iter_in(self.args.iter().map(|(a, b)| *a), arena); + + ProcLayout { + arguments: args.into_bump_slice(), + result: self.ret_layout, + niche: Niche::NONE, + } + } } /// A host-exposed function must be specialized; it's a seed for subsequent specializations diff --git a/examples/platform-switching/rocLovesZig.roc b/examples/platform-switching/rocLovesZig.roc index aa0526fe97..d44a860ae8 100644 --- a/examples/platform-switching/rocLovesZig.roc +++ b/examples/platform-switching/rocLovesZig.roc @@ -1,3 +1,5 @@ app [main] { pf: platform "zig-platform/main.roc" } -main = "Roc <3 Zig!\n" +helper = \a, b -> Str.concat a b + +main = helper "Roc <" "3 Zig!\n"