basic transitive inference

2024-10-04 14:17:28 +03:00 · 2024-04-01 21:36:39 +02:00 · 2024-04-01 21:36:39 +02:00 · 199558b661
commit 199558b661
parent 1bcf30391b
4 changed files with 347 additions and 37 deletions
--- a/crates/compiler/mono/src/borrow.rs
+++ b/crates/compiler/mono/src/borrow.rs
@ -1,20 +1,159 @@
 use bumpalo::{collections::Vec, Bump};
+use roc_collections::{MutMap, ReferenceMatrix};
+use roc_error_macros::todo_lambda_erasure;
 use roc_module::symbol::Symbol;

 use crate::{
    inc_dec::Ownership,
-    ir::{Call, CallType, Expr, Proc, Stmt},
-    layout::{Builtin, InLayout, LayoutInterner, LayoutRepr},
+    ir::{Call, CallType, Expr, Proc, ProcLayout, Stmt},
+    layout::{Builtin, InLayout, LayoutInterner, LayoutRepr, Niche},
 };

-#[allow(unused)]
-pub(crate) fn infer_borrow_signature<'a>(
+#[derive(Clone, Copy)]
+pub(crate) struct BorrowSignature(u64);
+
+impl std::fmt::Debug for BorrowSignature {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let mut f = &mut f.debug_struct("BorrowSignature");
+
+        dbg!(self.0);
+
+        for (i, ownership) in self.iter().enumerate() {
+            f = f.field(&format!("_{i}"), &ownership);
+        }
+
+        f.finish()
+    }
+}
+
+impl BorrowSignature {
+    fn new(len: usize) -> Self {
+        assert!(len < 64 - 8);
+
+        Self(len as _)
+    }
+
+    fn len(&self) -> usize {
+        (self.0 & 0xFF) as usize
+    }
+
+    fn get(&self, index: usize) -> Option<&Ownership> {
+        if index >= self.len() {
+            return None;
+        }
+
+        match self.0 & (1 << (index + 8)) {
+            0 => Some(&Ownership::Borrowed),
+            _ => Some(&Ownership::Owned),
+        }
+    }
+
+    fn set(&mut self, index: usize, ownership: Ownership) {
+        assert!(index < self.len());
+
+        let mask = 1 << (index + 8);
+
+        match ownership {
+            Ownership::Owned => self.0 |= mask,
+            Ownership::Borrowed => self.0 &= !mask,
+        }
+    }
+
+    fn iter(&self) -> impl Iterator<Item = Ownership> + '_ {
+        let mut i = 0;
+
+        std::iter::from_fn(move || {
+            let value = self.get(i)?;
+            i += 1;
+            Some(*value)
+        })
+    }
+}
+
+impl std::ops::Index<usize> for BorrowSignature {
+    type Output = Ownership;
+
+    fn index(&self, index: usize) -> &Self::Output {
+        self.get(index).unwrap()
+    }
+}
+
+pub(crate) type BorrowSignatures<'a> = MutMap<(Symbol, ProcLayout<'a>), BorrowSignature>;
+
+pub(crate) fn infer_borrow_signatures<'a>(
    arena: &'a Bump,
    interner: &impl LayoutInterner<'a>,
+    procs: &'a MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>,
+) -> BorrowSignatures<'a> {
+    let host_exposed_procs = &[];
+
+    let mut borrow_signatures = procs
+        .iter()
+        .map(|(key, proc)| {
+            let mut signature = BorrowSignature::new(proc.args.len());
+
+            for (i, in_layout) in key.1.arguments.iter().enumerate() {
+                signature.set(i, layout_to_ownership(*in_layout, interner));
+            }
+
+            (*key, signature)
+        })
+        .collect();
+
+    // next we first partition the functions into strongly connected components, then do a
+    // topological sort on these components, finally run the fix-point borrow analysis on each
+    // component (in top-sorted order, from primitives (std-lib) to main)
+
+    let matrix = construct_reference_matrix(arena, procs);
+    let sccs = matrix.strongly_connected_components_all();
+
+    let mut env = ();
+
+    for (group, _) in sccs.groups() {
+        // This is a fixed-point analysis
+        //
+        // all functions initially own all their parameters
+        // through a series of checks and heuristics, some arguments are set to borrowed
+        // when that doesn't lead to conflicts the change is kept, otherwise it may be reverted
+        //
+        // when the signatures no longer change, the analysis stops and returns the signatures
+        loop {
+            for index in group.iter_ones() {
+                let (key, proc) = procs.iter().nth(index).unwrap();
+
+                if proc.args.is_empty() {
+                    continue;
+                }
+
+                // host-exposed functions must always own their arguments.
+                let is_host_exposed = host_exposed_procs.contains(&key.0);
+
+                let mut state = State::new(arena, interner, &mut borrow_signatures, proc);
+                state.inspect_stmt(&mut borrow_signatures, &proc.body);
+
+                borrow_signatures.insert(*key, state.borrow_signature);
+            }
+
+            // if there were no modifications, we're done
+            // if !env.modified {
+            if true {
+                break;
+            }
+        }
+    }
+
+    borrow_signatures
+}
+
+#[allow(unused)]
+fn infer_borrow_signature<'a>(
+    arena: &'a Bump,
+    interner: &impl LayoutInterner<'a>,
+    borrow_signatures: &'a mut BorrowSignatures<'a>,
    proc: &'a Proc<'a>,
-) -> &'a [Ownership] {
-    let mut state = State::new(arena, interner, proc);
-    state.inspect_stmt(&proc.body);
+) -> BorrowSignature {
+    let mut state = State::new(arena, interner, borrow_signatures, proc);
+    state.inspect_stmt(borrow_signatures, &proc.body);
    state.borrow_signature
 }

@ -22,7 +161,7 @@ struct State<'a> {
    /// Argument symbols with a layout of `List *` or `Str`, i.e. the layouts
    /// for which borrow inference might decide to pass as borrowed
    args: &'a [(InLayout<'a>, Symbol)],
-    borrow_signature: &'a mut [Ownership],
+    borrow_signature: BorrowSignature,
 }

 fn layout_to_ownership<'a>(
@ -39,32 +178,45 @@ fn layout_to_ownership<'a>(
 }

 impl<'a> State<'a> {
-    fn new(arena: &'a Bump, interner: &impl LayoutInterner<'a>, proc: &'a Proc<'a>) -> Self {
-        let borrow_signature = Vec::from_iter_in(
-            proc.args
-                .iter()
-                .map(|(in_layout, _)| layout_to_ownership(*in_layout, interner)),
-            arena,
-        )
-        .into_bump_slice_mut();
+    fn new(
+        arena: &'a Bump,
+        interner: &impl LayoutInterner<'a>,
+        borrow_signatures: &mut BorrowSignatures<'a>,
+        proc: &'a Proc<'a>,
+    ) -> Self {
+        let key = (proc.name.name(), proc.proc_layout(arena));
+
+        // initialize the borrow signature based on the layout if first time
+        let borrow_signature = borrow_signatures.entry(key).or_insert_with(|| {
+            let mut borrow_signature = BorrowSignature::new(proc.args.len());
+
+            for (i, in_layout) in key.1.arguments.iter().enumerate() {
+                borrow_signature.set(i, layout_to_ownership(*in_layout, interner));
+            }
+
+            borrow_signature
+        });

        Self {
            args: proc.args,
-            borrow_signature,
+            borrow_signature: *borrow_signature,
        }
    }

+    /// Mark the given argument symbol as Owned if the symbol participates in borrow inference
+    ///
+    /// Currently argument symbols participate if `layout_to_ownership` returns `Borrowed` for their layout.
    fn mark_owned(&mut self, symbol: Symbol) {
        if let Some(index) = self.args.iter().position(|(_, s)| *s == symbol) {
-            self.borrow_signature[index] = Ownership::Owned;
+            self.borrow_signature.set(index, Ownership::Owned);
        }
    }

-    fn inspect_stmt(&mut self, stmt: &'a Stmt<'a>) {
+    fn inspect_stmt(&mut self, borrow_signatures: &mut BorrowSignatures<'a>, stmt: &'a Stmt<'a>) {
        match stmt {
            Stmt::Let(_, expr, _, stmt) => {
-                self.inspect_expr(expr);
-                self.inspect_stmt(stmt);
+                self.inspect_expr(borrow_signatures, expr);
+                self.inspect_stmt(borrow_signatures, stmt);
            }
            Stmt::Switch {
                branches,
@ -72,12 +224,16 @@ impl<'a> State<'a> {
                ..
            } => {
                for (_, _, stmt) in branches.iter() {
-                    self.inspect_stmt(stmt);
+                    self.inspect_stmt(borrow_signatures, stmt);
                }
-                self.inspect_stmt(default_branch.1);
+                self.inspect_stmt(borrow_signatures, default_branch.1);
            }
-            Stmt::Ret(_) => todo!(),
-            Stmt::Refcounting(_, _) => todo!(),
+            Stmt::Ret(s) => {
+                // to return a value we must own it
+                // (with the current implementation anyway)
+                self.mark_owned(*s);
+            }
+            Stmt::Refcounting(_, _) => unreachable!("not inserted yet"),
            Stmt::Expect { .. } | Stmt::ExpectFx { .. } => {
                // TODO do we rely on values being passed by-value here?
                // it would be better to pass by-reference in general
@ -89,31 +245,48 @@ impl<'a> State<'a> {
            Stmt::Join {
                body, remainder, ..
            } => {
-                self.inspect_stmt(body);
-                self.inspect_stmt(remainder);
+                self.inspect_stmt(borrow_signatures, body);
+                self.inspect_stmt(borrow_signatures, remainder);
            }

            Stmt::Jump(_, _) | Stmt::Crash(_, _) => { /* not relevant for ownership */ }
        }
    }

-    fn inspect_expr(&mut self, expr: &'a Expr<'a>) {
+    fn inspect_expr(&mut self, borrow_signatures: &mut BorrowSignatures<'a>, expr: &'a Expr<'a>) {
        if let Expr::Call(call) = expr {
-            self.inspect_call(call)
+            self.inspect_call(borrow_signatures, call)
        }
    }

-    fn inspect_call(&mut self, call: &'a Call<'a>) {
+    fn inspect_call(&mut self, borrow_signatures: &mut BorrowSignatures<'a>, call: &'a Call<'a>) {
        let Call {
            call_type,
            arguments,
        } = call;

        match call_type {
-            CallType::ByName { name: _, .. } => {
-                // TODO ownership should depend on the borrow signature of the called function
-                for argument in arguments.iter() {
-                    self.mark_owned(*argument)
+            CallType::ByName {
+                name,
+                arg_layouts,
+                ret_layout,
+                ..
+            } => {
+                let proc_layout = ProcLayout {
+                    arguments: arg_layouts,
+                    result: *ret_layout,
+                    niche: Niche::NONE,
+                };
+
+                let borrow_signature = match borrow_signatures.get(&(name.name(), proc_layout)) {
+                    Some(s) => s,
+                    None => todo!("no borrow signature for function/layout"),
+                };
+
+                for (argument, ownership) in arguments.iter().zip(borrow_signature.iter()) {
+                    if let Ownership::Owned = ownership {
+                        self.mark_owned(*argument);
+                    }
                }
            }
            CallType::LowLevel { op, .. } => {
@ -134,3 +307,110 @@ impl<'a> State<'a> {
        }
    }
 }
+
+fn construct_reference_matrix<'a>(
+    arena: &'a Bump,
+    procs: &MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>,
+) -> ReferenceMatrix {
+    let mut matrix = ReferenceMatrix::new(procs.len());
+
+    let mut call_info = CallInfo::new(arena);
+
+    for (row, proc) in procs.values().enumerate() {
+        call_info.clear();
+        call_info.stmt(arena, &proc.body);
+
+        for key in call_info.keys.iter() {
+            // the same symbol can be in `keys` multiple times (with different layouts)
+            for (col, (k, _)) in procs.keys().enumerate() {
+                if k == key {
+                    matrix.set_row_col(row, col, true);
+                }
+            }
+        }
+    }
+
+    matrix
+}
+
+struct CallInfo<'a> {
+    keys: Vec<'a, Symbol>,
+}
+
+impl<'a> CallInfo<'a> {
+    fn new(arena: &'a Bump) -> Self {
+        CallInfo {
+            keys: Vec::new_in(arena),
+        }
+    }
+
+    fn clear(&mut self) {
+        self.keys.clear()
+    }
+
+    fn call(&mut self, call: &crate::ir::Call<'a>) {
+        use crate::ir::CallType::*;
+        use crate::ir::HigherOrderLowLevel;
+        use crate::ir::PassedFunction;
+
+        match call.call_type {
+            ByName { name, .. } => {
+                self.keys.push(name.name());
+            }
+            ByPointer { .. } => {
+                todo_lambda_erasure!()
+            }
+            Foreign { .. } => {}
+            LowLevel { .. } => {}
+            HigherOrder(HigherOrderLowLevel {
+                passed_function: PassedFunction { name, .. },
+                ..
+            }) => {
+                self.keys.push(name.name());
+            }
+        }
+    }
+
+    fn stmt(&mut self, arena: &'a Bump, stmt: &Stmt<'a>) {
+        use Stmt::*;
+
+        let mut stack = bumpalo::vec![in arena; stmt];
+
+        while let Some(stmt) = stack.pop() {
+            match stmt {
+                Join {
+                    remainder: v,
+                    body: b,
+                    ..
+                } => {
+                    stack.push(v);
+                    stack.push(b);
+                }
+                Let(_, expr, _, cont) => {
+                    if let Expr::Call(call) = expr {
+                        self.call(call);
+                    }
+                    stack.push(cont);
+                }
+                Switch {
+                    branches,
+                    default_branch,
+                    ..
+                } => {
+                    stack.extend(branches.iter().map(|b| &b.2));
+                    stack.push(default_branch.1);
+                }
+
+                Dbg { remainder, .. } => stack.push(remainder),
+                Expect { remainder, .. } => stack.push(remainder),
+                ExpectFx { remainder, .. } => stack.push(remainder),
+
+                Refcounting(_, _) => unreachable!("these have not been introduced yet"),
+
+                Ret(_) | Jump(_, _) | Crash(..) => {
+                    // these are terminal, do nothing
+                }
+            }
+        }
+    }
+}
--- a/crates/compiler/mono/src/inc_dec.rs
+++ b/crates/compiler/mono/src/inc_dec.rs
@ -15,6 +15,7 @@ use roc_module::low_level::LowLevel;
 use roc_module::{low_level::LowLevelWrapperType, symbol::Symbol};

 use crate::ir::ErasedField;
+use crate::layout::LambdaName;
 use crate::{
    ir::{
        BranchInfo, Call, CallType, Expr, HigherOrderLowLevel, JoinPointId, ListLiteralElement,
@ -30,8 +31,19 @@ Insert the reference count operations for procedures.
 pub fn insert_inc_dec_operations<'a>(
    arena: &'a Bump,
    layout_interner: &STLayoutInterner<'a>,
-    procedures: &mut HashMap<(Symbol, ProcLayout), Proc<'a>, BuildHasherDefault<WyHash>>,
+    procedures: &mut HashMap<(Symbol, ProcLayout<'a>), Proc<'a>, BuildHasherDefault<WyHash>>,
 ) {
+    // TODO remove this clone?
+    let x = procedures.clone();
+    let ps = arena.alloc(x);
+
+    let borrow_signatures = crate::borrow::infer_borrow_signatures(arena, layout_interner, ps);
+    let borrow_signatures = arena.alloc(borrow_signatures);
+
+    for ((s, _), sig) in borrow_signatures.iter() {
+        dbg!((s, sig));
+    }
+
    // All calls to lowlevels are wrapped in another function to help with type inference and return/parameter layouts.
    // But this lowlevel might get inlined into the caller of the wrapper and thus removing any reference counting operations.
    // Thus, these rc operations are performed on the caller of the wrapper instead, and we skip rc on the lowlevel.
@ -43,7 +55,7 @@ pub fn insert_inc_dec_operations<'a>(
            LowLevelWrapperType::NotALowLevelWrapper
        ) {
            let symbol_rc_types_env = SymbolRcTypesEnv::from_layout_interner(layout_interner);
-            insert_inc_dec_operations_proc(arena, symbol_rc_types_env, proc);
+            insert_inc_dec_operations_proc(arena, symbol_rc_types_env, borrow_signatures, proc);
        }
    }
 }
@ -245,6 +257,8 @@ Type containing data about the symbols consumption of a join point.
 */
 type JoinPointConsumption = MutSet<Symbol>;

+type Key<'a> = (LambdaName<'a>, InLayout<'a>, &'a [InLayout<'a>]);
+
 /**
 The environment for the reference counting pass.
 Contains the symbols rc types and the ownership.
@ -256,6 +270,8 @@ struct RefcountEnvironment<'v> {
    // The Koka implementation assumes everything that is not owned to be borrowed.
    symbols_ownership: SymbolsOwnership,
    jointpoint_closures: MutMap<JoinPointId, JoinPointConsumption>,
+    // inferred borrow signatures of roc functions
+    borrow_signatures: &'v crate::borrow::BorrowSignatures<'v>,
 }

 impl<'v> RefcountEnvironment<'v> {
@ -403,6 +419,7 @@ impl<'v> RefcountEnvironment<'v> {
 fn insert_inc_dec_operations_proc<'a>(
    arena: &'a Bump,
    mut symbol_rc_types_env: SymbolRcTypesEnv<'a, '_>,
+    borrow_signatures: &'a crate::borrow::BorrowSignatures<'a>,
    proc: &mut Proc<'a>,
 ) {
    // Clone the symbol_rc_types_env and insert the symbols in the current procedure.
@ -413,6 +430,7 @@ fn insert_inc_dec_operations_proc<'a>(
        symbols_rc_types: &symbol_rc_types_env.symbols_rc_type,
        symbols_ownership: MutMap::default(),
        jointpoint_closures: MutMap::default(),
+        borrow_signatures,
    };

    // Add all arguments to the environment (if they are reference counted)
--- a/crates/compiler/mono/src/ir.rs
+++ b/crates/compiler/mono/src/ir.rs
@ -396,6 +396,16 @@ impl<'a> Proc<'a> {
        w.push(b'\n');
        String::from_utf8(w).unwrap()
    }
+
+    pub fn proc_layout(&self, arena: &'a Bump) -> ProcLayout<'a> {
+        let args = Vec::from_iter_in(self.args.iter().map(|(a, b)| *a), arena);
+
+        ProcLayout {
+            arguments: args.into_bump_slice(),
+            result: self.ret_layout,
+            niche: Niche::NONE,
+        }
+    }
 }

 /// A host-exposed function must be specialized; it's a seed for subsequent specializations
--- a/examples/platform-switching/rocLovesZig.roc
+++ b/examples/platform-switching/rocLovesZig.roc
@ -1,3 +1,5 @@
 app [main] { pf: platform "zig-platform/main.roc" }

-main = "Roc <3 Zig!\n"
+helper = \a, b -> Str.concat a b
+
+main = helper "Roc <" "3 Zig!\n"