diff --git a/rust/iron-planet/.gitignore b/rust/.gitignore similarity index 100% rename from rust/iron-planet/.gitignore rename to rust/.gitignore diff --git a/rust/iron-planet/Cargo.lock b/rust/iron-planet/Cargo.lock index e7a41d6..5f2326b 100644 --- a/rust/iron-planet/Cargo.lock +++ b/rust/iron-planet/Cargo.lock @@ -2,34 +2,144 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bitvec" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1489fcb93a5bb47da0462ca93ad252ad6af2145cce58d10d46a83931ba9f016b" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + +[[package]] +name = "either" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" + +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + +[[package]] +name = "intmap" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b357564d111300f8a33b79e06795235529a627a1f7078d2b1db7f7dcdf032874" + [[package]] name = "iron-planet" version = "0.1.0" dependencies = [ + "bitvec", + "either", + "intmap", + "libc", "memmap", - "noun", + "murmur3", + "num-derive", + "num-traits", ] [[package]] name = "libc" -version = "0.2.120" +version = "0.2.126" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad5c14e80759d0939d013e6ca49930e59fc53dd8e5009132f76240c179380c09" +checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" [[package]] name = "memmap" -version = "0.6.2" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2ffa2c986de11a9df78620c01eeaaf27d94d3ff02bf81bfcca953102dd0c6ff" +checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" dependencies = [ "libc", "winapi", ] [[package]] -name = "noun" -version = "0.1.0" -source = "git+https://github.com/mcevoypeter/urbit.git?branch=main#cca52babc9a080199083519cfb347de8cd3024e0" +name = "murmur3" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ead5388e485d38e622630c6b05afd3761a6701ff15c55b279ea5b31dcb62cff" + +[[package]] +name = "num-derive" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "876a53fff98e03a936a674b29568b0e605f06b29372c2489ff4de23f1949743d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", +] + +[[package]] +name = "proc-macro2" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd96a1e8ed2596c337f8eae5f24924ec83f5ad5ab21ea8e455d3566c69fbcaf7" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + +[[package]] +name = "syn" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + +[[package]] +name = "unicode-ident" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c" [[package]] name = "winapi" @@ -52,3 +162,12 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "wyz" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30b31594f29d27036c383b53b59ed3476874d518f0efb151b27a4c275141390e" +dependencies = [ + "tap", +] diff --git a/rust/iron-planet/Cargo.toml b/rust/iron-planet/Cargo.toml index ff678e6..3f793ec 100644 --- a/rust/iron-planet/Cargo.toml +++ b/rust/iron-planet/Cargo.toml @@ -7,5 +7,11 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -memmap = "0.6.2" -noun = { git = "https://github.com/mcevoypeter/urbit.git", branch = "main" } +bitvec = "1.0.0" +either = "1.6.1" +libc = "0.2.126" +murmur3 = "0.5.1" +memmap = "0.7.0" +intmap = "1.1.0" +num-traits = "0.2" +num-derive = "0.3" diff --git a/rust/iron-planet/src/interpreter.rs b/rust/iron-planet/src/interpreter.rs new file mode 100644 index 0000000..fee95ba --- /dev/null +++ b/rust/iron-planet/src/interpreter.rs @@ -0,0 +1,597 @@ +use self::NockWork::*; +use crate::mem::unifying_equality; +use crate::mem::NockStack; +use crate::noun::{Atom, Cell, DirectAtom, IndirectAtom, Noun}; +use bitvec::prelude::{BitSlice, Lsb0}; +use either::Either::*; +use num_traits::cast::{FromPrimitive, ToPrimitive}; + +#[derive(Copy, Clone, FromPrimitive, ToPrimitive)] +#[repr(u64)] +enum NockWork { + Done, + NockCellComputeHead, + NockCellComputeTail, + NockCellCons, + Nock0Axis, + Nock1Constant, + Nock2ComputeSubject, + Nock2ComputeFormula, + Nock2ComputeResult, + Nock2RestoreSubject, + Nock3ComputeChild, + Nock3ComputeType, + Nock4ComputeChild, + Nock4Increment, + Nock5ComputeLeftChild, + Nock5ComputeRightChild, + Nock5TestEquals, + Nock6ComputeTest, + Nock6ComputeBranch, + Nock6Done, + Nock7ComputeSubject, + Nock7ComputeResult, + Nock7RestoreSubject, + Nock8ComputeSubject, + Nock8ComputeResult, + Nock8RestoreSubject, + Nock9ComputeCore, + Nock9ComputeResult, + Nock9RestoreSubject, + Nock10ComputeTree, + Nock10ComputePatch, + Nock10Edit, + Nock11ComputeHint, + Nock11ComputeResult, + Nock11Done, +} + +fn work_to_noun(work: NockWork) -> Noun { + unsafe { + DirectAtom::new_unchecked(work.to_u64().expect("IMPOSSIBLE: work does not fit in u64")) + .as_atom() + .as_noun() + } +} + +fn noun_to_work(noun: Noun) -> NockWork { + if let Left(direct) = noun.as_either_direct_allocated() { + NockWork::from_u64(direct.data()).expect("Invalid work") + } else { + panic!("Work should always be a direct atom.") + } +} + +pub fn interpret(stack: &mut NockStack, mut subject: Noun, formula: Noun) -> Noun { + let mut res = unsafe { DirectAtom::new_unchecked(0).as_atom().as_noun() }; + stack.push(1); + unsafe { + *(stack.local_noun_pointer(0)) = work_to_noun(Done); + } + push_formula(stack, formula); + loop { + match unsafe { noun_to_work(*(stack.local_noun_pointer(0))) } { + Done => { + break; + } + NockCellComputeHead => { + unsafe { + *stack.local_noun_pointer(0) = work_to_noun(NockCellComputeTail); + let formula = *stack.local_noun_pointer(1); + push_formula(stack, formula); + }; + } + NockCellComputeTail => { + unsafe { + *(stack.local_noun_pointer(0)) = work_to_noun(NockCellCons); + *(stack.local_noun_pointer(1)) = res; + let formula = *stack.local_noun_pointer(2); + push_formula(stack, formula); + }; + } + NockCellCons => { + unsafe { + let head = *stack.local_noun_pointer(1); + res = Cell::new(stack, head, res).as_noun(); + }; + stack.pop(&mut res); + } + Nock0Axis => { + if let Ok(atom) = unsafe { (*(stack.local_noun_pointer(1))).as_atom() } { + res = axis(subject, atom.as_bitslice()); + stack.pop(&mut res); + } else { + panic!("Axis must be atom"); + }; + } + Nock1Constant => { + unsafe { + res = *(stack.local_noun_pointer(1)); + } + stack.pop(&mut res); + } + Nock2ComputeSubject => { + unsafe { + *(stack.local_noun_pointer(0)) = work_to_noun(Nock2ComputeFormula); + let formula = *stack.local_noun_pointer(1); + push_formula(stack, formula); + }; + } + Nock2ComputeFormula => { + unsafe { + *(stack.local_noun_pointer(0)) = work_to_noun(Nock2ComputeResult); + *(stack.local_noun_pointer(1)) = res; + let formula = *stack.local_noun_pointer(2); + push_formula(stack, formula); + }; + } + Nock2ComputeResult => { + unsafe { + *(stack.local_noun_pointer(0)) = work_to_noun(Nock2RestoreSubject); + *(stack.local_noun_pointer(2)) = subject; + subject = *(stack.local_noun_pointer(1)); + push_formula(stack, res); + }; + } + Nock2RestoreSubject => unsafe { + subject = *(stack.local_noun_pointer(2)); + stack.pop(&mut res); + }, + Nock3ComputeChild => unsafe { + *(stack.local_noun_pointer(0)) = work_to_noun(Nock3ComputeType); + let formula = *stack.local_noun_pointer(1); + push_formula(stack, formula); + }, + Nock3ComputeType => { + res = unsafe { + if res.is_cell() { + DirectAtom::new_unchecked(0).as_atom().as_noun() + } else { + DirectAtom::new_unchecked(1).as_atom().as_noun() + } + }; + stack.pop(&mut res); + } + Nock4ComputeChild => { + unsafe { + *(stack.local_noun_pointer(0)) = work_to_noun(Nock4Increment); + let formula = *stack.local_noun_pointer(1); + push_formula(stack, formula); + }; + } + Nock4Increment => { + if let Ok(atom) = res.as_atom() { + res = inc(stack, atom).as_noun(); + stack.pop(&mut res); + } else { + panic!("Cannot increment (Nock 4) a cell"); + }; + } + Nock5ComputeLeftChild => { + unsafe { + *(stack.local_noun_pointer(0)) = work_to_noun(Nock5ComputeRightChild); + let formula = *stack.local_noun_pointer(1); + push_formula(stack, formula); + }; + } + Nock5ComputeRightChild => { + unsafe { + *(stack.local_noun_pointer(0)) = work_to_noun(Nock5TestEquals); + *(stack.local_noun_pointer(1)) = res; + let formula = *stack.local_noun_pointer(2); + push_formula(stack, formula); + }; + } + Nock5TestEquals => { + unsafe { + let saved_value_ptr = stack.local_noun_pointer(1); + res = if unifying_equality(stack, &mut res, saved_value_ptr) { + DirectAtom::new_unchecked(0).as_atom().as_noun() + } else { + DirectAtom::new_unchecked(1).as_atom().as_noun() + }; + stack.pop(&mut res); + }; + } + Nock6ComputeTest => { + unsafe { + *(stack.local_noun_pointer(0)) = work_to_noun(Nock6ComputeBranch); + let formula = *stack.local_noun_pointer(1); + push_formula(stack, formula); + }; + } + Nock6ComputeBranch => { + unsafe { + *(stack.local_noun_pointer(0)) = work_to_noun(Nock6Done); + if let Left(direct) = res.as_either_direct_allocated() { + if direct.data() == 0 { + let formula = *stack.local_noun_pointer(2); + push_formula(stack, formula); + } else if direct.data() == 1 { + let formula = *stack.local_noun_pointer(3); + push_formula(stack, formula); + } else { + panic!("Test branch of Nock 6 must return 0 or 1"); + }; + } else { + panic!("Test branch of Nock 6 must return a direct atom"); + } + }; + } + Nock6Done => { + stack.pop(&mut res); + } + Nock7ComputeSubject => { + unsafe { + *(stack.local_noun_pointer(0)) = work_to_noun(Nock7ComputeResult); + let formula = *stack.local_noun_pointer(1); + push_formula(stack, formula); + }; + } + Nock7ComputeResult => { + unsafe { + *(stack.local_noun_pointer(0)) = work_to_noun(Nock7RestoreSubject); + *(stack.local_noun_pointer(1)) = subject; + subject = res; + let formula = *stack.local_noun_pointer(2); + push_formula(stack, formula); + }; + } + Nock7RestoreSubject => { + unsafe { + subject = *(stack.local_noun_pointer(1)); + stack.pop(&mut res); + }; + } + Nock8ComputeSubject => { + unsafe { + *(stack.local_noun_pointer(0)) = work_to_noun(Nock8ComputeResult); + let formula = *stack.local_noun_pointer(1); + push_formula(stack, formula); + }; + } + Nock8ComputeResult => { + unsafe { + *(stack.local_noun_pointer(0)) = work_to_noun(Nock8RestoreSubject); + *(stack.local_noun_pointer(1)) = subject; + subject = Cell::new(stack, res, subject).as_noun(); + let formula = *stack.local_noun_pointer(2); + push_formula(stack, formula); + }; + } + Nock8RestoreSubject => { + unsafe { + subject = *(stack.local_noun_pointer(2)); + stack.pop(&mut res); + }; + } + Nock9ComputeCore => { + unsafe { + *(stack.local_noun_pointer(0)) = work_to_noun(Nock9ComputeResult); + let formula = *stack.local_noun_pointer(2); + push_formula(stack, formula); + }; + } + Nock9ComputeResult => { + unsafe { + if let Ok(formula_axis) = (*(stack.local_noun_pointer(1))).as_atom() { + *(stack.local_noun_pointer(0)) = work_to_noun(Nock9RestoreSubject); + *(stack.local_noun_pointer(2)) = subject; + subject = res; + push_formula(stack, axis(subject, formula_axis.as_bitslice())); + } else { + panic!("Axis into core must be atom"); + } + }; + } + Nock9RestoreSubject => unsafe { + subject = *(stack.local_noun_pointer(2)); + stack.pop(&mut res); + }, + Nock10ComputeTree => unsafe { + *(stack.local_noun_pointer(0)) = work_to_noun(Nock10ComputePatch); + let formula = *stack.local_noun_pointer(3); + push_formula(stack, formula); + }, + Nock10ComputePatch => unsafe { + *(stack.local_noun_pointer(0)) = work_to_noun(Nock10Edit); + *(stack.local_noun_pointer(3)) = res; + let formula = *stack.local_noun_pointer(2); + push_formula(stack, formula); + }, + Nock10Edit => unsafe { + if let Ok(edit_axis) = (*stack.local_noun_pointer(1)).as_atom() { + let tree = *stack.local_noun_pointer(3); + res = edit(stack, edit_axis.as_bitslice(), res, tree); + stack.pop(&mut res); + } + }, + Nock11ComputeHint => unsafe { + let hint = *stack.local_noun_pointer(1); + if let Ok(hint_cell) = hint.as_cell() { + *(stack.local_noun_pointer(0)) = work_to_noun(Nock11ComputeResult); + push_formula(stack, hint_cell.tail()); + } else { + panic!("IMPOSSIBLE: tried to compute a dynamic hint but hint is an atom"); + } + }, + Nock11ComputeResult => unsafe { + *(stack.local_noun_pointer(0)) = work_to_noun(Nock11Done); + let formula = *stack.local_noun_pointer(2); + push_formula(stack, formula); + }, + Nock11Done => { + stack.pop(&mut res); + } + }; + } + res +} + +fn push_formula(stack: &mut NockStack, formula: Noun) { + if let Ok(formula_cell) = formula.as_cell() { + // Formula + match formula_cell.head().as_either_atom_cell() { + Right(_cell) => { + stack.push(3); + unsafe { + *(stack.local_noun_pointer(0)) = work_to_noun(NockCellComputeHead); + *(stack.local_noun_pointer(1)) = formula_cell.head(); + *(stack.local_noun_pointer(2)) = formula_cell.tail(); + } + } + Left(atom) => { + if let Ok(direct) = atom.as_direct() { + match direct.data() { + 0 => { + stack.push(2); + unsafe { + *(stack.local_noun_pointer(0)) = work_to_noun(Nock0Axis); + *(stack.local_noun_pointer(1)) = formula_cell.tail(); + }; + } + 1 => { + stack.push(2); + unsafe { + *(stack.local_noun_pointer(0)) = work_to_noun(Nock1Constant); + *(stack.local_noun_pointer(1)) = formula_cell.tail(); + }; + } + 2 => { + if let Ok(arg_cell) = formula_cell.tail().as_cell() { + stack.push(3); + unsafe { + *(stack.local_noun_pointer(0)) = + work_to_noun(Nock2ComputeSubject); + *(stack.local_noun_pointer(1)) = arg_cell.head(); + *(stack.local_noun_pointer(2)) = arg_cell.tail(); + }; + } else { + panic!("Argument for Nock 2 must be cell"); + }; + } + 3 => { + stack.push(2); + unsafe { + *(stack.local_noun_pointer(0)) = work_to_noun(Nock3ComputeChild); + *(stack.local_noun_pointer(1)) = formula_cell.tail(); + }; + } + 4 => { + stack.push(2); + unsafe { + *(stack.local_noun_pointer(0)) = work_to_noun(Nock4ComputeChild); + *(stack.local_noun_pointer(1)) = formula_cell.tail(); + }; + } + 5 => { + if let Ok(arg_cell) = formula_cell.tail().as_cell() { + stack.push(3); + unsafe { + *(stack.local_noun_pointer(0)) = + work_to_noun(Nock5ComputeLeftChild); + *(stack.local_noun_pointer(1)) = arg_cell.head(); + *(stack.local_noun_pointer(2)) = arg_cell.tail(); + }; + } else { + panic!("Argument for Nock 5 must be cell"); + }; + } + 6 => { + if let Ok(arg_cell) = formula_cell.tail().as_cell() { + if let Ok(branch_cell) = arg_cell.tail().as_cell() { + stack.push(4); + unsafe { + *(stack.local_noun_pointer(0)) = + work_to_noun(Nock6ComputeTest); + *(stack.local_noun_pointer(1)) = arg_cell.head(); + *(stack.local_noun_pointer(2)) = branch_cell.head(); + *(stack.local_noun_pointer(3)) = branch_cell.tail(); + } + } else { + panic!("Argument tail for Nock 6 must be cell"); + }; + } else { + panic!("Argument for Nock 6 must be cell"); + } + } + 7 => { + if let Ok(arg_cell) = formula_cell.tail().as_cell() { + stack.push(3); + unsafe { + *(stack.local_noun_pointer(0)) = + work_to_noun(Nock7ComputeSubject); + *(stack.local_noun_pointer(1)) = arg_cell.head(); + *(stack.local_noun_pointer(2)) = arg_cell.tail(); + } + } else { + panic!("Argument for Nock 7 must be cell"); + }; + } + 8 => { + if let Ok(arg_cell) = formula_cell.tail().as_cell() { + stack.push(3); + unsafe { + *(stack.local_noun_pointer(0)) = + work_to_noun(Nock8ComputeSubject); + *(stack.local_noun_pointer(1)) = arg_cell.head(); + *(stack.local_noun_pointer(2)) = arg_cell.tail(); + }; + } else { + panic!("Argument for Nock 8 must be cell"); + }; + } + 9 => { + if let Ok(arg_cell) = formula_cell.tail().as_cell() { + stack.push(3); + unsafe { + *(stack.local_noun_pointer(0)) = work_to_noun(Nock9ComputeCore); + *(stack.local_noun_pointer(1)) = arg_cell.head(); + *(stack.local_noun_pointer(2)) = arg_cell.tail(); + }; + } else { + panic!("Argument for Nock 9 must be cell"); + }; + } + 10 => { + if let Ok(arg_cell) = formula_cell.tail().as_cell() { + if let Ok(patch_cell) = arg_cell.head().as_cell() { + stack.push(4); + unsafe { + *(stack.local_noun_pointer(0)) = + work_to_noun(Nock10ComputeTree); + *(stack.local_noun_pointer(1)) = patch_cell.head(); + *(stack.local_noun_pointer(2)) = patch_cell.tail(); + *(stack.local_noun_pointer(3)) = arg_cell.tail(); + }; + } else { + panic!("Argument head for Nock 10 must be cell"); + }; + } else { + panic!("Argument for Nock 10 must be cell"); + }; + } + 11 => { + if let Ok(arg_cell) = formula_cell.tail().as_cell() { + stack.push(3); + unsafe { + *(stack.local_noun_pointer(0)) = + work_to_noun(if arg_cell.head().is_cell() { + Nock11ComputeHint + } else { + Nock11ComputeResult + }); + *(stack.local_noun_pointer(1)) = arg_cell.head(); + *(stack.local_noun_pointer(2)) = arg_cell.tail(); + }; + } else { + panic!("Argument for Nock 11 must be cell"); + }; + } + _ => { + panic!("Invalid opcode"); + } + } + } else { + panic!("Invalid opcode"); + } + } + } + } else { + panic!("Bad formula: atoms are not formulas"); + } +} + +fn axis(mut noun: Noun, axis: &BitSlice) -> Noun { + let mut cursor = if let Some(x) = axis.last_one() { + x + } else { + panic!("0 is not allowed as an axis") + }; + loop { + if cursor == 0 { + break; + }; + cursor -= 1; + if let Ok(cell) = noun.as_cell() { + if axis[cursor] { + noun = cell.tail(); + } else { + noun = cell.head(); + } + } else { + panic!("Axis tried to descend through atom."); + }; + } + noun +} + +fn edit( + stack: &mut NockStack, + edit_axis: &BitSlice, + patch: Noun, + mut tree: Noun, +) -> Noun { + let mut res = patch; + let mut dest: *mut Noun = &mut res; + let mut cursor = edit_axis + .last_one() + .expect("0 is not allowed as an edit axis"); + loop { + if cursor == 0 { + unsafe { + *dest = patch; + } + break; + }; + if let Ok(tree_cell) = tree.as_cell() { + cursor -= 1; + if edit_axis[cursor] { + unsafe { + let (cell, cellmem) = Cell::new_raw_mut(stack); + *dest = cell.as_noun(); + (*cellmem).head = tree_cell.head(); + dest = &mut ((*cellmem).tail); + } + tree = tree_cell.tail(); + } else { + unsafe { + let (cell, cellmem) = Cell::new_raw_mut(stack); + *dest = cell.as_noun(); + (*cellmem).tail = tree_cell.tail(); + dest = &mut ((*cellmem).head); + } + tree = tree_cell.tail(); + } + } else { + panic!("Invalid axis for edit"); + }; + } + res +} + +fn inc(stack: &mut NockStack, atom: Atom) -> Atom { + match atom.as_either() { + Left(direct) => Atom::new(stack, direct.data() + 1), + Right(indirect) => { + let indirect_slice = indirect.as_bitslice(); + match indirect_slice.first_zero() { + None => { + // all ones, make an indirect one word bigger + let (new_indirect, new_slice) = + unsafe { IndirectAtom::new_raw_mut_bitslice(stack, indirect.size() + 1) }; + new_slice.set(indirect_slice.len(), true); + new_indirect.as_atom() + } + Some(first_zero) => { + let (new_indirect, new_slice) = + unsafe { IndirectAtom::new_raw_mut_bitslice(stack, indirect.size()) }; + new_slice.set(first_zero, true); + new_slice[first_zero + 1..] + .copy_from_bitslice(&indirect_slice[first_zero + 1..]); + new_indirect.as_atom() + } + } + } + } +} diff --git a/rust/iron-planet/src/lib.rs b/rust/iron-planet/src/lib.rs index d9cb81c..55d0089 100644 --- a/rust/iron-planet/src/lib.rs +++ b/rust/iron-planet/src/lib.rs @@ -1,2 +1,7 @@ -pub mod memory; +#[macro_use] +extern crate num_derive; +pub mod interpreter; +pub mod mem; +pub mod mug; pub mod noun; +pub mod serialization; diff --git a/rust/iron-planet/src/main.rs b/rust/iron-planet/src/main.rs index e7a11a9..3957153 100644 --- a/rust/iron-planet/src/main.rs +++ b/rust/iron-planet/src/main.rs @@ -1,3 +1,48 @@ -fn main() { - println!("Hello, world!"); +use std::env; +use std::fs::File; +use std::fs::OpenOptions; +use std::mem; +use memmap::Mmap; +use memmap::MmapMut; +use std::ptr::copy_nonoverlapping; +use std::ptr::write_bytes; +use iron_planet::mem::NockStack; +use iron_planet::serialization::{cue,jam}; +use iron_planet::interpreter::interpret; +use iron_planet::noun::IndirectAtom; +use std::io; + +fn main() -> io::Result<()> { + let filename = env::args().nth(1).expect("Must provide input filename"); + let output_filename = format!("{}.out", filename.clone()); + let f = File::open(filename)?; + let in_len = f.metadata()?.len(); + println!("in_len: {:?}", in_len); + let mut stack = NockStack::new( 8 << 10 << 10, 0 ); + let jammed_input = unsafe { + let in_map = Mmap::map(&f)?; + let word_len = (in_len + 7) >> 3; + println!("word_len: {:?}", word_len); + let (mut atom, dest) = IndirectAtom::new_raw_mut(&mut stack, word_len as usize); + write_bytes(dest.add(word_len as usize - 1), 0, 8); + copy_nonoverlapping(in_map.as_ptr(), dest as *mut u8, in_len as usize); + println!("dest[0]: {:?}", *(dest as *const u8)); + println!("dest[3]: {:?}", *((dest as *const u8).add(3))); + println!("dest[7]: {:?}", *((dest as *const u8).add(7))); + println!("dest[10]: {:?}", *((dest as *const u8).add(10))); + mem::drop(in_map); + atom.normalize_as_atom() + }; + let input = cue(&mut stack, jammed_input); + let input_cell = input.as_cell().expect("Input must be jam of subject/formula pair"); + let result = interpret(&mut stack, input_cell.head(), input_cell.tail()); + let jammed_result = jam(&mut stack, result); + let f_out = OpenOptions::new().write(true).open(output_filename)?; + f_out.set_len((jammed_result.size() << 3) as u64)?; + unsafe { + let mut out_map = MmapMut::map_mut(&f_out)?; + copy_nonoverlapping(jammed_result.data_pointer(), out_map.as_mut_ptr() as *mut u64, jammed_result.size()); + out_map.flush()?; + }; + Ok(()) } diff --git a/rust/iron-planet/src/mem.rs b/rust/iron-planet/src/mem.rs new file mode 100644 index 0000000..8f09618 --- /dev/null +++ b/rust/iron-planet/src/mem.rs @@ -0,0 +1,759 @@ +use crate::noun::{CellMemory, IndirectAtom, Noun, NounAllocator }; +use either::Either::{self, Left, Right}; +use libc::{c_void, memcmp}; +use std::mem; +use std::ptr; +use std::ptr::copy_nonoverlapping; +use memmap::MmapMut; + +/** Utility function to get size in words */ +pub const fn word_size_of() -> usize { + (mem::size_of::() + 7) >> 3 +} + +/** Utility function to compute the raw memory usage of an IndirectAtom */ +fn indirect_raw_size(atom: IndirectAtom) -> usize { + atom.size() + 2 +} + +/** Which side of the two opposing stacks are we working on? */ +#[derive(Copy, Clone)] +pub enum Polarity { + /** Stack growing down from high memory */ + East, + /** Stack growing up from low memory */ + West, +} + +/** A stack for Nock computation, which supports stack allocation and delimited copying collection + * for returned nouns + */ +pub struct NockStack { + /** The base pointer */ + start: *const u64, + /** The size of the memory region */ + size: usize, + /** Which side of the stack is the active stack frame on? */ + polarity: Polarity, + /** Furthest extent of the current stack frame */ + stack_pointer: *mut u64, + /** Base pointer for the current stack frame. Accesses to slots are computed from this base. */ + frame_pointer: *mut u64, + /** MMap which must be kept alive as long as this NockStack is */ + memory: MmapMut, +} + +impl NockStack { + /** Size is in 64 bit words. + * top_slots is how many slots to allocate to the top stack frame. + */ + pub fn new(size: usize, top_slots: usize) -> NockStack { + let mut memory = MmapMut::map_anon(size << 3).expect("Mapping memory for nockstack failed"); + let start = memory.as_ptr() as *const u64; + let frame_pointer = memory.as_mut_ptr() as *mut u64; + let stack_pointer = unsafe { frame_pointer.add(top_slots + 2) }; + unsafe { + *frame_pointer = frame_pointer.add(size) as u64; + *frame_pointer.add(1) = ptr::null::() as u64; + }; + NockStack { + start: start, + size: size, + polarity: Polarity::West, + stack_pointer: stack_pointer, + frame_pointer: frame_pointer, + memory: memory, + } + } + + /** Size **in 64-bit words** of this NockStack */ + pub fn size(&self) -> usize { + self.size + } + + /** Mutable pointer to a slot in a stack frame: east stack */ + unsafe fn slot_pointer_east(&mut self, slot: usize) -> *mut u64 { + self.frame_pointer.sub(slot + 1) + } + + /** Mutable pointer to a slot in a stack frame: west stack */ + unsafe fn slot_pointer_west(&mut self, slot: usize) -> *mut u64 { + self.frame_pointer.add(slot) + } + + /** Mutable pointer to a slot in a stack frame */ + unsafe fn slot_pointer(&mut self, slot: usize) -> *mut u64 { + match &self.polarity { + Polarity::East => self.slot_pointer_east(slot), + Polarity::West => self.slot_pointer_west(slot), + } + } + + /** Pointer to a local slot typed as Noun */ + pub unsafe fn local_noun_pointer(&mut self, local: usize) -> *mut Noun { + self.slot_pointer(local + 2) as *mut Noun + } + + /** Save the stack pointer for the previous frame in a slot of an east frame */ + unsafe fn save_prev_stack_pointer_to_local_east(&mut self, local: usize) { + *(self.slot_pointer_east(local + 2) as *mut *mut u64) = + *(self.previous_stack_pointer_pointer_east()) + } + + /** Save the stack pointer for the previous frame in a slot of a west frame */ + unsafe fn save_prev_stack_pointer_to_local_west(&mut self, local: usize) { + *(self.slot_pointer_west(local + 2) as *mut *mut u64) = + *(self.previous_stack_pointer_pointer_west()) + } + + /** Save the stack pointer for the previous frame in a slot */ + pub unsafe fn save_prev_stack_pointer_to_local(&mut self, local: usize) { + match &self.polarity { + Polarity::East => self.save_prev_stack_pointer_to_local_east(local), + Polarity::West => self.save_prev_stack_pointer_to_local_west(local), + } + } + + unsafe fn restore_prev_stack_pointer_from_local_east(&mut self, local: usize) { + *(self.previous_stack_pointer_pointer_east()) = + *(self.slot_pointer_east(local + 2) as *mut *mut u64); + } + + unsafe fn restore_prev_stack_pointer_from_local_west(&mut self, local: usize) { + *(self.previous_stack_pointer_pointer_east()) = + *(self.slot_pointer_east(local + 2) as *mut *mut u64); + } + + unsafe fn restore_prev_stack_pointer_from_local(&mut self, local: usize) { + match &self.polarity { + Polarity::East => self.restore_prev_stack_pointer_from_local_east(local), + Polarity::West => self.restore_prev_stack_pointer_from_local_west(local), + } + } + + unsafe fn prev_stack_pointer_equals_local_east(&mut self, local: usize) -> bool { + *(self.slot_pointer_east(local + 2) as *const *mut u64) == + *(self.previous_stack_pointer_pointer_east()) + } + + unsafe fn prev_stack_pointer_equals_local_west(&mut self, local: usize) -> bool { + *(self.slot_pointer_west(local + 2) as *const *mut u64) == + *(self.previous_stack_pointer_pointer_west()) + } + + /** Test the stack pointer for the previous frame against a slot */ + pub unsafe fn prev_stack_pointer_equals_local(&mut self, local: usize) -> bool { + match &self.polarity { + Polarity::East => self.prev_stack_pointer_equals_local_east(local), + Polarity::West => self.prev_stack_pointer_equals_local_west(local), + } + } + + unsafe fn alloc_in_previous_frame_west(&mut self) -> *mut T { + let prev_stack_pointer_pointer = self.previous_stack_pointer_pointer_west(); + // note that the allocation is on the east frame, and thus resembles raw_alloc_east + *prev_stack_pointer_pointer = (*prev_stack_pointer_pointer).sub(word_size_of::()); + *prev_stack_pointer_pointer as *mut T + } + + unsafe fn alloc_in_previous_frame_east(&mut self) -> *mut T { + let prev_stack_pointer_pointer = self.previous_stack_pointer_pointer_east(); + // note that the allocation is on the west frame, and thus resembles raw_alloc_west + let alloc = *(prev_stack_pointer_pointer); + *prev_stack_pointer_pointer = (*prev_stack_pointer_pointer).add(word_size_of::()); + alloc as *mut T + } + + pub unsafe fn alloc_in_previous_frame(&mut self) -> *mut T { + match &self.polarity { + Polarity::East => self.alloc_in_previous_frame_east(), + Polarity::West => self.alloc_in_previous_frame_west(), + } + } + + unsafe fn reclaim_in_previous_frame_east(&mut self) { + let prev_stack_pointer_pointer = self.previous_stack_pointer_pointer_east(); + *prev_stack_pointer_pointer = (*prev_stack_pointer_pointer).sub(word_size_of::()); + } + + unsafe fn reclaim_in_previous_frame_west(&mut self) { + let prev_stack_pointer_pointer = self.previous_stack_pointer_pointer_west(); + *prev_stack_pointer_pointer = (*prev_stack_pointer_pointer).add(word_size_of::()); + } + + /** Reclaim allocated space at the end of the previous stack frame. + * This is unsafe because if we're not checking against a saved pointer, we could reclaim + * space used for noun allocations and cause them to be overwritten + */ + pub unsafe fn reclaim_in_previous_frame(&mut self) { + match &self.polarity { + Polarity::East => self.reclaim_in_previous_frame_east::(), + Polarity::West => self.reclaim_in_previous_frame_west::(), + } + } + + unsafe fn top_in_previous_frame_east(&mut self) -> *mut T { + let prev_stack_pointer_pointer = self.previous_stack_pointer_pointer_east(); + (*prev_stack_pointer_pointer).sub(word_size_of::()) as *mut T + } + + unsafe fn top_in_previous_frame_west(&mut self) -> *mut T { + let prev_stack_pointer_pointer = self.previous_stack_pointer_pointer_east(); + *prev_stack_pointer_pointer as *mut T + } + + /** Get a pointer to the top entry in the previous stack frame. + * + * Note that if the there are no entries the behavior is undefined. + */ + pub unsafe fn top_in_previous_frame(&mut self) -> *mut T { + match &self.polarity { + Polarity::East => self.top_in_previous_frame_east::(), + Polarity::West => self.top_in_previous_frame_west::(), + } + } + + /** Pointer to where the previous (west) stack pointer is saved in an east frame */ + unsafe fn previous_stack_pointer_pointer_east(&mut self) -> *mut *mut u64 { + self.slot_pointer_east(0) as *mut *mut u64 + } + + /** Pointer to where the previous (east) stack pointer is saved in a west frame */ + unsafe fn previous_stack_pointer_pointer_west(&mut self) -> *mut *mut u64 { + self.slot_pointer_west(0) as *mut *mut u64 + } + + /** Pointer to where the previous (west) frame pointer is saved in an east frame */ + unsafe fn previous_frame_pointer_pointer_east(&mut self) -> *mut *mut u64 { + self.slot_pointer_east(1) as *mut *mut u64 + } + + /** Pointer to where the previous (east) frame pointer is saved in a west frame */ + unsafe fn previous_frame_pointer_pointer_west(&mut self) -> *mut *mut u64 { + self.slot_pointer_west(1) as *mut *mut u64 + } + + /** Bump the stack pointer for an east frame to make space for an allocation */ + unsafe fn raw_alloc_east(&mut self, words: usize) -> *mut u64 { + self.stack_pointer = self.stack_pointer.sub(words); + self.stack_pointer + } + + /** Bump the stack pointer for a west frame to make space for an allocation */ + unsafe fn raw_alloc_west(&mut self, words: usize) -> *mut u64 { + let alloc = self.stack_pointer; + self.stack_pointer = self.stack_pointer.add(words); + alloc + } + + /** Allocate space for an indirect pointer in an east frame */ + unsafe fn indirect_alloc_east(&mut self, words: usize) -> *mut u64 { + self.raw_alloc_east(words + 2) + } + + /** Allocate space for an indirect pointer in a west frame */ + unsafe fn indirect_alloc_west(&mut self, words: usize) -> *mut u64 { + self.raw_alloc_west(words + 2) + } + + /** Allocate space for an indirect pointer in a stack frame */ + unsafe fn indirect_alloc(&mut self, words: usize) -> *mut u64 { + match &self.polarity { + Polarity::East => self.indirect_alloc_east(words), + Polarity::West => self.indirect_alloc_west(words), + } + } + + unsafe fn struct_alloc_east(&mut self, count: usize) -> *mut T { + self.raw_alloc_east(word_size_of::() * count) as *mut T + } + + unsafe fn struct_alloc_west(&mut self, count: usize) -> *mut T { + self.raw_alloc_west(word_size_of::() * count) as *mut T + } + + unsafe fn struct_alloc(&mut self, count: usize) -> *mut T { + match &self.polarity { + Polarity::East => self.struct_alloc_east::(count), + Polarity::West => self.struct_alloc_west::(count), + } + } + + /** Copy a result noun and its subnouns from an east frame to its parent west frame + * + * This is a fairly standard copying collector algorithm where the from arena is the current + * (east) frame, and the to arena is the parent (west) frame. + * + * There can be references outside the current frame, but since only the current frame will be + * discarded these can be left in place and not copied. Since there are no recursive or mutable + * references, there cannot be references from outside the current frame into the current + * frame. Thus, once we have copied out nouns which are reachable from the given result noun + * and are in the current frame, we are done. + * + * Since our to-space is the previous frame, we maintain a work stack at the end of the current + * frame, past the allocations. This is inverse from when we do a noun traversal generally, + * where we may want to allocate on the current frame, so we maintain a work stack adjacent to + * the previous frame. + */ + unsafe fn copy_east(&mut self, noun: &mut Noun) { + let noun_ptr = noun as *mut Noun; + let work_start = self.stack_pointer; + let mut other_stack_pointer = *self.previous_stack_pointer_pointer_east(); + self.stack_pointer = self.stack_pointer.sub(2); + *(self.stack_pointer as *mut Noun) = *noun; + *(self.stack_pointer.add(1) as *mut *mut Noun) = noun_ptr; + loop { + if self.stack_pointer == work_start { + break; + } + + // Pop a noun to copy from the stack + let next_noun = *(self.stack_pointer as *const Noun); + let next_dest = *(self.stack_pointer.add(1) as *const *mut Noun); + self.stack_pointer = self.stack_pointer.add(2); + + // If it's a direct atom, just write it to the destination + // Otherwise we have allocations to make + match next_noun.as_either_direct_allocated() { + Either::Left(_direct) => { + *next_dest = next_noun; + } + Either::Right(allocated) => { + // If it's an allocated noun with a forwarding pointer, just write the + // noun resulting from the forwarding pointer to the destination + // + // Otherwise, we have to allocate space for and copy the allocated noun + match allocated.forwarding_pointer() { + Option::Some(new_allocated) => { + *next_dest = new_allocated.as_noun(); + } + Option::None => { + if (allocated.to_raw_pointer() as *const u64) > work_start + && (allocated.to_raw_pointer() as *const u64) < self.frame_pointer + { + match allocated.as_either() { + Either::Left(mut indirect) => { + // Make space for the atom + let new_indirect_alloc = other_stack_pointer; + other_stack_pointer = + other_stack_pointer.add(indirect_raw_size(indirect)); + + // Indirect atoms can be copied directly + copy_nonoverlapping( + indirect.to_raw_pointer(), + new_indirect_alloc, + indirect_raw_size(indirect), + ); + + // Set a forwarding pointer so we don't create duplicates from other + // references + indirect.set_forwarding_pointer(new_indirect_alloc); + + *next_dest = + IndirectAtom::from_raw_pointer(new_indirect_alloc) + .as_noun(); + } + Either::Right(mut cell) => { + // Make space for the cell + let new_cell_alloc = other_stack_pointer as *mut CellMemory; + other_stack_pointer = + other_stack_pointer.add(word_size_of::()); + + // Copy the cell metadata + (*new_cell_alloc).metadata = + (*cell.to_raw_pointer()).metadata; + + // Set the forwarding pointer + cell.set_forwarding_pointer(new_cell_alloc); + + // Push the tail and the head to the work stack + self.stack_pointer = self.stack_pointer.sub(4); + *(self.stack_pointer as *mut Noun) = cell.tail(); + *(self.stack_pointer.add(1) as *mut *mut Noun) = + &mut (*new_cell_alloc).tail; + *(self.stack_pointer.add(2) as *mut Noun) = cell.head(); + *(self.stack_pointer.add(3) as *mut *mut Noun) = + &mut (*new_cell_alloc).head; + } + } + } else { + *next_dest = allocated.as_noun(); // Don't copy references outside the current frame + } + } + } + } + } + } + *self.previous_stack_pointer_pointer_east() = other_stack_pointer; + } + + /** Copy a result noun and its subnouns from a west frame to its parent east frame + * + * This is a fairly standard copying collector algorithm where the from arena is the current + * (west) frame, and the to arena is the parent (east) frame. + * + * There can be references outside the current frame, but since only the current frame will be + * discarded these can be left in place and not copied. Since there are no recursive or mutable + * references, there cannot be references from outside the current frame into the current + * frame. Thus, once we have copied out nouns which are reachable from the given result noun + * and are in the current frame, we are done. + * + * Since our to-space is the previous frame, we maintain a work stack at the end of the current + * frame, past the allocations. This is inverse from when we do a noun traversal generally, + * where we may want to allocate on the current frame, so we maintain a work stack adjacent to + * the previous frame. + */ + unsafe fn copy_west(&mut self, noun: &mut Noun) { + let noun_ptr = noun as *mut Noun; + let work_start = self.stack_pointer; + let mut other_stack_pointer = *self.previous_stack_pointer_pointer_west(); + self.stack_pointer = self.stack_pointer.add(2); + *(self.stack_pointer.sub(2) as *mut Noun) = *noun; + *(self.stack_pointer.sub(1) as *mut *mut Noun) = noun_ptr; + loop { + if self.stack_pointer == work_start { + break; + } + + // Pop a noun to copy from the stack + let next_noun = *(self.stack_pointer.sub(2) as *const Noun); + let next_dest = *(self.stack_pointer.sub(1) as *const *mut Noun); + self.stack_pointer = self.stack_pointer.sub(2); + + // If it's a direct atom, just write it to the destination. + // Otherwise we have allocations to make. + match next_noun.as_either_direct_allocated() { + Either::Left(_direct) => { + *next_dest = next_noun; + } + Either::Right(allocated) => { + // If it's an allocated noun with a forwarding pointer, just write the + // noun resulting from the forwarding pointer to the destination + // + // Otherwise, we have to allocate space for and copy the allocated noun + match allocated.forwarding_pointer() { + Option::Some(new_allocated) => { + *next_dest = new_allocated.as_noun(); + } + Option::None => { + if (allocated.to_raw_pointer() as *const u64) < work_start + && (allocated.to_raw_pointer() as *const u64) > self.frame_pointer + { + match allocated.as_either() { + Either::Left(mut indirect) => { + // Make space for the atom + other_stack_pointer = + other_stack_pointer.sub(indirect_raw_size(indirect)); + let new_indirect_alloc = other_stack_pointer; + + // Indirect atoms can be copied directly + copy_nonoverlapping( + indirect.to_raw_pointer(), + new_indirect_alloc, + indirect_raw_size(indirect), + ); + + // Set a forwarding pointer so we don't create duplicates + // from other references + indirect.set_forwarding_pointer(new_indirect_alloc); + + *next_dest = + IndirectAtom::from_raw_pointer(new_indirect_alloc) + .as_noun(); + } + Either::Right(mut cell) => { + // Make space for the cell + other_stack_pointer = + other_stack_pointer.sub(word_size_of::()); + let new_cell_alloc = other_stack_pointer as *mut CellMemory; + + // Copy the cell metadata + (*new_cell_alloc).metadata = + (*cell.to_raw_pointer()).metadata; + + // Set the forwarding pointer + cell.set_forwarding_pointer(new_cell_alloc); + + *(self.stack_pointer as *mut Noun) = cell.tail(); + *(self.stack_pointer.add(1) as *mut *mut Noun) = + &mut (*new_cell_alloc).tail; + *(self.stack_pointer.add(2) as *mut Noun) = cell.head(); + *(self.stack_pointer.add(3) as *mut *mut Noun) = + &mut (*new_cell_alloc).head; + self.stack_pointer = self.stack_pointer.add(4); + } + } + } else { + *next_dest = allocated.as_noun(); // Don't copy references outside the current frame + } + } + } + } + } + } + *self.previous_stack_pointer_pointer_west() = other_stack_pointer; + } + + /** Pop a frame from the (east) stack, providing a result, which will be copied to the return target + * (west) frame. */ + unsafe fn pop_east(&mut self, result: &mut Noun) { + self.copy_east(result); + self.pop_no_copy_east(); + } + + unsafe fn pop_no_copy_east(&mut self) { + self.stack_pointer = *self.previous_stack_pointer_pointer_east(); + self.frame_pointer = *self.previous_frame_pointer_pointer_east(); + self.polarity = Polarity::West; + } + + /** Pop a frame from the (west) stack, providing a result, which will be copied to the return target + * (east) frame. */ + unsafe fn pop_west(&mut self, result: &mut Noun) { + self.copy_west(result); + self.pop_no_copy_west(); + } + + unsafe fn pop_no_copy_west(&mut self) { + self.stack_pointer = *self.previous_stack_pointer_pointer_west(); + self.frame_pointer = *self.previous_frame_pointer_pointer_west(); + self.polarity = Polarity::East; + } + + pub unsafe fn pop_no_copy(&mut self) { + match &self.polarity { + Polarity::East => self.pop_no_copy_east(), + Polarity::West => self.pop_no_copy_west(), + } + } + + /** Pop a frame from the stack, providing a result, which will be copied to the return target + * frame. */ + pub fn pop(&mut self, result: &mut Noun) { + unsafe { + match &self.polarity { + Polarity::East => self.pop_east(result), + Polarity::West => self.pop_west(result), + }; + } + } + + /** Push a frame onto the west stack with 0 or more local variable slots. + * + * (The method is `push_east` because the naming convention refers to the beginning state of + * the stack, not the final state.) + */ + unsafe fn push_east(&mut self, num_locals: usize) { + let previous_stack_pointer: *mut u64 = *self.previous_stack_pointer_pointer_east(); + *previous_stack_pointer = self.stack_pointer as u64; + *(previous_stack_pointer.add(1)) = self.frame_pointer as u64; + self.stack_pointer = previous_stack_pointer.add(num_locals + 2); + self.frame_pointer = previous_stack_pointer; + self.polarity = Polarity::West; + } + + /** Push a frame onto the east stack with 0 or more local variable slots. + * + * (The method is `push_west` because the naming convention refers to the beginning state of the + * stack, not the final state.) + */ + unsafe fn push_west(&mut self, num_locals: usize) { + let previous_stack_pointer: *mut u64 = *self.previous_stack_pointer_pointer_west(); + *(previous_stack_pointer.sub(1)) = self.stack_pointer as u64; + *(previous_stack_pointer.sub(2)) = self.frame_pointer as u64; + self.stack_pointer = previous_stack_pointer.sub(num_locals + 2); + self.frame_pointer = previous_stack_pointer; + self.polarity = Polarity::East; + } + + /** Push a frame onto the stack with 0 or more local variable slots. */ + pub fn push(&mut self, num_locals: usize) { + unsafe { + match &self.polarity { + Polarity::East => self.push_east(num_locals), + Polarity::West => self.push_west(num_locals), + } + } + } +} + +/** Unifying equality compares nouns for equality in the obvious way, and replaces a noun pointing + * to a more junior allocation with a noun pointing to a more senior allocation if the two are + * equal. + * + * This function is unsafe because it demands that all atoms be normalized: direct and indirect atoms + * will be considered non-equal without comparing their values, and indirects of different sizes + * will be considered non-equal. + * + * TODO: we really should try to tie lifetimes into the stack and use mut references instead of raw + * pointers wherever we can, but this is hard and delaying progress right now + */ +pub unsafe fn unifying_equality(stack: &mut NockStack, a: *mut Noun, b: *mut Noun) -> bool { + stack.push(1); + stack.save_prev_stack_pointer_to_local(0); + *(stack.alloc_in_previous_frame()) = (a, b); + loop { + if stack.prev_stack_pointer_equals_local(0) { + break; + } else { + let (x, y) : (*mut Noun, *mut Noun) = *(stack.top_in_previous_frame()); + match ( + (*x).as_either_direct_allocated(), + (*y).as_either_direct_allocated(), + ) { + (Left(x_direct), Left(y_direct)) => { + if x_direct.data() == y_direct.data() { + stack.reclaim_in_previous_frame::<(*mut Noun, *mut Noun)>(); + continue; + } else { + break; + } + } + (Right(x_alloc), Right(y_alloc)) => { + match (x_alloc.get_cached_mug(), y_alloc.get_cached_mug()) { + (Some(x_mug), Some(y_mug)) => { + if x_mug != y_mug { + break; // short-circuit, the mugs differ therefore the nouns must differ + } + } + _ => {} + }; + match (x_alloc.as_either(), y_alloc.as_either()) { + (Left(x_indirect), Left(y_indirect)) => { + let x_as_ptr = x_indirect.to_raw_pointer(); + let y_as_ptr = y_indirect.to_raw_pointer(); + if x_as_ptr == y_as_ptr { + stack.reclaim_in_previous_frame::<(*mut Noun, *mut Noun)>(); + continue; + } else if x_indirect.size() == y_indirect.size() + && memcmp( + x_indirect.data_pointer() as *const c_void, + y_indirect.data_pointer() as *const c_void, + indirect_raw_size(x_indirect), + ) == 0 + { + let (_senior, junior) = + senior_pointer_first(stack, x_as_ptr, y_as_ptr); + // unify + if x_as_ptr == junior { + *x = *y; + } else { + *y = *x; + } + stack.reclaim_in_previous_frame::<(*mut Noun, *mut Noun)>(); + continue; + } else { + break; + } + } + (Right(x_cell), Right(y_cell)) => { + let x_as_ptr = x_cell.to_raw_pointer(); + let y_as_ptr = y_cell.to_raw_pointer(); + if x_as_ptr == y_as_ptr { + continue; + } else { + if x_cell.head().raw_equals(y_cell.head()) + && x_cell.tail().raw_equals(y_cell.tail()) + { + let (_senior, junior) = + senior_pointer_first(stack, x_as_ptr, y_as_ptr); + if x_as_ptr == junior { + *x = *y; + } else { + *y = *x; + } + stack.pop_no_copy(); + continue; + } else { + *(stack.alloc_in_previous_frame()) = (x_cell.tail_as_mut(), y_cell.tail_as_mut()); + *(stack.alloc_in_previous_frame()) = (x_cell.head_as_mut(), y_cell.tail_as_mut()); + continue; + } + } + } + (_, _) => { + break; + } + } + } + (_, _) => { + break; + } + } + } + } + stack.restore_prev_stack_pointer_from_local(0); + stack.pop_no_copy(); + (*a).raw_equals(*b) +} + +unsafe fn senior_pointer_first( + stack: &NockStack, + a: *const T, + b: *const T, +) -> (*const T, *const T) { + let mut polarity = stack.polarity; + let mut frame_pointer = stack.frame_pointer as *const u64; + let (mut high_pointer, mut low_pointer) = match polarity { + Polarity::East => ( + stack.frame_pointer as *const T, + stack.stack_pointer as *const T, + ), + Polarity::West => ( + stack.stack_pointer as *const T, + stack.frame_pointer as *const T, + ), + }; + loop { + if a < high_pointer && a >= low_pointer { + // a is in the current frame + if b < high_pointer && b >= low_pointer { + // so is b, pick arbitrarily + break (a, b); + } else { + // b is not, so b must be further up, b is senior + break (b, a); + } + } else { + // a is not in the current frame + if b < high_pointer && b >= low_pointer { + // b is, a is senior + break (a, b); + } else { + // chase up the stack + if (frame_pointer as *const u64) == stack.start { + // we found the top of the stack! + break (a, b); // both are out of the stack, pick arbitrarily + } else { + match polarity { + Polarity::East => { + high_pointer = *(frame_pointer.sub(2)) as *const T; + low_pointer = *(frame_pointer.sub(1)) as *const T; + frame_pointer = *(frame_pointer.sub(1)) as *const u64; + polarity = Polarity::West; + continue; + } + Polarity::West => { + high_pointer = *frame_pointer as *const T; + low_pointer = *(frame_pointer.add(1)) as *const T; + frame_pointer = *frame_pointer as *const u64; + polarity = Polarity::West; + continue; + } + } + } + } + } + } +} + +impl NounAllocator for NockStack { + unsafe fn alloc_indirect(&mut self, words: usize) -> *mut u64 { + self.indirect_alloc(words) + } + + unsafe fn alloc_cell(&mut self) -> *mut CellMemory { + self.struct_alloc::(1) + } +} + diff --git a/rust/iron-planet/src/memory.rs b/rust/iron-planet/src/memory.rs deleted file mode 100644 index 287d8dc..0000000 --- a/rust/iron-planet/src/memory.rs +++ /dev/null @@ -1,582 +0,0 @@ -#![allow(dead_code)] - -use memmap::MmapMut; -use std::{mem, ptr::copy_nonoverlapping}; - -/// Tag bits for a direct atom -const DIRECT: u64 = 0x0; - -/// Tag mask for a direct atom ( -const DIRECT_MASK: u64 = 0x8000000000000000; - -/// Maximum direct atom -const DIRECT_MAX: u64 = 0x7FFFFFFFFFFFFFFF; - -/// Tag bits for an indirect atom -const INDIRECT: u64 = 0x8000000000000000; - -/// Tag mask for an indirect atom -const INDIRECT_MASK: u64 = 0xC000000000000000; - -/// Tag bits for a cell -const CELL: u64 = 0xC000000000000000; - -/// Tag mask for a cell -const CELL_MASK: u64 = 0xE000000000000000; - -/// Tag bits for a forwarding pointer -const FORWARD: u64 = 0xE000000000000000; - -/// Tag mask for a forwarding pointer -const FORWARD_MASK: u64 = 0xE000000000000000; - -/// Mask to extract a pointer if not shifting -const PTR_MASK: u64 = 0x1FFFFFFFFFFFFFFF; - -/// Various pointer-related methods. -trait Ptr { - fn as_ptr(&self) -> *const u64; - - fn as_mut_ptr(&self) -> *mut u64 { - self.as_ptr() as *mut u64 - } - - /// Extract a forwarding pointer. - fn forward_ptr(&self) -> Option; -} - -/// Annotated 64-bit direct atom pointer. -#[derive(Clone, Copy)] -struct DirectAtom(u64); - -impl DirectAtom { - // Peter: this fn replaces direct(). - fn new(val: u64) -> Result { - if val <= DIRECT_MAX { - Ok(Self(val)) - } else { - Err(()) - } - } -} - -/// Annotated 64-bit indirect atom pointer. -#[derive(Clone, Copy)] -struct IndirectAtom(u64); - -impl IndirectAtom { - // Peter: this fn replaces indirect_1(). - fn new(stack: &mut NockStack, atom: u64) -> Self { - let indirect_dest = stack.alloc(2); - unsafe { - *indirect_dest = 8; - *(indirect_dest.add(1)) = atom; - } - Self((indirect_dest as u64) >> 3 | INDIRECT) - } - - /// Size in 64-bit words. - // Peter: this fn replaces indirect_size_unchecked(). - fn size(&self) -> u64 { - unsafe { *self.as_ptr() << 3 } - } - - // Peter: this fn replaces indirect_data_unchecked(). - fn data(&self) -> *const u64 { - unsafe { self.as_ptr().add(1) } - } -} - -impl Ptr for IndirectAtom { - fn as_ptr(&self) -> *const u64 { - (self.0 << 3) as *const u64 - } - - // Peter: this fn replaces is_forward() and indirect_forwarded_unchecked(). - fn forward_ptr(&self) -> Option { - let raw_sz = unsafe { *self.as_ptr() }; - if raw_sz & FORWARD_MASK == FORWARD { - Some(raw_sz & PTR_MASK | INDIRECT) - } else { - None - } - } -} - -/// Annotated 64-bit cell pointer. -#[derive(Clone, Copy)] -struct Cell(u64); - -impl Cell { - // Peter: this fn replaces cell(). - fn new(stack: &mut NockStack, head: Noun, tail: Noun) -> Self { - let cell_dest = stack.alloc(2); - unsafe { - *cell_dest = head.raw; - *(cell_dest.add(1)) = tail.raw; - } - Self((cell_dest as u64) >> 3 | CELL) - } - - // Peter: this fn replaces cell_head_unchecked(). - fn head(&self) -> Noun { - let raw = unsafe { *((self.0 << 3) as *const u64) }; - Noun { raw } - } - - // Peter: this fn replaces cell_tail_unchecked(). - fn tail(&self) -> Noun { - let raw = unsafe { *(((self.0 << 3) as *const u64).add(1)) }; - Noun { raw } - } -} - -impl Ptr for Cell { - fn as_ptr(&self) -> *const u64 { - (self.0 << 3) as *const u64 - } - - // Peter: this fn replaces is_forward() and cell_forwarded_unchecked(). - fn forward_ptr(&self) -> Option { - let head = unsafe { self.head().raw }; - if head & FORWARD_MASK == FORWARD { - Some(head & PTR_MASK | INDIRECT) - } else { - None - } - } -} - -/// Annotated 64-bit pointer. -#[derive(Clone, Copy)] -#[repr(C)] -union Noun { - raw: u64, - direct_atom: DirectAtom, - indirect_atom: IndirectAtom, - cell: Cell, -} - -impl Noun { - // Peter: this fn replaces direct(). - fn is_direct_atom(&self) -> bool { - unsafe { self.raw & DIRECT_MASK == DIRECT } - } - - fn as_direct_atom(&self) -> Result { - if self.is_direct_atom() { - unsafe { Ok(self.direct_atom) } - } else { - Err(()) - } - } - - // Peter: this fn replaces indirect_1(). - fn is_indirect_atom(&self) -> bool { - unsafe { self.raw & INDIRECT_MASK == INDIRECT } - } - - fn as_indirect_atom(&self) -> Result { - if self.is_indirect_atom() { - unsafe { Ok(self.indirect_atom) } - } else { - Err(()) - } - } - - // Peter: this fn replaces is_cell(). - fn is_cell(&self) -> bool { - unsafe { self.raw & CELL_MASK == CELL } - } - - fn as_cell(&self) -> Result { - if self.is_cell() { - unsafe { Ok(self.cell) } - } else { - Err(()) - } - } -} - -impl Ptr for Noun { - fn as_ptr(&self) -> *const u64 { - unsafe { (self.raw << 3) as *const u64 } - } - - fn forward_ptr(&self) -> Option { - None - } -} - -/// Current direction of the stack -enum Polarity { - /// Current frame is lowest in high memory - East, - /// Current frame is highest in low memory - West, -} - -/// Structure representing a Nock computational stack (1 per thread) -struct NockStack { - sp: *mut u64, - fp: *mut u64, - polarity: Polarity, - _map: MmapMut, -} - -impl NockStack { - /// Given the size *in noun-words*, memory-map space for a Nock stack. - // Peter: this fn replaces map_nock_stack(). - fn new(size: usize) -> Result { - let bytesize = size * mem::size_of::(); - let mut map = MmapMut::map_anon(bytesize)?; - unsafe { - let fp: *mut u64 = map.as_mut_ptr() as *mut u64; - let sp: *mut u64 = fp.add(2); - // Peter: does it make more sense to store `size` at the base of the stack rather than - // the end address of the stack? - *fp = fp.add(size) as u64; - Ok(Self { - sp: sp, - fp: fp, - polarity: Polarity::West, - _map: map, - }) - } - } - - // Peter: this fn replaces slot_west(). - fn slot_west(&self, slot: usize) -> *mut u64 { - unsafe { self.fp.add(slot) } - } - - // Peter: this fn replaces slot_east(). - fn slot_east(&self, slot: usize) -> *mut u64 { - unsafe { self.fp.sub(slot + 1) } - } - - /// Get a pointer to a slot in a frame - // Peter: this fn replaces slot(). - fn slot(&self, slot: usize) -> *mut u64 { - match self.polarity { - Polarity::West => self.slot_west(slot), - Polarity::East => self.slot_east(slot), - } - } - - /// Get a pointer to a local variable slot in a frame - // Peter: this fn replaces local(). - fn local(&self, local: usize) -> *mut u64 { - self.slot(local + 2) - } - - // Peter: this fn replaces push_west(). - fn push_west(&mut self, slots: usize) { - unsafe { - let east_sp_new_fp: *mut u64 = *(self.slot_west(0)) as *mut u64; - let new_east_sp: *mut u64 = east_sp_new_fp.sub(slots + 2); - *(east_sp_new_fp.sub(1)) = self.sp as u64; - *(east_sp_new_fp.sub(2)) = self.fp as u64; - self.fp = east_sp_new_fp; - self.sp = new_east_sp; - self.polarity = Polarity::East; - } - } - - // Peter: this fn replaces push_east(). - fn push_east(&mut self, slots: usize) { - unsafe { - let west_sp_new_fp: *mut u64 = *(self.slot_east(0)) as *mut u64; - let new_west_sp: *mut u64 = west_sp_new_fp.add(slots + 2); - *(west_sp_new_fp) = self.sp as u64; - *(west_sp_new_fp.add(1)) = self.fp as u64; - self.fp = west_sp_new_fp; - self.sp = new_west_sp; - self.polarity = Polarity::West; - } - } - - /// Push a new frame - // Peter: this fn replaces push(). - fn push(&mut self, slots: usize) { - match self.polarity { - Polarity::West => self.push_west(slots), - Polarity::East => self.push_east(slots), - } - } - - // Peter: this fn replaces alloc_west(). - fn alloc_west(&mut self, size: usize) -> *mut u64 { - unsafe { - let base = self.sp; - self.sp = self.sp.add(size); - base - } - } - - // Peter: this fn replaces alloc_east(). - fn alloc_east(&mut self, size: usize) -> *mut u64 { - unsafe { - let base = self.sp.sub(size); - self.sp = base; - base - } - } - - /// Allocate on the stack - // Peter: this fn replaces alloc(). - fn alloc(&mut self, size: usize) -> *mut u64 { - match self.polarity { - Polarity::West => self.alloc_west(size), - Polarity::East => self.alloc_east(size), - } - } - - // Peter: this fn replaces copy_east(). - fn copy_east(&mut self, root: Noun) -> Noun { - unsafe { - let mut west_sp = *(self.slot_east(0)) as *mut u64; - let lower_bound_inclusive: *const u64 = self.sp; - let upper_bound_exclusive: *const u64 = self.fp; - let mut copy_stack_top: *mut u64 = self.sp; - let res = if let Ok(_) = root.as_direct_atom() { - root - } else if root.as_ptr() < lower_bound_inclusive { - root - } else if root.as_ptr() >= upper_bound_exclusive { - root - } else if let Ok(atom) = root.as_indirect_atom() { - let sz: usize = (atom.size() + 1) as usize; - let base: *mut u64 = west_sp; - west_sp = west_sp.add(sz); - copy_nonoverlapping(atom.as_mut_ptr(), base, sz); - *(atom.as_mut_ptr()) = (base as u64) >> 3 | FORWARD; - Noun { - raw: (base as u64) >> 3 | INDIRECT, - } - } else if let Ok(cell) = root.as_cell() { - let base: *mut u64 = west_sp; - west_sp = west_sp.add(2); - copy_stack_top = copy_stack_top.sub(4); - *copy_stack_top = cell.head().raw; - *(copy_stack_top.add(1)) = base as u64; - *(copy_stack_top.add(2)) = cell.tail().raw; - *(copy_stack_top.add(3)) = (base.add(1)) as u64; - *(cell.as_mut_ptr()) = (base as u64) >> 3 | FORWARD; - Noun { - raw: (base as u64) >> 3 | CELL, - } - } else { - panic!("no tag matches"); - }; - loop { - if (copy_stack_top as *const u64) == lower_bound_inclusive { - break; - } - let noun = Noun { - raw: *copy_stack_top, - }; - let dest: *mut u64 = *(copy_stack_top.add(1)) as *mut u64; - copy_stack_top = copy_stack_top.add(2); - if let Ok(atom) = noun.as_direct_atom() { - *dest = atom.0 - } else if noun.as_ptr() < lower_bound_inclusive { - *dest = noun.raw - } else if noun.as_ptr() >= upper_bound_exclusive { - *dest = noun.raw - } else if let Ok(atom) = noun.as_indirect_atom() { - match atom.forward_ptr() { - Some(fwd) => *dest = fwd, - None => { - let sz: usize = (atom.size() + 1) as usize; - let base: *mut u64 = west_sp; - west_sp = west_sp.add(sz); - copy_nonoverlapping(atom.as_mut_ptr(), base, sz); - *(atom.as_mut_ptr()) = (base as u64) >> 3 | FORWARD; - *dest = (base as u64) >> 3 | INDIRECT; - } - } - } else if let Ok(cell) = noun.as_cell() { - match cell.forward_ptr() { - Some(fwd) => *dest = fwd, - None => { - let base: *mut u64 = west_sp; - west_sp = west_sp.add(2); - copy_stack_top = copy_stack_top.sub(4); - *copy_stack_top = cell.head().raw; - *(copy_stack_top.add(1)) = base as u64; - *(copy_stack_top.add(2)) = cell.tail().raw; - *(copy_stack_top.add(3)) = (base.add(1)) as u64; - *(cell.as_mut_ptr()) = (base as u64) >> 3 | FORWARD; - *dest = (base as u64) >> 3 | CELL; - } - } - } else { - panic!("no tag matches"); - } - } - *(self.slot_east(0)) = west_sp as u64; - res - } - } - - // Peter: this fn replaces copy_west(). - fn copy_west(&mut self, root: Noun) -> Noun { - unsafe { - let mut east_sp: *mut u64 = *(self.slot_west(0)) as *mut u64; - let lower_bound_inclusive: *const u64 = self.fp; - let upper_bound_exclusive: *const u64 = self.sp; - let mut copy_stack_top: *mut u64 = self.sp; - let res = if let Ok(_) = root.as_direct_atom() { - root - } else if root.as_ptr() < lower_bound_inclusive { - root - } else if root.as_ptr() >= upper_bound_exclusive { - root - } else if let Ok(atom) = root.as_indirect_atom() { - let sz: usize = (atom.size() + 1) as usize; - east_sp = east_sp.sub(sz); - let base: *mut u64 = east_sp; - copy_nonoverlapping(atom.as_mut_ptr(), base, sz); - *(atom.as_mut_ptr()) = (base as u64) >> 3 | FORWARD; - Noun { - raw: (base as u64) >> 3 | INDIRECT, - } - } else if let Ok(cell) = root.as_cell() { - east_sp = east_sp.sub(2); - let base: *mut u64 = east_sp; - copy_stack_top = copy_stack_top.add(4); - *copy_stack_top = root.cell.head().raw; - *(copy_stack_top.add(1)) = base as u64; - *(copy_stack_top.add(2)) = cell.tail().raw; - *(copy_stack_top.add(3)) = (base.add(1)) as u64; - *(cell.as_mut_ptr()) = (base as u64) >> 3 | FORWARD; - Noun { - raw: (base as u64) >> 3 | CELL, - } - } else { - panic!("no tag matches") - }; - loop { - if (copy_stack_top as *const u64) == upper_bound_exclusive { - break; - } - let noun = Noun { - raw: *copy_stack_top, - }; - let dest: *mut u64 = *(copy_stack_top.add(1)) as *mut u64; - copy_stack_top = copy_stack_top.sub(2); - if let Ok(atom) = noun.as_direct_atom() { - *dest = atom.0 - } else if noun.as_ptr() < lower_bound_inclusive { - *dest = noun.raw - } else if noun.as_ptr() >= upper_bound_exclusive { - *dest = noun.raw - } else if let Ok(atom) = noun.as_indirect_atom() { - match atom.forward_ptr() { - Some(fwd) => *dest = fwd, - None => { - let sz: usize = (atom.size() + 1) as usize; - east_sp = east_sp.sub(sz); - let base: *mut u64 = east_sp; - copy_nonoverlapping(atom.as_mut_ptr(), base, sz); - *(atom.as_mut_ptr()) = (base as u64) >> 3 | FORWARD; - *dest = (base as u64) >> 3 | INDIRECT; - } - } - } else if let Ok(cell) = noun.as_cell() { - match cell.forward_ptr() { - Some(fwd) => *dest = fwd, - None => { - east_sp = east_sp.sub(2); - let base: *mut u64 = east_sp; - copy_stack_top = copy_stack_top.add(4); - *copy_stack_top = cell.head().raw; - *(copy_stack_top.add(1)) = base as u64; - *(copy_stack_top.add(2)) = cell.tail().raw; - *(copy_stack_top.add(3)) = (base.add(1)) as u64; - *(cell.as_mut_ptr()) = (base as u64) >> 3 | FORWARD; - *dest = (base as u64) >> 3 | CELL; - } - } - } else { - panic!("no tag matches") - } - } - *(self.slot_west(0)) = east_sp as u64; - res - } - } - - // Peter: this fn replaces pop_west(). - fn pop_west(&mut self, root: Noun) -> Noun { - unsafe { - let res = self.copy_west(root); - self.sp = *(self.slot_west(0)) as *mut u64; - self.fp = *(self.slot_west(1)) as *mut u64; - self.polarity = Polarity::East; - res - } - } - - // Peter: this fn replaces pop_east(). - fn pop_east(&mut self, root: Noun) -> Noun { - unsafe { - let res = self.copy_east(root); - self.sp = *(self.slot_east(0)) as *mut u64; - self.fp = *(self.slot_east(1)) as *mut u64; - self.polarity = Polarity::West; - res - } - } - - // Peter: this fn replaces pop(). - fn pop(&mut self, root: Noun) -> Noun { - match self.polarity { - Polarity::East => self.pop_east(root), - Polarity::West => self.pop_west(root), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn type_size() { - // DirectAtom. - { - assert_eq!( - std::mem::size_of::(), - std::mem::size_of::() - ); - let a = DirectAtom(107); - assert_eq!(std::mem::size_of::(), std::mem::size_of_val(&a)); - } - - // IndirectAtom. - { - assert_eq!( - std::mem::size_of::(), - std::mem::size_of::() - ); - let a = IndirectAtom(110); - assert_eq!(std::mem::size_of::(), std::mem::size_of_val(&a)); - } - - // Cell. - { - assert_eq!(std::mem::size_of::(), std::mem::size_of::()); - let c = Cell(140); - assert_eq!(std::mem::size_of::(), std::mem::size_of_val(&c)); - } - - // Noun. - { - assert_eq!(std::mem::size_of::(), std::mem::size_of::()); - let n = Noun { raw: 242 }; - assert_eq!(std::mem::size_of::(), std::mem::size_of_val(&n)); - } - } -} diff --git a/rust/iron-planet/src/mug.rs b/rust/iron-planet/src/mug.rs new file mode 100644 index 0000000..32c9815 --- /dev/null +++ b/rust/iron-planet/src/mug.rs @@ -0,0 +1,235 @@ +use crate::mem::*; +use crate::noun::{Allocated, Atom, DirectAtom, Noun}; +use either::Either::*; +use murmur3::murmur3_32; +use std::cmp::min; +use std::io::{Read, Result}; +use std::ptr::{copy_nonoverlapping, write_bytes}; + +/** A reader for an atom which pads the atom out to a given length */ +struct PaddedReadAtom { + atom_bytes: usize, // actual size of the stored atom + atom_base: *const u8, // pointer to the atom data + atom_cursor: usize, // How many bytes we have read + atom_len: usize, // The total padded length +} + +impl PaddedReadAtom { + fn new(atom: Atom, len: usize) -> Self { + match atom.as_either() { + Left(direct) => PaddedReadAtom { + atom_bytes: 8, + atom_base: (&direct as *const DirectAtom) as *const u8, + atom_cursor: 0, + atom_len: len, + }, + Right(indirect) => PaddedReadAtom { + atom_bytes: indirect.size() << 3, // size is in 64 bit words, multiply by 8 to get bytes + atom_base: indirect.data_pointer() as *const u8, // data pointer, but for bytes + atom_cursor: 0, + atom_len: len, + }, + } + } +} + +impl Read for PaddedReadAtom { + fn read(&mut self, buf: &mut [u8]) -> Result { + if self.atom_cursor >= self.atom_len { + Ok(0) // we are done + } else { + let req = buf.len(); // How many bytes does the reading caller want? + if self.atom_cursor < self.atom_bytes { + // are we still reading bytes from the atom? + let len = min( + self.atom_len - self.atom_cursor, + min(self.atom_bytes - self.atom_cursor, req), + ); + // copy out bytes into the buffer, not running over the atom length itself, the + // padded length, or the buffer length + unsafe { + copy_nonoverlapping( + self.atom_base.add(self.atom_cursor), + buf.as_mut_ptr(), + len, + ); + } + self.atom_cursor += len; + Ok(len) + } else { + // We are past the atom and into padding + let len = min(self.atom_len - self.atom_cursor, req); + // write 0s until we hit the buffer length or the padded length + unsafe { + write_bytes(buf.as_mut_ptr(), 0, len); + } + self.atom_cursor += len; + Ok(len) + } + } + } +} + +// Murmur3 hash an atom with a given padded length +fn muk_u32(syd: u32, len: usize, key: Atom) -> u32 { + murmur3_32(&mut PaddedReadAtom::new(key, len), syd).expect("Murmur3 hashing failed.") +} + +/** Byte size of an atom. + * + * Assumes atom is normalized + */ +fn met3_usize(atom: Atom) -> usize { + match atom.as_either() { + Left(direct) => (64 - (direct.data().leading_zeros() as usize) + 7) >> 3, + Right(indirect) => { + let last_word = unsafe { *(indirect.data_pointer().add(indirect.size() - 1)) }; + let last_word_bytes = (64 - (last_word.leading_zeros() as usize) + 7) >> 3; + ((indirect.size() - 1) << 3) + last_word_bytes + } + } +} + +fn mum_u32(syd: u32, fal: u32, key: Atom) -> u32 { + let wyd = met3_usize(key); + let mut i = 0; + loop { + if i == 8 { + break fal; + } else { + let haz = muk_u32(syd, wyd, key); + let ham = (haz >> 31) ^ (haz & !(1 << 31)); + if ham == 0 { + i += 1; + continue; + } else { + break ham; + } + } + } +} + +fn calc_atom_mug_u32(atom: Atom) -> u32 { + mum_u32(0xCAFEBABE, 0x7FFF, atom) +} + +/** Unsafe because this passes a direct atom to mum_u32 made by concatenating the two mugs, + * so we must ensure that the tail_mug conforms to the mug invariant and is only 31 bits + */ +unsafe fn calc_cell_mug_u32(head_mug: u32, tail_mug: u32) -> u32 { + let cat_mugs = (head_mug as u64) | ((tail_mug as u64) << 32); + mum_u32( + 0xDEADBEEF, + 0xFFFE, + DirectAtom::new_unchecked(cat_mugs).as_atom(), + ) // this is safe on mugs since mugs are 31 bits +} + +pub fn get_mug(noun: Noun) -> Option { + match noun.as_either_direct_allocated() { + Left(direct) => Some(calc_atom_mug_u32(direct.as_atom())), + Right(allocated) => allocated.get_cached_mug(), + } +} + +const MASK_OUT_MUG: u64 = !(u32::MAX as u64); + +unsafe fn set_mug(allocated: Allocated, mug: u32) { + let metadata = allocated.get_metadata(); + allocated.set_metadata((metadata & MASK_OUT_MUG) | (mug as u64)); +} + +/** Calculate and cache the mug for a noun, but do *not* recursively calculate cache mugs for + * children of cells. + * + * If called on a cell with no mug cached for the head or tail, this function will return `None`. + */ +pub fn allocated_mug_u32_one(allocated: Allocated) -> Option { + match allocated.get_cached_mug() { + Some(mug) => Some(mug), + None => match allocated.as_either() { + Left(indirect) => { + let mug = calc_atom_mug_u32(indirect.as_atom()); + unsafe { + set_mug(allocated, mug); + } + Some(mug) + } + Right(cell) => match (get_mug(cell.head()), get_mug(cell.tail())) { + (Some(head_mug), Some(tail_mug)) => { + let mug = unsafe { calc_cell_mug_u32(head_mug, tail_mug) }; + unsafe { + set_mug(allocated, mug); + } + Some(mug) + } + _ => None, + }, + }, + } +} + +pub fn mug_u32_one(noun: Noun) -> Option { + match noun.as_either_direct_allocated() { + Left(direct) => Some(calc_atom_mug_u32(direct.as_atom())), + Right(allocated) => allocated_mug_u32_one(allocated), + } +} + +pub fn mug_u32(stack: &mut NockStack, noun: Noun) -> u32 { + stack.push(1); + unsafe { + stack.save_prev_stack_pointer_to_local(0); + *(stack.alloc_in_previous_frame()) = noun; + } + loop { + if unsafe { stack.prev_stack_pointer_equals_local(0) } { + break; + } else { + let noun : Noun = unsafe { *(stack.top_in_previous_frame()) }; + match noun.as_either_direct_allocated() { + Left(_direct) => { + unsafe { stack.reclaim_in_previous_frame::(); } + continue; + } // no point in calculating a direct mug here as we wont cache it + Right(allocated) => match allocated.get_cached_mug() { + Some(_mug) => { + unsafe { stack.reclaim_in_previous_frame::(); } + continue; + } + None => match allocated.as_either() { + Left(indirect) => unsafe { + set_mug(allocated, calc_atom_mug_u32(indirect.as_atom())); + stack.reclaim_in_previous_frame::(); + continue; + }, + Right(cell) => unsafe { + match (get_mug(cell.head()), get_mug(cell.tail())) { + (Some(head_mug), Some(tail_mug)) => { + set_mug(allocated, calc_cell_mug_u32(head_mug, tail_mug)); + stack.reclaim_in_previous_frame::(); + continue; + }, + _ => { + *(stack.alloc_in_previous_frame()) = cell.tail(); + *(stack.alloc_in_previous_frame()) = cell.head(); + continue; + }, + } + }, + }, + }, + } + } + } + unsafe { + stack.pop_no_copy(); + get_mug(noun).expect("Noun should have a mug once it is mugged.") + } +} + +pub fn mug(stack: &mut NockStack, noun: Noun) -> DirectAtom { + unsafe { + DirectAtom::new_unchecked(mug_u32(stack, noun) as u64) + } +} diff --git a/rust/iron-planet/src/noun.rs b/rust/iron-planet/src/noun.rs index c4818e6..d79c495 100644 --- a/rust/iron-planet/src/noun.rs +++ b/rust/iron-planet/src/noun.rs @@ -1,87 +1,594 @@ -use noun::atom; -use noun::Noun; -use std::rc::Rc; +use bitvec::prelude::{BitSlice, Lsb0}; +use either::Either; +use std::ptr; +use std::slice::{from_raw_parts, from_raw_parts_mut}; -use std::ptr::copy_nonoverlapping; +/** Tag for a direct atom. */ +const DIRECT_TAG: u64 = 0x0; -use crate::memory; +/** Tag mask for a direct atom. */ +const DIRECT_MASK: u64 = !(u64::MAX >> 1); -/* -enum Work { - Head(Rc, Rc), - Tail(Rc), - Cell, +/** Maximum value of a direct atom. Values higher than this must be represented by indirect atoms. */ +pub const DIRECT_MAX: u64 = u64::MAX >> 1; + +/** Tag for an indirect atom. */ +const INDIRECT_TAG: u64 = u64::MAX & DIRECT_MASK; + +/** Tag mask for an indirect atom. */ +const INDIRECT_MASK: u64 = !(u64::MAX >> 2); + +/** Tag for a cell. */ +const CELL_TAG: u64 = u64::MAX & INDIRECT_MASK; + +/** Tag mask for a cell. */ +const CELL_MASK: u64 = !(u64::MAX >> 3); + +/** Tag for a forwarding pointer */ +const FORWARDING_TAG: u64 = u64::MAX & CELL_MASK; + +/** Tag mask for a forwarding pointer */ +const FORWARDING_MASK: u64 = CELL_MASK; + +/** Test if a noun is a direct atom. */ +fn is_direct_atom(noun: u64) -> bool { + noun & DIRECT_MASK == DIRECT_TAG } -fn atom_onto_stack(stack: &mut memory::NockStack, atom: &atom::Atom) -> u64 { - if atom.v().len() == 0 { - 0 - } else if atom.v().len() == 1 && atom.v()[0] <= memory::DIRECT_MAX { - atom.v()[0] - } else { - let indirect_dest: *mut u64 = memory::alloc(stack, atom.v().len() + 1); - unsafe { - *indirect_dest = atom.v().len() as u64; - copy_nonoverlapping(atom.v().as_ptr(), indirect_dest.add(1), atom.v().len()); +/** Test if a noun is an indirect atom. */ +fn is_indirect_atom(noun: u64) -> bool { + noun & INDIRECT_MASK == INDIRECT_TAG +} + +/** Test if a noun is a cell. */ +fn is_cell(noun: u64) -> bool { + noun & CELL_MASK == CELL_TAG +} + +/** A direct atom. + * + * Direct atoms represent an atom up to and including DIRECT_MAX as a machine word. + */ +#[derive(Copy, Clone)] +#[repr(packed(8))] +pub struct DirectAtom(u64); + +impl DirectAtom { + /** Create a new direct atom, or panic if the value is greater than DIRECT_MAX */ + pub const fn new_panic(value: u64) -> Self { + if value > DIRECT_MAX { + panic!("Number is greater than DIRECT_MAX") + } else { + DirectAtom(value) } - ((indirect_dest as u64) >> 3) | memory::INDIRECT + } + + /** Create a new direct atom, or return Err if the value is greater than DIRECT_MAX */ + pub const fn new(value: u64) -> Result { + if value > DIRECT_MAX { + Err(()) + } else { + Ok(DirectAtom(value)) + } + } + + /** Create a new direct atom. This is unsafe because the value is not checked. + * + * Attempting to create a direct atom with a value greater than DIRECT_MAX will + * result in this value being interpreted by the runtime as a cell or indirect atom, + * with corresponding memory accesses. Thus, this function is marked as unsafe. + */ + pub const unsafe fn new_unchecked(value: u64) -> Self { + DirectAtom(value) + } + + pub fn as_atom(self) -> Atom { + Atom { direct: self } + } + + pub fn data(self) -> u64 { + self.0 + } + + pub fn as_bitslice<'a>(&'a self) -> &'a BitSlice { + &(BitSlice::from_element(&self.0)) } } -pub fn noun_onto_stack(stack: &mut memory::NockStack, noun: &Noun) -> u64 { - let mut work: Vec = Vec::new(); - let mut results: Vec = Vec::new(); - match noun { - Noun::Atom(atom) => { - results.push(atom_onto_stack(stack, atom)); - } - Noun::Cell(cell) => work.push(Work::Head(cell.h(), cell.t())), +/** An indirect atom. + * + * Indirect atoms represent atoms above DIRECT_MAX as a tagged pointer to a memory buffer whose first word + * is the number of machine words necessary to store the atom, and whose remaining memory is the + * atom in little-endian byte order + */ +#[derive(Copy, Clone)] +#[repr(packed(8))] +pub struct IndirectAtom(u64); + +impl IndirectAtom { + /** Tag the pointer and type it as an indirect atom. */ + pub unsafe fn from_raw_pointer(ptr: *const u64) -> Self { + IndirectAtom((ptr as u64) >> 3 | INDIRECT_TAG) } - loop { - match work.pop() { - None => { + + /** Strip the tag from an indirect atom and return it as a mutable pointer to its memory buffer. */ + unsafe fn to_raw_pointer_mut(&mut self) -> *mut u64 { + (self.0 << 3) as *mut u64 + } + + /** Strip the tag from an indirect atom and return it as a pointer to its memory buffer. */ + pub unsafe fn to_raw_pointer(&self) -> *const u64 { + (self.0 << 3) as *const u64 + } + + pub unsafe fn set_forwarding_pointer(&mut self, new_me: *const u64) { + // This is OK because the size is stored as 64 bit words, not bytes. + // Thus, a true size value will never be larger than U64::MAX >> 3, and so + // any of the high bits set as an MSB + *self.to_raw_pointer_mut().add(1) = (new_me as u64) >> 3 | FORWARDING_TAG; + } + + pub unsafe fn forwarding_pointer(&self) -> Option { + let size_raw = *self.to_raw_pointer().add(1); + if size_raw | FORWARDING_MASK == FORWARDING_TAG { + // we can replace this by masking out thge forwarding pointer and putting in the + // indirect tag + Some(Self::from_raw_pointer((size_raw << 3) as *const u64)) + } else { + None + } + } + + /** Make an indirect atom by copying from other memory. + * + * The size is specified in 64 bit words, not in bytes. + */ + pub unsafe fn new_raw( + allocator: &mut dyn NounAllocator, + size: usize, + data: *const u64, + ) -> Self { + let (mut indirect, buffer) = Self::new_raw_mut(allocator, size); + ptr::copy_nonoverlapping(data, buffer.add(2), size); + *(indirect.normalize()) + } + + /** Make an indirect atom that can be written into. Return the atom (which should not be used + * until it is written and normalized) and a mutable pointer which can be used as a + * destination. + */ + pub unsafe fn new_raw_mut(allocator: &mut dyn NounAllocator, size: usize) -> (Self, *mut u64) { + let buffer = allocator.alloc_indirect(size); + *buffer = 0; + *buffer.add(1) = size as u64; + (Self::from_raw_pointer(buffer), buffer.add(2)) + } + + /** Make an indirect atom that can be written into as a bitslice. The constraints of + * [new_raw_mut] also apply here + */ + pub unsafe fn new_raw_mut_bitslice<'a>( + allocator: &mut dyn NounAllocator, + size: usize, + ) -> (Self, &'a mut BitSlice) { + let (noun, ptr) = Self::new_raw_mut(allocator, size); + ( + noun, + BitSlice::from_slice_mut(from_raw_parts_mut(ptr, size)), + ) + } + + /** Size of an indirect atom in 64-bit words */ + pub fn size(&self) -> usize { + unsafe { *(self.to_raw_pointer().add(1)) as usize } + } + + /** Pointer to data for indirect atom */ + pub fn data_pointer(&self) -> *const u64 { + unsafe { self.to_raw_pointer().add(2) as *const u64 } + } + + pub fn as_slice<'a>(&'a self) -> &'a [u64] { + unsafe { from_raw_parts(self.data_pointer(), self.size()) } + } + + pub fn as_byte_size<'a>(&'a self) -> &'a [u8] { + unsafe { from_raw_parts(self.data_pointer() as *const u8, self.size() << 3) } + } + + /** BitSlice view on an indirect atom, with lifetime tied to reference to indirect atom. */ + pub fn as_bitslice<'a>(&'a self) -> &'a BitSlice { + BitSlice::from_slice(self.as_slice()) + } + + /** Ensure that the size does not contain any trailing 0 words */ + pub unsafe fn normalize(&mut self) -> &Self { + let mut index = self.size() - 1; + let data = self.data_pointer().add(1); + loop { + if index == 0 || *(data.add(index)) != 0 { break; } - Some(work_type) => match work_type { - Work::Head(head_noun, tail_noun) => { - work.push(Work::Tail(tail_noun)); - match &*head_noun { - Noun::Atom(atom) => { - results.push(atom_onto_stack(stack, &atom)); - } - Noun::Cell(cell) => work.push(Work::Head(cell.h(), cell.t())), - } - } - Work::Tail(tail_noun) => { - work.push(Work::Cell); - match &*tail_noun { - Noun::Atom(atom) => { - results.push(atom_onto_stack(stack, &atom)); - } - Noun::Cell(cell) => work.push(Work::Head(cell.h(), cell.t())), - } - } - Work::Cell => match results.pop() { - None => { - panic!("Shouldn't happen: no results when making a cell"); - } - Some(tail_noun) => match results.pop() { - None => { - panic!("Shouldn't happen: no results when making a cell"); - } - Some(head_noun) => { - results.push(memory::cell(stack, head_noun, tail_noun)); - } - }, - }, - }, + index = index - 1; + } + *(self.to_raw_pointer_mut()) = (index + 1) as u64; + self + } + + /** Normalize, but convert to direct atom if it will fit */ + pub unsafe fn normalize_as_atom(&mut self) -> Atom { + self.normalize(); + if self.size() == 1 && *(self.data_pointer()) <= DIRECT_MAX { + Atom { + direct: DirectAtom(*(self.data_pointer())), + } + } else { + Atom { indirect: *self } } } - match results.pop() { - None => { - panic!("shouldn't happen: no result to return") - } - Some(result) => result, + + pub fn as_atom(self) -> Atom { + Atom { indirect: self } + } + + pub fn as_allocated(self) -> Allocated { + Allocated { indirect: self } + } + + pub fn as_noun(self) -> Noun { + Noun { indirect: self } } } -*/ + +/** + * A cell. + * + * A cell is represented by a tagged pointer to a memory buffer with metadata, a word describing + * the noun which is the cell's head, and a word describing a noun which is the cell's tail, each + * at a fixed offset. + */ +#[derive(Copy, Clone)] +#[repr(packed(8))] +pub struct Cell(u64); + +impl Cell { + pub unsafe fn from_raw_pointer(ptr: *const CellMemory) -> Self { + Cell((ptr as u64) >> 3 | CELL_TAG) + } + + pub unsafe fn to_raw_pointer(&self) -> *const CellMemory { + (self.0 << 3) as *const CellMemory + } + + unsafe fn to_raw_pointer_mut(&mut self) -> *mut CellMemory { + (self.0 << 3) as *mut CellMemory + } + + pub unsafe fn head_as_mut(mut self) -> *mut Noun { + &mut (*self.to_raw_pointer_mut()).head as *mut Noun + } + + pub unsafe fn tail_as_mut<'a>(mut self) -> *mut Noun { + &mut (*self.to_raw_pointer_mut()).tail as *mut Noun + } + + pub unsafe fn set_forwarding_pointer(&mut self, new_me: *const CellMemory) { + (*self.to_raw_pointer_mut()).head = Noun { + raw: (new_me as u64) >> 3 | FORWARDING_TAG, + } + } + + pub unsafe fn forwarding_pointer(&self) -> Option { + let head_raw = (*self.to_raw_pointer()).head.raw; + if head_raw | FORWARDING_MASK == FORWARDING_TAG { + // we can replace this by masking out the forwarding pointer and putting in the cell + // tag + Some(Self::from_raw_pointer((head_raw << 3) as *const CellMemory)) + } else { + None + } + } + + pub fn new(allocator: &mut dyn NounAllocator, head: Noun, tail: Noun) -> Cell { + unsafe { + let (cell, memory) = Self::new_raw_mut(allocator); + (*memory).head = head; + (*memory).tail = tail; + cell + } + } + + pub unsafe fn new_raw_mut(allocator: &mut dyn NounAllocator) -> (Cell, *mut CellMemory) { + let memory = allocator.alloc_cell(); + (*memory).metadata = 0; + (Self::from_raw_pointer(memory), memory) + } + + pub fn head(&self) -> Noun { + unsafe { (*self.to_raw_pointer()).head } + } + + pub fn tail(&self) -> Noun { + unsafe { (*self.to_raw_pointer()).tail } + } + + pub fn as_allocated(&self) -> Allocated { + Allocated { cell: *self } + } + + pub fn as_noun(&self) -> Noun { + Noun { cell: *self } + } +} + +/** + * Memory representation of the contents of a cell + */ +#[derive(Copy, Clone)] +#[repr(packed(8))] +pub struct CellMemory { + pub metadata: u64, + pub head: Noun, + pub tail: Noun, +} + +#[derive(Copy, Clone)] +#[repr(packed(8))] +pub union Atom { + raw: u64, + direct: DirectAtom, + indirect: IndirectAtom, +} + +impl Atom { + pub fn new(allocator: &mut dyn NounAllocator, value: u64) -> Atom { + if value <= DIRECT_MAX { + unsafe { DirectAtom::new_unchecked(value).as_atom() } + } else { + unsafe { IndirectAtom::new_raw(allocator, 1, &value).as_atom() } + } + } + pub fn is_direct(&self) -> bool { + unsafe { is_direct_atom(self.raw) } + } + + pub fn is_indirect(&self) -> bool { + unsafe { is_indirect_atom(self.raw) } + } + + pub fn as_direct(&self) -> Result { + if self.is_direct() { + unsafe { Ok(self.direct) } + } else { + Err(()) + } + } + + pub fn as_indirect(&self) -> Result { + if self.is_indirect() { + unsafe { Ok(self.indirect) } + } else { + Err(()) + } + } + + pub fn as_either(&self) -> Either { + if self.is_indirect() { + unsafe { Either::Right(self.indirect) } + } else { + unsafe { Either::Left(self.direct) } + } + } + + pub fn as_bitslice<'a>(&'a self) -> &'a BitSlice { + if self.is_indirect() { + unsafe { self.indirect.as_bitslice() } + } else { + unsafe { &(self.direct.as_bitslice()) } + } + } + + pub fn size(&self) -> usize { + match self.as_either() { + Either::Left(_direct) => 1, + Either::Right(indirect) => indirect.size(), + } + } + + pub fn data_pointer(&self) -> *const u64 { + match self.as_either() { + Either::Left(direct) => &(direct.0), + Either::Right(indirect) => indirect.data_pointer(), + } + } + + pub unsafe fn normalize(&mut self) -> Atom { + if self.is_indirect() { + self.indirect.normalize_as_atom() + } else { + *self + } + } + + pub fn as_noun(self) -> Noun { + Noun { atom: self } + } +} + +#[derive(Copy, Clone)] +#[repr(packed(8))] +pub union Allocated { + raw: u64, + indirect: IndirectAtom, + cell: Cell, +} + +impl Allocated { + pub fn is_indirect(&self) -> bool { + unsafe { is_indirect_atom(self.raw) } + } + + pub fn is_cell(&self) -> bool { + unsafe { is_cell(self.raw) } + } + + pub unsafe fn to_raw_pointer(&self) -> *const u64 { + (self.raw << 3) as *const u64 + } + + pub unsafe fn to_raw_pointer_mut(&mut self) -> *mut u64 { + (self.raw << 3) as *mut u64 + } + + unsafe fn const_to_raw_pointer_mut(self) -> *mut u64 { + (self.raw << 3) as *mut u64 + } + + pub unsafe fn forwarding_pointer(&self) -> Option { + match self.as_either() { + Either::Left(indirect) => indirect.forwarding_pointer().map(|i| i.as_allocated()), + Either::Right(cell) => cell.forwarding_pointer().map(|c| c.as_allocated()), + } + } + + pub unsafe fn get_metadata(&self) -> u64 { + *(self.to_raw_pointer() as *const u64) + } + + pub unsafe fn set_metadata(self, metadata: u64) { + *(self.const_to_raw_pointer_mut() as *mut u64) = metadata; + } + + pub fn as_either(&self) -> Either { + if self.is_indirect() { + unsafe { Either::Left(self.indirect) } + } else { + unsafe { Either::Right(self.cell) } + } + } + + pub fn as_noun(&self) -> Noun { + Noun { allocated: *self } + } + + pub fn get_cached_mug(self: Allocated) -> Option { + unsafe { + let bottom_metadata = self.get_metadata() as u32 & 0x7FFFFFFF; // magic number: LS 31 bits + if bottom_metadata > 0 { + Some(bottom_metadata) + } else { + None + } + } + } +} + +#[derive(Copy, Clone)] +#[repr(packed(8))] +pub union Noun { + raw: u64, + direct: DirectAtom, + indirect: IndirectAtom, + atom: Atom, + cell: Cell, + allocated: Allocated, +} + +impl Noun { + pub fn is_direct(&self) -> bool { + unsafe { is_direct_atom(self.raw) } + } + + pub fn is_indirect(&self) -> bool { + unsafe { is_indirect_atom(self.raw) } + } + + pub fn is_atom(&self) -> bool { + self.is_direct() || self.is_indirect() + } + + pub fn is_allocated(&self) -> bool { + self.is_indirect() || self.is_cell() + } + + pub fn is_cell(&self) -> bool { + unsafe { is_cell(self.raw) } + } + + pub fn as_direct(&self) -> Result { + if self.is_direct() { + unsafe { Ok(self.direct) } + } else { + Err(()) + } + } + + pub fn as_indirect(&self) -> Result { + if self.is_indirect() { + unsafe { Ok(self.indirect) } + } else { + Err(()) + } + } + + pub fn as_cell(&self) -> Result { + if self.is_cell() { + unsafe { Ok(self.cell) } + } else { + Err(()) + } + } + + pub fn as_atom(&self) -> Result { + if self.is_atom() { + unsafe { Ok(self.atom) } + } else { + Err(()) + } + } + + pub fn as_allocated(&self) -> Result { + if self.is_allocated() { + unsafe { Ok(self.allocated) } + } else { + Err(()) + } + } + + pub fn as_either_atom_cell(&self) -> Either { + if self.is_cell() { + unsafe { Either::Right(self.cell) } + } else { + unsafe { Either::Left(self.atom) } + } + } + + pub fn as_either_direct_allocated(&self) -> Either { + if self.is_direct() { + unsafe { Either::Left(self.direct) } + } else { + unsafe { Either::Right(self.allocated) } + } + } + + /** Are these the same noun */ + pub unsafe fn raw_equals(self, other: Noun) -> bool { + self.raw == other.raw + } +} + +/** + * An allocation object (probably a mem::NockStack) which can allocate a memory buffer sized to + * a certain number of nouns + */ +pub trait NounAllocator { + /** Allocate memory for some multiple of the size of a noun + * + * This should allocate *two more* `u64`s than `words` to make space for the size and metadata + */ + unsafe fn alloc_indirect(&mut self, words: usize) -> *mut u64; + + /** Allocate memory for a cell */ + unsafe fn alloc_cell(&mut self) -> *mut CellMemory; +} + diff --git a/rust/iron-planet/src/serialization.rs b/rust/iron-planet/src/serialization.rs new file mode 100644 index 0000000..fe713be --- /dev/null +++ b/rust/iron-planet/src/serialization.rs @@ -0,0 +1,348 @@ +use crate::mem::NockStack; +use crate::mem::unifying_equality; +use crate::mug::mug_u32; +use crate::noun::{Atom, Cell, DirectAtom, IndirectAtom, Noun}; +use bitvec::prelude::{BitSlice, Lsb0}; +use either::Either::{Left, Right}; +use intmap::IntMap; + +pub fn met0_usize(atom: Atom) -> usize { + let atom_bitslice = atom.as_bitslice(); + match atom_bitslice.last_one() { + Some(last_one) => last_one + 1, + None => 0, + } +} + +pub fn met0_u64_to_usize(x: u64) -> usize { + let usize_bitslice = BitSlice::::from_element(&x); + match usize_bitslice.last_one() { + Some(last_one) => last_one + 1, + None => 0, + } +} + +pub fn cue(stack: &mut NockStack, buffer: Atom) -> Noun { + let buffer_bitslice = buffer.as_bitslice(); + let mut cursor: usize = 0; + let mut backref_map = IntMap::::new(); + stack.push(2); + unsafe { + stack.save_prev_stack_pointer_to_local(0); + *(stack.alloc_in_previous_frame()) = stack.local_noun_pointer(1); + }; + loop { + if unsafe { stack.prev_stack_pointer_equals_local(0) } { + let result = unsafe { + *(stack.local_noun_pointer(1)) + }; + unsafe { stack.pop_no_copy(); }; + break result; + } else { + let dest_ptr: *mut Noun = unsafe { *(stack.top_in_previous_frame()) }; + if buffer_bitslice[cursor] { + // 1 bit + if buffer_bitslice[cursor + 1] { + // 11 bits - cue backreference + cursor += 2; + unsafe { + *dest_ptr = *(backref_map + .get(rub_backref(&mut cursor, buffer_bitslice)) + .expect("Invalid backref in cue")); + stack.reclaim_in_previous_frame::<*mut Noun>(); + } + continue; + } else { + // 10 bits - cue cell + let backref = cursor; + cursor += 2; + unsafe { + let (cell, cell_mem_ptr) = Cell::new_raw_mut(stack); + *dest_ptr = cell.as_noun(); + backref_map.insert(backref as u64, *dest_ptr); + stack.reclaim_in_previous_frame::<*mut Noun>(); + *(stack.alloc_in_previous_frame::<*mut Noun>()) = &mut ((*cell_mem_ptr).tail); + *(stack.alloc_in_previous_frame::<*mut Noun>()) = &mut ((*cell_mem_ptr).head); + } + continue; + } + } else { + // 0 bit - cue atom + let backref = cursor; + cursor += 1; + unsafe { + *dest_ptr = + rub_atom(stack, &mut cursor, buffer_bitslice).as_noun(); + backref_map.insert(backref as u64, *dest_ptr); + stack.reclaim_in_previous_frame::<*mut Noun>(); + }; + continue; + } + }; + } +} + +// TODO: use first_zero() on a slice of the buffer +fn get_size(cursor: &mut usize, buffer: &BitSlice) -> usize { + let mut bitsize: usize = 0; + loop { + if buffer[*cursor + bitsize] { + bitsize += 1; + continue; + } else { + break; + } + } + if bitsize == 0 { + *cursor += 1; + 0 + } else { + let mut size: u64 = 0; + BitSlice::from_element_mut(&mut size)[0..bitsize - 1] + .copy_from_bitslice(&buffer[*cursor + bitsize + 1..*cursor + bitsize + bitsize]); + *cursor += bitsize + bitsize; + (size as usize) + (1 << (bitsize - 1)) + } +} + +fn rub_atom( + stack: &mut NockStack, + cursor: &mut usize, + buffer: &BitSlice, +) -> Atom { + let size = get_size(cursor, buffer); + if size == 0 { + unsafe { DirectAtom::new_unchecked(0).as_atom() } + } else if size < 64 { + // fits in a direct atom + let mut direct_raw = 0; + BitSlice::from_element_mut(&mut direct_raw)[0..size] + .copy_from_bitslice(&buffer[*cursor..*cursor + size]); + *cursor += size; + unsafe { DirectAtom::new_unchecked(direct_raw).as_atom() } + } else { + // need an indirect atom + let wordsize = (size + 63) >> 6; + let (atom, slice) = unsafe { IndirectAtom::new_raw_mut_bitslice(stack, wordsize) }; // fast round to wordsize + slice[0..size].copy_from_bitslice(&buffer[*cursor..*cursor + size]); + slice[size..wordsize << 6].fill(false); + atom.as_atom() + } +} + +fn rub_backref(cursor: &mut usize, buffer: &BitSlice) -> u64 { + let size = get_size(cursor, buffer); + if size == 0 { + 0 + } else if size <= 64 { + let mut backref: u64 = 0; + BitSlice::from_element_mut(&mut backref)[0..size] + .copy_from_bitslice(&buffer[*cursor..*cursor + size]); + *cursor += size; + backref + } else { + panic!("Backreference size too big for vere") + } +} + +struct JamState<'a> { + cursor: usize, + size: usize, + atom: IndirectAtom, + slice: &'a mut BitSlice, +} + +pub fn jam(stack: &mut NockStack, noun: Noun) -> Atom { + let mut backref_map: IntMap> = IntMap::new(); + let size = 8; + let (atom, slice) = unsafe { IndirectAtom::new_raw_mut_bitslice(stack, size) }; + let mut state = JamState { + cursor: 0, + size: size, + atom: atom, + slice: slice, + }; + stack.push(1); + unsafe { + stack.save_prev_stack_pointer_to_local(0); + *(stack.alloc_in_previous_frame()) = noun; + }; + 'jam: loop { + if unsafe { stack.prev_stack_pointer_equals_local(0) } { + break; + } else { + let mut noun = unsafe { *(stack.top_in_previous_frame()) }; + let mug = mug_u32(stack, noun); + match backref_map.get_mut(mug as u64) { + None => {} + Some(backref_chain) => { + for (mut key, backref) in backref_chain { + if unsafe { unifying_equality(stack, &mut noun, &mut key) } { + match noun.as_either_atom_cell() { + Left(atom) => { + let atom_size = met0_usize(atom); + let backref_size = met0_u64_to_usize(*backref); + if atom_size <= backref_size { + jam_atom(stack, &mut state, atom); + } else { + jam_backref(stack, &mut state, *backref); + } + } + Right(_cell) => { + jam_backref(stack, &mut state, *backref); + } + } + unsafe { + stack.reclaim_in_previous_frame::(); + } + continue 'jam; + } + } + } + }; + match noun.as_either_atom_cell() { + Left(atom) => { + let backref = state.cursor; + match backref_map.get_mut(mug as u64) { + None => { + backref_map.insert(mug as u64, vec![(noun, backref as u64)]); + }, + Some(vec) => { + vec.push((noun, backref as u64)); + }, + }; + jam_atom(stack, &mut state, atom); + unsafe { + stack.reclaim_in_previous_frame::(); + }; + continue; + } + Right(cell) => { + let backref = state.cursor; + match backref_map.get_mut(mug as u64) { + None => { + backref_map.insert(mug as u64, vec![(noun, backref as u64)]); + }, + Some(vec) => { + vec.push((noun, backref as u64)); + }, + }; + jam_cell(stack, &mut state); + unsafe { + stack.reclaim_in_previous_frame::(); + *(stack.alloc_in_previous_frame()) = cell.tail(); + *(stack.alloc_in_previous_frame()) = cell.head(); + }; + continue; + } + } + } + } + let mut result = unsafe { state.atom.normalize_as_atom().as_noun() }; + stack.pop(&mut result); + result.as_atom().expect("IMPOSSIBLE: result was coerced from an atom so should not fail coercion to an atom") +} + +fn jam_atom( + traversal: &mut NockStack, + state: &mut JamState, + atom: Atom, +) { + loop { + if state.cursor + 1 > state.slice.len() { + double_atom_size(traversal, state); + } else { + break; + } + } + state.slice.set(state.cursor, false); + state.cursor += 1; + loop { + if let Ok(()) = mat(traversal, state, atom) { + break; + } else { + double_atom_size(traversal, state); + } + } +} + +fn jam_cell(traversal: &mut NockStack, state: &mut JamState) { + loop { + if state.cursor + 2 > state.slice.len() { + double_atom_size(traversal, state); + } else { + break; + } + } + state.slice.set(state.cursor, true); + state.slice.set(state.cursor + 1, false); + state.cursor += 2; +} + +fn jam_backref( + traversal: &mut NockStack, + state: &mut JamState, + backref: u64, +) { + loop { + if state.cursor + 2 > state.slice.len() { + double_atom_size(traversal, state); + } else { + break; + } + } + state.slice.set(state.cursor, true); + state.slice.set(state.cursor + 1, true); + state.cursor += 2; + let backref_atom = Atom::new(traversal, backref); + loop { + if let Ok(()) = mat(traversal, state, backref_atom) { + break; + } else { + double_atom_size(traversal, state); + } + } +} + +fn double_atom_size(traversal: &mut NockStack, state: &mut JamState) { + let new_size = state.size + state.size; + let (new_atom, new_slice) = unsafe { IndirectAtom::new_raw_mut_bitslice(traversal, new_size) }; + new_slice[0..state.cursor].copy_from_bitslice(&state.slice[0..state.cursor]); + state.size = new_size; + state.atom = new_atom; + state.slice = new_slice; +} + +// INVARIANT: mat must not modify state.cursor unless it will also return `Ok(())` +fn mat( + traversal: &mut NockStack, + state: &mut JamState, + atom: Atom, +) -> Result<(), ()> { + let b_atom_size = met0_usize(atom); + let b_atom_size_atom = Atom::new(traversal, b_atom_size as u64); + if b_atom_size == 0 { + if state.cursor + 1 > state.slice.len() { + Err(()) + } else { + state.slice.set(state.cursor, true); + state.cursor += 1; + Ok(()) + } + } else { + let c_b_size = met0_usize(b_atom_size_atom); + if state.cursor + c_b_size + c_b_size + b_atom_size > state.slice.len() { + Err(()) + } else { + state.slice[state.cursor..state.cursor + c_b_size + 1].fill(true); // a 1 bit for each bit in the atom size + state.slice.set(state.cursor + c_b_size + 1, false); // a terminating 0 bit + state.slice[state.cursor + c_b_size + 2..state.cursor + c_b_size + c_b_size] + .copy_from_bitslice(&b_atom_size_atom.as_bitslice()[0..c_b_size - 1]); // the atom size excepting the most significant 1 (since we know where that is from the size-of-the-size) + state.slice[state.cursor + c_b_size + c_b_size + ..state.cursor + c_b_size + c_b_size + b_atom_size] + .copy_from_bitslice(atom.as_bitslice()); // the atom itself + state.cursor += c_b_size + c_b_size + b_atom_size; + Ok(()) + } + } +} diff --git a/rust/nix/sources.json b/rust/nix/sources.json new file mode 100644 index 0000000..3f87af9 --- /dev/null +++ b/rust/nix/sources.json @@ -0,0 +1,14 @@ +{ + "nixpkgs": { + "branch": "release-22.05", + "description": "Nix Packages collection", + "homepage": "", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "9ff65cb28c43236eac4cbbd20a5781581d9cbbf6", + "sha256": "1vfz9xdkpwp1bbp7p7abwl64sfdsg0g10hbvxsvma1jdz2pnxl5h", + "type": "tarball", + "url": "https://github.com/NixOS/nixpkgs/archive/9ff65cb28c43236eac4cbbd20a5781581d9cbbf6.tar.gz", + "url_template": "https://github.com///archive/.tar.gz" + } +} diff --git a/rust/nix/sources.nix b/rust/nix/sources.nix new file mode 100644 index 0000000..9a01c8a --- /dev/null +++ b/rust/nix/sources.nix @@ -0,0 +1,194 @@ +# This file has been generated by Niv. + +let + + # + # The fetchers. fetch_ fetches specs of type . + # + + fetch_file = pkgs: name: spec: + let + name' = sanitizeName name + "-src"; + in + if spec.builtin or true then + builtins_fetchurl { inherit (spec) url sha256; name = name'; } + else + pkgs.fetchurl { inherit (spec) url sha256; name = name'; }; + + fetch_tarball = pkgs: name: spec: + let + name' = sanitizeName name + "-src"; + in + if spec.builtin or true then + builtins_fetchTarball { name = name'; inherit (spec) url sha256; } + else + pkgs.fetchzip { name = name'; inherit (spec) url sha256; }; + + fetch_git = name: spec: + let + ref = + if spec ? ref then spec.ref else + if spec ? branch then "refs/heads/${spec.branch}" else + if spec ? tag then "refs/tags/${spec.tag}" else + abort "In git source '${name}': Please specify `ref`, `tag` or `branch`!"; + submodules = if spec ? submodules then spec.submodules else false; + submoduleArg = + let + nixSupportsSubmodules = builtins.compareVersions builtins.nixVersion "2.4" >= 0; + emptyArgWithWarning = + if submodules == true + then + builtins.trace + ( + "The niv input \"${name}\" uses submodules " + + "but your nix's (${builtins.nixVersion}) builtins.fetchGit " + + "does not support them" + ) + {} + else {}; + in + if nixSupportsSubmodules + then { inherit submodules; } + else emptyArgWithWarning; + in + builtins.fetchGit + ({ url = spec.repo; inherit (spec) rev; inherit ref; } // submoduleArg); + + fetch_local = spec: spec.path; + + fetch_builtin-tarball = name: throw + ''[${name}] The niv type "builtin-tarball" is deprecated. You should instead use `builtin = true`. + $ niv modify ${name} -a type=tarball -a builtin=true''; + + fetch_builtin-url = name: throw + ''[${name}] The niv type "builtin-url" will soon be deprecated. You should instead use `builtin = true`. + $ niv modify ${name} -a type=file -a builtin=true''; + + # + # Various helpers + # + + # https://github.com/NixOS/nixpkgs/pull/83241/files#diff-c6f540a4f3bfa4b0e8b6bafd4cd54e8bR695 + sanitizeName = name: + ( + concatMapStrings (s: if builtins.isList s then "-" else s) + ( + builtins.split "[^[:alnum:]+._?=-]+" + ((x: builtins.elemAt (builtins.match "\\.*(.*)" x) 0) name) + ) + ); + + # The set of packages used when specs are fetched using non-builtins. + mkPkgs = sources: system: + let + sourcesNixpkgs = + import (builtins_fetchTarball { inherit (sources.nixpkgs) url sha256; }) { inherit system; }; + hasNixpkgsPath = builtins.any (x: x.prefix == "nixpkgs") builtins.nixPath; + hasThisAsNixpkgsPath = == ./.; + in + if builtins.hasAttr "nixpkgs" sources + then sourcesNixpkgs + else if hasNixpkgsPath && ! hasThisAsNixpkgsPath then + import {} + else + abort + '' + Please specify either (through -I or NIX_PATH=nixpkgs=...) or + add a package called "nixpkgs" to your sources.json. + ''; + + # The actual fetching function. + fetch = pkgs: name: spec: + + if ! builtins.hasAttr "type" spec then + abort "ERROR: niv spec ${name} does not have a 'type' attribute" + else if spec.type == "file" then fetch_file pkgs name spec + else if spec.type == "tarball" then fetch_tarball pkgs name spec + else if spec.type == "git" then fetch_git name spec + else if spec.type == "local" then fetch_local spec + else if spec.type == "builtin-tarball" then fetch_builtin-tarball name + else if spec.type == "builtin-url" then fetch_builtin-url name + else + abort "ERROR: niv spec ${name} has unknown type ${builtins.toJSON spec.type}"; + + # If the environment variable NIV_OVERRIDE_${name} is set, then use + # the path directly as opposed to the fetched source. + replace = name: drv: + let + saneName = stringAsChars (c: if isNull (builtins.match "[a-zA-Z0-9]" c) then "_" else c) name; + ersatz = builtins.getEnv "NIV_OVERRIDE_${saneName}"; + in + if ersatz == "" then drv else + # this turns the string into an actual Nix path (for both absolute and + # relative paths) + if builtins.substring 0 1 ersatz == "/" then /. + ersatz else /. + builtins.getEnv "PWD" + "/${ersatz}"; + + # Ports of functions for older nix versions + + # a Nix version of mapAttrs if the built-in doesn't exist + mapAttrs = builtins.mapAttrs or ( + f: set: with builtins; + listToAttrs (map (attr: { name = attr; value = f attr set.${attr}; }) (attrNames set)) + ); + + # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/lists.nix#L295 + range = first: last: if first > last then [] else builtins.genList (n: first + n) (last - first + 1); + + # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/strings.nix#L257 + stringToCharacters = s: map (p: builtins.substring p 1 s) (range 0 (builtins.stringLength s - 1)); + + # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/strings.nix#L269 + stringAsChars = f: s: concatStrings (map f (stringToCharacters s)); + concatMapStrings = f: list: concatStrings (map f list); + concatStrings = builtins.concatStringsSep ""; + + # https://github.com/NixOS/nixpkgs/blob/8a9f58a375c401b96da862d969f66429def1d118/lib/attrsets.nix#L331 + optionalAttrs = cond: as: if cond then as else {}; + + # fetchTarball version that is compatible between all the versions of Nix + builtins_fetchTarball = { url, name ? null, sha256 }@attrs: + let + inherit (builtins) lessThan nixVersion fetchTarball; + in + if lessThan nixVersion "1.12" then + fetchTarball ({ inherit url; } // (optionalAttrs (!isNull name) { inherit name; })) + else + fetchTarball attrs; + + # fetchurl version that is compatible between all the versions of Nix + builtins_fetchurl = { url, name ? null, sha256 }@attrs: + let + inherit (builtins) lessThan nixVersion fetchurl; + in + if lessThan nixVersion "1.12" then + fetchurl ({ inherit url; } // (optionalAttrs (!isNull name) { inherit name; })) + else + fetchurl attrs; + + # Create the final "sources" from the config + mkSources = config: + mapAttrs ( + name: spec: + if builtins.hasAttr "outPath" spec + then abort + "The values in sources.json should not have an 'outPath' attribute" + else + spec // { outPath = replace name (fetch config.pkgs name spec); } + ) config.sources; + + # The "config" used by the fetchers + mkConfig = + { sourcesFile ? if builtins.pathExists ./sources.json then ./sources.json else null + , sources ? if isNull sourcesFile then {} else builtins.fromJSON (builtins.readFile sourcesFile) + , system ? builtins.currentSystem + , pkgs ? mkPkgs sources system + }: rec { + # The sources, i.e. the attribute set of spec name to spec + inherit sources; + + # The "pkgs" (evaluated nixpkgs) to use for e.g. non-builtin fetchers + inherit pkgs; + }; + +in +mkSources (mkConfig {}) // { __functor = _: settings: mkSources (mkConfig settings); } diff --git a/rust/shell.nix b/rust/shell.nix new file mode 100644 index 0000000..556ee49 --- /dev/null +++ b/rust/shell.nix @@ -0,0 +1,4 @@ +{ sources ? import ./nix/sources.nix, pkgs ? import sources.nixpkgs {} }: +pkgs.mkShell { + packages = with pkgs; [ rustc cargo cargo-watch rustfmt ]; +}