Finish initial version of hvm-lang to hvm-core compilation

This commit is contained in:
Nicolas Abril 2023-08-31 20:35:41 +02:00
parent 9f1ef68ea3
commit 7731d2487c
11 changed files with 260 additions and 54 deletions

29
Cargo.lock generated
View File

@ -161,6 +161,12 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
[[package]]
name = "diff"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
[[package]]
name = "either"
version = "1.9.0"
@ -196,7 +202,9 @@ dependencies = [
"ariadne",
"chumsky",
"clap",
"itertools 0.11.0",
"logos",
"pretty_assertions",
"shrinkwraprs",
]
@ -209,6 +217,15 @@ dependencies = [
"either",
]
[[package]]
name = "itertools"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
dependencies = [
"either",
]
[[package]]
name = "libc"
version = "0.2.147"
@ -253,6 +270,16 @@ version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
[[package]]
name = "pretty_assertions"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66"
dependencies = [
"diff",
"yansi",
]
[[package]]
name = "proc-macro2"
version = "1.0.66"
@ -293,7 +320,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e63e6744142336dfb606fe2b068afa2e1cca1ee6a5d8377277a92945d81fa331"
dependencies = [
"bitflags",
"itertools",
"itertools 0.8.2",
"proc-macro2",
"quote",
"syn 1.0.109",

View File

@ -8,5 +8,9 @@ anyhow = "1.0.72"
ariadne = "0.3.0"
chumsky = "1.0.0-alpha.4"
clap = { version = "4.4.1", features = ["derive"] }
itertools = "0.11.0"
logos = "0.13.0"
shrinkwraprs = "0.3.0"
[dev-dependencies]
pretty_assertions = "1.4.0"

View File

@ -1,6 +1,7 @@
use super::DefId;
use itertools::Itertools;
use std::{collections::HashMap, fmt};
use super::DefId;
pub struct Book {
pub defs: HashMap<DefId, LNet>,
}
@ -9,8 +10,8 @@ pub struct Book {
#[derive(Debug)]
pub struct LNet {
root: LTree,
acts: LActs,
pub root: LTree,
pub acts: LActs,
}
#[derive(Debug)]
@ -83,10 +84,10 @@ pub fn u32_to_name(num: u32) -> String {
impl fmt::Display for LNet {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, " $ {}", self.root)?;
write!(f, "$ {}", self.root)?;
for (a, b) in &self.acts {
writeln!(f, " & {}", a)?;
writeln!(f, " ~ {}", b)?;
write!(f, "\n& {}", a)?;
write!(f, "\n~ {}", b)?;
}
Ok(())
}
@ -108,24 +109,23 @@ impl fmt::Display for Book {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for (id, net) in &self.defs {
writeln!(f, "{} =", u32_to_name(**id))?;
writeln!(f, "{}\n", net)?;
writeln!(f, "{}\n", net.to_string().split('\n').map(|x| format!(" {x}")).join("\n"))?;
}
Ok(())
}
}
pub type Tag = u8;
pub type Val = u32;
pub const NIL: Tag = 0x0; // empty node
pub const REF: Tag = 0x1; // reference to a definition (closed net)
pub const NUM: Tag = 0x2; // unboxed number
pub const ERA: Tag = 0x3; // unboxed eraser
pub const VRR: Tag = 0x4; // variable pointing to root
pub const VR1: Tag = 0x5; // variable pointing to aux1 port of node
pub const VR2: Tag = 0x6; // variable pointing to aux2 port of node
pub const RDT: Tag = 0x7; // redirection to root
pub const RD1: Tag = 0x8; // redirection to aux1 port of node
pub const RD2: Tag = 0x9; // redirection to aux2 port of node
// Not used in hvm-core AST
// pub const NIL: Tag = 0x0; // empty node
// pub const REF: Tag = 0x1; // reference to a definition (closed net)
// pub const NUM: Tag = 0x2; // unboxed number
// pub const ERA: Tag = 0x3; // unboxed eraser
// pub const VRR: Tag = 0x4; // variable pointing to root
// pub const VR1: Tag = 0x5; // variable pointing to aux1 port of node
// pub const VR2: Tag = 0x6; // variable pointing to aux2 port of node
// pub const RDT: Tag = 0x7; // redirection to root
// pub const RD1: Tag = 0x8; // redirection to aux1 port of node
// pub const RD2: Tag = 0x9; // redirection to aux2 port of node
pub const CON: Tag = 0xA; // points to main port of con node
pub const DUP: Tag = 0xB; // points to main port of dup node; higher labels also dups

View File

@ -21,9 +21,9 @@ pub struct Rule {
#[derive(Debug, Clone)]
pub enum Pattern {
Ctr(Name, Vec<Pattern>),
Num(Number),
Var(Name),
_Ctr(Name, Vec<Pattern>),
_Num(Number),
_Var(Name),
}
#[derive(Debug, Clone)]

4
src/lib.rs Normal file
View File

@ -0,0 +1,4 @@
pub mod ast;
pub mod loader;
pub mod parser;
pub mod to_core;

View File

@ -1,10 +1,10 @@
use clap::Parser;
use std::path::PathBuf;
mod ast;
mod loader;
mod parser;
mod to_core;
pub mod ast;
pub mod loader;
pub mod parser;
pub mod to_core;
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]

View File

@ -1,4 +1,4 @@
mod lexer;
mod parser;
pub use parser::parse_definition_book;
pub use parser::{parse_definition_book, parse_term};

View File

@ -14,15 +14,6 @@ use chumsky::{
};
use logos::Logos;
pub fn parse_definition_book(code: &str) -> Result<DefinitionBook, Vec<Rich<Token>>> {
let token_iter = Token::lexer(code).spanned().map(|(token, span)| match token {
Ok(t) => (t, SimpleSpan::from(span)),
Err(e) => (Token::Error(e), SimpleSpan::from(span)),
});
let token_stream = Stream::from_iter(token_iter).spanned(SimpleSpan::from(code.len() .. code.len()));
book_parser().parse(token_stream).into_result()
}
// TODO: Pattern matching on rules
// TODO: Other types of numbers
/// <Name> ::= <name_token> // [_a-zA-Z][_a-zA-Z0-9]*
@ -39,8 +30,28 @@ pub fn parse_definition_book(code: &str) -> Result<DefinitionBook, Vec<Rich<Toke
/// <Rule> ::= "(" <Name> ")" "=" <newline_token>* <Term> <newline_token>*
/// <Def> ::= (<Rule> <NewLine>)+
/// <Book> ::= <Def>+
pub fn parse_definition_book(code: &str) -> Result<DefinitionBook, Vec<Rich<Token>>> {
let token_iter = Token::lexer(code).spanned().map(|(token, span)| match token {
Ok(t) => (t, SimpleSpan::from(span)),
Err(e) => (Token::Error(e), SimpleSpan::from(span)),
});
let token_stream = Stream::from_iter(token_iter).spanned(SimpleSpan::from(code.len() .. code.len()));
book_parser().parse(token_stream).into_result()
}
fn rule_parser<'a, I>() -> impl Parser<'a, I, Rule, extra::Err<Rich<'a, Token>>>
pub fn parse_term(code: &str) -> Result<Term, Vec<Rich<Token>>> {
// TODO: Make a function that calls a parser. I couldn't figure out how to type it correctly.
let token_iter = Token::lexer(code).spanned().map(|(token, span)| match token {
Ok(t) => (t, SimpleSpan::from(span)),
Err(e) => (Token::Error(e), SimpleSpan::from(span)),
});
let token_stream = Stream::from_iter(token_iter).spanned(SimpleSpan::from(code.len() .. code.len()));
term_parser().parse(token_stream).into_result()
}
// Parsers
fn term_parser<'a, I>() -> impl Parser<'a, I, Term, extra::Err<Rich<'a, Token>>>
where
I: ValueInput<'a, Token = Token, Span = SimpleSpan>,
{
@ -69,7 +80,7 @@ where
let var = name.map(|name| Term::Var { nam: name });
let term = recursive(|term| {
recursive(|term| {
let nested = term
.clone()
.delimited_by(just(Token::LParen).then(new_line.clone()), just(Token::RParen).then(new_line.clone()));
@ -112,13 +123,21 @@ where
});
choice((lam, app, dup, let_, num_op, item))
});
})
}
fn rule_parser<'a, I>() -> impl Parser<'a, I, Rule, extra::Err<Rich<'a, Token>>>
where
I: ValueInput<'a, Token = Token, Span = SimpleSpan>,
{
let name = select!(Token::Name(name) => Name(name.to_string()));
let new_line = just(Token::NewLine).repeated();
just(Token::LParen)
.ignore_then(name)
.then_ignore(just(Token::RParen))
.then_ignore(just(Token::Equals))
.then(term.delimited_by(new_line.clone(), new_line.clone()))
.then(term_parser().delimited_by(new_line.clone(), new_line.clone()))
.map(|(name, body)| Rule { name, pats: vec![], body })
}

View File

@ -1,6 +1,10 @@
use std::collections::HashMap;
use crate::ast::{DefId, Name, Term};
use crate::ast::{
self,
core::{LNet, LTree},
DefId, Name, Term,
};
#[derive(Clone, Debug)]
/// Net representation used only as an intermediate for converting to hvm-core format
@ -209,3 +213,128 @@ fn encode_term(
}
}
}
pub fn compat_net_to_core(inet: &INet) -> anyhow::Result<LNet> {
let tree_roots = get_tree_roots(inet);
let mut port_to_var_id: HashMap<Port, VarId> = HashMap::new();
let root = compat_tree_to_hvm_tree(inet, tree_roots[1], &mut port_to_var_id);
let mut acts = vec![];
for active_pair in tree_roots[2 ..].chunks_exact(2) {
let act1 = compat_tree_to_hvm_tree(inet, active_pair[0], &mut port_to_var_id);
let act2 = compat_tree_to_hvm_tree(inet, active_pair[1], &mut port_to_var_id);
acts.push((act1, act2));
}
Ok(LNet { root, acts })
}
type TreeId = NodeId;
type VarId = u32;
/// Returns a list of all the tree node roots in the compat inet.
fn get_tree_roots(inet: &INet) -> Vec<NodeId> {
let mut node_to_tree_id: Vec<Option<TreeId>> = vec![None; inet.nodes.len() / 4];
let mut tree_roots: Vec<NodeId> = vec![addr(ROOT), addr(enter(inet, ROOT))];
let mut side_links: Vec<Port> = vec![]; // Links between trees
// Start by mapping the root tree
go_down_tree(inet, tree_roots[1], 0, &mut node_to_tree_id, &mut side_links);
// Check each side-link for a possible new tree pair;
while let Some(dest_port) = side_links.pop() {
let dest_node = addr(dest_port);
// Only go up unmarked trees
if node_to_tree_id[dest_node as usize].is_none() {
let (new_tree1, new_tree2) = go_up_tree(inet, dest_node);
go_down_tree(inet, new_tree1, tree_roots.len() as TreeId, &mut node_to_tree_id, &mut side_links);
tree_roots.push(new_tree1);
go_down_tree(inet, new_tree2, tree_roots.len() as TreeId, &mut node_to_tree_id, &mut side_links);
tree_roots.push(new_tree2);
}
}
tree_roots
}
/// Go down a node tree, marking all nodes with the tree_id and storing any side_links found.
fn go_down_tree(
inet: &INet,
root: NodeId,
tree_id: TreeId,
node_to_tree_id: &mut [Option<NodeId>],
side_links: &mut Vec<Port>,
) {
let mut nodes_to_check = vec![root];
while let Some(node) = nodes_to_check.pop() {
node_to_tree_id[node as usize] = Some(tree_id);
for down_slot in [1, 2] {
let down_port = enter(inet, port(node, down_slot));
if slot(down_port) == 0 {
// If this down-link is to a main port, this is a node of the same tree
nodes_to_check.push(addr(down_port));
} else {
// Otherwise it's a side-link
side_links.push(down_port);
}
}
}
}
/// Goes up a node tree, starting from some given node.
/// Returns the root of this tree and the root of its active pair.
fn go_up_tree(inet: &INet, start_node: NodeId) -> (NodeId, NodeId) {
let mut crnt_node = start_node;
loop {
let up_port = enter(inet, port(crnt_node, 0));
let up_node = addr(up_port);
if slot(up_port) == 0 {
return (crnt_node, up_node);
} else {
crnt_node = up_node;
}
}
}
fn compat_tree_to_hvm_tree(inet: &INet, root: NodeId, port_to_var_id: &mut HashMap<Port, VarId>) -> LTree {
let kind = kind(inet, root);
let tag = kind & TAG_MASK;
let label = kind & LABEL_MASK; // TODO: Check if label too high, do something about it.
match tag {
ERA => LTree::Era,
CON => LTree::Nod {
tag: ast::core::CON,
lft: Box::new(var_or_subtree(inet, port(root, 1), port_to_var_id)),
rgt: Box::new(var_or_subtree(inet, port(root, 2), port_to_var_id)),
},
DUP => LTree::Nod {
tag: ast::core::DUP + label as u8,
lft: Box::new(var_or_subtree(inet, port(root, 1), port_to_var_id)),
rgt: Box::new(var_or_subtree(inet, port(root, 2), port_to_var_id)),
},
REF => LTree::Ref { nam: label },
NUM => LTree::Num { val: enter(inet, port(root, 1)) },
NUMOP => todo!(), // TODO: HVM2 doesn't have numeric operator atm.
_ => unreachable!(),
}
}
fn var_or_subtree(inet: &INet, src_port: Port, port_to_var_id: &mut HashMap<Port, VarId>) -> LTree {
let dst_port = enter(inet, src_port);
if slot(dst_port) == 0 {
// Subtree
compat_tree_to_hvm_tree(inet, addr(dst_port), port_to_var_id)
} else {
// Var
if let Some(&var_id) = port_to_var_id.get(&src_port) {
// Previously found var
LTree::Var { nam: var_id_to_name(var_id) }
} else {
// New var
let var_id = port_to_var_id.len() as u32;
port_to_var_id.insert(dst_port, var_id);
LTree::Var { nam: var_id_to_name(var_id) }
}
}
}
fn var_id_to_name(var_id: VarId) -> String {
format!("x{var_id}")
}

View File

@ -1,11 +1,7 @@
mod compat_net;
use self::compat_net::term_to_compat_net;
use crate::ast::{
self,
core::{name_to_u32, LNet},
DefId, Definition, DefinitionBook, Name,
};
use self::compat_net::{compat_net_to_core, term_to_compat_net};
use crate::ast::{self, core::name_to_u32, DefId, Definition, DefinitionBook, Name, Term};
use std::collections::HashMap;
pub fn book_to_hvm_core(book: &DefinitionBook) -> anyhow::Result<ast::core::Book> {
@ -18,15 +14,15 @@ pub fn book_to_hvm_core(book: &DefinitionBook) -> anyhow::Result<ast::core::Book
Ok(core_book)
}
fn definition_to_hvm_core(
pub fn definition_to_hvm_core(
definition: &Definition,
def_to_id: &HashMap<Name, DefId>,
) -> anyhow::Result<ast::core::LNet> {
let term = &definition.rules[0].body;
let compat_net = term_to_compat_net(term, def_to_id)?;
compat_net_to_core(compat_net)
// TODO: Multiple rules, pattern matching, etc.
term_to_hvm_core(&definition.rules[0].body, def_to_id)
}
fn compat_net_to_core(compat_net: compat_net::INet) -> anyhow::Result<LNet> {
todo!()
pub fn term_to_hvm_core(term: &Term, def_to_id: &HashMap<Name, DefId>) -> anyhow::Result<ast::core::LNet> {
let compat_net = term_to_compat_net(term, def_to_id)?;
compat_net_to_core(&compat_net)
}

27
tests/compile_terms.rs Normal file
View File

@ -0,0 +1,27 @@
use hvm_lang::{parser::parse_term, to_core::term_to_hvm_core};
use pretty_assertions::assert_eq;
use std::collections::HashMap;
fn check_single_term_compilation(code: &str, expected: &str) {
let term = parse_term(code).unwrap();
let net = term_to_hvm_core(&term, &HashMap::new()).unwrap();
assert_eq!(net.to_string(), expected)
}
#[test]
fn single_def_terms() {
let terms = [
("id", "λa a", "$ (0 x0 x0)"),
("dup apply", "λa dup a1 a2 = a; (a1 a2)", "$ (0 (1 (0 x0 x1) x0) x1)"),
("erased dup", "λa dup a1 a2 = a; a1", "$ (0 (1 x0 *) x0)"),
("c_z", "λs λz z", "$ (0 * (0 x0 x0))"),
(
"c_s",
"λk λs dup s0 s1 = s; λz (s0 ((k s1) z))",
"$ (0 (0 x0 (0 x1 x2)) (0 (1 (0 x2 x3) x0) (0 x1 x3)))",
),
];
for (_name, code, expected) in terms {
check_single_term_compilation(code, expected)
}
}