Remove pattern matching passes and tests

This commit is contained in:
Nicolas Abril 2023-10-12 21:44:01 +02:00
parent 6e96263f47
commit cd8a2f4008
14 changed files with 5 additions and 642 deletions

View File

@ -1,234 +0,0 @@
use crate::term::{DefId, DefNames, Definition, DefinitionBook, Name, Pattern, Rule, Term};
use hvmc::Val;
use std::collections::HashSet;
impl DefinitionBook {
/// Splits definitions with nested pattern matching into multiple ones so that the maximum pattern matching depth is 1.
pub fn flatten_rules(&mut self) {
for i in 0 .. self.defs.len() {
let (old_def, mut new_defs) = flatten_def(&self.defs[i], &mut self.def_names);
// Replace the definition with its flattened version and add the extracted rules to the book.
// We add the names for those definitions when they're first defined, so no need to add the names to the book here.
self.defs[i] = old_def;
self.defs.append(&mut new_defs);
}
}
}
/// Flattens a single definition.
/// Returns the original definition but flattened,
/// as well as the new ones that were split from the nested one, all of them also flattened.
fn flatten_def(def: &Definition, def_names: &mut DefNames) -> (Definition, Vec<Definition>) {
let mut new_defs: Vec<Definition> = Vec::new();
// For each extracted nested pattern, we create a new definition with a name based on this counter.
let mut name_count = 0;
// The indices of rules that have already been processed by the flattening step of a previous rule in this def.
let mut skip: HashSet<usize> = HashSet::default();
// The flattened rules we're building for this definition.
// Old refers to the already existing definition, while new is the ones we're extracting from nested patterns.
let mut old_def_rules: Vec<Rule> = Vec::new();
for i in 0 .. def.rules.len() {
if !skip.contains(&i) {
let rule = &def.rules[i];
if must_split(rule) {
let (old_rule, new_def, split_defs) =
split_rule(&def.rules, i, &mut name_count, &mut skip, def_names);
old_def_rules.push(old_rule);
new_defs.push(new_def);
new_defs.extend(split_defs);
} else {
old_def_rules.push(def.rules[i].clone());
}
}
}
let old_def = Definition { def_id: def.def_id, rules: old_def_rules };
(old_def, new_defs)
}
/// Returns true if a rule needs to be split to be made flat.
/// If a rule has nested patterns (matchable pattern inside another matchable pattern) then it must be split.
/// Ex: (Rule (CtrA (CtrB))) has a constructor inside a constructor
/// It must be separated into a new rule (Rule$0 (CtrB)) while changing the old pattern to (Rule (CtrA .x0)).
fn must_split(rule: &Rule) -> bool {
for pat in &rule.pats {
if let Pattern::Ctr(_, args) = &pat {
for arg in args {
if matches!(arg, Pattern::Ctr(..) | Pattern::Num(..)) {
return true;
}
}
}
}
false
}
/// Returns whether when 'a' matches an expression 'b' will also always match together.
fn matches_together(a: &Rule, b: &Rule) -> bool {
let mut a_implies_b = true;
for (a_pat, b_pat) in a.pats.iter().zip(&b.pats) {
match (&a_pat, &b_pat) {
(Pattern::Var(..), _) => (),
// The hvm also has this clause, but it's used to generate extra incorrect rules.
// On the hvm this doesn't matter, but here it would mess with the ADTs.
//(_, Pattern::Var(..)) => b_implies_a = false,
(Pattern::Ctr(an, ..), Pattern::Ctr(bn, ..)) if an == bn => (),
(Pattern::Ctr(..), Pattern::Ctr(..)) => {
a_implies_b = false;
break;
}
(Pattern::Num(a), Pattern::Num(b)) if a == b => (),
(Pattern::Num(..), Pattern::Num(..)) => {
a_implies_b = false;
break;
}
// Other cases are when the pattern types don't match (like constructor with number)
_ => {
a_implies_b = false;
break;
}
}
}
a_implies_b
}
/// Flattens a rule with nested pattern matching.
/// This creates at least one new definition from the top layer of flattening.
/// The new extracted definition might still have nested patterns,
/// so we flatten those until all patterns have only 1 layer of pattern matching.
/// Those recursive flattening steps return a vector of new subdefinitions which are also returned here.
/// If we encounter
fn split_rule(
rules: &[Rule],
rule_idx: usize,
name_count: &mut Val,
skip: &mut HashSet<usize>,
def_names: &mut DefNames,
) -> (Rule, Definition, Vec<Definition>) {
let mut var_count: Val = 0;
let new_def_id = make_split_def_name(rules[rule_idx].def_id, name_count, def_names);
let new_def = make_split_def(rules, rule_idx, new_def_id, &mut var_count, skip);
let old_rule = make_rule_calling_split(&rules[rule_idx], new_def_id);
// Recursively flatten the newly created definition
let (new_def, split_defs) = flatten_def(&new_def, def_names);
(old_rule, new_def, split_defs)
}
/// Generates a unique name for the definition we're extracting from a nested pattern matching of the old definition.
/// Inserts the name in the DefBook's name registry and return its Id.
fn make_split_def_name(old_def_id: DefId, name_count: &mut Val, def_names: &mut DefNames) -> DefId {
let num = *name_count;
*name_count += 1;
let old_def_name = def_names.name(&old_def_id).unwrap();
let new_def_name = Name(format!("{}${}$", old_def_name, num));
def_names.insert(new_def_name)
}
/// Make the definition coming from splitting a rule with nested patterns.
fn make_split_def(
rules: &[Rule],
rule_idx: usize,
new_def_id: DefId,
var_count: &mut Val,
skip: &mut HashSet<usize>,
) -> Definition {
let mut new_def_rules: Vec<Rule> = Vec::new();
let rule = &rules[rule_idx];
// For each rule that also matches with this one, we create a rule in the newly extracted definition.
// We don't look back though, since all previous rules were already taken care of.
for (j, other) in rules.iter().enumerate().skip(rule_idx) {
let matches_together = matches_together(rule, other);
if matches_together {
skip.insert(j);
let mut new_rule_pats = Vec::new();
let mut new_rule_body = other.body.clone();
for (rule_pat, other_pat) in rule.pats.iter().zip(&other.pats) {
match (rule_pat, other_pat) {
(Pattern::Ctr(..), Pattern::Ctr(_, pat_args)) => {
new_rule_pats.extend(pat_args.clone());
}
(Pattern::Ctr(ctr_name, ctr_args), Pattern::Var(opat_name)) => {
let mut new_ctr_args = vec![];
for _ in 0 .. ctr_args.len() {
let new_arg = Pattern::Var(Some(Name(format!(".x{}", var_count))));
*var_count += 1;
new_ctr_args.push(Term::from(&new_arg));
new_rule_pats.push(new_arg);
}
let new_ctr = Term::call(Term::Var { nam: ctr_name.clone() }, new_ctr_args);
if let Some(opat_name) = opat_name {
new_rule_body.subst(opat_name, &new_ctr);
}
}
(Pattern::Var(..), other_pat) => {
new_rule_pats.push(other_pat.clone());
}
(Pattern::Num(..), Pattern::Num(..)) => (),
(Pattern::Num(num), Pattern::Var(name)) => {
if let Some(name) = name {
new_rule_body.subst(name, &Term::Num { val: *num });
}
}
// Not possible since we know it matches
_ => {
panic!("Internal error. Please report.");
}
}
}
let new_rule = Rule { def_id: new_def_id, pats: new_rule_pats, body: new_rule_body };
new_def_rules.push(new_rule);
}
}
debug_assert!(!new_def_rules.is_empty());
Definition { def_id: new_def_id, rules: new_def_rules }
}
/// Make a rule that calls the new split definition to be used in place of the rules with nested patterns.
fn make_rule_calling_split(old_rule: &Rule, new_def_id: DefId) -> Rule {
let mut var_count = 0;
let mut old_rule_pats: Vec<Pattern> = Vec::new();
let mut old_rule_body_args: Vec<Term> = Vec::new();
for pat in &old_rule.pats {
match &pat {
Pattern::Var(name) => {
old_rule_pats.push(pat.clone());
// TODO: Test this
old_rule_body_args.push(Term::Var { nam: name.as_ref().cloned().unwrap_or(Name::new("_")) });
}
Pattern::Ctr(name, args) => {
let mut new_pat_args = Vec::new();
for field in args {
let arg = match &field {
Pattern::Ctr(..) => {
let nam = Name(format!(".x{}", var_count));
var_count += 1;
Pattern::Var(Some(nam))
}
Pattern::Var(..) => field.clone(),
Pattern::Num(..) => {
let nam = Name(format!(".x{}", var_count));
var_count += 1;
Pattern::Var(Some(nam))
}
};
old_rule_body_args.push(Term::from(&arg));
new_pat_args.push(arg);
}
old_rule_pats.push(Pattern::Ctr(name.clone(), new_pat_args));
}
Pattern::Num(..) => {
old_rule_pats.push(pat.clone());
}
}
}
let old_rule_body = Term::call(Term::Ref { def_id: new_def_id }, old_rule_body_args);
Rule { def_id: old_rule.def_id, pats: old_rule_pats, body: old_rule_body }
}

View File

@ -1,71 +0,0 @@
use crate::term::{DefinitionBook, Name};
use itertools::Itertools;
use std::{collections::HashMap, fmt};
pub mod flatten;
pub mod type_inference;
impl DefinitionBook {
/// Checks whether all rules of a definition have the same number of arguments
pub fn check_rule_arities(&self) -> anyhow::Result<()> {
for def in &self.defs {
let expected_arity = def.arity();
// TODO: Return all errors, don't stop at the first one
for rule in &def.rules {
let found_arity = rule.arity();
if expected_arity != found_arity {
return Err(anyhow::anyhow!(
"Inconsistent arity on definition '{}'. Expected {} patterns, found {}",
self.def_names.name(&def.def_id).unwrap(),
expected_arity,
found_arity
));
}
}
}
Ok(())
}
}
#[derive(Debug, Clone)]
pub enum Type {
Any,
Adt(AdtId),
U32,
I32,
}
type AdtId = usize;
#[derive(Debug, Clone, PartialEq, Default)]
pub struct Adt {
// Constructor names and their arities
ctrs: HashMap<Name, usize>,
others: bool,
}
impl Adt {
pub fn from_ctr(nam: Name, arity: usize) -> Self {
Adt { ctrs: HashMap::from_iter([(nam, arity)]), others: false }
}
pub fn new() -> Self {
Default::default()
}
}
impl fmt::Display for Adt {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"[{}{}]",
self
.ctrs
.iter()
.sorted()
.map(|(nam, arity)| format!("({}{})", nam, (0 .. *arity).map(|_| format!(" _")).join("")))
.join(", "),
if self.others { ", ..." } else { "" }
)
}
}

View File

@ -1,177 +0,0 @@
use super::{Adt, AdtId, Type};
use crate::term::{DefId, Definition, DefinitionBook, Name, Pattern};
use anyhow::anyhow;
use itertools::Itertools;
use std::collections::HashMap;
type Adts = HashMap<AdtId, Adt>;
type Types = Vec<Vec<Type>>;
impl DefinitionBook {
/// Infers ADTs from the patterns of the rules in a book.
/// Returns the infered type of the patterns of each definition.
/// Returns an error if rules use the types in a inconsistent way.
/// These could be same name in different types, different arities or mixing numbers and ADTs.
/// Precondition: Rules have been flattened, rule arity is correct.
pub fn get_types_from_patterns(&self) -> anyhow::Result<(Adts, Types)> {
// TODO: This algorithm does a lot of unnecessary copying, checking and moving around of data.
// There's a lot of space for optimizing.
// The infered ADTs, possibly shared between multiple defs
let mut adts: HashMap<AdtId, Adt> = HashMap::new();
// The types of each pattern in each def.
let mut types: Vec<Vec<Type>> = vec![];
// Control
let mut pats_using_adt: HashMap<AdtId, Vec<(DefId, usize)>> = HashMap::new();
let mut ctr_name_to_adt: HashMap<Name, AdtId> = HashMap::new();
let mut adt_counter = 0;
for def in &self.defs {
let pat_types = get_types_from_def_patterns(def)?;
// Check if the types in this def share some ctr names with previous types.
// Try to merge them if they do.
for (i, typ) in pat_types.iter().enumerate() {
if let Type::Adt(_) = typ {
let mut crnt_adt = def.get_adt_from_pat(i)?;
eprintln!("crnt_adt: {crnt_adt}");
// Gather the existing types that share constructor names.
// We will try to merge them all together.
let mut to_merge = vec![];
for ctr_name in crnt_adt.ctrs.keys() {
if let Some(old_adt_idx) = ctr_name_to_adt.get(ctr_name) {
to_merge.push(*old_adt_idx);
}
}
if to_merge.is_empty() {
// If nothing to merge, just add the new type and update the control vars
for ctr in crnt_adt.ctrs.keys() {
pats_using_adt.insert(adt_counter, vec![(def.def_id, i)]);
ctr_name_to_adt.insert(ctr.clone(), adt_counter);
}
adts.insert(adt_counter, crnt_adt);
adt_counter += 1;
} else {
// If this adt has to be merged with others, we use the id of the first existing adt to store it.
// We merge all the adts sharing constructor names with our current adt into the one with the first id.
// All the other adts are removed in favor of the merged one that has all the constructors.
// The control variables are updated to now point everything to the merged adt.
let dest_id = to_merge[0];
pats_using_adt.get_mut(&dest_id).unwrap().push((def.def_id, i));
for to_merge in to_merge {
merge_adts(
&mut crnt_adt,
dest_id,
&mut adts,
to_merge,
&mut pats_using_adt,
&mut ctr_name_to_adt,
)?;
}
adts.insert(dest_id, crnt_adt);
}
}
}
types.push(pat_types);
}
// Point the definition types to the correct adts.
for (adt_id, uses) in pats_using_adt {
for (def_id, pat_id) in uses {
if let Type::Adt(type_adt) = &mut types[*def_id as usize][pat_id] {
*type_adt = adt_id;
}
}
}
Ok((adts, types))
}
}
fn get_types_from_def_patterns(def: &Definition) -> anyhow::Result<Vec<Type>> {
let arity = def.arity();
let mut pat_types = vec![];
for pat_idx in 0 .. arity {
let pats = def.rules.iter().map(|x| &x.pats[pat_idx]);
let mut pat_type = Type::Any;
for pat in pats {
if let Pattern::Ctr(..) = pat {
pat_type = Type::Adt(usize::MAX);
}
}
pat_types.push(pat_type);
}
Ok(pat_types)
}
impl Definition {
fn get_adt_from_pat(&self, pat_idx: usize) -> anyhow::Result<Adt> {
let mut adt = Adt::new();
for rule in &self.rules {
match &rule.pats[pat_idx] {
Pattern::Ctr(nam, args) => {
if let Some(expected_arity) = adt.ctrs.get(nam) {
if *expected_arity == self.arity() {
} else {
return Err(anyhow::anyhow!(
"Inconsistent arity used for constructor {nam} across different rules for the same definition argument"
));
}
} else {
adt.ctrs.insert(nam.clone(), args.len());
}
}
Pattern::Var(_) => adt.others = true,
_ => panic!("Expected only Ctr and Var patterns to be called here"),
}
}
Ok(adt)
}
}
fn merge_adts(
this_adt: &mut Adt,
this_id: AdtId,
adts: &mut HashMap<AdtId, Adt>,
other_id: AdtId,
pats_using_adt: &mut HashMap<AdtId, Vec<(DefId, usize)>>,
ctr_name_to_adt: &mut HashMap<Name, AdtId>,
) -> anyhow::Result<()> {
let adts2 = adts.clone();
let other_adt = adts.get_mut(&other_id).unwrap();
// If all constructors of the other ADT are known, check that this ADT doesn't have any extra constructors.
// Also check arity of shared constructors.
for (ctr_name, this_arity) in &this_adt.ctrs {
if let Some(other_arity) = other_adt.ctrs.get(ctr_name) {
if this_arity != other_arity {
return Err(anyhow!(
"Inconsistent arity used for constructor {ctr_name} across different definition arguments"
));
}
} else if !other_adt.others {
eprintln!("Adts: {{{}}}", adts2.iter().map(|(a, b)| format!("{a} => {b}")).join(", "));
return Err(anyhow!(
"Found same constructor {ctr_name} being used in incompatible types {this_adt} and {other_adt}"
));
}
}
// If not all constructors of this ADT are known, move any new constructors from the other ADT to this one.
if this_adt.others {
for (ctr_name, other_arity) in &other_adt.ctrs {
if !other_adt.ctrs.contains_key(ctr_name) {
this_adt.ctrs.insert(ctr_name.clone(), *other_arity);
}
}
}
this_adt.others = this_adt.others && other_adt.others;
if this_id != other_id {
for ctr_name in other_adt.ctrs.keys() {
*ctr_name_to_adt.get_mut(ctr_name).unwrap() = this_id;
}
let mut moved_pats = pats_using_adt.remove(&other_id).unwrap();
pats_using_adt.get_mut(&this_id).unwrap().append(&mut moved_pats);
adts.remove(&other_id);
}
Ok(())
}

View File

@ -80,11 +80,13 @@ fn run_golden_test_dir(test_name: &str, run: &dyn Fn(&Path, &str) -> anyhow::Res
#[test]
fn compile_single_terms() {
run_golden_test_dir(function_name!(), &|_, code| {
let term = parse_term(code).map_err(|errs| {
let mut term = parse_term(code).map_err(|errs| {
let msg = errs.into_iter().map(|e| display_err_for_text(e)).join("\n");
anyhow::anyhow!(msg)
})?;
let term = term.sanitize_vars(&Default::default())?;
term.check_unbound_vars()?;
term.make_var_names_unique();
term.linearize_vars()?;
let net = term_to_hvm_core(&term)?;
Ok(show_lnet(&net))
})
@ -137,43 +139,14 @@ fn readback_lnet() {
#[test]
fn flatten_rules() {
run_golden_test_dir(function_name!(), &|_, code| {
let mut book = parse_definition_book(code).map_err(|errs| {
let book = parse_definition_book(code).map_err(|errs| {
let msg = errs.into_iter().map(|e| display_err_for_text(e)).join("\n");
anyhow::anyhow!(msg)
})?;
book.flatten_rules();
Ok(book.to_string())
})
}
#[test]
fn type_inference() {
run_golden_test_dir(function_name!(), &|_, code| {
let mut book = parse_definition_book(code).map_err(|errs| {
let msg = errs.into_iter().map(|e| display_err_for_text(e)).join("\n");
anyhow::anyhow!(msg)
})?;
book.check_rule_arities()?;
book.flatten_rules();
let (adts, def_types) = book.get_types_from_patterns()?;
let mut out = String::new();
out.push_str("Adts: [\n");
for (adt_id, adt) in adts.iter().sorted_by_key(|x| x.0) {
out.push_str(&format!(" {adt_id} => {adt}\n"));
}
out.push_str("]\nTypes: [\n");
for (def_id, def_types) in def_types.iter().enumerate() {
out.push_str(&format!(
" {} => [{}]\n",
book.def_names.name(&DefId::from(def_id as Val)).unwrap(),
def_types.iter().map(|x| format!("{x:?}")).join(", "),
));
}
out.push_str("]");
Ok(out)
})
}
#[test]
fn error_outputs() {
let _ = miette::set_hook(Box::new(|_| Box::new(miette::JSONReportHandler::new())));

View File

@ -1,19 +0,0 @@
(Rule1) = λx x
(Rule2 a) = λx x
(Rule3 a b c d) = (((a b) c) d)
(Rule4 (CtrA)) = λx x
(Rule4 (CtrB x)) = x
(Rule4 x) = x
(Rule5 (CtrA1 a) b) = (a b)
(Rule5 (CtrA2 a1 a2) b) = ((a1 a2) b)
(Rule5 a (CtrB0)) = a
(Rule5 a (CtrB1 b)) = (a b)
(Rule5 (CtrA3 a) (CtrB3 b)) = (a b)
(Rule5 a b) = (a b)
(Rule6 a) = a
(Rule6 b) = b

View File

@ -1,19 +0,0 @@
Rule1 = λx x
Rule2 a = λx x
Rule3 a b c d = (a b c d)
Rule4 (CtrA) = λx x
Rule4 (CtrB x) = x
Rule4 x = x
Rule5 (CtrA1 a) b = (a b)
Rule5 (CtrA2 a1 a2) b = (a1 a2 b)
Rule5 a (CtrB0) = a
Rule5 a (CtrB1 b) = (a b)
Rule5 (CtrA3 a) (CtrB3 b) = (a b)
Rule5 a b = (a b)
Rule6 a = a
Rule6 b = b

View File

@ -1,11 +0,0 @@
(Data.Bits.dec Data.Bits.e) = Data.Bits.e
(Data.Bits.dec (Data.Bits.o .x0)) = (Data.Bits.dec$0$ .x0)
(Data.Bits.dec (Data.Bits.i .x0)) = (Data.Bits.dec$1$ .x0)
(Data.Bits.dec$0$ (Data.Bits.e)) = Data.Bits.e
(Data.Bits.dec$0$ (Data.Bits.o b.pred)) = (Data.Bits.i (Data.Bits.dec b.pred))
(Data.Bits.dec$0$ (Data.Bits.i b.pred)) = (Data.Bits.i (Data.Bits.dec b.pred))
(Data.Bits.dec$1$ (Data.Bits.e)) = (Data.Bits.o Data.Bits.e)
(Data.Bits.dec$1$ (Data.Bits.o b.pred)) = (Data.Bits.o b.pred)
(Data.Bits.dec$1$ (Data.Bits.i b.pred)) = (Data.Bits.o b.pred)

View File

@ -1,7 +0,0 @@
Data.Bits.dec Data.Bits.e = Data.Bits.e
Data.Bits.dec (Data.Bits.o (Data.Bits.e)) = Data.Bits.e
Data.Bits.dec (Data.Bits.i (Data.Bits.e)) = Data.Bits.o (Data.Bits.e)
Data.Bits.dec (Data.Bits.i (Data.Bits.o b.pred)) = Data.Bits.o b.pred
Data.Bits.dec (Data.Bits.i (Data.Bits.i b.pred)) = Data.Bits.o b.pred
Data.Bits.dec (Data.Bits.o (Data.Bits.o b.pred)) = Data.Bits.i (Data.Bits.dec b.pred)
Data.Bits.dec (Data.Bits.o (Data.Bits.i b.pred)) = Data.Bits.i (Data.Bits.dec b.pred)

View File

@ -1,9 +0,0 @@
(Rule (CtrA a .x0)) = ((Rule$0$ a) .x0)
(Rule (CtrB b)) = b
(Rule x) = x
(Rule$0$ a (CtrB1 b)) = (a b)
(Rule$0$ a (CtrB2 .x0 b)) = (((Rule$0$$0$ a) .x0) b)
(Rule$0$ a b) = (a b)
(Rule$0$$0$ a (CtrC) b) = (a b)

View File

@ -1,5 +0,0 @@
(Rule (CtrA a (CtrB1 b))) = (a b)
(Rule (CtrA a (CtrB2 (CtrC) b))) = (a b)
(Rule (CtrA a b)) = (a b)
(Rule (CtrB b)) = b
(Rule x) = x

View File

@ -1 +0,0 @@
Found same constructor True being used in incompatible types [(False), (True)] and [(False)]

View File

@ -1,31 +0,0 @@
Not (True) = False
Not (False) = True
Not2 (False) = True
Not2 x = False
And (False) b = False
And (True) b = b
Or (False) b = b
Or (True) b = True
//Or2 (True) b = True
//Or2 a (True) = True
Xor (True) (False) = True
Xor (False) (True) = True
Xor a b = False
Xor2 (True) b = Not b
Xor2 (False) b = b
Xnor a b = Not (Xor a b)
If (True) t e = t
If (False) t e = e
If2 cond = cond
//Andc (True) (False) = True
//Andc a b = False

View File

@ -1,15 +0,0 @@
Adts: [
0 => [(Ctr0)]
1 => [(Ctr1 _)]
2 => [(Ctr2 _ _)]
3 => [(Ctr3 _ _ _)]
4 => [(Ctr4 _ _ _ _)]
]
Types: [
Id => [Any]
Record0 => [Adt(0)]
Record1 => [Adt(1)]
Record2 => [Adt(2)]
Record3 => [Adt(3)]
Record4 => [Adt(4)]
]

View File

@ -1,11 +0,0 @@
Id x = x
Record0 (Ctr0) = λx x
Record1 (Ctr1 a) = a
Record2 (Ctr2 a b) = (a b)
Record3 (Ctr3 a b c) = (a b c)
Record4 (Ctr4 a b c d) = (a b c d)