diff --git a/bench/map.carp b/bench/map.carp index 93105214..96f559a3 100644 --- a/bench/map.carp +++ b/bench/map.carp @@ -7,7 +7,7 @@ (Map.put! &m &i &1)))) (defn insert-collisions [] - (let [m (Map.create-with-len 1)] + (let [m {}] (for [i 0 100] (Map.put! &m &1 &1)))) @@ -23,11 +23,6 @@ (defn retrieve [] (Map.get &m &10)) -(defn setup-big-map-collisions [] - (do - (set! m (Map.create-with-len 1)) - (setup-big-map))) - (defn map-tests [] (do (println "Testing single map insert:") @@ -42,10 +37,6 @@ (setup-big-map) (println "Testing map retrieval:") (bench retrieve) - (println "") - (setup-big-map-collisions) - (println "Testing map retrieval with maximum collisions:") - (bench retrieve) (println ""))) @@ -55,7 +46,7 @@ (Set.put! &m &i)))) (defn insert-set-collisions [] - (let [m (Set.create-with-len 1)] + (let [m (Set.create)] (for [i 0 100] (Set.put! &m &i)))) @@ -71,11 +62,6 @@ (defn contains-set [] (Set.contains? &s &10)) -(defn setup-big-set-collisions [] - (do - (set! s (Set.create-with-len 1)) - (setup-big-set))) - (defn set-tests [] (do (println "Testing single set insert:") @@ -90,10 +76,6 @@ (setup-big-set) (println "Testing set contains:") (bench contains-set) - (println "") - (setup-big-set-collisions) - (println "Testing set contains with maximum collisions:") - (bench contains-set) (println ""))) (defn main [] diff --git a/core/Map.carp b/core/Map.carp index 7f879e57..6bc8058e 100644 --- a/core/Map.carp +++ b/core/Map.carp @@ -175,34 +175,72 @@ b)) ) -(deftype (Map a b) [n-buckets Int buckets (Array (Bucket a b))]) +(deftype (Map a b) [len Int n-buckets Int buckets (Array (Bucket a b))]) +(doc Map + "is a hashmap datatype, i.e. a key-value structure. It allows you to put in " + "pairs of keys and values and look them up by key." + "" + "Implementation notes: it is a dense double-array-backed structure that" + "grows and shrinks based on load.") (defmodule Map (hidden dflt-len) (def dflt-len 256) + (doc min-load "The load a map needs to reach in order to shrink.") + (def min-load 20) + (doc max-load "The load a map needs to reach in order to grow.") + (def max-load 80) + (doc create "Create an empty map.") (defn create [] - (init dflt-len (Array.repeat dflt-len &Bucket.empty))) + (init 0 dflt-len (Array.repeat dflt-len &Bucket.empty))) - (doc create-with-len "Create an empty map with a given number of buckets. High numbers reduce the possibility of hash collisions while increasing the memory footprint.") - (defn create-with-len [len] - (init len (Array.repeat len &Bucket.empty))) - - (doc put "Put a a value v into map m, using the key k.") - (defn put [m k v] - (let [idx (Int.positive-mod (hash k) @(n-buckets &m))] - (update-buckets m &(fn [b] - (let [n (Array.unsafe-nth &b idx)] - (Array.aset b idx (Bucket.put @n k v))))))) - - (doc put! "Put a a value v into map m, using the key k, in place.") - (defn put! [m k v] - (let [idx (Int.positive-mod (hash k) @(n-buckets m)) - b (buckets m) - n (Array.unsafe-nth b idx)] + (private put-!) + (hidden put-!) + (defn put-! [m k v] + (let-do [idx (Int.positive-mod (hash k) @(n-buckets m)) + b (buckets m) + n (Array.unsafe-nth b idx)] + (set-len! m (Int.inc @(len m))) (Bucket.put! n k v))) + (doc resize "Resize a map `m` to size `s`.") + (defn resize [m s] + (let-do [n (init 0 s (Array.repeat s &Bucket.empty))] + (for [i 0 @(n-buckets &m)] + (let [bucket (Array.unsafe-nth (buckets &m) i) + len (Array.length (Bucket.entries bucket)) + entries (Bucket.entries bucket)] + (for [j 0 len] + (let [e (Array.unsafe-nth entries j)] + (put-! &n (Pair.a e) (Pair.b e)))))) + n)) + + (doc grow "Grow a map `m`. Should usually be handled automatically.") + (defn grow [m] (resize m (* @(n-buckets &m) 2))) + (doc shrink "Shrink a map `m`. Should usually be handled automatically.") + (defn shrink [m] + (let [new-size (/ @(n-buckets &m) 2)] + (if (< new-size dflt-len) + m + (resize m new-size)))) + + (doc put "Put a value v into map m, using the key k.") + (defn put [m k v] + (if (> (/ (* @(len &m) 100) @(n-buckets &m)) min-load) + (put (grow m) k v) + (let [idx (Int.positive-mod (hash k) @(n-buckets &m))] + (update-len + (update-buckets m &(fn [b] + (let [n (Array.unsafe-nth &b idx)] + (Array.aset b idx (Bucket.put @n k v))))) + &Int.inc)))) + + (doc put! "Put a value v into map m, using the key k, in place.") + (defn put! [m k v] + (put-! m k v)) + (doc get-with-default "Get the value for the key k from map m. If it isn’t found, the default is returned.") (defn get-with-default [m k default-value] (let [idx (Int.positive-mod (hash k) @(n-buckets m))] @@ -240,14 +278,11 @@ (doc length "Get the length of the map m.") (defn length [m] - (let-do [c 0] - (for [i 0 @(n-buckets m)] - (set! c (+ c (Array.length (Bucket.entries (Array.unsafe-nth (buckets m) i)))))) - c)) + @(len m)) (doc empty? "Check whether the map m is empty.") (defn empty? [m] - (= (length m) 0)) + (= @(len m) 0)) (doc contains? "Check whether the map m contains the key k.") (defn contains? [m k] @@ -256,10 +291,14 @@ (doc remove "Remove the value under the key k from the map m.") (defn remove [m k] - (let [idx (Int.positive-mod (hash k) @(n-buckets &m))] - (update-buckets m &(fn [b] - (let [n (Array.unsafe-nth &b idx)] - (Array.aset b idx (Bucket.shrink @n k))))))) + (if (> (/ (* @(len &m) 100) @(n-buckets &m)) min-load) + (remove (shrink m) k) + (let [idx (Int.positive-mod (hash k) @(n-buckets &m))] + (update-len + (update-buckets m &(fn [b] + (let [n (Array.unsafe-nth &b idx)] + (Array.aset b idx (Bucket.shrink @n k))))) + &Int.dec)))) (doc all? "Do all key-value pairs pass the given predicate (of two arguments)?") (defn all? [pred m] @@ -334,7 +373,7 @@ (let [e (Array.unsafe-nth a i) k (Pair.a e) v (Pair.b e)] - (set! m (put m k v)))) + (put! &m k v))) m)) (doc to-array "Convert Map to Array of Pairs") @@ -392,62 +431,105 @@ @b)) ) -(deftype (Set a) [n-buckets Int buckets (Array (SetBucket a))]) +(deftype (Set a) [len Int n-buckets Int buckets (Array (SetBucket a))]) +(doc Set + "is a hashset datatype, i.e. a unique list structure. It allows you to put " + "in values and guarantees uniqueness." + "" + "Implementation notes: it is a dense double-array-backed structure that" + "grows and shrinks based on load, similar to `Map`.") (defmodule Set (hidden dflt-len) (def dflt-len 256) + (doc min-load "The load a set needs to reach in order to shrink.") + (def min-load 20) + (doc max-load "The load a set needs to reach in order to grow.") + (def max-load 80) + (doc create "Create an empty set.") (defn create [] - (init dflt-len (Array.repeat dflt-len &SetBucket.empty))) + (init 0 dflt-len (Array.repeat dflt-len &SetBucket.empty))) - (doc create-with-len "Create an empty set with a given number of buckets. Higher numbers decrease the probability of hash collisions while increasing the memory footprint.") - (defn create-with-len [len] - (init len (Array.repeat len &SetBucket.empty))) + (private put-!) + (hidden put-!) + (defn put-! [s v] + (let-do [idx (Int.positive-mod (hash v) @(n-buckets s)) + b (buckets s) + n (Array.unsafe-nth b idx)] + (set-len! s (Int.inc @(len s))) + (SetBucket.push-back! n @v))) - (doc put "Put a a key k into the set s.") - (defn put [s k] - (let [idx (Int.positive-mod (hash k) @(n-buckets &s)) - ;; The lifetime system really doesn't like this function, had to put in a bunch of copying to make it compile: - ] - (update-buckets s &(fn [b] - (let [n (Array.unsafe-nth &b idx)] - (if (SetBucket.contains? n k) - b - (let [new-k @k] ;; HACK! - (Array.aset b idx (SetBucket.grow n new-k))))))))) + (doc resize "Resize a set `s` to size `size`.") + (defn resize [s size] + (let-do [n (init 0 size (Array.repeat size &SetBucket.empty))] + (for [i 0 @(n-buckets &s)] + (let [bucket (Array.unsafe-nth (buckets &s) i) + len (Array.length (SetBucket.entries bucket)) + entries (SetBucket.entries bucket)] + (for [j 0 len] + (let [e (Array.unsafe-nth entries j)] + (put-! &n e))))) + n)) - (doc put! "Put a a key k into the set s, in place.") - (defn put! [s k] - (let [idx (Int.positive-mod (hash k) @(n-buckets s)) - b (buckets s) - n (Array.unsafe-nth b idx)] - (when (not (SetBucket.contains? n k)) - (SetBucket.push-back! n @k)))) + (doc grow "Grow a set `s`. Should usually be handled automatically.") + (defn grow [s] (resize s (* @(n-buckets &s) 2))) + (doc shrink "Shrink a set `s`. Should usually be handled automatically.") + (defn shrink [s] + (let [new-size (/ @(n-buckets &s) 2)] + (if (< new-size dflt-len) + s + (resize s new-size)))) + + (doc contains? "Check whether the set `s` contains the value `v`.") + (defn contains? [s v] + (let [idx (Int.positive-mod (hash v) @(n-buckets s))] + (SetBucket.contains? (Array.unsafe-nth (buckets s) idx) v))) + + (doc put "Put a value `v` into the set `s`.") + (defn put [s v] + (cond + (contains? &s v) + s + (> (/ (* @(len &s) 100) @(n-buckets &s)) min-load) + (put (grow s) v) + (let [idx (Int.positive-mod (hash v) @(n-buckets &s)) + ;; The lifetime system really doesn't like this function, had to put in a bunch of copying to make it compile: + ] + (update-len + (update-buckets s &(fn [b] + (let [n (Array.unsafe-nth &b idx)] + (let [new-k @v] ;; HACK! + (Array.aset b idx (SetBucket.grow n new-k)))))) + &Int.inc)))) + + (doc put! "Put a value `v` into the set `s`, in place.") + (defn put! [s v] + (unless (contains? s v) + (put-! s v))) (doc length "Get the length of set s.") (defn length [s] - (let-do [c 0] - (for [i 0 @(n-buckets s)] - (set! c (+ c (Array.length (SetBucket.entries (Array.unsafe-nth (buckets s) i)))))) - c)) + @(len s)) (doc empty? "Check whether the set s is empty.") (defn empty? [s] (= (length s) 0)) - (doc contains? "Check whether the set s contains the key k.") - (defn contains? [s k] - (let [idx (Int.positive-mod (hash k) @(n-buckets s))] - (SetBucket.contains? (Array.unsafe-nth (buckets s) idx) k))) - - (doc remove "Remove the key k from the set s.") - (defn remove [s k] - (let [idx (Int.positive-mod (hash k) @(n-buckets &s))] - (update-buckets s &(fn [b] - (let [n (Array.unsafe-nth &b idx)] - (Array.aset b idx (SetBucket.shrink n k))))))) + (doc remove "Remove the value `v` from the set `s`.") + (defn remove [s v] + (cond + (not (contains? &s v)) + s + (> (/ (* @(len &s) 100) @(n-buckets &s)) min-load) + (remove (shrink s) v) + (let [idx (Int.positive-mod (hash v) @(n-buckets &s))] + (update-len + (update-buckets s &(fn [b] + (let [n (Array.unsafe-nth &b idx)] + (Array.aset b idx (SetBucket.shrink n v))))) + &Int.dec)))) (doc all? "Does the predicate hold for all values in this set?") (defn all? [pred set] @@ -484,7 +566,7 @@ (let-do [s (create)] (for [i 0 (Array.length a)] (let [e (Array.unsafe-nth a i)] - (set! s (put s e)))) + (put! &s e))) s)) (doc reduce "Reduce values of the set with function f. Order of reduction is not guaranteed")