Add automatic map resizing (#1071)

* core: add map resizing

* add set resizing

* do not shrink if under dflt-size
This commit is contained in:
Veit Heller 2020-12-19 22:20:52 +01:00 committed by GitHub
parent 5f0ae6819e
commit 5ad1d227f3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 151 additions and 87 deletions

View File

@ -7,7 +7,7 @@
(Map.put! &m &i &1)))) (Map.put! &m &i &1))))
(defn insert-collisions [] (defn insert-collisions []
(let [m (Map.create-with-len 1)] (let [m {}]
(for [i 0 100] (for [i 0 100]
(Map.put! &m &1 &1)))) (Map.put! &m &1 &1))))
@ -23,11 +23,6 @@
(defn retrieve [] (defn retrieve []
(Map.get &m &10)) (Map.get &m &10))
(defn setup-big-map-collisions []
(do
(set! m (Map.create-with-len 1))
(setup-big-map)))
(defn map-tests [] (defn map-tests []
(do (do
(println "Testing single map insert:") (println "Testing single map insert:")
@ -42,10 +37,6 @@
(setup-big-map) (setup-big-map)
(println "Testing map retrieval:") (println "Testing map retrieval:")
(bench retrieve) (bench retrieve)
(println "")
(setup-big-map-collisions)
(println "Testing map retrieval with maximum collisions:")
(bench retrieve)
(println ""))) (println "")))
@ -55,7 +46,7 @@
(Set.put! &m &i)))) (Set.put! &m &i))))
(defn insert-set-collisions [] (defn insert-set-collisions []
(let [m (Set.create-with-len 1)] (let [m (Set.create)]
(for [i 0 100] (for [i 0 100]
(Set.put! &m &i)))) (Set.put! &m &i))))
@ -71,11 +62,6 @@
(defn contains-set [] (defn contains-set []
(Set.contains? &s &10)) (Set.contains? &s &10))
(defn setup-big-set-collisions []
(do
(set! s (Set.create-with-len 1))
(setup-big-set)))
(defn set-tests [] (defn set-tests []
(do (do
(println "Testing single set insert:") (println "Testing single set insert:")
@ -90,10 +76,6 @@
(setup-big-set) (setup-big-set)
(println "Testing set contains:") (println "Testing set contains:")
(bench contains-set) (bench contains-set)
(println "")
(setup-big-set-collisions)
(println "Testing set contains with maximum collisions:")
(bench contains-set)
(println ""))) (println "")))
(defn main [] (defn main []

View File

@ -175,34 +175,72 @@
b)) b))
) )
(deftype (Map a b) [n-buckets Int buckets (Array (Bucket a b))]) (deftype (Map a b) [len Int n-buckets Int buckets (Array (Bucket a b))])
(doc Map
"is a hashmap datatype, i.e. a key-value structure. It allows you to put in "
"pairs of keys and values and look them up by key."
""
"Implementation notes: it is a dense double-array-backed structure that"
"grows and shrinks based on load.")
(defmodule Map (defmodule Map
(hidden dflt-len) (hidden dflt-len)
(def dflt-len 256) (def dflt-len 256)
(doc min-load "The load a map needs to reach in order to shrink.")
(def min-load 20)
(doc max-load "The load a map needs to reach in order to grow.")
(def max-load 80)
(doc create "Create an empty map.") (doc create "Create an empty map.")
(defn create [] (defn create []
(init dflt-len (Array.repeat dflt-len &Bucket.empty))) (init 0 dflt-len (Array.repeat dflt-len &Bucket.empty)))
(doc create-with-len "Create an empty map with a given number of buckets. High numbers reduce the possibility of hash collisions while increasing the memory footprint.") (private put-!)
(defn create-with-len [len] (hidden put-!)
(init len (Array.repeat len &Bucket.empty))) (defn put-! [m k v]
(let-do [idx (Int.positive-mod (hash k) @(n-buckets m))
(doc put "Put a a value v into map m, using the key k.") b (buckets m)
(defn put [m k v] n (Array.unsafe-nth b idx)]
(let [idx (Int.positive-mod (hash k) @(n-buckets &m))] (set-len! m (Int.inc @(len m)))
(update-buckets m &(fn [b]
(let [n (Array.unsafe-nth &b idx)]
(Array.aset b idx (Bucket.put @n k v)))))))
(doc put! "Put a a value v into map m, using the key k, in place.")
(defn put! [m k v]
(let [idx (Int.positive-mod (hash k) @(n-buckets m))
b (buckets m)
n (Array.unsafe-nth b idx)]
(Bucket.put! n k v))) (Bucket.put! n k v)))
(doc resize "Resize a map `m` to size `s`.")
(defn resize [m s]
(let-do [n (init 0 s (Array.repeat s &Bucket.empty))]
(for [i 0 @(n-buckets &m)]
(let [bucket (Array.unsafe-nth (buckets &m) i)
len (Array.length (Bucket.entries bucket))
entries (Bucket.entries bucket)]
(for [j 0 len]
(let [e (Array.unsafe-nth entries j)]
(put-! &n (Pair.a e) (Pair.b e))))))
n))
(doc grow "Grow a map `m`. Should usually be handled automatically.")
(defn grow [m] (resize m (* @(n-buckets &m) 2)))
(doc shrink "Shrink a map `m`. Should usually be handled automatically.")
(defn shrink [m]
(let [new-size (/ @(n-buckets &m) 2)]
(if (< new-size dflt-len)
m
(resize m new-size))))
(doc put "Put a value v into map m, using the key k.")
(defn put [m k v]
(if (> (/ (* @(len &m) 100) @(n-buckets &m)) min-load)
(put (grow m) k v)
(let [idx (Int.positive-mod (hash k) @(n-buckets &m))]
(update-len
(update-buckets m &(fn [b]
(let [n (Array.unsafe-nth &b idx)]
(Array.aset b idx (Bucket.put @n k v)))))
&Int.inc))))
(doc put! "Put a value v into map m, using the key k, in place.")
(defn put! [m k v]
(put-! m k v))
(doc get-with-default "Get the value for the key k from map m. If it isnt found, the default is returned.") (doc get-with-default "Get the value for the key k from map m. If it isnt found, the default is returned.")
(defn get-with-default [m k default-value] (defn get-with-default [m k default-value]
(let [idx (Int.positive-mod (hash k) @(n-buckets m))] (let [idx (Int.positive-mod (hash k) @(n-buckets m))]
@ -240,14 +278,11 @@
(doc length "Get the length of the map m.") (doc length "Get the length of the map m.")
(defn length [m] (defn length [m]
(let-do [c 0] @(len m))
(for [i 0 @(n-buckets m)]
(set! c (+ c (Array.length (Bucket.entries (Array.unsafe-nth (buckets m) i))))))
c))
(doc empty? "Check whether the map m is empty.") (doc empty? "Check whether the map m is empty.")
(defn empty? [m] (defn empty? [m]
(= (length m) 0)) (= @(len m) 0))
(doc contains? "Check whether the map m contains the key k.") (doc contains? "Check whether the map m contains the key k.")
(defn contains? [m k] (defn contains? [m k]
@ -256,10 +291,14 @@
(doc remove "Remove the value under the key k from the map m.") (doc remove "Remove the value under the key k from the map m.")
(defn remove [m k] (defn remove [m k]
(let [idx (Int.positive-mod (hash k) @(n-buckets &m))] (if (> (/ (* @(len &m) 100) @(n-buckets &m)) min-load)
(update-buckets m &(fn [b] (remove (shrink m) k)
(let [n (Array.unsafe-nth &b idx)] (let [idx (Int.positive-mod (hash k) @(n-buckets &m))]
(Array.aset b idx (Bucket.shrink @n k))))))) (update-len
(update-buckets m &(fn [b]
(let [n (Array.unsafe-nth &b idx)]
(Array.aset b idx (Bucket.shrink @n k)))))
&Int.dec))))
(doc all? "Do all key-value pairs pass the given predicate (of two arguments)?") (doc all? "Do all key-value pairs pass the given predicate (of two arguments)?")
(defn all? [pred m] (defn all? [pred m]
@ -334,7 +373,7 @@
(let [e (Array.unsafe-nth a i) (let [e (Array.unsafe-nth a i)
k (Pair.a e) k (Pair.a e)
v (Pair.b e)] v (Pair.b e)]
(set! m (put m k v)))) (put! &m k v)))
m)) m))
(doc to-array "Convert Map to Array of Pairs") (doc to-array "Convert Map to Array of Pairs")
@ -392,62 +431,105 @@
@b)) @b))
) )
(deftype (Set a) [n-buckets Int buckets (Array (SetBucket a))]) (deftype (Set a) [len Int n-buckets Int buckets (Array (SetBucket a))])
(doc Set
"is a hashset datatype, i.e. a unique list structure. It allows you to put "
"in values and guarantees uniqueness."
""
"Implementation notes: it is a dense double-array-backed structure that"
"grows and shrinks based on load, similar to `Map`.")
(defmodule Set (defmodule Set
(hidden dflt-len) (hidden dflt-len)
(def dflt-len 256) (def dflt-len 256)
(doc min-load "The load a set needs to reach in order to shrink.")
(def min-load 20)
(doc max-load "The load a set needs to reach in order to grow.")
(def max-load 80)
(doc create "Create an empty set.") (doc create "Create an empty set.")
(defn create [] (defn create []
(init dflt-len (Array.repeat dflt-len &SetBucket.empty))) (init 0 dflt-len (Array.repeat dflt-len &SetBucket.empty)))
(doc create-with-len "Create an empty set with a given number of buckets. Higher numbers decrease the probability of hash collisions while increasing the memory footprint.") (private put-!)
(defn create-with-len [len] (hidden put-!)
(init len (Array.repeat len &SetBucket.empty))) (defn put-! [s v]
(let-do [idx (Int.positive-mod (hash v) @(n-buckets s))
b (buckets s)
n (Array.unsafe-nth b idx)]
(set-len! s (Int.inc @(len s)))
(SetBucket.push-back! n @v)))
(doc put "Put a a key k into the set s.") (doc resize "Resize a set `s` to size `size`.")
(defn put [s k] (defn resize [s size]
(let [idx (Int.positive-mod (hash k) @(n-buckets &s)) (let-do [n (init 0 size (Array.repeat size &SetBucket.empty))]
;; The lifetime system really doesn't like this function, had to put in a bunch of copying to make it compile: (for [i 0 @(n-buckets &s)]
] (let [bucket (Array.unsafe-nth (buckets &s) i)
(update-buckets s &(fn [b] len (Array.length (SetBucket.entries bucket))
(let [n (Array.unsafe-nth &b idx)] entries (SetBucket.entries bucket)]
(if (SetBucket.contains? n k) (for [j 0 len]
b (let [e (Array.unsafe-nth entries j)]
(let [new-k @k] ;; HACK! (put-! &n e)))))
(Array.aset b idx (SetBucket.grow n new-k))))))))) n))
(doc put! "Put a a key k into the set s, in place.") (doc grow "Grow a set `s`. Should usually be handled automatically.")
(defn put! [s k] (defn grow [s] (resize s (* @(n-buckets &s) 2)))
(let [idx (Int.positive-mod (hash k) @(n-buckets s)) (doc shrink "Shrink a set `s`. Should usually be handled automatically.")
b (buckets s) (defn shrink [s]
n (Array.unsafe-nth b idx)] (let [new-size (/ @(n-buckets &s) 2)]
(when (not (SetBucket.contains? n k)) (if (< new-size dflt-len)
(SetBucket.push-back! n @k)))) s
(resize s new-size))))
(doc contains? "Check whether the set `s` contains the value `v`.")
(defn contains? [s v]
(let [idx (Int.positive-mod (hash v) @(n-buckets s))]
(SetBucket.contains? (Array.unsafe-nth (buckets s) idx) v)))
(doc put "Put a value `v` into the set `s`.")
(defn put [s v]
(cond
(contains? &s v)
s
(> (/ (* @(len &s) 100) @(n-buckets &s)) min-load)
(put (grow s) v)
(let [idx (Int.positive-mod (hash v) @(n-buckets &s))
;; The lifetime system really doesn't like this function, had to put in a bunch of copying to make it compile:
]
(update-len
(update-buckets s &(fn [b]
(let [n (Array.unsafe-nth &b idx)]
(let [new-k @v] ;; HACK!
(Array.aset b idx (SetBucket.grow n new-k))))))
&Int.inc))))
(doc put! "Put a value `v` into the set `s`, in place.")
(defn put! [s v]
(unless (contains? s v)
(put-! s v)))
(doc length "Get the length of set s.") (doc length "Get the length of set s.")
(defn length [s] (defn length [s]
(let-do [c 0] @(len s))
(for [i 0 @(n-buckets s)]
(set! c (+ c (Array.length (SetBucket.entries (Array.unsafe-nth (buckets s) i))))))
c))
(doc empty? "Check whether the set s is empty.") (doc empty? "Check whether the set s is empty.")
(defn empty? [s] (defn empty? [s]
(= (length s) 0)) (= (length s) 0))
(doc contains? "Check whether the set s contains the key k.") (doc remove "Remove the value `v` from the set `s`.")
(defn contains? [s k] (defn remove [s v]
(let [idx (Int.positive-mod (hash k) @(n-buckets s))] (cond
(SetBucket.contains? (Array.unsafe-nth (buckets s) idx) k))) (not (contains? &s v))
s
(doc remove "Remove the key k from the set s.") (> (/ (* @(len &s) 100) @(n-buckets &s)) min-load)
(defn remove [s k] (remove (shrink s) v)
(let [idx (Int.positive-mod (hash k) @(n-buckets &s))] (let [idx (Int.positive-mod (hash v) @(n-buckets &s))]
(update-buckets s &(fn [b] (update-len
(let [n (Array.unsafe-nth &b idx)] (update-buckets s &(fn [b]
(Array.aset b idx (SetBucket.shrink n k))))))) (let [n (Array.unsafe-nth &b idx)]
(Array.aset b idx (SetBucket.shrink n v)))))
&Int.dec))))
(doc all? "Does the predicate hold for all values in this set?") (doc all? "Does the predicate hold for all values in this set?")
(defn all? [pred set] (defn all? [pred set]
@ -484,7 +566,7 @@
(let-do [s (create)] (let-do [s (create)]
(for [i 0 (Array.length a)] (for [i 0 (Array.length a)]
(let [e (Array.unsafe-nth a i)] (let [e (Array.unsafe-nth a i)]
(set! s (put s e)))) (put! &s e)))
s)) s))
(doc reduce "Reduce values of the set with function f. Order of reduction is not guaranteed") (doc reduce "Reduce values of the set with function f. Order of reduction is not guaranteed")