Add automatic map resizing (#1071)

* core: add map resizing

* add set resizing

* do not shrink if under dflt-size
This commit is contained in:
Veit Heller 2020-12-19 22:20:52 +01:00 committed by GitHub
parent 5f0ae6819e
commit 5ad1d227f3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 151 additions and 87 deletions

View File

@ -7,7 +7,7 @@
(Map.put! &m &i &1))))
(defn insert-collisions []
(let [m (Map.create-with-len 1)]
(let [m {}]
(for [i 0 100]
(Map.put! &m &1 &1))))
@ -23,11 +23,6 @@
(defn retrieve []
(Map.get &m &10))
(defn setup-big-map-collisions []
(do
(set! m (Map.create-with-len 1))
(setup-big-map)))
(defn map-tests []
(do
(println "Testing single map insert:")
@ -42,10 +37,6 @@
(setup-big-map)
(println "Testing map retrieval:")
(bench retrieve)
(println "")
(setup-big-map-collisions)
(println "Testing map retrieval with maximum collisions:")
(bench retrieve)
(println "")))
@ -55,7 +46,7 @@
(Set.put! &m &i))))
(defn insert-set-collisions []
(let [m (Set.create-with-len 1)]
(let [m (Set.create)]
(for [i 0 100]
(Set.put! &m &i))))
@ -71,11 +62,6 @@
(defn contains-set []
(Set.contains? &s &10))
(defn setup-big-set-collisions []
(do
(set! s (Set.create-with-len 1))
(setup-big-set)))
(defn set-tests []
(do
(println "Testing single set insert:")
@ -90,10 +76,6 @@
(setup-big-set)
(println "Testing set contains:")
(bench contains-set)
(println "")
(setup-big-set-collisions)
(println "Testing set contains with maximum collisions:")
(bench contains-set)
(println "")))
(defn main []

View File

@ -175,34 +175,72 @@
b))
)
(deftype (Map a b) [n-buckets Int buckets (Array (Bucket a b))])
(deftype (Map a b) [len Int n-buckets Int buckets (Array (Bucket a b))])
(doc Map
"is a hashmap datatype, i.e. a key-value structure. It allows you to put in "
"pairs of keys and values and look them up by key."
""
"Implementation notes: it is a dense double-array-backed structure that"
"grows and shrinks based on load.")
(defmodule Map
(hidden dflt-len)
(def dflt-len 256)
(doc min-load "The load a map needs to reach in order to shrink.")
(def min-load 20)
(doc max-load "The load a map needs to reach in order to grow.")
(def max-load 80)
(doc create "Create an empty map.")
(defn create []
(init dflt-len (Array.repeat dflt-len &Bucket.empty)))
(init 0 dflt-len (Array.repeat dflt-len &Bucket.empty)))
(doc create-with-len "Create an empty map with a given number of buckets. High numbers reduce the possibility of hash collisions while increasing the memory footprint.")
(defn create-with-len [len]
(init len (Array.repeat len &Bucket.empty)))
(doc put "Put a a value v into map m, using the key k.")
(defn put [m k v]
(let [idx (Int.positive-mod (hash k) @(n-buckets &m))]
(update-buckets m &(fn [b]
(let [n (Array.unsafe-nth &b idx)]
(Array.aset b idx (Bucket.put @n k v)))))))
(doc put! "Put a a value v into map m, using the key k, in place.")
(defn put! [m k v]
(let [idx (Int.positive-mod (hash k) @(n-buckets m))
b (buckets m)
n (Array.unsafe-nth b idx)]
(private put-!)
(hidden put-!)
(defn put-! [m k v]
(let-do [idx (Int.positive-mod (hash k) @(n-buckets m))
b (buckets m)
n (Array.unsafe-nth b idx)]
(set-len! m (Int.inc @(len m)))
(Bucket.put! n k v)))
(doc resize "Resize a map `m` to size `s`.")
(defn resize [m s]
(let-do [n (init 0 s (Array.repeat s &Bucket.empty))]
(for [i 0 @(n-buckets &m)]
(let [bucket (Array.unsafe-nth (buckets &m) i)
len (Array.length (Bucket.entries bucket))
entries (Bucket.entries bucket)]
(for [j 0 len]
(let [e (Array.unsafe-nth entries j)]
(put-! &n (Pair.a e) (Pair.b e))))))
n))
(doc grow "Grow a map `m`. Should usually be handled automatically.")
(defn grow [m] (resize m (* @(n-buckets &m) 2)))
(doc shrink "Shrink a map `m`. Should usually be handled automatically.")
(defn shrink [m]
(let [new-size (/ @(n-buckets &m) 2)]
(if (< new-size dflt-len)
m
(resize m new-size))))
(doc put "Put a value v into map m, using the key k.")
(defn put [m k v]
(if (> (/ (* @(len &m) 100) @(n-buckets &m)) min-load)
(put (grow m) k v)
(let [idx (Int.positive-mod (hash k) @(n-buckets &m))]
(update-len
(update-buckets m &(fn [b]
(let [n (Array.unsafe-nth &b idx)]
(Array.aset b idx (Bucket.put @n k v)))))
&Int.inc))))
(doc put! "Put a value v into map m, using the key k, in place.")
(defn put! [m k v]
(put-! m k v))
(doc get-with-default "Get the value for the key k from map m. If it isnt found, the default is returned.")
(defn get-with-default [m k default-value]
(let [idx (Int.positive-mod (hash k) @(n-buckets m))]
@ -240,14 +278,11 @@
(doc length "Get the length of the map m.")
(defn length [m]
(let-do [c 0]
(for [i 0 @(n-buckets m)]
(set! c (+ c (Array.length (Bucket.entries (Array.unsafe-nth (buckets m) i))))))
c))
@(len m))
(doc empty? "Check whether the map m is empty.")
(defn empty? [m]
(= (length m) 0))
(= @(len m) 0))
(doc contains? "Check whether the map m contains the key k.")
(defn contains? [m k]
@ -256,10 +291,14 @@
(doc remove "Remove the value under the key k from the map m.")
(defn remove [m k]
(let [idx (Int.positive-mod (hash k) @(n-buckets &m))]
(update-buckets m &(fn [b]
(let [n (Array.unsafe-nth &b idx)]
(Array.aset b idx (Bucket.shrink @n k)))))))
(if (> (/ (* @(len &m) 100) @(n-buckets &m)) min-load)
(remove (shrink m) k)
(let [idx (Int.positive-mod (hash k) @(n-buckets &m))]
(update-len
(update-buckets m &(fn [b]
(let [n (Array.unsafe-nth &b idx)]
(Array.aset b idx (Bucket.shrink @n k)))))
&Int.dec))))
(doc all? "Do all key-value pairs pass the given predicate (of two arguments)?")
(defn all? [pred m]
@ -334,7 +373,7 @@
(let [e (Array.unsafe-nth a i)
k (Pair.a e)
v (Pair.b e)]
(set! m (put m k v))))
(put! &m k v)))
m))
(doc to-array "Convert Map to Array of Pairs")
@ -392,62 +431,105 @@
@b))
)
(deftype (Set a) [n-buckets Int buckets (Array (SetBucket a))])
(deftype (Set a) [len Int n-buckets Int buckets (Array (SetBucket a))])
(doc Set
"is a hashset datatype, i.e. a unique list structure. It allows you to put "
"in values and guarantees uniqueness."
""
"Implementation notes: it is a dense double-array-backed structure that"
"grows and shrinks based on load, similar to `Map`.")
(defmodule Set
(hidden dflt-len)
(def dflt-len 256)
(doc min-load "The load a set needs to reach in order to shrink.")
(def min-load 20)
(doc max-load "The load a set needs to reach in order to grow.")
(def max-load 80)
(doc create "Create an empty set.")
(defn create []
(init dflt-len (Array.repeat dflt-len &SetBucket.empty)))
(init 0 dflt-len (Array.repeat dflt-len &SetBucket.empty)))
(doc create-with-len "Create an empty set with a given number of buckets. Higher numbers decrease the probability of hash collisions while increasing the memory footprint.")
(defn create-with-len [len]
(init len (Array.repeat len &SetBucket.empty)))
(private put-!)
(hidden put-!)
(defn put-! [s v]
(let-do [idx (Int.positive-mod (hash v) @(n-buckets s))
b (buckets s)
n (Array.unsafe-nth b idx)]
(set-len! s (Int.inc @(len s)))
(SetBucket.push-back! n @v)))
(doc put "Put a a key k into the set s.")
(defn put [s k]
(let [idx (Int.positive-mod (hash k) @(n-buckets &s))
;; The lifetime system really doesn't like this function, had to put in a bunch of copying to make it compile:
]
(update-buckets s &(fn [b]
(let [n (Array.unsafe-nth &b idx)]
(if (SetBucket.contains? n k)
b
(let [new-k @k] ;; HACK!
(Array.aset b idx (SetBucket.grow n new-k)))))))))
(doc resize "Resize a set `s` to size `size`.")
(defn resize [s size]
(let-do [n (init 0 size (Array.repeat size &SetBucket.empty))]
(for [i 0 @(n-buckets &s)]
(let [bucket (Array.unsafe-nth (buckets &s) i)
len (Array.length (SetBucket.entries bucket))
entries (SetBucket.entries bucket)]
(for [j 0 len]
(let [e (Array.unsafe-nth entries j)]
(put-! &n e)))))
n))
(doc put! "Put a a key k into the set s, in place.")
(defn put! [s k]
(let [idx (Int.positive-mod (hash k) @(n-buckets s))
b (buckets s)
n (Array.unsafe-nth b idx)]
(when (not (SetBucket.contains? n k))
(SetBucket.push-back! n @k))))
(doc grow "Grow a set `s`. Should usually be handled automatically.")
(defn grow [s] (resize s (* @(n-buckets &s) 2)))
(doc shrink "Shrink a set `s`. Should usually be handled automatically.")
(defn shrink [s]
(let [new-size (/ @(n-buckets &s) 2)]
(if (< new-size dflt-len)
s
(resize s new-size))))
(doc contains? "Check whether the set `s` contains the value `v`.")
(defn contains? [s v]
(let [idx (Int.positive-mod (hash v) @(n-buckets s))]
(SetBucket.contains? (Array.unsafe-nth (buckets s) idx) v)))
(doc put "Put a value `v` into the set `s`.")
(defn put [s v]
(cond
(contains? &s v)
s
(> (/ (* @(len &s) 100) @(n-buckets &s)) min-load)
(put (grow s) v)
(let [idx (Int.positive-mod (hash v) @(n-buckets &s))
;; The lifetime system really doesn't like this function, had to put in a bunch of copying to make it compile:
]
(update-len
(update-buckets s &(fn [b]
(let [n (Array.unsafe-nth &b idx)]
(let [new-k @v] ;; HACK!
(Array.aset b idx (SetBucket.grow n new-k))))))
&Int.inc))))
(doc put! "Put a value `v` into the set `s`, in place.")
(defn put! [s v]
(unless (contains? s v)
(put-! s v)))
(doc length "Get the length of set s.")
(defn length [s]
(let-do [c 0]
(for [i 0 @(n-buckets s)]
(set! c (+ c (Array.length (SetBucket.entries (Array.unsafe-nth (buckets s) i))))))
c))
@(len s))
(doc empty? "Check whether the set s is empty.")
(defn empty? [s]
(= (length s) 0))
(doc contains? "Check whether the set s contains the key k.")
(defn contains? [s k]
(let [idx (Int.positive-mod (hash k) @(n-buckets s))]
(SetBucket.contains? (Array.unsafe-nth (buckets s) idx) k)))
(doc remove "Remove the key k from the set s.")
(defn remove [s k]
(let [idx (Int.positive-mod (hash k) @(n-buckets &s))]
(update-buckets s &(fn [b]
(let [n (Array.unsafe-nth &b idx)]
(Array.aset b idx (SetBucket.shrink n k)))))))
(doc remove "Remove the value `v` from the set `s`.")
(defn remove [s v]
(cond
(not (contains? &s v))
s
(> (/ (* @(len &s) 100) @(n-buckets &s)) min-load)
(remove (shrink s) v)
(let [idx (Int.positive-mod (hash v) @(n-buckets &s))]
(update-len
(update-buckets s &(fn [b]
(let [n (Array.unsafe-nth &b idx)]
(Array.aset b idx (SetBucket.shrink n v)))))
&Int.dec))))
(doc all? "Does the predicate hold for all values in this set?")
(defn all? [pred set]
@ -484,7 +566,7 @@
(let-do [s (create)]
(for [i 0 (Array.length a)]
(let [e (Array.unsafe-nth a i)]
(set! s (put s e))))
(put! &s e)))
s))
(doc reduce "Reduce values of the set with function f. Order of reduction is not guaranteed")