mirror of
https://github.com/urbit/shrub.git
synced 2024-12-24 11:24:21 +03:00
Faster Jam/Cue by using heuristics to predict hashtable size.
This commit is contained in:
parent
54dd7a93f5
commit
a7bbc9364c
@ -106,8 +106,9 @@ type Bits = Vector Bool
|
||||
doGet :: Get a -> ByteString -> Either DecodeExn a
|
||||
doGet m bs =
|
||||
unsafePerformIO $ try $ BS.unsafeUseAsCStringLen bs \(ptr, len) -> do
|
||||
-- traceM ("cue size: " <> show (length bs `div` 10))
|
||||
let endPtr = ptr `plusPtr` len
|
||||
tbl <- H.new -- Sized 1000000
|
||||
tbl <- H.newSized (length bs `div` 10)
|
||||
GetResult _ r <- runGet m endPtr tbl (S (castPtr ptr) 0 0)
|
||||
pure r
|
||||
|
||||
|
@ -6,7 +6,8 @@
|
||||
{-# OPTIONS_GHC -fwarn-unused-binds -fwarn-unused-imports #-}
|
||||
|
||||
module Noun.Fat ( FatNoun(..)
|
||||
, fatHash, fatCell, fatAtom
|
||||
, fatSize, fatHash
|
||||
, fatCell, fatAtom
|
||||
, toFatNoun, fromFatNoun
|
||||
) where
|
||||
|
||||
@ -26,6 +27,7 @@ import Noun (Noun(Atom, Cell))
|
||||
|
||||
data FatNoun
|
||||
= FatCell {-# UNPACK #-} !Int
|
||||
!Word
|
||||
!FatNoun
|
||||
!FatNoun
|
||||
| FatWord {-# UNPACK #-} !Word
|
||||
@ -50,8 +52,8 @@ instance Eq FatNoun where
|
||||
w1==w2
|
||||
(FatAtom x1 a1, FatAtom x2 a2 ) ->
|
||||
x1==x2 && a1==a2
|
||||
(FatCell x1 h1 t1, FatCell x2 h2 t2) ->
|
||||
x1==x2 && h1==h2 && t1==t2
|
||||
(FatCell x1 s1 h1 t1, FatCell x2 s2 h2 t2) ->
|
||||
s1==s2 && x1==x2 && h1==h2 && t1==t2
|
||||
(_, _ ) ->
|
||||
False
|
||||
{-# INLINE (==) #-}
|
||||
@ -59,12 +61,17 @@ instance Eq FatNoun where
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
fatSize :: FatNoun -> Word
|
||||
fatSize = \case
|
||||
FatCell _ s _ _ -> s
|
||||
_ -> 1
|
||||
|
||||
{-# INLINE fatHash #-}
|
||||
fatHash :: FatNoun -> Int
|
||||
fatHash = \case
|
||||
FatCell h _ _ -> h
|
||||
FatAtom h _ -> h
|
||||
FatWord w -> hash w
|
||||
FatCell h _ _ _ -> h
|
||||
FatAtom h _ -> h
|
||||
FatWord w -> hash w
|
||||
|
||||
{-# INLINE fatAtom #-}
|
||||
fatAtom :: Atom -> FatNoun
|
||||
@ -74,8 +81,9 @@ fatAtom = \case
|
||||
|
||||
{-# INLINE fatCell #-}
|
||||
fatCell :: FatNoun -> FatNoun -> FatNoun
|
||||
fatCell h t = FatCell has h t
|
||||
fatCell h t = FatCell has siz h t
|
||||
where
|
||||
siz = fatSize h + fatSize t
|
||||
has = fatHash h `combine` fatHash t
|
||||
|
||||
{-# INLINE toFatNoun #-}
|
||||
@ -89,9 +97,9 @@ toFatNoun = go
|
||||
fromFatNoun :: FatNoun -> Noun
|
||||
fromFatNoun = go
|
||||
where go = \case
|
||||
FatAtom _ a -> Atom (MkAtom $ NatJ# a)
|
||||
FatCell _ h t -> Cell (go h) (go t)
|
||||
FatWord w -> Atom (fromIntegral w)
|
||||
FatAtom _ a -> Atom (MkAtom $ NatJ# a)
|
||||
FatCell _ _ h t -> Cell (go h) (go t)
|
||||
FatWord w -> Atom (fromIntegral w)
|
||||
|
||||
|
||||
-- Stolen from Hashable Library ------------------------------------------------
|
||||
|
@ -269,9 +269,9 @@ writeNoun :: FatNoun -> Put ()
|
||||
writeNoun n =
|
||||
getRef >>= \case
|
||||
Just bk -> writeBackRef bk
|
||||
Nothing -> case n of FatAtom _ n -> writeAtom (MkAtom $ NatJ# n)
|
||||
FatWord (W# w) -> writeAtom (MkAtom $ NatS# w)
|
||||
FatCell _ h t -> writeCell h t
|
||||
Nothing -> case n of FatAtom _ n -> writeAtom (MkAtom $ NatJ# n)
|
||||
FatWord (W# w) -> writeAtom (MkAtom $ NatS# w)
|
||||
FatCell _ _ h t -> writeCell h t
|
||||
|
||||
{-# INLINE writeMat #-}
|
||||
writeMat :: Atom -> Put ()
|
||||
@ -340,18 +340,24 @@ jamWordSz (W# w) = 1 + 2*(W# preW) + (W# atmW)
|
||||
compress :: FatNoun -> IO (Word, H.CuckooHashTable Word Word)
|
||||
compress top = do
|
||||
-- traceM "<compress>"
|
||||
nodes :: H.BasicHashTable FatNoun Word <- H.new -- Sized 1000000
|
||||
backs :: H.CuckooHashTable Word Word <- H.new -- Sized 1000000
|
||||
let sz = 10 ^ (floor $ logBase 600 (fromIntegral $ fatSize top))
|
||||
|
||||
-- traceM ("inp(" <> show (fatSize top) <> ")")
|
||||
-- traceM ("sz(" <> show sz <> ")")
|
||||
|
||||
nodes :: H.BasicHashTable FatNoun Word <- H.newSized sz
|
||||
backs :: H.CuckooHashTable Word Word <- H.newSized sz
|
||||
|
||||
let proc :: Word -> FatNoun -> IO Word
|
||||
proc pos = \case
|
||||
n@(FatAtom _ a) -> pure $ atomSz (MkAtom (NatJ# a))
|
||||
FatWord w -> pure (jamWordSz w)
|
||||
FatCell _ h t -> do
|
||||
FatCell _ _ h t -> do
|
||||
!hSz <- go (pos+2) h
|
||||
!tSz <- go (pos+2+hSz) t
|
||||
pure (2+hSz+tSz)
|
||||
|
||||
|
||||
go :: Word -> FatNoun -> IO Word
|
||||
go p inp = do
|
||||
H.lookup nodes inp >>= \case
|
||||
@ -363,7 +369,7 @@ compress top = do
|
||||
doRef = H.insert backs p bak $> rs
|
||||
noRef = proc p inp
|
||||
case inp of
|
||||
FatCell _ _ _ -> doRef
|
||||
FatCell _ _ _ _ -> doRef
|
||||
FatWord w | rs < atomSz (fromIntegral w) -> doRef
|
||||
FatAtom _ a | rs < atomSz (MkAtom (NatJ# a)) -> doRef
|
||||
_ -> noRef
|
||||
|
@ -38,21 +38,17 @@ dumpJam fp = writeFile fp . view (re _CueFatBytes)
|
||||
|
||||
tryCuePill :: PillFile -> IO ()
|
||||
tryCuePill pill =
|
||||
loadNoun (show pill) >>= \case Nothing -> print "nil"
|
||||
loadNoun (show pill) >>= \case Nothing -> print "nil"
|
||||
Just (FatAtom _ _) -> print "atom"
|
||||
Just (FatWord _) -> print "word"
|
||||
_ -> print "cell"
|
||||
Just (FatWord _) -> print "word"
|
||||
_ -> print "cell"
|
||||
|
||||
tryCueJamPill :: PillFile -> IO ()
|
||||
tryCueJamPill pill = do
|
||||
|
||||
n <- loadNoun (show pill) >>= \case
|
||||
Nothing -> do print "failure"
|
||||
pure (FatWord 0)
|
||||
Just n@(FatAtom _ _) -> do print "atom"
|
||||
pure n
|
||||
Just n@(FatCell _ _ _) -> do print "cell"
|
||||
pure n
|
||||
Nothing -> print "failure" >> pure (FatWord 0)
|
||||
Just n@(FatAtom _ _) -> print "atom" >> pure n
|
||||
Just n@(FatCell _ _ _ _) -> print "cell" >> pure n
|
||||
|
||||
bs <- evaluate (force (jamFatBS n))
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user