Faster Jam/Cue by using heuristics to predict hashtable size.

This commit is contained in:
Benjamin Summers 2019-07-04 15:06:41 -07:00
parent 54dd7a93f5
commit a7bbc9364c
4 changed files with 39 additions and 28 deletions

View File

@ -106,8 +106,9 @@ type Bits = Vector Bool
doGet :: Get a -> ByteString -> Either DecodeExn a
doGet m bs =
unsafePerformIO $ try $ BS.unsafeUseAsCStringLen bs \(ptr, len) -> do
-- traceM ("cue size: " <> show (length bs `div` 10))
let endPtr = ptr `plusPtr` len
tbl <- H.new -- Sized 1000000
tbl <- H.newSized (length bs `div` 10)
GetResult _ r <- runGet m endPtr tbl (S (castPtr ptr) 0 0)
pure r

View File

@ -6,7 +6,8 @@
{-# OPTIONS_GHC -fwarn-unused-binds -fwarn-unused-imports #-}
module Noun.Fat ( FatNoun(..)
, fatHash, fatCell, fatAtom
, fatSize, fatHash
, fatCell, fatAtom
, toFatNoun, fromFatNoun
) where
@ -26,6 +27,7 @@ import Noun (Noun(Atom, Cell))
data FatNoun
= FatCell {-# UNPACK #-} !Int
!Word
!FatNoun
!FatNoun
| FatWord {-# UNPACK #-} !Word
@ -50,8 +52,8 @@ instance Eq FatNoun where
w1==w2
(FatAtom x1 a1, FatAtom x2 a2 ) ->
x1==x2 && a1==a2
(FatCell x1 h1 t1, FatCell x2 h2 t2) ->
x1==x2 && h1==h2 && t1==t2
(FatCell x1 s1 h1 t1, FatCell x2 s2 h2 t2) ->
s1==s2 && x1==x2 && h1==h2 && t1==t2
(_, _ ) ->
False
{-# INLINE (==) #-}
@ -59,10 +61,15 @@ instance Eq FatNoun where
--------------------------------------------------------------------------------
fatSize :: FatNoun -> Word
fatSize = \case
FatCell _ s _ _ -> s
_ -> 1
{-# INLINE fatHash #-}
fatHash :: FatNoun -> Int
fatHash = \case
FatCell h _ _ -> h
FatCell h _ _ _ -> h
FatAtom h _ -> h
FatWord w -> hash w
@ -74,8 +81,9 @@ fatAtom = \case
{-# INLINE fatCell #-}
fatCell :: FatNoun -> FatNoun -> FatNoun
fatCell h t = FatCell has h t
fatCell h t = FatCell has siz h t
where
siz = fatSize h + fatSize t
has = fatHash h `combine` fatHash t
{-# INLINE toFatNoun #-}
@ -90,7 +98,7 @@ fromFatNoun :: FatNoun -> Noun
fromFatNoun = go
where go = \case
FatAtom _ a -> Atom (MkAtom $ NatJ# a)
FatCell _ h t -> Cell (go h) (go t)
FatCell _ _ h t -> Cell (go h) (go t)
FatWord w -> Atom (fromIntegral w)

View File

@ -271,7 +271,7 @@ writeNoun n =
Just bk -> writeBackRef bk
Nothing -> case n of FatAtom _ n -> writeAtom (MkAtom $ NatJ# n)
FatWord (W# w) -> writeAtom (MkAtom $ NatS# w)
FatCell _ h t -> writeCell h t
FatCell _ _ h t -> writeCell h t
{-# INLINE writeMat #-}
writeMat :: Atom -> Put ()
@ -340,18 +340,24 @@ jamWordSz (W# w) = 1 + 2*(W# preW) + (W# atmW)
compress :: FatNoun -> IO (Word, H.CuckooHashTable Word Word)
compress top = do
-- traceM "<compress>"
nodes :: H.BasicHashTable FatNoun Word <- H.new -- Sized 1000000
backs :: H.CuckooHashTable Word Word <- H.new -- Sized 1000000
let sz = 10 ^ (floor $ logBase 600 (fromIntegral $ fatSize top))
-- traceM ("inp(" <> show (fatSize top) <> ")")
-- traceM ("sz(" <> show sz <> ")")
nodes :: H.BasicHashTable FatNoun Word <- H.newSized sz
backs :: H.CuckooHashTable Word Word <- H.newSized sz
let proc :: Word -> FatNoun -> IO Word
proc pos = \case
n@(FatAtom _ a) -> pure $ atomSz (MkAtom (NatJ# a))
FatWord w -> pure (jamWordSz w)
FatCell _ h t -> do
FatCell _ _ h t -> do
!hSz <- go (pos+2) h
!tSz <- go (pos+2+hSz) t
pure (2+hSz+tSz)
go :: Word -> FatNoun -> IO Word
go p inp = do
H.lookup nodes inp >>= \case
@ -363,7 +369,7 @@ compress top = do
doRef = H.insert backs p bak $> rs
noRef = proc p inp
case inp of
FatCell _ _ _ -> doRef
FatCell _ _ _ _ -> doRef
FatWord w | rs < atomSz (fromIntegral w) -> doRef
FatAtom _ a | rs < atomSz (MkAtom (NatJ# a)) -> doRef
_ -> noRef

View File

@ -45,14 +45,10 @@ tryCuePill pill =
tryCueJamPill :: PillFile -> IO ()
tryCueJamPill pill = do
n <- loadNoun (show pill) >>= \case
Nothing -> do print "failure"
pure (FatWord 0)
Just n@(FatAtom _ _) -> do print "atom"
pure n
Just n@(FatCell _ _ _) -> do print "cell"
pure n
Nothing -> print "failure" >> pure (FatWord 0)
Just n@(FatAtom _ _) -> print "atom" >> pure n
Just n@(FatCell _ _ _ _) -> print "cell" >> pure n
bs <- evaluate (force (jamFatBS n))