Faster Jam/Cue by using heuristics to predict hashtable size.

This commit is contained in:
Benjamin Summers 2019-07-04 15:06:41 -07:00
parent 54dd7a93f5
commit a7bbc9364c
4 changed files with 39 additions and 28 deletions

View File

@ -106,8 +106,9 @@ type Bits = Vector Bool
doGet :: Get a -> ByteString -> Either DecodeExn a doGet :: Get a -> ByteString -> Either DecodeExn a
doGet m bs = doGet m bs =
unsafePerformIO $ try $ BS.unsafeUseAsCStringLen bs \(ptr, len) -> do unsafePerformIO $ try $ BS.unsafeUseAsCStringLen bs \(ptr, len) -> do
-- traceM ("cue size: " <> show (length bs `div` 10))
let endPtr = ptr `plusPtr` len let endPtr = ptr `plusPtr` len
tbl <- H.new -- Sized 1000000 tbl <- H.newSized (length bs `div` 10)
GetResult _ r <- runGet m endPtr tbl (S (castPtr ptr) 0 0) GetResult _ r <- runGet m endPtr tbl (S (castPtr ptr) 0 0)
pure r pure r

View File

@ -6,7 +6,8 @@
{-# OPTIONS_GHC -fwarn-unused-binds -fwarn-unused-imports #-} {-# OPTIONS_GHC -fwarn-unused-binds -fwarn-unused-imports #-}
module Noun.Fat ( FatNoun(..) module Noun.Fat ( FatNoun(..)
, fatHash, fatCell, fatAtom , fatSize, fatHash
, fatCell, fatAtom
, toFatNoun, fromFatNoun , toFatNoun, fromFatNoun
) where ) where
@ -26,6 +27,7 @@ import Noun (Noun(Atom, Cell))
data FatNoun data FatNoun
= FatCell {-# UNPACK #-} !Int = FatCell {-# UNPACK #-} !Int
!Word
!FatNoun !FatNoun
!FatNoun !FatNoun
| FatWord {-# UNPACK #-} !Word | FatWord {-# UNPACK #-} !Word
@ -50,8 +52,8 @@ instance Eq FatNoun where
w1==w2 w1==w2
(FatAtom x1 a1, FatAtom x2 a2 ) -> (FatAtom x1 a1, FatAtom x2 a2 ) ->
x1==x2 && a1==a2 x1==x2 && a1==a2
(FatCell x1 h1 t1, FatCell x2 h2 t2) -> (FatCell x1 s1 h1 t1, FatCell x2 s2 h2 t2) ->
x1==x2 && h1==h2 && t1==t2 s1==s2 && x1==x2 && h1==h2 && t1==t2
(_, _ ) -> (_, _ ) ->
False False
{-# INLINE (==) #-} {-# INLINE (==) #-}
@ -59,12 +61,17 @@ instance Eq FatNoun where
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
fatSize :: FatNoun -> Word
fatSize = \case
FatCell _ s _ _ -> s
_ -> 1
{-# INLINE fatHash #-} {-# INLINE fatHash #-}
fatHash :: FatNoun -> Int fatHash :: FatNoun -> Int
fatHash = \case fatHash = \case
FatCell h _ _ -> h FatCell h _ _ _ -> h
FatAtom h _ -> h FatAtom h _ -> h
FatWord w -> hash w FatWord w -> hash w
{-# INLINE fatAtom #-} {-# INLINE fatAtom #-}
fatAtom :: Atom -> FatNoun fatAtom :: Atom -> FatNoun
@ -74,8 +81,9 @@ fatAtom = \case
{-# INLINE fatCell #-} {-# INLINE fatCell #-}
fatCell :: FatNoun -> FatNoun -> FatNoun fatCell :: FatNoun -> FatNoun -> FatNoun
fatCell h t = FatCell has h t fatCell h t = FatCell has siz h t
where where
siz = fatSize h + fatSize t
has = fatHash h `combine` fatHash t has = fatHash h `combine` fatHash t
{-# INLINE toFatNoun #-} {-# INLINE toFatNoun #-}
@ -89,9 +97,9 @@ toFatNoun = go
fromFatNoun :: FatNoun -> Noun fromFatNoun :: FatNoun -> Noun
fromFatNoun = go fromFatNoun = go
where go = \case where go = \case
FatAtom _ a -> Atom (MkAtom $ NatJ# a) FatAtom _ a -> Atom (MkAtom $ NatJ# a)
FatCell _ h t -> Cell (go h) (go t) FatCell _ _ h t -> Cell (go h) (go t)
FatWord w -> Atom (fromIntegral w) FatWord w -> Atom (fromIntegral w)
-- Stolen from Hashable Library ------------------------------------------------ -- Stolen from Hashable Library ------------------------------------------------

View File

@ -269,9 +269,9 @@ writeNoun :: FatNoun -> Put ()
writeNoun n = writeNoun n =
getRef >>= \case getRef >>= \case
Just bk -> writeBackRef bk Just bk -> writeBackRef bk
Nothing -> case n of FatAtom _ n -> writeAtom (MkAtom $ NatJ# n) Nothing -> case n of FatAtom _ n -> writeAtom (MkAtom $ NatJ# n)
FatWord (W# w) -> writeAtom (MkAtom $ NatS# w) FatWord (W# w) -> writeAtom (MkAtom $ NatS# w)
FatCell _ h t -> writeCell h t FatCell _ _ h t -> writeCell h t
{-# INLINE writeMat #-} {-# INLINE writeMat #-}
writeMat :: Atom -> Put () writeMat :: Atom -> Put ()
@ -340,18 +340,24 @@ jamWordSz (W# w) = 1 + 2*(W# preW) + (W# atmW)
compress :: FatNoun -> IO (Word, H.CuckooHashTable Word Word) compress :: FatNoun -> IO (Word, H.CuckooHashTable Word Word)
compress top = do compress top = do
-- traceM "<compress>" -- traceM "<compress>"
nodes :: H.BasicHashTable FatNoun Word <- H.new -- Sized 1000000 let sz = 10 ^ (floor $ logBase 600 (fromIntegral $ fatSize top))
backs :: H.CuckooHashTable Word Word <- H.new -- Sized 1000000
-- traceM ("inp(" <> show (fatSize top) <> ")")
-- traceM ("sz(" <> show sz <> ")")
nodes :: H.BasicHashTable FatNoun Word <- H.newSized sz
backs :: H.CuckooHashTable Word Word <- H.newSized sz
let proc :: Word -> FatNoun -> IO Word let proc :: Word -> FatNoun -> IO Word
proc pos = \case proc pos = \case
n@(FatAtom _ a) -> pure $ atomSz (MkAtom (NatJ# a)) n@(FatAtom _ a) -> pure $ atomSz (MkAtom (NatJ# a))
FatWord w -> pure (jamWordSz w) FatWord w -> pure (jamWordSz w)
FatCell _ h t -> do FatCell _ _ h t -> do
!hSz <- go (pos+2) h !hSz <- go (pos+2) h
!tSz <- go (pos+2+hSz) t !tSz <- go (pos+2+hSz) t
pure (2+hSz+tSz) pure (2+hSz+tSz)
go :: Word -> FatNoun -> IO Word go :: Word -> FatNoun -> IO Word
go p inp = do go p inp = do
H.lookup nodes inp >>= \case H.lookup nodes inp >>= \case
@ -363,7 +369,7 @@ compress top = do
doRef = H.insert backs p bak $> rs doRef = H.insert backs p bak $> rs
noRef = proc p inp noRef = proc p inp
case inp of case inp of
FatCell _ _ _ -> doRef FatCell _ _ _ _ -> doRef
FatWord w | rs < atomSz (fromIntegral w) -> doRef FatWord w | rs < atomSz (fromIntegral w) -> doRef
FatAtom _ a | rs < atomSz (MkAtom (NatJ# a)) -> doRef FatAtom _ a | rs < atomSz (MkAtom (NatJ# a)) -> doRef
_ -> noRef _ -> noRef

View File

@ -38,21 +38,17 @@ dumpJam fp = writeFile fp . view (re _CueFatBytes)
tryCuePill :: PillFile -> IO () tryCuePill :: PillFile -> IO ()
tryCuePill pill = tryCuePill pill =
loadNoun (show pill) >>= \case Nothing -> print "nil" loadNoun (show pill) >>= \case Nothing -> print "nil"
Just (FatAtom _ _) -> print "atom" Just (FatAtom _ _) -> print "atom"
Just (FatWord _) -> print "word" Just (FatWord _) -> print "word"
_ -> print "cell" _ -> print "cell"
tryCueJamPill :: PillFile -> IO () tryCueJamPill :: PillFile -> IO ()
tryCueJamPill pill = do tryCueJamPill pill = do
n <- loadNoun (show pill) >>= \case n <- loadNoun (show pill) >>= \case
Nothing -> do print "failure" Nothing -> print "failure" >> pure (FatWord 0)
pure (FatWord 0) Just n@(FatAtom _ _) -> print "atom" >> pure n
Just n@(FatAtom _ _) -> do print "atom" Just n@(FatCell _ _ _ _) -> print "cell" >> pure n
pure n
Just n@(FatCell _ _ _) -> do print "cell"
pure n
bs <- evaluate (force (jamFatBS n)) bs <- evaluate (force (jamFatBS n))