mirror of
https://github.com/urbit/shrub.git
synced 2024-12-25 13:04:17 +03:00
Faster Jam/Cue by using heuristics to predict hashtable size.
This commit is contained in:
parent
54dd7a93f5
commit
a7bbc9364c
@ -106,8 +106,9 @@ type Bits = Vector Bool
|
|||||||
doGet :: Get a -> ByteString -> Either DecodeExn a
|
doGet :: Get a -> ByteString -> Either DecodeExn a
|
||||||
doGet m bs =
|
doGet m bs =
|
||||||
unsafePerformIO $ try $ BS.unsafeUseAsCStringLen bs \(ptr, len) -> do
|
unsafePerformIO $ try $ BS.unsafeUseAsCStringLen bs \(ptr, len) -> do
|
||||||
|
-- traceM ("cue size: " <> show (length bs `div` 10))
|
||||||
let endPtr = ptr `plusPtr` len
|
let endPtr = ptr `plusPtr` len
|
||||||
tbl <- H.new -- Sized 1000000
|
tbl <- H.newSized (length bs `div` 10)
|
||||||
GetResult _ r <- runGet m endPtr tbl (S (castPtr ptr) 0 0)
|
GetResult _ r <- runGet m endPtr tbl (S (castPtr ptr) 0 0)
|
||||||
pure r
|
pure r
|
||||||
|
|
||||||
|
@ -6,7 +6,8 @@
|
|||||||
{-# OPTIONS_GHC -fwarn-unused-binds -fwarn-unused-imports #-}
|
{-# OPTIONS_GHC -fwarn-unused-binds -fwarn-unused-imports #-}
|
||||||
|
|
||||||
module Noun.Fat ( FatNoun(..)
|
module Noun.Fat ( FatNoun(..)
|
||||||
, fatHash, fatCell, fatAtom
|
, fatSize, fatHash
|
||||||
|
, fatCell, fatAtom
|
||||||
, toFatNoun, fromFatNoun
|
, toFatNoun, fromFatNoun
|
||||||
) where
|
) where
|
||||||
|
|
||||||
@ -26,6 +27,7 @@ import Noun (Noun(Atom, Cell))
|
|||||||
|
|
||||||
data FatNoun
|
data FatNoun
|
||||||
= FatCell {-# UNPACK #-} !Int
|
= FatCell {-# UNPACK #-} !Int
|
||||||
|
!Word
|
||||||
!FatNoun
|
!FatNoun
|
||||||
!FatNoun
|
!FatNoun
|
||||||
| FatWord {-# UNPACK #-} !Word
|
| FatWord {-# UNPACK #-} !Word
|
||||||
@ -50,8 +52,8 @@ instance Eq FatNoun where
|
|||||||
w1==w2
|
w1==w2
|
||||||
(FatAtom x1 a1, FatAtom x2 a2 ) ->
|
(FatAtom x1 a1, FatAtom x2 a2 ) ->
|
||||||
x1==x2 && a1==a2
|
x1==x2 && a1==a2
|
||||||
(FatCell x1 h1 t1, FatCell x2 h2 t2) ->
|
(FatCell x1 s1 h1 t1, FatCell x2 s2 h2 t2) ->
|
||||||
x1==x2 && h1==h2 && t1==t2
|
s1==s2 && x1==x2 && h1==h2 && t1==t2
|
||||||
(_, _ ) ->
|
(_, _ ) ->
|
||||||
False
|
False
|
||||||
{-# INLINE (==) #-}
|
{-# INLINE (==) #-}
|
||||||
@ -59,10 +61,15 @@ instance Eq FatNoun where
|
|||||||
|
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
fatSize :: FatNoun -> Word
|
||||||
|
fatSize = \case
|
||||||
|
FatCell _ s _ _ -> s
|
||||||
|
_ -> 1
|
||||||
|
|
||||||
{-# INLINE fatHash #-}
|
{-# INLINE fatHash #-}
|
||||||
fatHash :: FatNoun -> Int
|
fatHash :: FatNoun -> Int
|
||||||
fatHash = \case
|
fatHash = \case
|
||||||
FatCell h _ _ -> h
|
FatCell h _ _ _ -> h
|
||||||
FatAtom h _ -> h
|
FatAtom h _ -> h
|
||||||
FatWord w -> hash w
|
FatWord w -> hash w
|
||||||
|
|
||||||
@ -74,8 +81,9 @@ fatAtom = \case
|
|||||||
|
|
||||||
{-# INLINE fatCell #-}
|
{-# INLINE fatCell #-}
|
||||||
fatCell :: FatNoun -> FatNoun -> FatNoun
|
fatCell :: FatNoun -> FatNoun -> FatNoun
|
||||||
fatCell h t = FatCell has h t
|
fatCell h t = FatCell has siz h t
|
||||||
where
|
where
|
||||||
|
siz = fatSize h + fatSize t
|
||||||
has = fatHash h `combine` fatHash t
|
has = fatHash h `combine` fatHash t
|
||||||
|
|
||||||
{-# INLINE toFatNoun #-}
|
{-# INLINE toFatNoun #-}
|
||||||
@ -90,7 +98,7 @@ fromFatNoun :: FatNoun -> Noun
|
|||||||
fromFatNoun = go
|
fromFatNoun = go
|
||||||
where go = \case
|
where go = \case
|
||||||
FatAtom _ a -> Atom (MkAtom $ NatJ# a)
|
FatAtom _ a -> Atom (MkAtom $ NatJ# a)
|
||||||
FatCell _ h t -> Cell (go h) (go t)
|
FatCell _ _ h t -> Cell (go h) (go t)
|
||||||
FatWord w -> Atom (fromIntegral w)
|
FatWord w -> Atom (fromIntegral w)
|
||||||
|
|
||||||
|
|
||||||
|
@ -271,7 +271,7 @@ writeNoun n =
|
|||||||
Just bk -> writeBackRef bk
|
Just bk -> writeBackRef bk
|
||||||
Nothing -> case n of FatAtom _ n -> writeAtom (MkAtom $ NatJ# n)
|
Nothing -> case n of FatAtom _ n -> writeAtom (MkAtom $ NatJ# n)
|
||||||
FatWord (W# w) -> writeAtom (MkAtom $ NatS# w)
|
FatWord (W# w) -> writeAtom (MkAtom $ NatS# w)
|
||||||
FatCell _ h t -> writeCell h t
|
FatCell _ _ h t -> writeCell h t
|
||||||
|
|
||||||
{-# INLINE writeMat #-}
|
{-# INLINE writeMat #-}
|
||||||
writeMat :: Atom -> Put ()
|
writeMat :: Atom -> Put ()
|
||||||
@ -340,18 +340,24 @@ jamWordSz (W# w) = 1 + 2*(W# preW) + (W# atmW)
|
|||||||
compress :: FatNoun -> IO (Word, H.CuckooHashTable Word Word)
|
compress :: FatNoun -> IO (Word, H.CuckooHashTable Word Word)
|
||||||
compress top = do
|
compress top = do
|
||||||
-- traceM "<compress>"
|
-- traceM "<compress>"
|
||||||
nodes :: H.BasicHashTable FatNoun Word <- H.new -- Sized 1000000
|
let sz = 10 ^ (floor $ logBase 600 (fromIntegral $ fatSize top))
|
||||||
backs :: H.CuckooHashTable Word Word <- H.new -- Sized 1000000
|
|
||||||
|
-- traceM ("inp(" <> show (fatSize top) <> ")")
|
||||||
|
-- traceM ("sz(" <> show sz <> ")")
|
||||||
|
|
||||||
|
nodes :: H.BasicHashTable FatNoun Word <- H.newSized sz
|
||||||
|
backs :: H.CuckooHashTable Word Word <- H.newSized sz
|
||||||
|
|
||||||
let proc :: Word -> FatNoun -> IO Word
|
let proc :: Word -> FatNoun -> IO Word
|
||||||
proc pos = \case
|
proc pos = \case
|
||||||
n@(FatAtom _ a) -> pure $ atomSz (MkAtom (NatJ# a))
|
n@(FatAtom _ a) -> pure $ atomSz (MkAtom (NatJ# a))
|
||||||
FatWord w -> pure (jamWordSz w)
|
FatWord w -> pure (jamWordSz w)
|
||||||
FatCell _ h t -> do
|
FatCell _ _ h t -> do
|
||||||
!hSz <- go (pos+2) h
|
!hSz <- go (pos+2) h
|
||||||
!tSz <- go (pos+2+hSz) t
|
!tSz <- go (pos+2+hSz) t
|
||||||
pure (2+hSz+tSz)
|
pure (2+hSz+tSz)
|
||||||
|
|
||||||
|
|
||||||
go :: Word -> FatNoun -> IO Word
|
go :: Word -> FatNoun -> IO Word
|
||||||
go p inp = do
|
go p inp = do
|
||||||
H.lookup nodes inp >>= \case
|
H.lookup nodes inp >>= \case
|
||||||
@ -363,7 +369,7 @@ compress top = do
|
|||||||
doRef = H.insert backs p bak $> rs
|
doRef = H.insert backs p bak $> rs
|
||||||
noRef = proc p inp
|
noRef = proc p inp
|
||||||
case inp of
|
case inp of
|
||||||
FatCell _ _ _ -> doRef
|
FatCell _ _ _ _ -> doRef
|
||||||
FatWord w | rs < atomSz (fromIntegral w) -> doRef
|
FatWord w | rs < atomSz (fromIntegral w) -> doRef
|
||||||
FatAtom _ a | rs < atomSz (MkAtom (NatJ# a)) -> doRef
|
FatAtom _ a | rs < atomSz (MkAtom (NatJ# a)) -> doRef
|
||||||
_ -> noRef
|
_ -> noRef
|
||||||
|
@ -45,14 +45,10 @@ tryCuePill pill =
|
|||||||
|
|
||||||
tryCueJamPill :: PillFile -> IO ()
|
tryCueJamPill :: PillFile -> IO ()
|
||||||
tryCueJamPill pill = do
|
tryCueJamPill pill = do
|
||||||
|
|
||||||
n <- loadNoun (show pill) >>= \case
|
n <- loadNoun (show pill) >>= \case
|
||||||
Nothing -> do print "failure"
|
Nothing -> print "failure" >> pure (FatWord 0)
|
||||||
pure (FatWord 0)
|
Just n@(FatAtom _ _) -> print "atom" >> pure n
|
||||||
Just n@(FatAtom _ _) -> do print "atom"
|
Just n@(FatCell _ _ _ _) -> print "cell" >> pure n
|
||||||
pure n
|
|
||||||
Just n@(FatCell _ _ _) -> do print "cell"
|
|
||||||
pure n
|
|
||||||
|
|
||||||
bs <- evaluate (force (jamFatBS n))
|
bs <- evaluate (force (jamFatBS n))
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user