mirror of
https://github.com/urbit/shrub.git
synced 2024-12-20 17:32:11 +03:00
Jam: much smarter preJam pass.
This commit is contained in:
parent
d445c1cbb1
commit
89b2cccae7
@ -7,8 +7,9 @@ import Data.Bits
|
||||
import Control.Lens
|
||||
import Text.Printf
|
||||
|
||||
import Data.Map (Map)
|
||||
import Control.Monad (guard)
|
||||
import Data.Map (Map)
|
||||
import Text.Printf (printf)
|
||||
|
||||
import Test.Tasty
|
||||
import Test.Tasty.TH
|
||||
@ -169,11 +170,11 @@ cue buf = view _2 <$> go mempty 0
|
||||
case (bitIdx i buf, bitIdx (i+1) buf) of
|
||||
(False, _ ) -> do Buf wid at <- rub (Cursor (i+1) buf)
|
||||
let r = Atom at
|
||||
pure (1+wid, r, insertMap i r tbl)
|
||||
pure (1+wid, r, trace (show ('c', i, r)) $ insertMap i r tbl)
|
||||
(True, False ) -> do (lSz,lef,tbl) <- go tbl (i+2)
|
||||
(rSz,rit,tbl) <- go tbl (i+2+fromIntegral lSz)
|
||||
let r = Cell lef rit
|
||||
pure (2+lSz+rSz, r, insertMap i r tbl)
|
||||
pure (2+lSz+rSz, r, trace (show ('c', i, r)) $ insertMap i r tbl)
|
||||
(True, True ) -> do Buf wid at <- rub (Cursor (i+2) buf)
|
||||
r <- lookup (fromIntegral at) tbl & \case
|
||||
Nothing -> error ("bad-ref-" <> show at)
|
||||
@ -203,38 +204,45 @@ pills = [ 0x2, 0xc, 0x48, 0x29, 0xc9, 0x299
|
||||
, 0x3170_c7c1, 0x93_c7c1, 0xa_72e0, 0x1bd5_b7dd_e080
|
||||
]
|
||||
|
||||
cueTest :: Maybe [Noun]
|
||||
cueTest = traverse cue pills
|
||||
-- cueTest :: Maybe [Noun]
|
||||
-- cueTest = traverse cue pills
|
||||
|
||||
jamTest :: Maybe [Atom]
|
||||
jamTest = fmap jam <$> cueTest
|
||||
-- jamTest :: Maybe [Atom]
|
||||
-- jamTest = fmap jam <$> cueTest
|
||||
|
||||
prop_fastMatSlow :: Atom -> Bool
|
||||
prop_fastMatSlow a = jam (Atom a) == Fast.jam (Atom a)
|
||||
-- prop_fastMatSlow :: Atom -> Bool
|
||||
-- prop_fastMatSlow a = jam (Atom a) == Fast.jam (Atom a)
|
||||
|
||||
prop_fastJamSlow :: Noun -> Bool
|
||||
prop_fastJamSlow n = jam n == Fast.jam n
|
||||
-- prop_fastJamSlow :: Noun -> Bool
|
||||
-- prop_fastJamSlow n = jam n == Fast.jam n
|
||||
|
||||
prop_fastJam :: Noun -> Bool
|
||||
prop_fastJam n = Just n == cue (Fast.jam n)
|
||||
|
||||
prop_jamCue :: Noun -> Bool
|
||||
prop_jamCue n = Just n == cue (jam n)
|
||||
-- prop_jamCue :: Noun -> Bool
|
||||
-- prop_jamCue n = Just n == cue (jam n)
|
||||
|
||||
prop_matRub :: Atom -> Bool
|
||||
prop_matRub atm = matSz==rubSz && rubRes==atm
|
||||
where
|
||||
Buf matSz matBuf = mat atm
|
||||
Buf rubSz rubRes = fromMaybe mempty (rub $ Cursor 0 matBuf)
|
||||
-- prop_matRub :: Atom -> Bool
|
||||
-- prop_matRub atm = matSz==rubSz && rubRes==atm
|
||||
-- where
|
||||
-- Buf matSz matBuf = mat atm
|
||||
-- Buf rubSz rubRes = fromMaybe mempty (rub $ Cursor 0 matBuf)
|
||||
|
||||
prop_jamCue' :: Noun -> Bool
|
||||
prop_jamCue' n = Just n == cue' (jam' n)
|
||||
-- prop_jamCue' :: Noun -> Bool
|
||||
-- prop_jamCue' n = Just n == cue' (jam' n)
|
||||
|
||||
prop_matRub' :: Atom -> Bool
|
||||
prop_matRub' atm = matSz==rubSz && rubRes==atm
|
||||
where
|
||||
Buf matSz matBuf = mat' atm
|
||||
Buf rubSz rubRes = fromMaybe mempty (rub' $ Cursor 0 matBuf)
|
||||
-- prop_matRub' :: Atom -> Bool
|
||||
-- prop_matRub' atm = matSz==rubSz && rubRes==atm
|
||||
-- where
|
||||
-- Buf matSz matBuf = mat' atm
|
||||
-- Buf rubSz rubRes = fromMaybe mempty (rub' $ Cursor 0 matBuf)
|
||||
|
||||
main :: IO ()
|
||||
main = $(defaultMainGenerator)
|
||||
|
||||
matSz' :: Atom -> Int
|
||||
matSz' a = length s - 1
|
||||
where
|
||||
s :: String
|
||||
s = printf "%b" $ fromIntegral @Atom @Integer $ jam $ Atom a
|
||||
|
||||
|
@ -9,7 +9,7 @@ import GHC.Integer.GMP.Internals
|
||||
|
||||
import Control.Lens (view, to, from, (&))
|
||||
import Control.Monad (guard)
|
||||
import Data.Bits (shiftL, shiftR, setBit, clearBit, (.|.), (.&.))
|
||||
import Data.Bits (shiftL, shiftR, setBit, clearBit, xor, (.|.), (.&.))
|
||||
import Data.Map (Map)
|
||||
import Data.Noun.Atom ( Atom(MkAtom), wordBitWidth, wordBitWidth#
|
||||
, atomBitWidth#, takeBitsWord )
|
||||
@ -24,6 +24,8 @@ import GHC.Int (Int(I#))
|
||||
import GHC.Word (Word(W#))
|
||||
import System.IO.Unsafe (unsafePerformIO)
|
||||
|
||||
import qualified Data.Hashable as Hash
|
||||
import qualified Data.Map as M
|
||||
import qualified Data.ByteString.Unsafe as BS
|
||||
import qualified Data.HashTable.IO as H
|
||||
import qualified Data.Vector.Primitive as VP
|
||||
@ -57,13 +59,15 @@ newtype Put a = Put
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
{-# INLINE insRef #-}
|
||||
insRef :: Noun -> Word -> Put ()
|
||||
insRef n w = Put \tbl s -> PutResult s <$> H.insert tbl n w
|
||||
|
||||
{-# INLINE getRef #-}
|
||||
getRef :: Noun -> Put (Maybe Word)
|
||||
getRef n = Put \tbl s -> PutResult s <$> H.lookup tbl n
|
||||
getRef n = Put \tbl s -> do
|
||||
pos <- pure (pos s)
|
||||
traceM ("getRef: " <> show n <> " @" <> show pos)
|
||||
res <- H.lookup tbl n
|
||||
pure $ PutResult s $ case res of
|
||||
Just w | w<pos -> Just w
|
||||
_ -> Nothing
|
||||
|
||||
{-
|
||||
1. Write the register to the output, and increment the output pointer.
|
||||
@ -236,10 +240,10 @@ instance Monad Put where
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
doPut :: Word -> Put () -> ByteString
|
||||
doPut sz m =
|
||||
doPut :: Map Noun Word -> Word -> Put () -> ByteString
|
||||
doPut tbl sz m =
|
||||
unsafePerformIO $ do
|
||||
tbl <- H.new
|
||||
tbl <- H.fromListWithSizeHint (M.size tbl) (mapToList tbl)
|
||||
buf <- callocBytes (fromIntegral $ 4 * wordSz*8)
|
||||
_ <- runPut (m >> mbFlush) tbl (S buf 0 0 0)
|
||||
BS.unsafePackCStringFinalizer (castPtr buf) (2*byteSz) (free buf)
|
||||
@ -263,7 +267,6 @@ writeNoun :: Noun -> Put ()
|
||||
writeNoun n = do
|
||||
-- traceM "writeNoun"
|
||||
|
||||
p <- pos <$> getS
|
||||
mRef <- getRef n
|
||||
|
||||
case (mRef, n) of
|
||||
@ -272,9 +275,6 @@ writeNoun n = do
|
||||
(Just bk, Atom a) | bitWidth a <= wordBitWidth bk -> writeAtom a
|
||||
(Just bk, _) -> writeBackRef bk
|
||||
|
||||
when (mRef == Nothing) $
|
||||
insRef n p
|
||||
|
||||
{-# INLINE writeMat #-}
|
||||
writeMat :: Atom -> Put ()
|
||||
writeMat 0 = do
|
||||
@ -308,7 +308,8 @@ writeAtom a = do
|
||||
{-# INLINE writeBackRef #-}
|
||||
writeBackRef :: Word -> Put ()
|
||||
writeBackRef a = do
|
||||
-- traceM ("writeBackRef: " <> show a)
|
||||
p <- pos <$> getS
|
||||
traceM ("writeBackRef: " <> show a <> " @" <> show p)
|
||||
writeBit True
|
||||
writeBit True
|
||||
writeMat (toAtom a)
|
||||
@ -316,13 +317,31 @@ writeBackRef a = do
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
jamBS :: Noun -> ByteString
|
||||
jamBS n = doPut (fst $ preJam n) (writeNoun n)
|
||||
jamBS n = trace (show $ sort $ swap <$> mapToList tbl)
|
||||
$ doPut tbl sz (writeNoun n)
|
||||
where (sz, tbl) = preJam n
|
||||
|
||||
jam :: Noun -> Atom
|
||||
jam = view (to jamBS . from atomBS)
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
{-# INLINE matSz #-}
|
||||
matSz :: Atom -> Word
|
||||
matSz a = W# (matSz# a)
|
||||
|
||||
{-# INLINE matSz# #-}
|
||||
matSz# :: Atom -> Word#
|
||||
matSz# 0 = 1##
|
||||
matSz# a = preW `plusWord#` preW `plusWord#` atmW
|
||||
where
|
||||
atmW = atomBitWidth# a
|
||||
preW = wordBitWidth# atmW
|
||||
|
||||
{-# INLINE refSz# #-}
|
||||
refSz# :: Word# -> Word#
|
||||
refSz# w = 2## `plusWord#` (matSz# (MkAtom (NatS# w)))
|
||||
|
||||
preJam :: Noun -> (Word, Map Noun Word)
|
||||
preJam = go 0 mempty
|
||||
where
|
||||
@ -332,12 +351,12 @@ preJam = go 0 mempty
|
||||
|
||||
go :: Word -> Map Noun Word -> Noun -> (Word, Map Noun Word)
|
||||
go off oldTbl noun =
|
||||
let tbl = insertNoun noun off oldTbl in
|
||||
case lookup noun oldTbl of
|
||||
Nothing ->
|
||||
let tbl = insertNoun noun off oldTbl in
|
||||
case noun of
|
||||
Atom atm ->
|
||||
(1 + W# (matSz# atm), tbl)
|
||||
(1 + matSz atm, tbl)
|
||||
Cell l r ->
|
||||
let (lSz, tbl') = go (2+off) tbl l in
|
||||
let (rSz, tbl'') = go (2+off+lSz) tbl' r in
|
||||
@ -346,23 +365,157 @@ preJam = go 0 mempty
|
||||
let refSz = W# (wordBitWidth# ref) in
|
||||
case noun of
|
||||
Atom atm ->
|
||||
let worSz = W# (matSz# atm) in
|
||||
let worSz = matSz atm in
|
||||
if worSz > refSz
|
||||
then (2 + refSz, oldTbl)
|
||||
else (1 + worSz, tbl)
|
||||
else (1 + worSz, oldTbl)
|
||||
Cell _ _ ->
|
||||
(2 + refSz, oldTbl)
|
||||
|
||||
matSz# :: Atom -> Word#
|
||||
matSz# 0 = 1##
|
||||
matSz# a = preW `plusWord#` preW `plusWord#` atmW
|
||||
where
|
||||
atmW = atomBitWidth# a
|
||||
preW = wordBitWidth# atmW
|
||||
|
||||
refSz# :: Word# -> Word#
|
||||
refSz# w = 2## `plusWord#` (matSz# (MkAtom (NatS# w)))
|
||||
-- Fast Pre-Jam ----------------------------------------------------------------
|
||||
|
||||
nounSz# :: Noun -> Word#
|
||||
nounSz# (Atom a) = 1## `plusWord#` (matSz# a)
|
||||
nounSz# (Cell l r) = 2## `plusWord#` (nounSz# l) `plusWord#` (nounSz# r)
|
||||
{-
|
||||
An `SHN` is a noun and some pre-computed information.
|
||||
|
||||
- `size` is the serialized size without backreferences, we use this
|
||||
for fast equality checks.
|
||||
- `jmSz` is the serialized size, we use this to allocate a buffer
|
||||
at the end.
|
||||
- `hash` is a precomputed noun hash. We use this to get better,
|
||||
cheaper hashes for our hashtable.
|
||||
- `noun` is the actual noun.
|
||||
-}
|
||||
data SHN = SHN
|
||||
{ size :: {-# UNPACK #-} !Word
|
||||
, jmSz :: {-# UNPACK #-} !Word
|
||||
, hash :: {-# UNPACK #-} !Int
|
||||
, noun :: {-# UNPACK #-} !Noun
|
||||
}
|
||||
deriving (Show)
|
||||
|
||||
instance Hashable SHN where
|
||||
hash (SHN _ _ h _) = h
|
||||
{-# INLINE hash #-}
|
||||
hashWithSalt = defaultHashWithSalt
|
||||
{-# INLINE hashWithSalt #-}
|
||||
|
||||
instance Eq SHN where
|
||||
x == y = (size x == size y) && (noun x == noun y)
|
||||
|
||||
preJam' :: Noun -> IO (SHN, H.LinearHashTable Word Word)
|
||||
preJam' top = do
|
||||
nodes :: H.LinearHashTable SHN Word <- H.new
|
||||
backs :: H.LinearHashTable Word Word <- H.new
|
||||
|
||||
let goAtom :: Word -> Atom -> IO SHN
|
||||
goAtom pos a@(MkAtom nat) = do
|
||||
let atmSz = matSz a
|
||||
let res = SHN (1+atmSz) (1+atmSz) (Hash.hash nat) (Atom a)
|
||||
H.lookup nodes res >>= \case
|
||||
Nothing -> do
|
||||
H.insert nodes res pos
|
||||
pure (traceShowId res)
|
||||
Just bak -> do
|
||||
let refSz = matSz (toAtom bak)
|
||||
if refSz < atmSz
|
||||
then do H.insert backs pos bak
|
||||
pure (traceShowId (res{jmSz=2+refSz}))
|
||||
else pure (traceShowId res)
|
||||
|
||||
goCell :: Word -> Noun -> Noun -> IO SHN
|
||||
goCell pos h t = do
|
||||
SHN hSz hJmSz hHash _ <- go (pos+2) h
|
||||
SHN tSz tJmSz tHash _ <- go (pos+2+hSz) t
|
||||
let sz = 2+hSz+tSz
|
||||
let jmSz = 2+hJmSz+tJmSz
|
||||
let res = SHN sz jmSz (combine hHash tHash) (Cell h t)
|
||||
H.lookup nodes res >>= \case
|
||||
Nothing -> do
|
||||
H.insert nodes res pos
|
||||
pure (traceShowId res)
|
||||
Just bak -> do
|
||||
let refSz = matSz (toAtom bak)
|
||||
H.insert backs pos bak
|
||||
pure (traceShowId (res{jmSz=2+refSz}))
|
||||
|
||||
go :: Word -> Noun -> IO SHN
|
||||
go p (Atom a) = goAtom p a
|
||||
go p (Cell h t) = goCell p h t
|
||||
|
||||
res <- go 0 top
|
||||
pure (res, backs)
|
||||
|
||||
-- Stolen from Hashable Library ------------------------------------------------
|
||||
|
||||
{-# INLINE combine #-}
|
||||
combine :: Int -> Int -> Int
|
||||
combine h1 h2 = (h1 * 16777619) `xor` h2
|
||||
|
||||
{-# INLINE defaultHashWithSalt #-}
|
||||
defaultHashWithSalt :: Hashable a => Int -> a -> Int
|
||||
defaultHashWithSalt salt x = salt `combine` Hash.hash x
|
||||
|
||||
|
||||
{-
|
||||
I suspect that hashing these big atoms recursively is going to be the bottleneck:
|
||||
Unless you have a good hashing system.
|
||||
Which we totally do in the nock runtime.
|
||||
Checking the hash for the top-level node precomputes the hashes for
|
||||
everything else, recursively.
|
||||
This is really smart.
|
||||
Maybe I could implement this as well?
|
||||
But hashing traverses the whole structure.
|
||||
So, now we have
|
||||
1. precompute hashes.
|
||||
2. precompute size and backref table.
|
||||
3. serialize
|
||||
This seems excessive.
|
||||
We insert into the backref table right away, but actually:
|
||||
Backreferences can't exist until the whole node is processed.
|
||||
|
||||
Which implies a smarter algorithm:
|
||||
- Setup a atom dup table
|
||||
atoms :: Hashtable BigNum Word
|
||||
- Setup a cell dup table
|
||||
cells :: Hashtable (Noun, Noun) Word
|
||||
- Setup a backref table (map from dup. pos to orig. pos)
|
||||
backs :: Hashtable Word Word
|
||||
- go :: Noun -> ST s (Hash, Word)
|
||||
- If atom,
|
||||
- Compute size and hash
|
||||
- Check atom table for backref
|
||||
- If atom in `atoms` table:
|
||||
- If backref smaller than atom
|
||||
- Insert (pos, bak) into `backs` table.
|
||||
- Return (backref size, atom hash)
|
||||
- If backref not smaller than atom
|
||||
- Return (atom size, atom hash)
|
||||
- Otherwise:
|
||||
- Insert atom into `atoms` table.
|
||||
- Return (atom size, atom hash)
|
||||
- If cell
|
||||
- process head
|
||||
- process tail
|
||||
- produce size+hash from results
|
||||
- Check cell table for backref
|
||||
- If backref exists
|
||||
- Insert `(pos, bak)` into `backs` table
|
||||
- Return (backref size, cell hash)
|
||||
- Else
|
||||
- Return (cell size, cell hash)
|
||||
|
||||
Then, to serialize:
|
||||
- Allocate a buffer of `size` bits
|
||||
- If current pos in `backs` table:
|
||||
- Write `11`
|
||||
- Write backref (mat)
|
||||
- Otherwise:
|
||||
- If Atom:
|
||||
- Write `0`
|
||||
- Write atom (mat)
|
||||
- If Cell
|
||||
- Write `10`
|
||||
- Write head
|
||||
- Write tail
|
||||
-}
|
||||
|
@ -21,6 +21,7 @@ dependencies:
|
||||
- flat
|
||||
- ghc-prim
|
||||
- hashable
|
||||
- hashable
|
||||
- hashtables
|
||||
- http-client
|
||||
- http-types
|
||||
|
Loading…
Reference in New Issue
Block a user