Jam: much smarter preJam pass.

This commit is contained in:
Benjamin Summers 2019-06-30 19:30:23 -07:00
parent d445c1cbb1
commit 89b2cccae7
3 changed files with 218 additions and 56 deletions

View File

@ -7,8 +7,9 @@ import Data.Bits
import Control.Lens import Control.Lens
import Text.Printf import Text.Printf
import Data.Map (Map)
import Control.Monad (guard) import Control.Monad (guard)
import Data.Map (Map)
import Text.Printf (printf)
import Test.Tasty import Test.Tasty
import Test.Tasty.TH import Test.Tasty.TH
@ -169,11 +170,11 @@ cue buf = view _2 <$> go mempty 0
case (bitIdx i buf, bitIdx (i+1) buf) of case (bitIdx i buf, bitIdx (i+1) buf) of
(False, _ ) -> do Buf wid at <- rub (Cursor (i+1) buf) (False, _ ) -> do Buf wid at <- rub (Cursor (i+1) buf)
let r = Atom at let r = Atom at
pure (1+wid, r, insertMap i r tbl) pure (1+wid, r, trace (show ('c', i, r)) $ insertMap i r tbl)
(True, False ) -> do (lSz,lef,tbl) <- go tbl (i+2) (True, False ) -> do (lSz,lef,tbl) <- go tbl (i+2)
(rSz,rit,tbl) <- go tbl (i+2+fromIntegral lSz) (rSz,rit,tbl) <- go tbl (i+2+fromIntegral lSz)
let r = Cell lef rit let r = Cell lef rit
pure (2+lSz+rSz, r, insertMap i r tbl) pure (2+lSz+rSz, r, trace (show ('c', i, r)) $ insertMap i r tbl)
(True, True ) -> do Buf wid at <- rub (Cursor (i+2) buf) (True, True ) -> do Buf wid at <- rub (Cursor (i+2) buf)
r <- lookup (fromIntegral at) tbl & \case r <- lookup (fromIntegral at) tbl & \case
Nothing -> error ("bad-ref-" <> show at) Nothing -> error ("bad-ref-" <> show at)
@ -203,38 +204,45 @@ pills = [ 0x2, 0xc, 0x48, 0x29, 0xc9, 0x299
, 0x3170_c7c1, 0x93_c7c1, 0xa_72e0, 0x1bd5_b7dd_e080 , 0x3170_c7c1, 0x93_c7c1, 0xa_72e0, 0x1bd5_b7dd_e080
] ]
cueTest :: Maybe [Noun] -- cueTest :: Maybe [Noun]
cueTest = traverse cue pills -- cueTest = traverse cue pills
jamTest :: Maybe [Atom] -- jamTest :: Maybe [Atom]
jamTest = fmap jam <$> cueTest -- jamTest = fmap jam <$> cueTest
prop_fastMatSlow :: Atom -> Bool -- prop_fastMatSlow :: Atom -> Bool
prop_fastMatSlow a = jam (Atom a) == Fast.jam (Atom a) -- prop_fastMatSlow a = jam (Atom a) == Fast.jam (Atom a)
prop_fastJamSlow :: Noun -> Bool -- prop_fastJamSlow :: Noun -> Bool
prop_fastJamSlow n = jam n == Fast.jam n -- prop_fastJamSlow n = jam n == Fast.jam n
prop_fastJam :: Noun -> Bool prop_fastJam :: Noun -> Bool
prop_fastJam n = Just n == cue (Fast.jam n) prop_fastJam n = Just n == cue (Fast.jam n)
prop_jamCue :: Noun -> Bool -- prop_jamCue :: Noun -> Bool
prop_jamCue n = Just n == cue (jam n) -- prop_jamCue n = Just n == cue (jam n)
prop_matRub :: Atom -> Bool -- prop_matRub :: Atom -> Bool
prop_matRub atm = matSz==rubSz && rubRes==atm -- prop_matRub atm = matSz==rubSz && rubRes==atm
where -- where
Buf matSz matBuf = mat atm -- Buf matSz matBuf = mat atm
Buf rubSz rubRes = fromMaybe mempty (rub $ Cursor 0 matBuf) -- Buf rubSz rubRes = fromMaybe mempty (rub $ Cursor 0 matBuf)
prop_jamCue' :: Noun -> Bool -- prop_jamCue' :: Noun -> Bool
prop_jamCue' n = Just n == cue' (jam' n) -- prop_jamCue' n = Just n == cue' (jam' n)
prop_matRub' :: Atom -> Bool -- prop_matRub' :: Atom -> Bool
prop_matRub' atm = matSz==rubSz && rubRes==atm -- prop_matRub' atm = matSz==rubSz && rubRes==atm
where -- where
Buf matSz matBuf = mat' atm -- Buf matSz matBuf = mat' atm
Buf rubSz rubRes = fromMaybe mempty (rub' $ Cursor 0 matBuf) -- Buf rubSz rubRes = fromMaybe mempty (rub' $ Cursor 0 matBuf)
main :: IO () main :: IO ()
main = $(defaultMainGenerator) main = $(defaultMainGenerator)
matSz' :: Atom -> Int
matSz' a = length s - 1
where
s :: String
s = printf "%b" $ fromIntegral @Atom @Integer $ jam $ Atom a

View File

@ -9,7 +9,7 @@ import GHC.Integer.GMP.Internals
import Control.Lens (view, to, from, (&)) import Control.Lens (view, to, from, (&))
import Control.Monad (guard) import Control.Monad (guard)
import Data.Bits (shiftL, shiftR, setBit, clearBit, (.|.), (.&.)) import Data.Bits (shiftL, shiftR, setBit, clearBit, xor, (.|.), (.&.))
import Data.Map (Map) import Data.Map (Map)
import Data.Noun.Atom ( Atom(MkAtom), wordBitWidth, wordBitWidth# import Data.Noun.Atom ( Atom(MkAtom), wordBitWidth, wordBitWidth#
, atomBitWidth#, takeBitsWord ) , atomBitWidth#, takeBitsWord )
@ -24,6 +24,8 @@ import GHC.Int (Int(I#))
import GHC.Word (Word(W#)) import GHC.Word (Word(W#))
import System.IO.Unsafe (unsafePerformIO) import System.IO.Unsafe (unsafePerformIO)
import qualified Data.Hashable as Hash
import qualified Data.Map as M
import qualified Data.ByteString.Unsafe as BS import qualified Data.ByteString.Unsafe as BS
import qualified Data.HashTable.IO as H import qualified Data.HashTable.IO as H
import qualified Data.Vector.Primitive as VP import qualified Data.Vector.Primitive as VP
@ -57,13 +59,15 @@ newtype Put a = Put
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
{-# INLINE insRef #-}
insRef :: Noun -> Word -> Put ()
insRef n w = Put \tbl s -> PutResult s <$> H.insert tbl n w
{-# INLINE getRef #-} {-# INLINE getRef #-}
getRef :: Noun -> Put (Maybe Word) getRef :: Noun -> Put (Maybe Word)
getRef n = Put \tbl s -> PutResult s <$> H.lookup tbl n getRef n = Put \tbl s -> do
pos <- pure (pos s)
traceM ("getRef: " <> show n <> " @" <> show pos)
res <- H.lookup tbl n
pure $ PutResult s $ case res of
Just w | w<pos -> Just w
_ -> Nothing
{- {-
1. Write the register to the output, and increment the output pointer. 1. Write the register to the output, and increment the output pointer.
@ -236,10 +240,10 @@ instance Monad Put where
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
doPut :: Word -> Put () -> ByteString doPut :: Map Noun Word -> Word -> Put () -> ByteString
doPut sz m = doPut tbl sz m =
unsafePerformIO $ do unsafePerformIO $ do
tbl <- H.new tbl <- H.fromListWithSizeHint (M.size tbl) (mapToList tbl)
buf <- callocBytes (fromIntegral $ 4 * wordSz*8) buf <- callocBytes (fromIntegral $ 4 * wordSz*8)
_ <- runPut (m >> mbFlush) tbl (S buf 0 0 0) _ <- runPut (m >> mbFlush) tbl (S buf 0 0 0)
BS.unsafePackCStringFinalizer (castPtr buf) (2*byteSz) (free buf) BS.unsafePackCStringFinalizer (castPtr buf) (2*byteSz) (free buf)
@ -263,7 +267,6 @@ writeNoun :: Noun -> Put ()
writeNoun n = do writeNoun n = do
-- traceM "writeNoun" -- traceM "writeNoun"
p <- pos <$> getS
mRef <- getRef n mRef <- getRef n
case (mRef, n) of case (mRef, n) of
@ -272,9 +275,6 @@ writeNoun n = do
(Just bk, Atom a) | bitWidth a <= wordBitWidth bk -> writeAtom a (Just bk, Atom a) | bitWidth a <= wordBitWidth bk -> writeAtom a
(Just bk, _) -> writeBackRef bk (Just bk, _) -> writeBackRef bk
when (mRef == Nothing) $
insRef n p
{-# INLINE writeMat #-} {-# INLINE writeMat #-}
writeMat :: Atom -> Put () writeMat :: Atom -> Put ()
writeMat 0 = do writeMat 0 = do
@ -308,7 +308,8 @@ writeAtom a = do
{-# INLINE writeBackRef #-} {-# INLINE writeBackRef #-}
writeBackRef :: Word -> Put () writeBackRef :: Word -> Put ()
writeBackRef a = do writeBackRef a = do
-- traceM ("writeBackRef: " <> show a) p <- pos <$> getS
traceM ("writeBackRef: " <> show a <> " @" <> show p)
writeBit True writeBit True
writeBit True writeBit True
writeMat (toAtom a) writeMat (toAtom a)
@ -316,13 +317,31 @@ writeBackRef a = do
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
jamBS :: Noun -> ByteString jamBS :: Noun -> ByteString
jamBS n = doPut (fst $ preJam n) (writeNoun n) jamBS n = trace (show $ sort $ swap <$> mapToList tbl)
$ doPut tbl sz (writeNoun n)
where (sz, tbl) = preJam n
jam :: Noun -> Atom jam :: Noun -> Atom
jam = view (to jamBS . from atomBS) jam = view (to jamBS . from atomBS)
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
{-# INLINE matSz #-}
matSz :: Atom -> Word
matSz a = W# (matSz# a)
{-# INLINE matSz# #-}
matSz# :: Atom -> Word#
matSz# 0 = 1##
matSz# a = preW `plusWord#` preW `plusWord#` atmW
where
atmW = atomBitWidth# a
preW = wordBitWidth# atmW
{-# INLINE refSz# #-}
refSz# :: Word# -> Word#
refSz# w = 2## `plusWord#` (matSz# (MkAtom (NatS# w)))
preJam :: Noun -> (Word, Map Noun Word) preJam :: Noun -> (Word, Map Noun Word)
preJam = go 0 mempty preJam = go 0 mempty
where where
@ -332,37 +351,171 @@ preJam = go 0 mempty
go :: Word -> Map Noun Word -> Noun -> (Word, Map Noun Word) go :: Word -> Map Noun Word -> Noun -> (Word, Map Noun Word)
go off oldTbl noun = go off oldTbl noun =
let tbl = insertNoun noun off oldTbl in
case lookup noun oldTbl of case lookup noun oldTbl of
Nothing -> Nothing ->
let tbl = insertNoun noun off oldTbl in
case noun of case noun of
Atom atm -> Atom atm ->
(1 + W# (matSz# atm), tbl) (1 + matSz atm, tbl)
Cell l r -> Cell l r ->
let (lSz, tbl') = go (2+off) tbl l in let (lSz, tbl') = go (2+off) tbl l in
let (rSz, tbl'') = go (2+off+lSz) tbl' r in let (rSz, tbl'') = go (2+off+lSz) tbl' r in
(2 + lSz + rSz, tbl'') (2 + lSz + rSz, tbl'')
Just (W# ref) -> Just (W# ref) ->
let refSz = W# (wordBitWidth# ref) in let refSz = W# (wordBitWidth# ref) in
case noun of case noun of
Atom atm -> Atom atm ->
let worSz = W# (matSz# atm) in let worSz = matSz atm in
if worSz > refSz if worSz > refSz
then (2 + refSz, oldTbl) then (2 + refSz, oldTbl)
else (1 + worSz, tbl) else (1 + worSz, oldTbl)
Cell _ _ -> Cell _ _ ->
(2 + refSz, oldTbl) (2 + refSz, oldTbl)
matSz# :: Atom -> Word#
matSz# 0 = 1##
matSz# a = preW `plusWord#` preW `plusWord#` atmW
where
atmW = atomBitWidth# a
preW = wordBitWidth# atmW
refSz# :: Word# -> Word# -- Fast Pre-Jam ----------------------------------------------------------------
refSz# w = 2## `plusWord#` (matSz# (MkAtom (NatS# w)))
nounSz# :: Noun -> Word# {-
nounSz# (Atom a) = 1## `plusWord#` (matSz# a) An `SHN` is a noun and some pre-computed information.
nounSz# (Cell l r) = 2## `plusWord#` (nounSz# l) `plusWord#` (nounSz# r)
- `size` is the serialized size without backreferences, we use this
for fast equality checks.
- `jmSz` is the serialized size, we use this to allocate a buffer
at the end.
- `hash` is a precomputed noun hash. We use this to get better,
cheaper hashes for our hashtable.
- `noun` is the actual noun.
-}
data SHN = SHN
{ size :: {-# UNPACK #-} !Word
, jmSz :: {-# UNPACK #-} !Word
, hash :: {-# UNPACK #-} !Int
, noun :: {-# UNPACK #-} !Noun
}
deriving (Show)
instance Hashable SHN where
hash (SHN _ _ h _) = h
{-# INLINE hash #-}
hashWithSalt = defaultHashWithSalt
{-# INLINE hashWithSalt #-}
instance Eq SHN where
x == y = (size x == size y) && (noun x == noun y)
preJam' :: Noun -> IO (SHN, H.LinearHashTable Word Word)
preJam' top = do
nodes :: H.LinearHashTable SHN Word <- H.new
backs :: H.LinearHashTable Word Word <- H.new
let goAtom :: Word -> Atom -> IO SHN
goAtom pos a@(MkAtom nat) = do
let atmSz = matSz a
let res = SHN (1+atmSz) (1+atmSz) (Hash.hash nat) (Atom a)
H.lookup nodes res >>= \case
Nothing -> do
H.insert nodes res pos
pure (traceShowId res)
Just bak -> do
let refSz = matSz (toAtom bak)
if refSz < atmSz
then do H.insert backs pos bak
pure (traceShowId (res{jmSz=2+refSz}))
else pure (traceShowId res)
goCell :: Word -> Noun -> Noun -> IO SHN
goCell pos h t = do
SHN hSz hJmSz hHash _ <- go (pos+2) h
SHN tSz tJmSz tHash _ <- go (pos+2+hSz) t
let sz = 2+hSz+tSz
let jmSz = 2+hJmSz+tJmSz
let res = SHN sz jmSz (combine hHash tHash) (Cell h t)
H.lookup nodes res >>= \case
Nothing -> do
H.insert nodes res pos
pure (traceShowId res)
Just bak -> do
let refSz = matSz (toAtom bak)
H.insert backs pos bak
pure (traceShowId (res{jmSz=2+refSz}))
go :: Word -> Noun -> IO SHN
go p (Atom a) = goAtom p a
go p (Cell h t) = goCell p h t
res <- go 0 top
pure (res, backs)
-- Stolen from Hashable Library ------------------------------------------------
{-# INLINE combine #-}
combine :: Int -> Int -> Int
combine h1 h2 = (h1 * 16777619) `xor` h2
{-# INLINE defaultHashWithSalt #-}
defaultHashWithSalt :: Hashable a => Int -> a -> Int
defaultHashWithSalt salt x = salt `combine` Hash.hash x
{-
I suspect that hashing these big atoms recursively is going to be the bottleneck:
Unless you have a good hashing system.
Which we totally do in the nock runtime.
Checking the hash for the top-level node precomputes the hashes for
everything else, recursively.
This is really smart.
Maybe I could implement this as well?
But hashing traverses the whole structure.
So, now we have
1. precompute hashes.
2. precompute size and backref table.
3. serialize
This seems excessive.
We insert into the backref table right away, but actually:
Backreferences can't exist until the whole node is processed.
Which implies a smarter algorithm:
- Setup a atom dup table
atoms :: Hashtable BigNum Word
- Setup a cell dup table
cells :: Hashtable (Noun, Noun) Word
- Setup a backref table (map from dup. pos to orig. pos)
backs :: Hashtable Word Word
- go :: Noun -> ST s (Hash, Word)
- If atom,
- Compute size and hash
- Check atom table for backref
- If atom in `atoms` table:
- If backref smaller than atom
- Insert (pos, bak) into `backs` table.
- Return (backref size, atom hash)
- If backref not smaller than atom
- Return (atom size, atom hash)
- Otherwise:
- Insert atom into `atoms` table.
- Return (atom size, atom hash)
- If cell
- process head
- process tail
- produce size+hash from results
- Check cell table for backref
- If backref exists
- Insert `(pos, bak)` into `backs` table
- Return (backref size, cell hash)
- Else
- Return (cell size, cell hash)
Then, to serialize:
- Allocate a buffer of `size` bits
- If current pos in `backs` table:
- Write `11`
- Write backref (mat)
- Otherwise:
- If Atom:
- Write `0`
- Write atom (mat)
- If Cell
- Write `10`
- Write head
- Write tail
-}

View File

@ -21,6 +21,7 @@ dependencies:
- flat - flat
- ghc-prim - ghc-prim
- hashable - hashable
- hashable
- hashtables - hashtables
- http-client - http-client
- http-types - http-types