Jam: much smarter preJam pass.

This commit is contained in:
Benjamin Summers 2019-06-30 19:30:23 -07:00
parent d445c1cbb1
commit 89b2cccae7
3 changed files with 218 additions and 56 deletions

View File

@ -7,8 +7,9 @@ import Data.Bits
import Control.Lens
import Text.Printf
import Data.Map (Map)
import Control.Monad (guard)
import Data.Map (Map)
import Text.Printf (printf)
import Test.Tasty
import Test.Tasty.TH
@ -169,11 +170,11 @@ cue buf = view _2 <$> go mempty 0
case (bitIdx i buf, bitIdx (i+1) buf) of
(False, _ ) -> do Buf wid at <- rub (Cursor (i+1) buf)
let r = Atom at
pure (1+wid, r, insertMap i r tbl)
pure (1+wid, r, trace (show ('c', i, r)) $ insertMap i r tbl)
(True, False ) -> do (lSz,lef,tbl) <- go tbl (i+2)
(rSz,rit,tbl) <- go tbl (i+2+fromIntegral lSz)
let r = Cell lef rit
pure (2+lSz+rSz, r, insertMap i r tbl)
pure (2+lSz+rSz, r, trace (show ('c', i, r)) $ insertMap i r tbl)
(True, True ) -> do Buf wid at <- rub (Cursor (i+2) buf)
r <- lookup (fromIntegral at) tbl & \case
Nothing -> error ("bad-ref-" <> show at)
@ -203,38 +204,45 @@ pills = [ 0x2, 0xc, 0x48, 0x29, 0xc9, 0x299
, 0x3170_c7c1, 0x93_c7c1, 0xa_72e0, 0x1bd5_b7dd_e080
]
cueTest :: Maybe [Noun]
cueTest = traverse cue pills
-- cueTest :: Maybe [Noun]
-- cueTest = traverse cue pills
jamTest :: Maybe [Atom]
jamTest = fmap jam <$> cueTest
-- jamTest :: Maybe [Atom]
-- jamTest = fmap jam <$> cueTest
prop_fastMatSlow :: Atom -> Bool
prop_fastMatSlow a = jam (Atom a) == Fast.jam (Atom a)
-- prop_fastMatSlow :: Atom -> Bool
-- prop_fastMatSlow a = jam (Atom a) == Fast.jam (Atom a)
prop_fastJamSlow :: Noun -> Bool
prop_fastJamSlow n = jam n == Fast.jam n
-- prop_fastJamSlow :: Noun -> Bool
-- prop_fastJamSlow n = jam n == Fast.jam n
prop_fastJam :: Noun -> Bool
prop_fastJam n = Just n == cue (Fast.jam n)
prop_jamCue :: Noun -> Bool
prop_jamCue n = Just n == cue (jam n)
-- prop_jamCue :: Noun -> Bool
-- prop_jamCue n = Just n == cue (jam n)
prop_matRub :: Atom -> Bool
prop_matRub atm = matSz==rubSz && rubRes==atm
where
Buf matSz matBuf = mat atm
Buf rubSz rubRes = fromMaybe mempty (rub $ Cursor 0 matBuf)
-- prop_matRub :: Atom -> Bool
-- prop_matRub atm = matSz==rubSz && rubRes==atm
-- where
-- Buf matSz matBuf = mat atm
-- Buf rubSz rubRes = fromMaybe mempty (rub $ Cursor 0 matBuf)
prop_jamCue' :: Noun -> Bool
prop_jamCue' n = Just n == cue' (jam' n)
-- prop_jamCue' :: Noun -> Bool
-- prop_jamCue' n = Just n == cue' (jam' n)
prop_matRub' :: Atom -> Bool
prop_matRub' atm = matSz==rubSz && rubRes==atm
where
Buf matSz matBuf = mat' atm
Buf rubSz rubRes = fromMaybe mempty (rub' $ Cursor 0 matBuf)
-- prop_matRub' :: Atom -> Bool
-- prop_matRub' atm = matSz==rubSz && rubRes==atm
-- where
-- Buf matSz matBuf = mat' atm
-- Buf rubSz rubRes = fromMaybe mempty (rub' $ Cursor 0 matBuf)
main :: IO ()
main = $(defaultMainGenerator)
matSz' :: Atom -> Int
matSz' a = length s - 1
where
s :: String
s = printf "%b" $ fromIntegral @Atom @Integer $ jam $ Atom a

View File

@ -9,7 +9,7 @@ import GHC.Integer.GMP.Internals
import Control.Lens (view, to, from, (&))
import Control.Monad (guard)
import Data.Bits (shiftL, shiftR, setBit, clearBit, (.|.), (.&.))
import Data.Bits (shiftL, shiftR, setBit, clearBit, xor, (.|.), (.&.))
import Data.Map (Map)
import Data.Noun.Atom ( Atom(MkAtom), wordBitWidth, wordBitWidth#
, atomBitWidth#, takeBitsWord )
@ -24,6 +24,8 @@ import GHC.Int (Int(I#))
import GHC.Word (Word(W#))
import System.IO.Unsafe (unsafePerformIO)
import qualified Data.Hashable as Hash
import qualified Data.Map as M
import qualified Data.ByteString.Unsafe as BS
import qualified Data.HashTable.IO as H
import qualified Data.Vector.Primitive as VP
@ -57,13 +59,15 @@ newtype Put a = Put
--------------------------------------------------------------------------------
{-# INLINE insRef #-}
insRef :: Noun -> Word -> Put ()
insRef n w = Put \tbl s -> PutResult s <$> H.insert tbl n w
{-# INLINE getRef #-}
getRef :: Noun -> Put (Maybe Word)
getRef n = Put \tbl s -> PutResult s <$> H.lookup tbl n
getRef n = Put \tbl s -> do
pos <- pure (pos s)
traceM ("getRef: " <> show n <> " @" <> show pos)
res <- H.lookup tbl n
pure $ PutResult s $ case res of
Just w | w<pos -> Just w
_ -> Nothing
{-
1. Write the register to the output, and increment the output pointer.
@ -236,10 +240,10 @@ instance Monad Put where
--------------------------------------------------------------------------------
doPut :: Word -> Put () -> ByteString
doPut sz m =
doPut :: Map Noun Word -> Word -> Put () -> ByteString
doPut tbl sz m =
unsafePerformIO $ do
tbl <- H.new
tbl <- H.fromListWithSizeHint (M.size tbl) (mapToList tbl)
buf <- callocBytes (fromIntegral $ 4 * wordSz*8)
_ <- runPut (m >> mbFlush) tbl (S buf 0 0 0)
BS.unsafePackCStringFinalizer (castPtr buf) (2*byteSz) (free buf)
@ -263,7 +267,6 @@ writeNoun :: Noun -> Put ()
writeNoun n = do
-- traceM "writeNoun"
p <- pos <$> getS
mRef <- getRef n
case (mRef, n) of
@ -272,9 +275,6 @@ writeNoun n = do
(Just bk, Atom a) | bitWidth a <= wordBitWidth bk -> writeAtom a
(Just bk, _) -> writeBackRef bk
when (mRef == Nothing) $
insRef n p
{-# INLINE writeMat #-}
writeMat :: Atom -> Put ()
writeMat 0 = do
@ -308,7 +308,8 @@ writeAtom a = do
{-# INLINE writeBackRef #-}
writeBackRef :: Word -> Put ()
writeBackRef a = do
-- traceM ("writeBackRef: " <> show a)
p <- pos <$> getS
traceM ("writeBackRef: " <> show a <> " @" <> show p)
writeBit True
writeBit True
writeMat (toAtom a)
@ -316,13 +317,31 @@ writeBackRef a = do
--------------------------------------------------------------------------------
jamBS :: Noun -> ByteString
jamBS n = doPut (fst $ preJam n) (writeNoun n)
jamBS n = trace (show $ sort $ swap <$> mapToList tbl)
$ doPut tbl sz (writeNoun n)
where (sz, tbl) = preJam n
jam :: Noun -> Atom
jam = view (to jamBS . from atomBS)
--------------------------------------------------------------------------------
{-# INLINE matSz #-}
matSz :: Atom -> Word
matSz a = W# (matSz# a)
{-# INLINE matSz# #-}
matSz# :: Atom -> Word#
matSz# 0 = 1##
matSz# a = preW `plusWord#` preW `plusWord#` atmW
where
atmW = atomBitWidth# a
preW = wordBitWidth# atmW
{-# INLINE refSz# #-}
refSz# :: Word# -> Word#
refSz# w = 2## `plusWord#` (matSz# (MkAtom (NatS# w)))
preJam :: Noun -> (Word, Map Noun Word)
preJam = go 0 mempty
where
@ -332,12 +351,12 @@ preJam = go 0 mempty
go :: Word -> Map Noun Word -> Noun -> (Word, Map Noun Word)
go off oldTbl noun =
let tbl = insertNoun noun off oldTbl in
case lookup noun oldTbl of
Nothing ->
let tbl = insertNoun noun off oldTbl in
case noun of
Atom atm ->
(1 + W# (matSz# atm), tbl)
(1 + matSz atm, tbl)
Cell l r ->
let (lSz, tbl') = go (2+off) tbl l in
let (rSz, tbl'') = go (2+off+lSz) tbl' r in
@ -346,23 +365,157 @@ preJam = go 0 mempty
let refSz = W# (wordBitWidth# ref) in
case noun of
Atom atm ->
let worSz = W# (matSz# atm) in
let worSz = matSz atm in
if worSz > refSz
then (2 + refSz, oldTbl)
else (1 + worSz, tbl)
else (1 + worSz, oldTbl)
Cell _ _ ->
(2 + refSz, oldTbl)
matSz# :: Atom -> Word#
matSz# 0 = 1##
matSz# a = preW `plusWord#` preW `plusWord#` atmW
where
atmW = atomBitWidth# a
preW = wordBitWidth# atmW
refSz# :: Word# -> Word#
refSz# w = 2## `plusWord#` (matSz# (MkAtom (NatS# w)))
-- Fast Pre-Jam ----------------------------------------------------------------
nounSz# :: Noun -> Word#
nounSz# (Atom a) = 1## `plusWord#` (matSz# a)
nounSz# (Cell l r) = 2## `plusWord#` (nounSz# l) `plusWord#` (nounSz# r)
{-
An `SHN` is a noun and some pre-computed information.
- `size` is the serialized size without backreferences, we use this
for fast equality checks.
- `jmSz` is the serialized size, we use this to allocate a buffer
at the end.
- `hash` is a precomputed noun hash. We use this to get better,
cheaper hashes for our hashtable.
- `noun` is the actual noun.
-}
data SHN = SHN
{ size :: {-# UNPACK #-} !Word
, jmSz :: {-# UNPACK #-} !Word
, hash :: {-# UNPACK #-} !Int
, noun :: {-# UNPACK #-} !Noun
}
deriving (Show)
instance Hashable SHN where
hash (SHN _ _ h _) = h
{-# INLINE hash #-}
hashWithSalt = defaultHashWithSalt
{-# INLINE hashWithSalt #-}
instance Eq SHN where
x == y = (size x == size y) && (noun x == noun y)
preJam' :: Noun -> IO (SHN, H.LinearHashTable Word Word)
preJam' top = do
nodes :: H.LinearHashTable SHN Word <- H.new
backs :: H.LinearHashTable Word Word <- H.new
let goAtom :: Word -> Atom -> IO SHN
goAtom pos a@(MkAtom nat) = do
let atmSz = matSz a
let res = SHN (1+atmSz) (1+atmSz) (Hash.hash nat) (Atom a)
H.lookup nodes res >>= \case
Nothing -> do
H.insert nodes res pos
pure (traceShowId res)
Just bak -> do
let refSz = matSz (toAtom bak)
if refSz < atmSz
then do H.insert backs pos bak
pure (traceShowId (res{jmSz=2+refSz}))
else pure (traceShowId res)
goCell :: Word -> Noun -> Noun -> IO SHN
goCell pos h t = do
SHN hSz hJmSz hHash _ <- go (pos+2) h
SHN tSz tJmSz tHash _ <- go (pos+2+hSz) t
let sz = 2+hSz+tSz
let jmSz = 2+hJmSz+tJmSz
let res = SHN sz jmSz (combine hHash tHash) (Cell h t)
H.lookup nodes res >>= \case
Nothing -> do
H.insert nodes res pos
pure (traceShowId res)
Just bak -> do
let refSz = matSz (toAtom bak)
H.insert backs pos bak
pure (traceShowId (res{jmSz=2+refSz}))
go :: Word -> Noun -> IO SHN
go p (Atom a) = goAtom p a
go p (Cell h t) = goCell p h t
res <- go 0 top
pure (res, backs)
-- Stolen from Hashable Library ------------------------------------------------
{-# INLINE combine #-}
combine :: Int -> Int -> Int
combine h1 h2 = (h1 * 16777619) `xor` h2
{-# INLINE defaultHashWithSalt #-}
defaultHashWithSalt :: Hashable a => Int -> a -> Int
defaultHashWithSalt salt x = salt `combine` Hash.hash x
{-
I suspect that hashing these big atoms recursively is going to be the bottleneck:
Unless you have a good hashing system.
Which we totally do in the nock runtime.
Checking the hash for the top-level node precomputes the hashes for
everything else, recursively.
This is really smart.
Maybe I could implement this as well?
But hashing traverses the whole structure.
So, now we have
1. precompute hashes.
2. precompute size and backref table.
3. serialize
This seems excessive.
We insert into the backref table right away, but actually:
Backreferences can't exist until the whole node is processed.
Which implies a smarter algorithm:
- Setup a atom dup table
atoms :: Hashtable BigNum Word
- Setup a cell dup table
cells :: Hashtable (Noun, Noun) Word
- Setup a backref table (map from dup. pos to orig. pos)
backs :: Hashtable Word Word
- go :: Noun -> ST s (Hash, Word)
- If atom,
- Compute size and hash
- Check atom table for backref
- If atom in `atoms` table:
- If backref smaller than atom
- Insert (pos, bak) into `backs` table.
- Return (backref size, atom hash)
- If backref not smaller than atom
- Return (atom size, atom hash)
- Otherwise:
- Insert atom into `atoms` table.
- Return (atom size, atom hash)
- If cell
- process head
- process tail
- produce size+hash from results
- Check cell table for backref
- If backref exists
- Insert `(pos, bak)` into `backs` table
- Return (backref size, cell hash)
- Else
- Return (cell size, cell hash)
Then, to serialize:
- Allocate a buffer of `size` bits
- If current pos in `backs` table:
- Write `11`
- Write backref (mat)
- Otherwise:
- If Atom:
- Write `0`
- Write atom (mat)
- If Cell
- Write `10`
- Write head
- Write tail
-}

View File

@ -21,6 +21,7 @@ dependencies:
- flat
- ghc-prim
- hashable
- hashable
- hashtables
- http-client
- http-types