From 2d25c21528699c8aca923d5b184a655fc294a724 Mon Sep 17 00:00:00 2001 From: Benjamin Summers Date: Mon, 1 Jul 2019 19:43:10 -0700 Subject: [PATCH] Jam: Cleanup --- .../lib/Data/Noun/{Jam/Get.hs => Cue/Fast.hs} | 82 ++- pkg/hs-urbit/lib/Data/Noun/Jam.hs | 4 +- pkg/hs-urbit/lib/Data/Noun/Jam/Fast.hs | 499 +++++++++++---- pkg/hs-urbit/lib/Data/Noun/Jam/Put.hs | 585 ------------------ pkg/hs-urbit/lib/Data/Noun/Lens.hs | 2 +- pkg/hs-urbit/lib/Vere/Serf.hs | 2 +- 6 files changed, 452 insertions(+), 722 deletions(-) rename pkg/hs-urbit/lib/Data/Noun/{Jam/Get.hs => Cue/Fast.hs} (78%) delete mode 100644 pkg/hs-urbit/lib/Data/Noun/Jam/Put.hs diff --git a/pkg/hs-urbit/lib/Data/Noun/Jam/Get.hs b/pkg/hs-urbit/lib/Data/Noun/Cue/Fast.hs similarity index 78% rename from pkg/hs-urbit/lib/Data/Noun/Jam/Get.hs rename to pkg/hs-urbit/lib/Data/Noun/Cue/Fast.hs index c53874ba16..31e9e39cbf 100644 --- a/pkg/hs-urbit/lib/Data/Noun/Jam/Get.hs +++ b/pkg/hs-urbit/lib/Data/Noun/Cue/Fast.hs @@ -1,7 +1,21 @@ -module Data.Noun.Jam.Get where +{-# LANGUAGE MagicHash #-} + +module Data.Noun.Cue.Fast where import ClassyPrelude +import ClassyPrelude +import Data.Noun +import Data.Noun.Atom +import Data.Noun.Poet +import Data.Bits hiding (Bits) +import Control.Lens +import Text.Printf +import GHC.Prim +import GHC.Word +import GHC.Natural +import Foreign.Ptr +import Foreign.Storable (peek) import Data.Noun (Noun) import Data.Bits (shiftR, (.|.), (.&.)) import Foreign.Ptr (Ptr, plusPtr, ptrToWordPtr) @@ -11,6 +25,11 @@ import Control.Monad (guard) import qualified Data.HashTable.IO as H +import Test.Tasty +import Test.Tasty.TH +import qualified Test.Tasty.QuickCheck as QC +import Test.QuickCheck hiding ((.&.)) + -- Types ----------------------------------------------------------------------- @@ -218,3 +237,64 @@ dWordBits n = do w <- peekWord advance n pure (takeLowBits n w) + + +-- Fast Cue -------------------------------------------------------------------- + +{- + Get the exponent-prefix of an atom: + + - Peek at the next word. + - Calculate the number of least-significant bits in that word (there's + a primitive for this). + - Advance by that number of bits. + - Return the number of bits +-} +dExp :: Get Word +dExp = do + W# w <- peekWord + let res = W# (ctz# w) + advance res + pure res + +dAtomLen :: Get Word +dAtomLen = do + e <- dExp + p <- dWordBits (e-1) + pure (2^e .|. p) + +dRef :: Get Word +dRef = dAtomLen >>= dWordBits + +dAtom :: Get Atom +dAtom = do + n <- dAtomLen + b <- dBits n + pure (bitsToAtom b) + +bitsToAtom :: Bits -> Atom +bitsToAtom = undefined + +dCell :: Get Noun +dCell = Cell <$> dNoun <*> dNoun + +{-| + Get a Noun. + + - Get a bit + - If it's zero, get an atom. + - Otherwise, get another bit. + - If it's zero, get a cell. + - If it's one, get an atom. +-} +dNoun :: Get Noun +dNoun = do + p <- getPos + + let yield r = insRef p r >> pure r + + dBit >>= \case + False -> (Atom <$> dAtom) >>= yield + True -> dBit >>= \case + False -> dCell >>= yield + True -> dRef >>= getRef diff --git a/pkg/hs-urbit/lib/Data/Noun/Jam.hs b/pkg/hs-urbit/lib/Data/Noun/Jam.hs index 06f397c437..7f1004f0de 100644 --- a/pkg/hs-urbit/lib/Data/Noun/Jam.hs +++ b/pkg/hs-urbit/lib/Data/Noun/Jam.hs @@ -16,8 +16,8 @@ import Test.Tasty.TH import Test.Tasty.QuickCheck as QC import Test.QuickCheck -import qualified Data.Noun.Jam.Put as Fast -import qualified Data.Noun.Pill as Pill +import qualified Data.Noun.Jam.Fast as Fast +import qualified Data.Noun.Pill as Pill -- Length-Encoded Atoms -------------------------------------------------------- diff --git a/pkg/hs-urbit/lib/Data/Noun/Jam/Fast.hs b/pkg/hs-urbit/lib/Data/Noun/Jam/Fast.hs index fa71373a3f..a5a339d9cb 100644 --- a/pkg/hs-urbit/lib/Data/Noun/Jam/Fast.hs +++ b/pkg/hs-urbit/lib/Data/Noun/Jam/Fast.hs @@ -1,160 +1,395 @@ {-# LANGUAGE MagicHash #-} +{-# OPTIONS_GHC -fwarn-unused-binds -fwarn-unused-imports #-} -module Data.Noun.Jam.Fast where +module Data.Noun.Jam.Fast (jam, jamBS) where -import ClassyPrelude -import Data.Noun -import Data.Noun.Atom -import Data.Noun.Poet -import Data.Bits hiding (Bits) -import Control.Lens -import Text.Printf -import GHC.Prim -import GHC.Word -import GHC.Natural -import Foreign.Ptr -import Foreign.Storable (peek) -import Data.Noun.Jam.Get +import ClassyPrelude hiding (hash) -import Data.Map (Map) -import Control.Monad (guard) +import Control.Lens (view, to, from) +import Data.Bits (shiftL, shiftR, setBit, clearBit, xor, (.|.)) +import Data.Noun.Atom (Atom(MkAtom), toAtom, bitWidth, takeBitsWord) +import Data.Noun.Atom (wordBitWidth, wordBitWidth# , atomBitWidth#) +import Data.Noun (Noun(Atom, Cell)) +import Data.Noun.Pill (bigNatWords, atomBS) +import Data.Vector.Primitive ((!)) +import Foreign.Marshal.Alloc (callocBytes, free) +import Foreign.Ptr (Ptr, castPtr, plusPtr) +import Foreign.Storable (poke) +import GHC.Integer.GMP.Internals (BigNat) +import GHC.Int (Int(I#)) +import GHC.Natural (Natural(NatS#, NatJ#)) +import GHC.Prim (Word#, plusWord#, word2Int#) +import GHC.Word (Word(W#)) +import System.IO.Unsafe (unsafePerformIO) -import Test.Tasty -import Test.Tasty.TH -import qualified Test.Tasty.QuickCheck as QC -import Test.QuickCheck hiding ((.&.)) - -import qualified Data.HashTable.IO as H +import qualified Data.ByteString.Unsafe as BS +import qualified Data.Hashable as Hash +import qualified Data.HashTable.IO as H +import qualified Data.Vector.Primitive as VP --- Pre-compute the bit-width of a jammed noun. --------------------------------- +-- Exports --------------------------------------------------------------------- -jamSz :: Noun -> Word -jamSz = fst . go 0 mempty +jamBS :: Noun -> ByteString +jamBS n = doPut bt sz (writeNoun n) where - insertNoun :: Noun -> Word -> Map Noun Word -> Map Noun Word - insertNoun n i tbl = lookup n tbl - & maybe tbl (const $ insertMap n i tbl) + (sz, bt) = unsafePerformIO (compress $ toBigNoun n) - go :: Word -> Map Noun Word -> Noun -> (Word, Map Noun Word) - go off oldTbl noun = - let tbl = insertNoun noun off oldTbl in - case lookup noun oldTbl of - Nothing -> - case noun of - Atom atm -> - (1 + W# (matSz# atm), tbl) - Cell l r -> - let (lSz, tbl) = go (2+off) tbl l in - let (rSz, tbl) = go (2+off+lSz) tbl r in - (2 + lSz + rSz, tbl) - Just (W# ref) -> - let refSz = W# (wordBitWidth# ref) in - case noun of - Atom atm -> - let worSz = W# (matSz# atm) in - if worSz > refSz - then (refSz, oldTbl) - else (1 + worSz, tbl) - Cell _ _ -> - (refSz, oldTbl) +jam :: Noun -> Atom +jam = view (to jamBS . from atomBS) - matSz# :: Atom -> Word# - matSz# 0 = 1## - matSz# a = preW `plusWord#` preW `plusWord#` atmW - where - atmW = atomBitWidth# a - preW = wordBitWidth# atmW - refSz# :: Word# -> Word# - refSz# w = 2## `plusWord#` (matSz# (MkAtom (NatS# w))) +-- Types ----------------------------------------------------------------------- - nounSz# :: Noun -> Word# - nounSz# (Atom a) = 1## `plusWord#` (matSz# a) - nounSz# (Cell l r) = 2## `plusWord#` (nounSz# l) `plusWord#` (nounSz# r) +{-| + The encoder state. + + - ptr: Pointer into the output buffer. + - reg: Next 64 bits of output, partially written. + - off: Number of bits already written into `reg` + - pos: Total number of bits written. +-} +data S = S + { ptr :: {-# UNPACK #-} !(Ptr Word) + , reg :: {-# UNPACK #-} !Word + , off :: {-# UNPACK #-} !Int + , pos :: {-# UNPACK #-} !Word + } deriving (Show,Eq,Ord) + +data PutResult a = PutResult {-# UNPACK #-} !S !a + deriving Functor + +newtype Put a = Put + { runPut :: H.LinearHashTable Word Word + -> S + -> IO (PutResult a) + } -------------------------------------------------------------------------------- -jamFast :: Noun -> Vector Word64 -jamFast n = undefined - -bitsToAtom :: Bits -> Atom -bitsToAtom = undefined - - --- Fast Cue -------------------------------------------------------------------- +{-# INLINE getRef #-} +getRef :: Put (Maybe Word) +getRef = Put \tbl s -> PutResult s <$> H.lookup tbl (pos s) {- - Get the exponent-prefix of an atom: - - - Peek at the next word. - - Calculate the number of least-significant bits in that word (there's - a primitive for this). - - Advance by that number of bits. - - Return the number of bits + 1. Write the register to the output, and increment the output pointer. -} -dExp :: Get Word -dExp = do - W# w <- peekWord - let res = W# (ctz# w) - advance res - pure res +{-# INLINE flush #-} +flush :: Put () +flush = Put $ \tbl s@S{..} -> do + poke ptr reg + pure $ PutResult (s { ptr = ptr `plusPtr` 8 }) () -dAtomLen :: Get Word -dAtomLen = do - e <- dExp - p <- dWordBits (e-1) - pure (2^e .|. p) +{-# INLINE update #-} +update :: (S -> S) -> Put () +update f = Put \tbl s@S{..} -> pure (PutResult (f s) ()) -dRef :: Get Word -dRef = dAtomLen >>= dWordBits +{-# INLINE setRegOff #-} +setRegOff :: Word -> Int -> Put () +setRegOff r o = update \s@S{..} -> (s {reg=r, off=o}) -dAtom :: Get Atom -dAtom = do - n <- dAtomLen - b <- dBits n - pure (bitsToAtom b) +{-# INLINE setReg #-} +setReg :: Word -> Put () +setReg r = update \s@S{..} -> (s { reg=r }) -dCell :: Get Noun -dCell = Cell <$> dNoun <*> dNoun +{-# INLINE getS #-} +getS :: Put S +getS = Put $ \tbl s -> pure (PutResult s s) -{-| - Get a Noun. - - - Get a bit - - If it's zero, get an atom. - - Otherwise, get another bit. - - If it's zero, get a cell. - - If it's one, get an atom. --} -dNoun :: Get Noun -dNoun = do - p <- getPos - - let yield r = insRef p r >> pure r - - dBit >>= \case - False -> (Atom <$> dAtom) >>= yield - True -> dBit >>= \case - False -> dCell >>= yield - True -> dRef >>= getRef +{-# INLINE putS #-} +putS :: S -> Put () +putS s = Put $ \tbl _ -> pure (PutResult s ()) {- - TODO Count leading zero bits. + To write a bit: - Read a 64 bit word from the buffer and get the number of leading - zeros in that word. This works as long as no atom is larger than - 2 zettabytes. - - - TODO Need to handle the edge-case where there are less than 64 bits - remaining in the buffer. Those extra bytes need to be zeros. One way - to handle this might be to add a zero word to the end of the buffer, - but that would require a re-alloc. Probably the right way is to - write new `peek` primitives that handle this case. - - - TODO Error out if we hit the end *and* the word is all zeros. - - Alright, let's pseudo-code this out: - - Grab the next 64 bits. Pill files are always LSB-first + | reg |= 1 << off + | off <- (off + 1) % 64 + | if (!off): + | buf[w++] <- reg + | reg <- 0 -} +{-# INLINE writeBit #-} +writeBit :: Bool -> Put () +writeBit b = Put $ \tbl s@S{..} -> do + let s' = s { reg = (if b then setBit else clearBit) reg off + , off = (off + 1) `mod` 64 + , pos = pos + 1 + } + + if off == 63 + then runPut (flush >> setRegOff 0 0) tbl s' + else pure $ PutResult s' () + +{- + To write a 64bit word: + + | reg |= w << off + | buf[bufI++] = reg + | reg = w >> (64 - off) +-} +{-# INLINE writeWord #-} +writeWord :: Word -> Put () +writeWord wor = do + S{..} <- getS + setReg (reg .|. shiftL wor off) + flush + update \s -> s { pos = 64 + pos + , reg = shiftR wor (64 - off) + } + +{- + To write some bits (< 64) from a word: + + | wor = takeBits(wid, wor) + | reg = reg .|. (wor << off) + | off = (off + wid) % 64 + | + | if (off + wid >= 64) + | buf[w] = x + | reg = wor >> (wid - off) +-} +{-# INLINE writeBitsFromWord #-} +writeBitsFromWord :: Int -> Word -> Put () +writeBitsFromWord wid wor = do + wor <- pure (takeBitsWord wid wor) + + oldSt <- getS + + let newSt = oldSt { reg = reg oldSt .|. shiftL wor (off oldSt) + , off = (off oldSt + wid) `mod` 64 + , pos = fromIntegral wid + pos oldSt + } + + putS newSt + + when (wid + off oldSt >= 64) $ do + flush + setReg (shiftR wor (wid - off newSt)) +{- + Write all of the the signficant bits of a direct atom. +-} +{-# INLINE writeAtomWord# #-} +writeAtomWord# :: Word# -> Put () +writeAtomWord# w = do + writeBitsFromWord (I# (word2Int# (wordBitWidth# w))) (W# w) + +{-# INLINE writeAtomWord #-} +writeAtomWord :: Word -> Put () +writeAtomWord (W# w) = writeAtomWord# w + +{- + Write all of the the signficant bits of an indirect atom. + + TODO Use memcpy when the bit-offset of the output is divisible by 8. +-} +{-# INLINE writeAtomBigNat #-} +writeAtomBigNat :: BigNat -> Put () +writeAtomBigNat (view bigNatWords -> words) = do + let lastIdx = VP.length words - 1 + for_ [0..(lastIdx-1)] \i -> + writeWord (words ! i) + writeAtomWord (words ! lastIdx) + +{-# INLINE writeAtomBits #-} +writeAtomBits :: Atom -> Put () +writeAtomBits = \case MkAtom (NatS# wd) -> writeAtomWord# wd + MkAtom (NatJ# bn) -> writeAtomBigNat bn + + +-- Put Instances --------------------------------------------------------------- + +instance Functor Put where + fmap f g = Put $ \tbl s -> do + PutResult s' a <- runPut g tbl s + pure $ PutResult s' (f a) + {-# INLINE fmap #-} + +instance Applicative Put where + pure x = Put (\_ s -> return $ PutResult s x) + {-# INLINE pure #-} + + Put f <*> Put g = Put $ \tbl s1 -> do + PutResult s2 f' <- f tbl s1 + PutResult s3 g' <- g tbl s2 + return $ PutResult s3 (f' g') + {-# INLINE (<*>) #-} + + Put f *> Put g = Put $ \tbl s1 -> do + PutResult s2 _ <- f tbl s1 + g tbl s2 + {-# INLINE (*>) #-} + +instance Monad Put where + return = pure + {-# INLINE return #-} + + (>>) = (*>) + {-# INLINE (>>) #-} + + Put x >>= f = Put $ \tbl s -> do + PutResult s' x' <- x tbl s + runPut (f x') tbl s' + {-# INLINE (>>=) #-} + + +-------------------------------------------------------------------------------- + +doPut :: H.LinearHashTable Word Word -> Word -> Put () -> ByteString +doPut tbl sz m = + unsafePerformIO $ do + buf <- callocBytes (fromIntegral (wordSz*8)) + _ <- runPut (m >> mbFlush) tbl (S buf 0 0 0) + BS.unsafePackCStringFinalizer (castPtr buf) byteSz (free buf) + where + wordSz = fromIntegral (sz `divUp` 64) + byteSz = fromIntegral (sz `divUp` 8) + divUp = \x y -> (x `div` y) + (if x `mod` y == 0 then 0 else 1) + + mbFlush :: Put () + mbFlush = do + shouldFlush <- (/= 0) . off <$> getS + when shouldFlush flush + + +-------------------------------------------------------------------------------- + +{- + TODO Handle back references +-} +writeNoun :: Noun -> Put () +writeNoun n = + getRef >>= \case + Just bk -> writeBackRef bk + Nothing -> case n of Atom a -> writeAtom a + Cell h t -> writeCell h t + +{-# INLINE writeMat #-} +writeMat :: Atom -> Put () +writeMat 0 = writeBit True +writeMat atm = do + writeBitsFromWord (preWid+1) (shiftL 1 preWid) + writeBitsFromWord (preWid-1) atmWid + writeAtomBits atm + where + atmWid = bitWidth atm + preWid = fromIntegral (wordBitWidth atmWid) + +{-# INLINE writeCell #-} +writeCell :: Noun -> Noun -> Put () +writeCell h t = do + writeBit True + writeBit False + writeNoun h + writeNoun t + +{-# INLINE writeAtom #-} +writeAtom :: Atom -> Put () +writeAtom a = do + writeBit False + writeMat a + +{-# INLINE writeBackRef #-} +writeBackRef :: Word -> Put () +writeBackRef a = do + p <- pos <$> getS + writeBit True + writeBit True + writeMat (toAtom a) + + +-- Compute Hashes and Jam Size (with no backrefs) ------------------------------ + +data BigNoun + = BigCell { bSize :: {-# UNPACK #-} !Word + , bHash :: {-# UNPACK #-} !Int + , bHead :: BigNoun + , bTail :: BigNoun + } + | BigAtom { bSize :: {-# UNPACK #-} !Word + , bHash :: {-# UNPACK #-} !Int + , bAtom :: {-# UNPACK #-} !Atom + } + deriving (Show) + +instance Hashable BigNoun where + hash = bHash + {-# INLINE hash #-} + hashWithSalt = defaultHashWithSalt + {-# INLINE hashWithSalt #-} + +instance Eq BigNoun where + BigAtom s1 _ a1 == BigAtom s2 _ a2 = s1==s2 && a1==a2 + BigCell s1 _ h1 t1 == BigCell s2 _ h2 t2 = s1==s2 && h1==h2 && t1==t2 + _ == _ = False + {-# INLINE (==) #-} + +{-# INLINE toBigNoun #-} +toBigNoun :: Noun -> BigNoun +toBigNoun = go + where + go (Atom a) = BigAtom (1 + matSz a) (Hash.hash a) a + go (Cell h t) = BigCell siz has hed tel + where + hed = toBigNoun h + tel = toBigNoun t + siz = 2 + bSize hed + bSize tel + has = fromIntegral siz `combine` bHash hed `combine` bHash tel + + +-- Calculate Jam Size and Backrefs --------------------------------------------- + +{-# INLINE matSz #-} +matSz :: Atom -> Word +matSz a = W# (matSz# a) + +{-# INLINE matSz# #-} +matSz# :: Atom -> Word# +matSz# 0 = 1## +matSz# a = preW `plusWord#` preW `plusWord#` atmW + where + atmW = atomBitWidth# a + preW = wordBitWidth# atmW + +{-# INLINE refSz# #-} +refSz# :: Word# -> Word# +refSz# w = 2## `plusWord#` (matSz# (MkAtom (NatS# w))) + +compress :: BigNoun -> IO (Word, H.LinearHashTable Word Word) +compress top = do + nodes :: H.LinearHashTable BigNoun Word <- H.new + backs :: H.LinearHashTable Word Word <- H.new + + let proc :: Word -> BigNoun -> IO Word + proc pos = \case + BigAtom _ _ a -> pure (1 + matSz a) + BigCell _ _ h t -> do + hSz <- go (pos+2) h + tSz <- go (pos+2+hSz) t + pure (2+hSz+tSz) + + go :: Word -> BigNoun -> IO Word + go p inp = do + H.lookup nodes inp >>= \case + Nothing -> do + H.insert nodes inp p + proc p inp + Just bak@(W# bakRaw) -> do + let refSz = W# (refSz# bakRaw) + if (refSz < bSize inp) + then H.insert backs p bak $> refSz + else proc p inp + + res <- go 0 top + pure (res, backs) + + +-- Stolen from Hashable Library ------------------------------------------------ + +{-# INLINE combine #-} +combine :: Int -> Int -> Int +combine h1 h2 = (h1 * 16777619) `xor` h2 + +{-# INLINE defaultHashWithSalt #-} +defaultHashWithSalt :: Hashable a => Int -> a -> Int +defaultHashWithSalt salt x = salt `combine` Hash.hash x diff --git a/pkg/hs-urbit/lib/Data/Noun/Jam/Put.hs b/pkg/hs-urbit/lib/Data/Noun/Jam/Put.hs deleted file mode 100644 index e6db84918a..0000000000 --- a/pkg/hs-urbit/lib/Data/Noun/Jam/Put.hs +++ /dev/null @@ -1,585 +0,0 @@ -{-# LANGUAGE MagicHash #-} - -module Data.Noun.Jam.Put where - -import ClassyPrelude hiding (hash) -import GHC.Prim -import GHC.Natural -import GHC.Integer.GMP.Internals - -import Control.Lens (view, to, from, (&)) -import Control.Monad (guard) -import Data.Bits (shiftL, shiftR, setBit, clearBit, xor, (.|.), (.&.)) -import Data.Map (Map) -import Data.Noun.Atom ( Atom(MkAtom), wordBitWidth, wordBitWidth# - , atomBitWidth#, takeBitsWord ) -import Data.Noun.Atom (toAtom, takeBits, bitWidth) -import Data.Noun (Noun(Atom, Cell)) -import Data.Noun.Pill (bigNatWords, atomBS) -import Data.Vector.Primitive ((!)) -import Foreign.Marshal.Alloc (callocBytes, free) -import Foreign.Ptr (Ptr, castPtr, plusPtr, ptrToWordPtr) -import Foreign.Storable (peek, poke) -import GHC.Int (Int(I#)) -import GHC.Word (Word(W#)) -import System.IO.Unsafe (unsafePerformIO) - -import qualified Data.Hashable as Hash -import qualified Data.Map as M -import qualified Data.ByteString.Unsafe as BS -import qualified Data.HashTable.IO as H -import qualified Data.Vector.Primitive as VP - - --- Types ----------------------------------------------------------------------- - -{-| - The encoder state. - - - ptr: Pointer into the output buffer. - - reg: Next 64 bits of output, partially written. - - off: Number of bits already written into `reg` - - pos: Total number of bits written. --} -data S = S - { ptr :: {-# UNPACK #-} !(Ptr Word) - , reg :: {-# UNPACK #-} !Word - , off :: {-# UNPACK #-} !Int - , pos :: {-# UNPACK #-} !Word - } deriving (Show,Eq,Ord) - -data PutResult a = PutResult {-# UNPACK #-} !S !a - deriving Functor - -newtype Put a = Put - { runPut :: H.LinearHashTable Word Word - -> S - -> IO (PutResult a) - } - --------------------------------------------------------------------------------- - -{-# INLINE getRef #-} -getRef :: Put (Maybe Word) -getRef = Put \tbl s -> PutResult s <$> H.lookup tbl (pos s) - -{- - 1. Write the register to the output, and increment the output pointer. --} -{-# INLINE flush #-} -flush :: Put () -flush = Put $ \tbl s@S{..} -> do - poke ptr reg - pure $ PutResult (s { ptr = ptr `plusPtr` 8 }) () - -{-# INLINE update #-} -update :: (S -> S) -> Put () -update f = Put \tbl s@S{..} -> pure (PutResult (f s) ()) - -{-# INLINE setRegOff #-} -setRegOff :: Word -> Int -> Put () -setRegOff r o = update \s@S{..} -> (s {reg=r, off=o}) - -{-# INLINE setReg #-} -setReg :: Word -> Put () -setReg r = update \s@S{..} -> (s { reg=r }) - -{-# INLINE getS #-} -getS :: Put S -getS = Put $ \tbl s -> pure (PutResult s s) - -{-# INLINE putS #-} -putS :: S -> Put () -putS s = Put $ \tbl _ -> pure (PutResult s ()) - -{- - To write a bit: - - | reg |= 1 << off - | off <- (off + 1) % 64 - | if (!off): - | buf[w++] <- reg - | reg <- 0 --} -{-# INLINE writeBit #-} -writeBit :: Bool -> Put () -writeBit b = Put $ \tbl s@S{..} -> do - -- traceM ("writeBit: " <> show b) - let s' = s { reg = (if b then setBit else clearBit) reg off - , off = (off + 1) `mod` 64 - , pos = pos + 1 - } - - if off == 63 - then runPut (flush >> setRegOff 0 0) tbl s' - else pure $ PutResult s' () - -{- - To write a 64bit word: - - | reg |= w << off - | buf[bufI++] = reg - | reg = w >> (64 - off) --} -{-# INLINE writeWord #-} -writeWord :: Word -> Put () -writeWord wor = do - -- traceM ("writeWord: " <> show wor) - S{..} <- getS - setReg (reg .|. shiftL wor off) - flush - update \s -> s { pos = 64 + pos - , reg = shiftR wor (64 - off) - } - -{- - To write some bits (< 64) from a word: - - | wor = takeBits(wid, wor) - | reg = reg .|. (wor << off) - | off = (off + wid) % 64 - | - | if (off + wid >= 64) - | buf[w] = x - | reg = wor >> (wid - off) --} - -{-# INLINE writeBitsFromWord #-} -writeBitsFromWord :: Int -> Word -> Put () -writeBitsFromWord wid wor = do - wor <- pure (takeBitsWord wid wor) - - -- traceM ("writeBitsFromWord: " <> show wid <> ", " <> show wor) - - oldSt <- getS - - let newSt = oldSt { reg = reg oldSt .|. shiftL wor (off oldSt) - , off = (off oldSt + wid) `mod` 64 - , pos = fromIntegral wid + pos oldSt - } - - putS newSt - - when (wid + off oldSt >= 64) $ do - flush - setReg (shiftR wor (wid - off newSt)) -{- - Write all of the the signficant bits of a direct atom. --} -{-# INLINE writeAtomWord# #-} -writeAtomWord# :: Word# -> Put () -writeAtomWord# w = do - -- traceM "writeAtomWord" - writeBitsFromWord (I# (word2Int# (wordBitWidth# w))) (W# w) - -{-# INLINE writeAtomWord #-} -writeAtomWord :: Word -> Put () -writeAtomWord (W# w) = writeAtomWord# w - -{- - Write all of the the signficant bits of an indirect atom. - - TODO Use memcpy when the bit-offset of the output is divisible by 8. --} -{-# INLINE writeAtomBigNat #-} -writeAtomBigNat :: BigNat -> Put () -writeAtomBigNat (view bigNatWords -> words) = do - -- traceM "writeAtomBigNat" - let lastIdx = VP.length words - 1 - for_ [0..(lastIdx-1)] \i -> - writeWord (words ! i) - writeAtomWord (words ! lastIdx) - -{-# INLINE writeAtomBits #-} -writeAtomBits :: Atom -> Put () -writeAtomBits = \case MkAtom (NatS# wd) -> writeAtomWord# wd - MkAtom (NatJ# bn) -> writeAtomBigNat bn - - --- Put Instances --------------------------------------------------------------- - -instance Functor Put where - fmap f g = Put $ \tbl s -> do - PutResult s' a <- runPut g tbl s - pure $ PutResult s' (f a) - {-# INLINE fmap #-} - -instance Applicative Put where - pure x = Put (\_ s -> return $ PutResult s x) - {-# INLINE pure #-} - - Put f <*> Put g = Put $ \tbl s1 -> do - PutResult s2 f' <- f tbl s1 - PutResult s3 g' <- g tbl s2 - return $ PutResult s3 (f' g') - {-# INLINE (<*>) #-} - - Put f *> Put g = Put $ \tbl s1 -> do - PutResult s2 _ <- f tbl s1 - g tbl s2 - {-# INLINE (*>) #-} - -instance Monad Put where - return = pure - {-# INLINE return #-} - - (>>) = (*>) - {-# INLINE (>>) #-} - - Put x >>= f = Put $ \tbl s -> do - PutResult s' x' <- x tbl s - runPut (f x') tbl s' - {-# INLINE (>>=) #-} - - --------------------------------------------------------------------------------- - -doPut :: H.LinearHashTable Word Word -> Word -> Put () -> ByteString -doPut tbl sz m = - unsafePerformIO $ do - -- traceM "" - -- H.toList tbl >>= traceM . show . sort - -- traceM "" - buf <- callocBytes (fromIntegral $ 4 * wordSz*8) - _ <- runPut (m >> mbFlush) tbl (S buf 0 0 0) - BS.unsafePackCStringFinalizer (castPtr buf) (2*byteSz) (free buf) - where - wordSz = fromIntegral (sz `divUp` 64) - byteSz = fromIntegral (sz `divUp` 8) - divUp x y = (x `div` y) + (if x `mod` y == 0 then 0 else 1) - - mbFlush :: Put () - mbFlush = do - shouldFlush <- (/= 0) . off <$> getS - when shouldFlush flush - - --------------------------------------------------------------------------------- - -{- - TODO Handle back references --} -writeNoun :: Noun -> Put () -writeNoun n = do - getRef >>= \case - Just bk -> writeBackRef bk - Nothing -> case n of Atom a -> writeAtom a - Cell h t -> writeCell h t - -{-# INLINE writeMat #-} -writeMat :: Atom -> Put () -writeMat 0 = do - -- traceM "writeMat: 0" - writeBit True -writeMat atm = do - -- traceM ("writeMat: " <> show atm) - writeBitsFromWord (preWid+1) (shiftL 1 preWid) - writeBitsFromWord (preWid-1) atmWid - writeAtomBits atm - where - atmWid = bitWidth atm - preWid = fromIntegral (wordBitWidth atmWid) - -{-# INLINE writeCell #-} -writeCell :: Noun -> Noun -> Put () -writeCell h t = do - -- traceM "writeCell" - writeBit True - writeBit False - writeNoun h - writeNoun t - -{-# INLINE writeAtom #-} -writeAtom :: Atom -> Put () -writeAtom a = do - -- traceM "writeAtom" - writeBit False - writeMat a - -{-# INLINE writeBackRef #-} -writeBackRef :: Word -> Put () -writeBackRef a = do - p <- pos <$> getS - -- traceM ("writeBackRef: " <> show a <> " @" <> show p) - writeBit True - writeBit True - writeMat (toAtom a) - --------------------------------------------------------------------------------- - -jamBS :: Noun -> ByteString -jamBS n = -- trace (show $ sort $ swap <$> mapToList tbl) --- $ trace (show $ sort $ swap <$> unsafePerformIO (H.toList ht)) - doPut bt sz' (writeNoun n) - where (sz, tbl) = preJam n - (shn, ht) = unsafePerformIO (preJam' n) - (sz', bt) = unsafePerformIO (compress $ toBigNoun n) - -jam :: Noun -> Atom -jam = view (to jamBS . from atomBS) - --------------------------------------------------------------------------------- - -{-# INLINE matSz #-} -matSz :: Atom -> Word -matSz a = W# (matSz# a) - -{-# INLINE matSz# #-} -matSz# :: Atom -> Word# -matSz# 0 = 1## -matSz# a = preW `plusWord#` preW `plusWord#` atmW - where - atmW = atomBitWidth# a - preW = wordBitWidth# atmW - -{-# INLINE refSz# #-} -refSz# :: Word# -> Word# -refSz# w = 2## `plusWord#` (matSz# (MkAtom (NatS# w))) - -preJam :: Noun -> (Word, Map Noun Word) -preJam = go 0 mempty - where - insertNoun :: Noun -> Word -> Map Noun Word -> Map Noun Word - insertNoun n i tbl = lookup n tbl - & maybe (insertMap n i tbl) (const tbl) - - go :: Word -> Map Noun Word -> Noun -> (Word, Map Noun Word) - go off oldTbl noun = - case lookup noun oldTbl of - Nothing -> - let tbl = insertNoun noun off oldTbl in - case noun of - Atom atm -> - (1 + matSz atm, tbl) - Cell l r -> - let (lSz, tbl') = go (2+off) tbl l in - let (rSz, tbl'') = go (2+off+lSz) tbl' r in - (2 + lSz + rSz, tbl'') - Just (W# ref) -> - let refSz = W# (wordBitWidth# ref) in - case noun of - Atom atm -> - let worSz = matSz atm in - if worSz > refSz - then (2 + refSz, oldTbl) - else (1 + worSz, oldTbl) - Cell _ _ -> - (2 + refSz, oldTbl) - - --- Nouns with pre-computed size and hash --------------------------------------- - -data BigNoun - = BigCell { bSize :: {-# UNPACK #-} !Word - , bHash :: {-# UNPACK #-} !Int - , bHead :: BigNoun - , bTail :: BigNoun - } - | BigAtom { bSize :: {-# UNPACK #-} !Word - , bHash :: {-# UNPACK #-} !Int - , bAtom :: {-# UNPACK #-} !Atom - } - deriving (Show) - -instance Hashable BigNoun where - hash = bHash - {-# INLINE hash #-} - hashWithSalt = defaultHashWithSalt - {-# INLINE hashWithSalt #-} - -instance Eq BigNoun where - BigAtom s1 _ a1 == BigAtom s2 _ a2 = s1==s2 && a1==a2 - BigCell s1 _ h1 t1 == BigCell s2 _ h2 t2 = s1==s2 && h1==h2 && t1==t2 - _ == _ = False - {-# INLINE (==) #-} - -{-# INLINE toBigNoun #-} -toBigNoun :: Noun -> BigNoun -toBigNoun = go - where - go (Atom a) = BigAtom (1 + matSz a) (Hash.hash a) a - go (Cell h t) = BigCell siz has hed tel - where - hed = toBigNoun h - tel = toBigNoun t - siz = 2 + bSize hed + bSize tel - has = fromIntegral siz `combine` bHash hed `combine` bHash tel - - --- Yet Another Fast Pre Jam ---------------------------------------------------- - -{-# INLINE compress #-} -compress :: BigNoun -> IO (Word, H.LinearHashTable Word Word) -compress top = do - nodes :: H.LinearHashTable BigNoun Word <- H.new - backs :: H.LinearHashTable Word Word <- H.new - - let proc :: Word -> BigNoun -> IO Word - proc pos = \case - BigAtom _ _ a -> pure (1 + matSz a) - BigCell _ _ h t -> do - hSz <- go (pos+2) h - tSz <- go (pos+2+hSz) t - pure (2+hSz+tSz) - - go :: Word -> BigNoun -> IO Word - go p inp = do - H.lookup nodes inp >>= \case - Nothing -> do - -- traceM ("inserting " <> show inp) - H.insert nodes inp p - proc p inp - Just bak -> do - -- traceM ("found backref for " <> show inp) - let refSz = 2 + matSz (toAtom bak) - if (refSz < bSize inp) - then H.insert backs p bak $> refSz - else proc p inp - - res <- go 0 top - pure (res, backs) - --- Fast Pre-Jam ---------------------------------------------------------------- - -{- - An `SHN` is a noun and some pre-computed information. - - - `size` is the serialized size without backreferences, we use this - for fast equality checks. - - `jmSz` is the serialized size, we use this to allocate a buffer - at the end. - - `hash` is a precomputed noun hash. We use this to get better, - cheaper hashes for our hashtable. - - `noun` is the actual noun. --} -data SHN = SHN - { size :: {-# UNPACK #-} !Word - , jmSz :: {-# UNPACK #-} !Word - , hash :: {-# UNPACK #-} !Int - , noun :: {-# UNPACK #-} !Noun - } - deriving (Show) - -instance Hashable SHN where - hash (SHN _ _ h _) = h - {-# INLINE hash #-} - hashWithSalt = defaultHashWithSalt - {-# INLINE hashWithSalt #-} - -instance Eq SHN where - x == y = (size x == size y) && (noun x == noun y) - -{- - This is slightly different that the stock `jam`, since we use - backreferences if-and-only-if they save space. --} -preJam' :: Noun -> IO (SHN, H.LinearHashTable Word Word) -preJam' top = do - nodes :: H.LinearHashTable SHN Word <- H.new - backs :: H.LinearHashTable Word Word <- H.new - - let goAtom :: Word -> Atom -> IO SHN - goAtom pos a@(MkAtom nat) = do - let atmSz = 1 + matSz a - pure $ SHN atmSz atmSz (Hash.hash nat) (Atom a) - - goCell :: Word -> Noun -> Noun -> IO SHN - goCell pos h t = do - SHN hSz hJmSz hHash _ <- go (pos+2) h - SHN tSz tJmSz tHash _ <- go (pos+2+hJmSz) t - let sz = 2+hSz+tSz - let jmSz = 2+hJmSz+tJmSz - pure $ SHN sz jmSz (combine hHash tHash) (Cell h t) - - go :: Word -> Noun -> IO SHN - go p n = do - res <- case n of Atom a -> goAtom p a - Cell h t -> goCell p h t - - H.lookup nodes res >>= \case - Nothing -> do - H.insert nodes res p - pure res - Just bak -> do - let refSz = 2 + matSz (toAtom bak) - if (refSz < jmSz res) - then do H.insert backs p bak - pure (res { jmSz = refSz }) - else pure res - - res <- go 0 top - pure (res, backs) - --- Stolen from Hashable Library ------------------------------------------------ - -{-# INLINE combine #-} -combine :: Int -> Int -> Int -combine h1 h2 = (h1 * 16777619) `xor` h2 - -{-# INLINE defaultHashWithSalt #-} -defaultHashWithSalt :: Hashable a => Int -> a -> Int -defaultHashWithSalt salt x = salt `combine` Hash.hash x - - -{- - I suspect that hashing these big atoms recursively is going to be the bottleneck: - Unless you have a good hashing system. - Which we totally do in the nock runtime. - Checking the hash for the top-level node precomputes the hashes for - everything else, recursively. - This is really smart. - Maybe I could implement this as well? - But hashing traverses the whole structure. - So, now we have - 1. precompute hashes. - 2. precompute size and backref table. - 3. serialize - This seems excessive. - We insert into the backref table right away, but actually: - Backreferences can't exist until the whole node is processed. - - Which implies a smarter algorithm: - - Setup a atom dup table - atoms :: Hashtable BigNum Word - - Setup a cell dup table - cells :: Hashtable (Noun, Noun) Word - - Setup a backref table (map from dup. pos to orig. pos) - backs :: Hashtable Word Word - - go :: Noun -> ST s (Hash, Word) - - If atom, - - Compute size and hash - - Check atom table for backref - - If atom in `atoms` table: - - If backref smaller than atom - - Insert (pos, bak) into `backs` table. - - Return (backref size, atom hash) - - If backref not smaller than atom - - Return (atom size, atom hash) - - Otherwise: - - Insert atom into `atoms` table. - - Return (atom size, atom hash) - - If cell - - process head - - process tail - - produce size+hash from results - - Check cell table for backref - - If backref exists - - Insert `(pos, bak)` into `backs` table - - Return (backref size, cell hash) - - Else - - Return (cell size, cell hash) - - Then, to serialize: - - Allocate a buffer of `size` bits - - If current pos in `backs` table: - - Write `11` - - Write backref (mat) - - Otherwise: - - If Atom: - - Write `0` - - Write atom (mat) - - If Cell - - Write `10` - - Write head - - Write tail --} diff --git a/pkg/hs-urbit/lib/Data/Noun/Lens.hs b/pkg/hs-urbit/lib/Data/Noun/Lens.hs index 12f7933b83..3f1fdfb422 100644 --- a/pkg/hs-urbit/lib/Data/Noun/Lens.hs +++ b/pkg/hs-urbit/lib/Data/Noun/Lens.hs @@ -7,7 +7,7 @@ import Data.Noun.Pill import Data.Noun import Data.Noun.Atom import Control.Lens -import Data.Noun.Jam.Put (jam, jamBS) +import Data.Noun.Jam.Fast (jam, jamBS) import Data.Noun.Jam (cue) -------------------------------------------------------------------------------- diff --git a/pkg/hs-urbit/lib/Vere/Serf.hs b/pkg/hs-urbit/lib/Vere/Serf.hs index 2f60ca5310..24f9e35a31 100644 --- a/pkg/hs-urbit/lib/Vere/Serf.hs +++ b/pkg/hs-urbit/lib/Vere/Serf.hs @@ -7,7 +7,7 @@ import Data.Void import Data.Noun import Data.Noun.Atom import Data.Noun.Jam hiding (jam) -import Data.Noun.Jam.Put (jam, jamBS) +import Data.Noun.Jam.Fast (jam, jamBS) import Data.Noun.Poet import Data.Noun.Pill import Vere.Pier.Types