From d445c1cbb14ce180a951fe5569d0e3ea677aab4e Mon Sep 17 00:00:00 2001 From: Benjamin Summers Date: Sun, 30 Jun 2019 16:17:59 -0700 Subject: [PATCH] Fixed a back-ref bug in my old jam, and got my fast jam to pass tests. --- pkg/hs-urbit/lib/Data/Noun/Atom.hs | 21 +++- pkg/hs-urbit/lib/Data/Noun/Jam.hs | 47 ++++++-- pkg/hs-urbit/lib/Data/Noun/Jam/Put.hs | 167 +++++++++++++++++++------- pkg/hs-urbit/lib/Data/Noun/Pill.hs | 31 ++--- 4 files changed, 192 insertions(+), 74 deletions(-) diff --git a/pkg/hs-urbit/lib/Data/Noun/Atom.hs b/pkg/hs-urbit/lib/Data/Noun/Atom.hs index f80937d8df..a78e76f89c 100644 --- a/pkg/hs-urbit/lib/Data/Noun/Atom.hs +++ b/pkg/hs-urbit/lib/Data/Noun/Atom.hs @@ -21,7 +21,7 @@ import Data.Hashable (Hashable) -------------------------------------------------------------------------------- newtype Atom = MkAtom { unAtom :: Natural } - deriving newtype (Eq, Ord, Num, Bits, Enum, Real, Integral, Flat, Hashable) + deriving newtype (Eq, Ord, Num, Bits, Enum, Real, Integral, Flat, Hashable, NFData) instance Show Atom where show (MkAtom a) = show a @@ -109,6 +109,9 @@ instance IsAtom Integer where wordBitWidth# :: Word# -> Word# wordBitWidth# w = minusWord# 64## (clz# w) +wordBitWidth :: Word -> Word +wordBitWidth (W# w) = W# (wordBitWidth# w) + bigNatBitWidth# :: BigNat -> Word# bigNatBitWidth# nat = lswBits `plusWord#` ((int2Word# lastIdx) `timesWord#` 64##) @@ -141,24 +144,30 @@ instance IsAtom Cursor where -------------------------------------------------------------------------------- +{-# INLINE slice #-} slice :: (Atom, Atom) -> Atom -> Atom slice (offset, size) buf = fromSlice (Slice (fromAtom offset) (fromAtom size) buf) +{-# INLINE fromSlice #-} fromSlice :: Slice -> Atom -fromSlice (Slice off wid buf) = mask .&. (shiftR buf off) - where mask = shiftL (MkAtom 1) wid - 1 - +fromSlice (Slice off wid buf) = takeBits wid (shiftR buf off) -------------------------------------------------------------------------------- +{-# INLINE takeBits #-} takeBits :: Int -> Atom -> Atom -takeBits wid buf = mask .&. buf - where mask = shiftL (MkAtom 1) wid - 1 +takeBits wid buf = buf .&. (shiftL (MkAtom 1) wid - 1) +{-# INLINE takeBitsWord #-} +takeBitsWord :: Int -> Word -> Word +takeBitsWord wid wor = wor .&. (shiftL 1 wid - 1) + +{-# INLINE bitIdx #-} bitIdx :: Int -> Atom -> Bool bitIdx idx buf = testBit buf idx +{-# INLINE bitConcat #-} bitConcat :: Atom -> Atom -> Atom bitConcat x y = x .|. shiftL y (bitWidth x) diff --git a/pkg/hs-urbit/lib/Data/Noun/Jam.hs b/pkg/hs-urbit/lib/Data/Noun/Jam.hs index adf6ffda7e..692099504b 100644 --- a/pkg/hs-urbit/lib/Data/Noun/Jam.hs +++ b/pkg/hs-urbit/lib/Data/Noun/Jam.hs @@ -15,6 +15,9 @@ import Test.Tasty.TH import Test.Tasty.QuickCheck as QC import Test.QuickCheck +import qualified Data.Noun.Jam.Put as Fast +import qualified Data.Noun.Pill as Pill + -- Length-Encoded Atoms -------------------------------------------------------- @@ -51,7 +54,7 @@ jam' = toAtom . fst . go 0 mempty where insertNoun :: Noun -> Int -> Map Noun Int -> Map Noun Int insertNoun n i tbl = lookup n tbl - & maybe tbl (const $ insertMap n i tbl) + & maybe (insertMap n i tbl) (const tbl) go :: Int -> Map Noun Int -> Noun -> (Buf, Map Noun Int) go off oldTbl noun = @@ -68,8 +71,8 @@ jam' = toAtom . fst . go 0 mempty where Buf sz res = mat' atm (Nothing, Cell lef rit) -> (Buf (2+lSz+rSz) (xor 1 (shiftL (lRes .|. shiftL rRes lSz) 2)), rTbl) - where (Buf lSz lRes, lTbl) = go (off+2) tbl lef - (Buf rSz rRes, rTbl) = go (off+lSz) lTbl rit + where (Buf lSz lRes, lTbl) = go (off+2) tbl lef + (Buf rSz rRes, rTbl) = go (off+2+lSz) lTbl rit cue' :: Atom -> Maybe Noun cue' buf = view _2 <$> go mempty 0 @@ -98,7 +101,7 @@ mat atm = Buf bufWid buffer where atmWid = bitWidth atm preWid = bitWidth (toAtom atmWid) - bufWid = preWid + preWid + atmWid + bufWid = 2*preWid + atmWid prefix = shiftL 1 preWid extras = takeBits (preWid-1) (toAtom atmWid) suffix = xor extras (shiftL atm (preWid-1)) @@ -114,9 +117,9 @@ rub slc@(Cursor idx buf) = prefix -> pure (Buf sz val) where widIdx = idx + 1 + prefix - width = fromSlice (Slice widIdx (prefix - 1) buf) + extra = fromSlice (Slice widIdx (prefix - 1) buf) datIdx = widIdx + (prefix-1) - datWid = fromIntegral $ 2^(prefix-1) + width + datWid = fromIntegral $ extra + 2^(prefix-1) sz = datWid + (2*prefix) val = fromSlice (Slice datIdx datWid buf) @@ -130,7 +133,7 @@ jam = toAtom . fst . go 0 mempty where insertNoun :: Noun -> Int -> Map Noun Int -> Map Noun Int insertNoun n i tbl = lookup n tbl - & maybe tbl (const $ insertMap n i tbl) + & maybe (insertMap n i tbl) (const tbl) go :: Int -> Map Noun Int -> Noun -> (Buf, Map Noun Int) go off oldTbl noun = @@ -148,7 +151,7 @@ jam = toAtom . fst . go 0 mempty (Nothing, Cell lef rit) -> (Buf (2+lSz+rSz) (xor 1 (shiftL (bitConcat lRes rRes) 2)), rTbl) where (Buf lSz lRes, lTbl) = go (off+2) tbl lef - (Buf rSz rRes, rTbl) = go (off+lSz) lTbl rit + (Buf rSz rRes, rTbl) = go (off+2+lSz) lTbl rit leadingZeros :: Cursor -> Maybe Int leadingZeros (Cursor idx buf) = go 0 @@ -163,17 +166,15 @@ cue buf = view _2 <$> go mempty 0 where go :: Map Int Noun -> Int -> Maybe (Int, Noun, Map Int Noun) go tbl i = - -- trace ("go-" <> show i) case (bitIdx i buf, bitIdx (i+1) buf) of (False, _ ) -> do Buf wid at <- rub (Cursor (i+1) buf) let r = Atom at - pure (wid+1, r, insertMap i r tbl) + pure (1+wid, r, insertMap i r tbl) (True, False ) -> do (lSz,lef,tbl) <- go tbl (i+2) (rSz,rit,tbl) <- go tbl (i+2+fromIntegral lSz) let r = Cell lef rit pure (2+lSz+rSz, r, insertMap i r tbl) (True, True ) -> do Buf wid at <- rub (Cursor (i+2) buf) - -- traceM ("ref-" <> show at) r <- lookup (fromIntegral at) tbl & \case Nothing -> error ("bad-ref-" <> show at) Just ix -> Just ix @@ -182,6 +183,21 @@ cue buf = view _2 <$> go mempty 0 -- Tests ----------------------------------------------------------------------- +a12 = Atom 12 +a36 = Atom 36 +a9 = Atom 9 + +d12 = Cell a12 a12 +q12 = Cell d12 d12 + +midleEx = Cell a36 $ Cell a9 $ Cell q12 q12 + +smallEx = Cell (Cell (Atom 14) (Atom 8)) + $ Cell (Atom 15) (Atom 15) + +smallEx2 = Cell (Cell (Atom 0) (Atom 0)) + $ Cell (Atom 10) (Atom 10) + pills :: [Atom] pills = [ 0x2, 0xc, 0x48, 0x29, 0xc9, 0x299 , 0x3170_c7c1, 0x93_c7c1, 0xa_72e0, 0x1bd5_b7dd_e080 @@ -193,6 +209,15 @@ cueTest = traverse cue pills jamTest :: Maybe [Atom] jamTest = fmap jam <$> cueTest +prop_fastMatSlow :: Atom -> Bool +prop_fastMatSlow a = jam (Atom a) == Fast.jam (Atom a) + +prop_fastJamSlow :: Noun -> Bool +prop_fastJamSlow n = jam n == Fast.jam n + +prop_fastJam :: Noun -> Bool +prop_fastJam n = Just n == cue (Fast.jam n) + prop_jamCue :: Noun -> Bool prop_jamCue n = Just n == cue (jam n) diff --git a/pkg/hs-urbit/lib/Data/Noun/Jam/Put.hs b/pkg/hs-urbit/lib/Data/Noun/Jam/Put.hs index 83ee00863f..4ed24265ce 100644 --- a/pkg/hs-urbit/lib/Data/Noun/Jam/Put.hs +++ b/pkg/hs-urbit/lib/Data/Noun/Jam/Put.hs @@ -7,16 +7,17 @@ import GHC.Prim import GHC.Natural import GHC.Integer.GMP.Internals -import Control.Lens (view) +import Control.Lens (view, to, from, (&)) import Control.Monad (guard) import Data.Bits (shiftL, shiftR, setBit, clearBit, (.|.), (.&.)) import Data.Map (Map) -import Data.Noun.Atom (Atom(MkAtom), wordBitWidth#) +import Data.Noun.Atom ( Atom(MkAtom), wordBitWidth, wordBitWidth# + , atomBitWidth#, takeBitsWord ) import Data.Noun.Atom (toAtom, takeBits, bitWidth) import Data.Noun (Noun(Atom, Cell)) -import Data.Noun.Pill (bigNatWords) +import Data.Noun.Pill (bigNatWords, atomBS) import Data.Vector.Primitive ((!)) -import Foreign.Marshal.Alloc (mallocBytes, free) +import Foreign.Marshal.Alloc (callocBytes, free) import Foreign.Ptr (Ptr, castPtr, plusPtr, ptrToWordPtr) import Foreign.Storable (peek, poke) import GHC.Int (Int(I#)) @@ -96,15 +97,16 @@ putS s = Put $ \tbl _ -> pure (PutResult s ()) {- To write a bit: - | reg |= 1 << regI - | regI <- (regI + 1) % 64 - | if (!regI): + | reg |= 1 << off + | off <- (off + 1) % 64 + | if (!off): | buf[w++] <- reg | reg <- 0 -} {-# INLINE writeBit #-} writeBit :: Bool -> Put () writeBit b = Put $ \tbl s@S{..} -> do + -- traceM ("writeBit: " <> show b) let s' = s { reg = (if b then setBit else clearBit) reg off , off = (off + 1) `mod` 64 , pos = pos + 1 @@ -117,49 +119,60 @@ writeBit b = Put $ \tbl s@S{..} -> do {- To write a 64bit word: - | reg |= w << regI + | reg |= w << off | buf[bufI++] = reg - | reg = w >> (64 - regI) + | reg = w >> (64 - off) -} {-# INLINE writeWord #-} writeWord :: Word -> Put () writeWord wor = do + -- traceM ("writeWord: " <> show wor) S{..} <- getS setReg (reg .|. shiftL wor off) flush - setReg (shiftR wor (64 - off)) + update \s -> s { pos = 64 + pos + , reg = shiftR wor (64 - off) + } {- To write some bits (< 64) from a word: - | reg |= wor << regI - | regI += wid + | wor = takeBits(wid, wor) + | reg = reg .|. (wor << off) + | off = (off + wid) % 64 | - | if (regI >= 64) - | regI -= 64 + | if (off + wid >= 64) | buf[w] = x - | reg = wor >> (wid - regI) + | reg = wor >> (wid - off) -} + {-# INLINE writeBitsFromWord #-} writeBitsFromWord :: Int -> Word -> Put () writeBitsFromWord wid wor = do - s <- getS + wor <- pure (takeBitsWord wid wor) - let s' = s { reg = reg s .|. shiftL wor (off s) - , off = off s + wid - } + -- traceM ("writeBitsFromWord: " <> show wid <> ", " <> show wor) - if (off s' < 64) - then do putS s' - else do update (\s -> s { off = off s - 64 }) - flush - setReg (shiftR wor (wid - off s')) + oldSt <- getS + + let newSt = oldSt { reg = reg oldSt .|. shiftL wor (off oldSt) + , off = (off oldSt + wid) `mod` 64 + , pos = fromIntegral wid + pos oldSt + } + + putS newSt + + when (wid + off oldSt >= 64) $ do + flush + setReg (shiftR wor (wid - off newSt)) {- Write all of the the signficant bits of a direct atom. -} {-# INLINE writeAtomWord# #-} writeAtomWord# :: Word# -> Put () -writeAtomWord# w = writeBitsFromWord (I# (word2Int# (wordBitWidth# w))) (W# w) +writeAtomWord# w = do + -- traceM "writeAtomWord" + writeBitsFromWord (I# (word2Int# (wordBitWidth# w))) (W# w) {-# INLINE writeAtomWord #-} writeAtomWord :: Word -> Put () @@ -173,6 +186,7 @@ writeAtomWord (W# w) = writeAtomWord# w {-# INLINE writeAtomBigNat #-} writeAtomBigNat :: BigNat -> Put () writeAtomBigNat (view bigNatWords -> words) = do + -- traceM "writeAtomBigNat" let lastIdx = VP.length words - 1 for_ [0..(lastIdx-1)] \i -> writeWord (words ! i) @@ -222,18 +236,23 @@ instance Monad Put where -------------------------------------------------------------------------------- -doPut :: Word64 -> Put () -> ByteString +doPut :: Word -> Put () -> ByteString doPut sz m = unsafePerformIO $ do tbl <- H.new - buf <- mallocBytes (fromIntegral $ wordSz*8) - _ <- runPut m tbl (S buf 0 0 0) - BS.unsafePackCStringFinalizer (castPtr buf) byteSz (free buf) + buf <- callocBytes (fromIntegral $ 4 * wordSz*8) + _ <- runPut (m >> mbFlush) tbl (S buf 0 0 0) + BS.unsafePackCStringFinalizer (castPtr buf) (2*byteSz) (free buf) where wordSz = fromIntegral (sz `divUp` 64) byteSz = fromIntegral (sz `divUp` 8) divUp x y = (x `div` y) + (if x `mod` y == 0 then 0 else 1) + mbFlush :: Put () + mbFlush = do + shouldFlush <- (/= 0) . off <$> getS + when shouldFlush flush + -------------------------------------------------------------------------------- @@ -242,32 +261,38 @@ doPut sz m = -} writeNoun :: Noun -> Put () writeNoun n = do - p <- pos <$> getS - mRef <- getRef n + -- traceM "writeNoun" - case (mRef, n) of - (Nothing, Atom a) -> writeAtom a - (Nothing, Cell h t) -> writeCell h t - (Just bk, Atom a) | a < toAtom bk -> writeAtom a - (Just bk, _) -> writeBackRef bk + p <- pos <$> getS + mRef <- getRef n - insRef n p + case (mRef, n) of + (Nothing, Atom a) -> writeAtom a + (Nothing, Cell h t) -> writeCell h t + (Just bk, Atom a) | bitWidth a <= wordBitWidth bk -> writeAtom a + (Just bk, _) -> writeBackRef bk + + when (mRef == Nothing) $ + insRef n p {-# INLINE writeMat #-} writeMat :: Atom -> Put () +writeMat 0 = do + -- traceM "writeMat: 0" + writeBit True writeMat atm = do - writeBitsFromWord (preWid+1) (shiftL (1 :: Word) preWid) - writeAtomBits extras + -- traceM ("writeMat: " <> show atm) + writeBitsFromWord (preWid+1) (shiftL 1 preWid) + writeBitsFromWord (preWid-1) atmWid writeAtomBits atm where - atmWid = bitWidth atm :: Atom - preWid = bitWidth atmWid :: Int - prefix = shiftL (1 :: Word) (fromIntegral preWid) - extras = takeBits (preWid-1) (toAtom atmWid) + atmWid = bitWidth atm + preWid = fromIntegral (wordBitWidth atmWid) {-# INLINE writeCell #-} writeCell :: Noun -> Noun -> Put () writeCell h t = do + -- traceM "writeCell" writeBit True writeBit False writeNoun h @@ -276,12 +301,68 @@ writeCell h t = do {-# INLINE writeAtom #-} writeAtom :: Atom -> Put () writeAtom a = do + -- traceM "writeAtom" writeBit False writeMat a {-# INLINE writeBackRef #-} writeBackRef :: Word -> Put () writeBackRef a = do + -- traceM ("writeBackRef: " <> show a) writeBit True writeBit True writeMat (toAtom a) + +-------------------------------------------------------------------------------- + +jamBS :: Noun -> ByteString +jamBS n = doPut (fst $ preJam n) (writeNoun n) + +jam :: Noun -> Atom +jam = view (to jamBS . from atomBS) + +-------------------------------------------------------------------------------- + +preJam :: Noun -> (Word, Map Noun Word) +preJam = go 0 mempty + where + insertNoun :: Noun -> Word -> Map Noun Word -> Map Noun Word + insertNoun n i tbl = lookup n tbl + & maybe (insertMap n i tbl) (const tbl) + + go :: Word -> Map Noun Word -> Noun -> (Word, Map Noun Word) + go off oldTbl noun = + let tbl = insertNoun noun off oldTbl in + case lookup noun oldTbl of + Nothing -> + case noun of + Atom atm -> + (1 + W# (matSz# atm), tbl) + Cell l r -> + let (lSz, tbl') = go (2+off) tbl l in + let (rSz, tbl'') = go (2+off+lSz) tbl' r in + (2 + lSz + rSz, tbl'') + Just (W# ref) -> + let refSz = W# (wordBitWidth# ref) in + case noun of + Atom atm -> + let worSz = W# (matSz# atm) in + if worSz > refSz + then (2 + refSz, oldTbl) + else (1 + worSz, tbl) + Cell _ _ -> + (2 + refSz, oldTbl) + + matSz# :: Atom -> Word# + matSz# 0 = 1## + matSz# a = preW `plusWord#` preW `plusWord#` atmW + where + atmW = atomBitWidth# a + preW = wordBitWidth# atmW + + refSz# :: Word# -> Word# + refSz# w = 2## `plusWord#` (matSz# (MkAtom (NatS# w))) + + nounSz# :: Noun -> Word# + nounSz# (Atom a) = 1## `plusWord#` (matSz# a) + nounSz# (Cell l r) = 2## `plusWord#` (nounSz# l) `plusWord#` (nounSz# r) diff --git a/pkg/hs-urbit/lib/Data/Noun/Pill.hs b/pkg/hs-urbit/lib/Data/Noun/Pill.hs index 902da5f12d..6532899f3f 100644 --- a/pkg/hs-urbit/lib/Data/Noun/Pill.hs +++ b/pkg/hs-urbit/lib/Data/Noun/Pill.hs @@ -25,7 +25,7 @@ module Data.Noun.Pill where import ClassyPrelude import Data.Noun hiding (toList, fromList) import Data.Noun.Atom -import Data.Noun.Jam hiding (main) +-- import Data.Noun.Jam hiding (main) import Data.Flat hiding (from, to) import Control.Monad.Except import Control.Lens hiding (index, Index) @@ -178,8 +178,8 @@ pillWords = iso toVec fromVec toVec = view (pillBS . to bsToWords) fromVec = view (to wordsToBytes . bytesBS . from pillBS) -_CueBytes :: Prism' ByteString Noun -_CueBytes = from pillBS . from pill . _Cue +-- _CueBytes :: Prism' ByteString Noun +-- _CueBytes = from pillBS . from pill . _Cue -------------------------------------------------------------------------------- @@ -201,10 +201,13 @@ pill = iso toAtom fromPill toAtom = view (atomNat . natWords . from pillWords) fromPill = view (pillBS . to bsToWords . from natWords . from atomNat) +atomBS :: Iso' Atom ByteString +atomBS = pill . pillBS + -------------------------------------------------------------------------------- -_Cue :: Prism' Atom Noun -_Cue = prism' jam cue +-- _Cue :: Prism' Atom Noun +-- _Cue = prism' jam cue _Tall :: Flat a => Prism' ByteString a _Tall = prism' flat (eitherToMaybe . unflat) @@ -221,8 +224,8 @@ loadPill = fmap Pill . readFile loadAtom :: FilePath -> IO Atom loadAtom = fmap (view $ from pillBS . from pill) . readFile -loadNoun :: FilePath -> IO (Maybe Noun) -loadNoun = fmap (preview $ from pillBS . from pill . _Cue) . readFile +-- loadNoun :: FilePath -> IO (Maybe Noun) +-- loadNoun = fmap (preview $ from pillBS . from pill . _Cue) . readFile loadFlat :: Flat a => FilePath -> IO (Either Text a) loadFlat = fmap (mapLeft tshow . unflat) . readFile @@ -235,8 +238,8 @@ dumpPill fp = writeFile fp . view pillBS dumpAtom :: FilePath -> Atom -> IO () dumpAtom fp = writeFile fp . view (pill . pillBS) -dumpJam :: FilePath -> Noun -> IO () -dumpJam fp = writeFile fp . view (re _Cue . pill . pillBS) +-- dumpJam :: FilePath -> Noun -> IO () +-- dumpJam fp = writeFile fp . view (re _Cue . pill . pillBS) dumpFlat :: Flat a => FilePath -> a -> IO () dumpFlat fp = writeFile fp . flat @@ -265,11 +268,11 @@ tryPackPill pf = do atm <- tryLoadPill pf print $ length (atm ^. pill . pillBS) -tryCuePill :: PillFile -> IO () -tryCuePill pill = - loadNoun (show pill) >>= \case Nothing -> print "nil" - Just (Atom _) -> print "atom" - _ -> print "cell" +-- tryCuePill :: PillFile -> IO () +-- tryCuePill pill = + -- loadNoun (show pill) >>= \case Nothing -> print "nil" + -- Just (Atom _) -> print "atom" + -- _ -> print "cell" -- Tests -----------------------------------------------------------------------