mirror of
https://github.com/urbit/shrub.git
synced 2024-12-01 06:35:32 +03:00
zipFlat is now typically smaller and faster than jam.
This commit is contained in:
parent
2374ed3ce8
commit
18098beaac
@ -43,7 +43,7 @@ instance Show Noun where
|
||||
fmtCell xs = "[" <> intercalate " " xs <> "]"
|
||||
|
||||
instance Arbitrary Noun where
|
||||
arbitrary = resize 12 genNoun
|
||||
arbitrary = resize 120 genNoun
|
||||
where
|
||||
genNoun = do
|
||||
sz <- getSize
|
||||
@ -51,7 +51,7 @@ instance Arbitrary Noun where
|
||||
case (sz, bit) of
|
||||
( 0, _ ) -> Atom <$> arbitrary
|
||||
( _, False ) -> Atom <$> arbitrary
|
||||
( _, True ) -> scale pred (Cell <$> genNoun <*> genNoun)
|
||||
( _, True ) -> scale (\x -> x-10) (Cell <$> genNoun <*> genNoun)
|
||||
|
||||
|
||||
-- Predicates ------------------------------------------------------------------
|
||||
|
@ -14,11 +14,12 @@ import Data.Bits
|
||||
import Test.QuickCheck.Arbitrary
|
||||
import Test.QuickCheck.Gen
|
||||
import Text.Printf
|
||||
import Data.Flat
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
newtype Atom = MkAtom Natural
|
||||
deriving newtype (Eq, Ord, Num, Bits, Enum, Real, Integral)
|
||||
deriving newtype (Eq, Ord, Num, Bits, Enum, Real, Integral, Flat)
|
||||
|
||||
instance Show Atom where
|
||||
show (MkAtom a) = show a
|
||||
@ -49,7 +50,11 @@ instance Arbitrary Natural where
|
||||
arbitrary = fromInteger . abs <$> arbitrary
|
||||
|
||||
instance Arbitrary Atom where
|
||||
arbitrary = MkAtom <$> arbitrary
|
||||
arbitrary = do
|
||||
arbitrary >>= \case
|
||||
False -> MkAtom <$> arbitrary
|
||||
True -> do n <- MkAtom <$> arbitrary
|
||||
pure (n + 2 ^ (n `mod` 64))
|
||||
|
||||
|
||||
-- Conversion ------------------------------------------------------------------
|
||||
@ -58,10 +63,30 @@ class IsAtom a where
|
||||
toAtom :: a -> Atom
|
||||
fromAtom :: Atom -> a
|
||||
|
||||
instance IsAtom Atom where
|
||||
toAtom = id
|
||||
fromAtom = id
|
||||
|
||||
instance IsAtom Natural where
|
||||
toAtom = MkAtom
|
||||
fromAtom (MkAtom a) = a
|
||||
|
||||
instance IsAtom Word8 where
|
||||
toAtom = fromIntegral
|
||||
fromAtom = fromIntegral
|
||||
|
||||
instance IsAtom Word16 where
|
||||
toAtom = fromIntegral
|
||||
fromAtom = fromIntegral
|
||||
|
||||
instance IsAtom Word32 where
|
||||
toAtom = fromIntegral
|
||||
fromAtom = fromIntegral
|
||||
|
||||
instance IsAtom Word64 where
|
||||
toAtom = fromIntegral
|
||||
fromAtom = fromIntegral
|
||||
|
||||
instance IsAtom Word where
|
||||
toAtom = fromIntegral
|
||||
fromAtom = fromIntegral
|
||||
|
@ -16,15 +16,19 @@ import Data.Bits
|
||||
import GHC.Generics
|
||||
import Test.QuickCheck.Arbitrary
|
||||
import Test.QuickCheck.Gen
|
||||
import Data.Flat
|
||||
import Data.Flat.Bits
|
||||
|
||||
import Data.List (intercalate)
|
||||
import Data.Typeable (Typeable)
|
||||
import Data.Word
|
||||
|
||||
import Control.Monad.State.Strict hiding (forM_)
|
||||
import Control.Monad.Trans.Maybe
|
||||
|
||||
import qualified Data.Vector as V
|
||||
import qualified Data.List as L
|
||||
import qualified Data.Vector as V
|
||||
import qualified Data.List as L
|
||||
import qualified Data.Vector.Unboxed as UV
|
||||
|
||||
import Test.Tasty
|
||||
import Test.Tasty.TH
|
||||
@ -34,72 +38,98 @@ import Test.QuickCheck
|
||||
|
||||
-- External Types --------------------------------------------------------------
|
||||
|
||||
newtype Zip = Zip (Vector ZipNode)
|
||||
deriving newtype (Eq, Ord, Show)
|
||||
|
||||
|
||||
-- Internal Types --------------------------------------------------------------
|
||||
|
||||
data ZipNode
|
||||
= ZipAtom !Atom
|
||||
| ZipCell !Word !Word
|
||||
deriving (Eq, Ord, Show)
|
||||
| ZipCell !ZipRef !ZipRef
|
||||
deriving stock (Eq, Ord, Show, Generic)
|
||||
deriving anyclass Flat
|
||||
|
||||
data ZipRef
|
||||
= ZRInline !ZipNode
|
||||
| ZRIndex !Word
|
||||
deriving stock (Eq, Ord, Show, Generic)
|
||||
deriving anyclass Flat
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
tag :: Bool -> Buf -> Buf
|
||||
tag bit buf = (if bit then Buf 1 1 else Buf 1 0) <> buf
|
||||
|
||||
jamZipNode :: ZipNode -> Buf
|
||||
jamZipNode (ZipAtom a) = tag False (mat a)
|
||||
jamZipNode (ZipCell l r) = tag True (mat (toAtom l) <> mat (toAtom r))
|
||||
|
||||
jamZip :: Zip -> Buf
|
||||
jamZip (Zip vec) = fold (length : nodes)
|
||||
where
|
||||
length = mat (toAtom (V.length vec))
|
||||
nodes = jamZipNode <$> V.toList vec
|
||||
|
||||
cueZip :: Buf -> Maybe Zip
|
||||
cueZip = undefined
|
||||
|
||||
-- TODO NonEmpty
|
||||
newtype Zip = Zip [ZipNode]
|
||||
deriving stock Generic
|
||||
deriving newtype (Eq, Ord, Show, Flat)
|
||||
|
||||
-- Zip -------------------------------------------------------------------------
|
||||
|
||||
type ZipM a = State ([ZipNode], Word, Map Noun Word) a
|
||||
|
||||
zip :: Noun -> Zip
|
||||
zip = \n -> evalState (go n >> end) ([], 0, mempty)
|
||||
findDups :: Noun -> Set Noun
|
||||
findDups = done . go mempty
|
||||
where
|
||||
done :: Map Noun Word -> Set Noun
|
||||
done = keysSet . filterMap (> 1)
|
||||
|
||||
ins :: Noun -> Map Noun Word -> Map Noun Word
|
||||
ins = alterMap $ \case Nothing -> Just 1
|
||||
Just n -> Just (n+1)
|
||||
|
||||
go :: Map Noun Word -> Noun -> Map Noun Word
|
||||
go acc a@(Atom _) = ins a acc
|
||||
go acc c@(Cell l r) = go (go (ins c acc) l) r
|
||||
|
||||
zzip :: Noun -> Zip
|
||||
zzip = zip
|
||||
|
||||
zip :: Noun -> Zip
|
||||
zip top = evalState (go top >> end) ([], 0, mempty)
|
||||
where
|
||||
dups :: Set Noun
|
||||
dups = findDups top
|
||||
|
||||
end :: ZipM Zip
|
||||
end = do
|
||||
(acc, _, _) <- get
|
||||
pure (Zip $ V.fromList $ reverse acc)
|
||||
pure (Zip $ reverse acc)
|
||||
|
||||
ins :: Noun -> ZipNode -> ZipM Word
|
||||
ins :: Noun -> ZipNode -> ZipM ZipRef
|
||||
ins noun node = do
|
||||
(acc, nex, tbl) <- get
|
||||
put (node:acc, nex+1, insertMap noun nex tbl)
|
||||
pure nex
|
||||
pure (ZRIndex nex)
|
||||
|
||||
go :: Noun -> ZipM Word
|
||||
go noun = do
|
||||
doAtom :: Atom -> ZipM ZipRef
|
||||
doAtom a = do
|
||||
if a >= 128 && member (Atom a) dups
|
||||
then ins (Atom a) (ZipAtom a)
|
||||
else pure (ZRInline (ZipAtom a))
|
||||
|
||||
doCell :: (Noun, Noun) -> ZipM ZipRef
|
||||
doCell (l,r) = do
|
||||
lRef <- loop l
|
||||
rRef <- loop r
|
||||
let res = ZipCell lRef rRef
|
||||
if member (Cell l r) dups
|
||||
then ins (Cell l r) res
|
||||
else pure (ZRInline res)
|
||||
|
||||
loop :: Noun -> ZipM ZipRef
|
||||
loop noun = do
|
||||
(acc, nex, tbl) <- get
|
||||
case (lookup noun tbl, noun) of
|
||||
(Just w, _) -> pure w
|
||||
(Nothing, Atom atm) -> ins noun (ZipAtom atm)
|
||||
(Nothing, Cell l r) -> (ZipCell <$> go l <*> go r) >>= ins noun
|
||||
(Just w, _) -> pure (ZRIndex w)
|
||||
(Nothing, Atom atm) -> doAtom atm
|
||||
(Nothing, Cell l r) -> doCell (l,r)
|
||||
|
||||
go :: Noun -> ZipM ZipRef
|
||||
go noun = do
|
||||
loop noun >>= \case
|
||||
ZRInline x -> ins noun x
|
||||
ZRIndex _ -> error "Impossible -- duplicate top-level node"
|
||||
|
||||
-- Unzip -----------------------------------------------------------------------
|
||||
|
||||
type UnZipM a = MaybeT (State (Word, Map Word Noun)) a
|
||||
|
||||
unzip :: Zip -> Maybe Noun
|
||||
unzip (Zip vec) | V.length vec == 0 = Nothing
|
||||
unzip (Zip []) = Nothing
|
||||
unzip (Zip vec) =
|
||||
L.last <$> cvt (V.toList vec)
|
||||
L.last <$> cvt vec
|
||||
where
|
||||
cvt :: [ZipNode] -> Maybe [Noun]
|
||||
cvt nodes = evalState (runMaybeT $ go nodes) (0, mempty)
|
||||
@ -109,12 +139,13 @@ unzip (Zip vec) =
|
||||
modify $ \(nex, tbl) -> (nex+1, insertMap nex noun tbl)
|
||||
pure noun
|
||||
|
||||
find :: Word -> UnZipM Noun
|
||||
find idx = do
|
||||
(nex, tbl) <- get
|
||||
lookup idx tbl & \case
|
||||
Nothing -> error "bad zip"
|
||||
Just res -> pure res
|
||||
find :: ZipRef -> UnZipM Noun
|
||||
find (ZRInline (ZipAtom a)) = pure (Atom a)
|
||||
find (ZRInline (ZipCell l r)) = Cell <$> find l <*> find r
|
||||
find (ZRIndex idx) = do (nex, tbl) <- get
|
||||
lookup idx tbl & \case
|
||||
Nothing -> error "bad zip"
|
||||
Just res -> pure res
|
||||
|
||||
go :: [ZipNode] -> UnZipM [Noun]
|
||||
go = mapM $ \case ZipAtom a -> ins (Atom a)
|
||||
@ -123,14 +154,23 @@ unzip (Zip vec) =
|
||||
|
||||
-- Tests -----------------------------------------------------------------------
|
||||
|
||||
compareSize :: Noun -> (Int, Int)
|
||||
compareSize n = (jamSz, zipSz)
|
||||
compareSize :: Noun -> Int
|
||||
compareSize n = flatSz - jamSz
|
||||
where
|
||||
Buf jamSz _ = fromAtom (jam n)
|
||||
Buf zipSz _ = jamZip (zip n)
|
||||
Buf jamSz _ = fromAtom (jam n)
|
||||
flatSz = UV.length (bits (zip n))
|
||||
|
||||
prop_zipUnzip :: Noun -> Bool
|
||||
prop_zipUnzip n = Just n == unzip (zip n)
|
||||
|
||||
main :: IO ()
|
||||
main = $(defaultMainGenerator)
|
||||
|
||||
dub :: Noun -> Noun
|
||||
dub x = Cell x x
|
||||
|
||||
testSizes :: IO ()
|
||||
testSizes = do
|
||||
nouns <- sample' (arbitrary :: Gen Noun)
|
||||
traverse_ (print . compareSize) nouns
|
||||
-- traverse_ print nouns
|
||||
|
@ -31,6 +31,7 @@ dependencies:
|
||||
- classy-prelude
|
||||
- containers
|
||||
- data-fix
|
||||
- flat
|
||||
- ghc-prim
|
||||
- http-client
|
||||
- integer-gmp
|
||||
|
@ -8,3 +8,4 @@ ghc-options:
|
||||
|
||||
extra-deps:
|
||||
- para-1.1@sha256:a90eebb063ad70271e6e2a7f00a93e8e8f8b77273f100f39852fbf8301926f81
|
||||
- flat-0.3.4@sha256:002a0e0ae656ea8cc02a772d0bcb6ea7dbd7f2e79070959cc748ad1e7138eb38
|
||||
|
Loading…
Reference in New Issue
Block a user