urbit/pkg/hs-urbit/lib/Noun/Jam.hs

361 lines
9.4 KiB
Haskell
Raw Normal View History

2019-07-12 04:16:40 +03:00
module Noun.Jam (jam, jamBS) where
2019-07-02 05:43:10 +03:00
import ClassyPrelude hiding (hash)
2019-07-12 04:16:40 +03:00
import Noun.Atom
2019-07-12 22:24:44 +03:00
import Noun.Core
2019-07-12 04:16:40 +03:00
2019-07-12 22:24:44 +03:00
import Control.Lens (from, view)
import Data.Bits (clearBit, setBit, shiftL, shiftR, (.|.))
2019-07-02 05:43:10 +03:00
import Data.Vector.Primitive ((!))
import Foreign.Marshal.Alloc (callocBytes, free)
import Foreign.Ptr (Ptr, castPtr, plusPtr)
import Foreign.Storable (poke)
import GHC.Int (Int(I#))
2019-07-12 22:24:44 +03:00
import GHC.Integer.GMP.Internals (BigNat)
import GHC.Natural (Natural(NatJ#, NatS#))
2019-07-02 05:43:10 +03:00
import GHC.Prim (Word#, plusWord#, word2Int#)
import GHC.Word (Word(W#))
import System.IO.Unsafe (unsafePerformIO)
import qualified Data.ByteString.Unsafe as BS
import qualified Data.HashTable.IO as H
import qualified Data.Vector.Primitive as VP
-- Exports ---------------------------------------------------------------------
jamBS :: Noun -> ByteString
jamBS n = doPut bt sz (writeNoun n)
2019-07-02 05:43:10 +03:00
where
(sz, bt) = unsafePerformIO (compress n)
2019-07-02 05:43:10 +03:00
jam :: Noun -> Atom
2019-07-12 04:16:40 +03:00
jam = view (from atomBytes) . jamBS
2019-07-02 05:43:10 +03:00
-- Types -----------------------------------------------------------------------
2019-07-02 05:43:10 +03:00
{-|
The encoder state.
2019-07-02 05:43:10 +03:00
- ptr: Pointer into the output buffer.
- reg: Next 64 bits of output, partially written.
- off: Number of bits already written into `reg`
- pos: Total number of bits written.
-}
data S = S
{ ptr :: {-# UNPACK #-} !(Ptr Word)
, reg :: {-# UNPACK #-} !Word
, off :: {-# UNPACK #-} !Int
, pos :: {-# UNPACK #-} !Word
} deriving (Show,Eq,Ord)
data PutResult a = PutResult {-# UNPACK #-} !S !a
deriving Functor
newtype Put a = Put
{ runPut :: H.CuckooHashTable Word Word
2019-07-02 05:43:10 +03:00
-> S
-> IO (PutResult a)
}
2019-07-02 05:43:10 +03:00
--------------------------------------------------------------------------------
2019-07-02 05:43:10 +03:00
{-# INLINE getRef #-}
getRef :: Put (Maybe Word)
2019-07-12 22:24:44 +03:00
getRef = Put $ \tbl s -> PutResult s <$> H.lookup tbl (pos s)
2019-07-02 05:43:10 +03:00
{-
1. Write the register to the output, and increment the output pointer.
-}
{-# INLINE flush #-}
flush :: Put ()
flush = Put $ \tbl s@S{..} -> do
poke ptr reg
pure $ PutResult (s { ptr = ptr `plusPtr` 8 }) ()
2019-07-02 05:43:10 +03:00
{-# INLINE update #-}
update :: (S -> S) -> Put ()
2019-07-12 22:24:44 +03:00
update f = Put $ \tbl s@S{..} -> pure (PutResult (f s) ())
2019-07-02 05:43:10 +03:00
{-# INLINE setRegOff #-}
setRegOff :: Word -> Int -> Put ()
2019-07-12 22:24:44 +03:00
setRegOff r o = update $ \s@S{..} -> (s {reg=r, off=o})
2019-07-02 05:43:10 +03:00
{-# INLINE setReg #-}
setReg :: Word -> Put ()
2019-07-12 22:24:44 +03:00
setReg r = update $ \s@S{..} -> (s { reg=r })
2019-07-02 05:43:10 +03:00
{-# INLINE getS #-}
getS :: Put S
getS = Put $ \tbl s -> pure (PutResult s s)
2019-07-02 05:43:10 +03:00
{-# INLINE putS #-}
putS :: S -> Put ()
putS s = Put $ \tbl _ -> pure (PutResult s ())
2019-07-02 05:43:10 +03:00
{-
To write a bit:
2019-07-02 05:43:10 +03:00
| reg |= 1 << off
| off <- (off + 1) % 64
| if (!off):
| buf[w++] <- reg
| reg <- 0
-}
{-# INLINE writeBit #-}
writeBit :: Bool -> Put ()
writeBit b = Put $ \tbl s@S{..} -> do
let s' = s { reg = (if b then setBit else clearBit) reg off
, off = (off + 1) `mod` 64
, pos = pos + 1
}
if off == 63
then runPut (flush >> setRegOff 0 0) tbl s'
else pure $ PutResult s' ()
{-
2019-07-02 05:43:10 +03:00
To write a 64bit word:
2019-07-02 05:43:10 +03:00
| reg |= w << off
| buf[bufI++] = reg
| reg = w >> (64 - off)
-}
2019-07-02 05:43:10 +03:00
{-# INLINE writeWord #-}
writeWord :: Word -> Put ()
writeWord wor = do
S{..} <- getS
setReg (reg .|. shiftL wor off)
flush
2019-07-12 22:24:44 +03:00
update $ \s -> s { pos = 64 + pos
, reg = shiftR wor (64 - off)
}
2019-07-02 05:43:10 +03:00
{-
To write some bits (< 64) from a word:
| wor = takeBits(wid, wor)
| reg = reg .|. (wor << off)
| off = (off + wid) % 64
|
| if (off + wid >= 64)
| buf[w] = x
| reg = wor >> (wid - off)
-}
2019-07-02 05:43:10 +03:00
{-# INLINE writeBitsFromWord #-}
writeBitsFromWord :: Int -> Word -> Put ()
writeBitsFromWord wid wor = do
wor <- pure (takeBitsWord wid wor)
oldSt <- getS
2019-07-02 05:43:10 +03:00
let newSt = oldSt { reg = reg oldSt .|. shiftL wor (off oldSt)
, off = (off oldSt + wid) `mod` 64
, pos = fromIntegral wid + pos oldSt
}
2019-07-02 05:43:10 +03:00
putS newSt
2019-07-02 05:43:10 +03:00
when (wid + off oldSt >= 64) $ do
flush
setReg (shiftR wor (wid - off newSt))
{-
2019-07-02 05:43:10 +03:00
Write all of the the signficant bits of a direct atom.
-}
{-# INLINE writeAtomWord# #-}
writeAtomWord# :: Word# -> Put ()
writeAtomWord# w = do
writeBitsFromWord (I# (word2Int# (wordBitWidth# w))) (W# w)
{-# INLINE writeAtomWord #-}
writeAtomWord :: Word -> Put ()
writeAtomWord (W# w) = writeAtomWord# w
{-
Write all of the the signficant bits of an indirect atom.
TODO Use memcpy when the bit-offset of the output is divisible by 8.
-}
{-# INLINE writeAtomBigNat #-}
writeAtomBigNat :: BigNat -> Put ()
writeAtomBigNat !(view bigNatWords -> words) = do
2019-07-02 05:43:10 +03:00
let lastIdx = VP.length words - 1
2019-07-12 22:24:44 +03:00
for_ [0..(lastIdx-1)] $ \i ->
2019-07-02 05:43:10 +03:00
writeWord (words ! i)
writeAtomWord (words ! lastIdx)
{-# INLINE writeAtomBits #-}
writeAtomBits :: Atom -> Put ()
2019-07-12 04:16:40 +03:00
writeAtomBits = \case NatS# wd -> writeAtomWord# wd
NatJ# bn -> writeAtomBigNat bn
2019-07-02 05:43:10 +03:00
2019-07-02 05:43:10 +03:00
-- Put Instances ---------------------------------------------------------------
2019-07-02 05:43:10 +03:00
instance Functor Put where
fmap f g = Put $ \tbl s -> do
PutResult s' a <- runPut g tbl s
pure $ PutResult s' (f a)
{-# INLINE fmap #-}
2019-07-02 05:43:10 +03:00
instance Applicative Put where
pure x = Put (\_ s -> return $ PutResult s x)
{-# INLINE pure #-}
2019-07-02 05:43:10 +03:00
Put f <*> Put g = Put $ \tbl s1 -> do
PutResult s2 f' <- f tbl s1
PutResult s3 g' <- g tbl s2
return $ PutResult s3 (f' g')
{-# INLINE (<*>) #-}
2019-07-02 05:43:10 +03:00
Put f *> Put g = Put $ \tbl s1 -> do
PutResult s2 _ <- f tbl s1
g tbl s2
{-# INLINE (*>) #-}
instance Monad Put where
return = pure
{-# INLINE return #-}
(>>) = (*>)
{-# INLINE (>>) #-}
Put x >>= f = Put $ \tbl s -> do
PutResult s' x' <- x tbl s
runPut (f x') tbl s'
{-# INLINE (>>=) #-}
--------------------------------------------------------------------------------
doPut :: H.CuckooHashTable Word Word -> Word -> Put () -> ByteString
doPut !tbl !sz m =
2019-07-02 05:43:10 +03:00
unsafePerformIO $ do
2019-07-04 07:01:40 +03:00
-- traceM "doPut"
2019-07-02 05:43:10 +03:00
buf <- callocBytes (fromIntegral (wordSz*8))
_ <- runPut (m >> mbFlush) tbl (S buf 0 0 0)
BS.unsafePackCStringFinalizer (castPtr buf) byteSz (free buf)
where
!wordSz = fromIntegral (sz `divUp` 64)
!byteSz = fromIntegral (sz `divUp` 8)
!divUp = \x y -> (x `div` y) + (if x `mod` y == 0 then 0 else 1)
2019-07-02 05:43:10 +03:00
mbFlush :: Put ()
mbFlush = do
shouldFlush <- (/= 0) . off <$> getS
when shouldFlush flush
--------------------------------------------------------------------------------
{-
TODO Handle back references
-}
writeNoun :: Noun -> Put ()
writeNoun !n =
2019-07-02 05:43:10 +03:00
getRef >>= \case
Just bk -> writeBackRef bk
Nothing -> case n of Atom a -> writeAtom a
Cell h t -> writeCell h t
2019-07-02 05:43:10 +03:00
{-# INLINE writeMat #-}
writeMat :: Atom -> Put ()
writeMat 0 = writeBit True
writeMat atm = do
writeBitsFromWord (preWid+1) (shiftL 1 preWid)
writeBitsFromWord (preWid-1) atmWid
writeAtomBits atm
where
atmWid = bitWidth atm
preWid = fromIntegral (wordBitWidth atmWid)
{-# INLINE writeCell #-}
writeCell :: Noun -> Noun -> Put ()
writeCell !h !t = do
2019-07-02 05:43:10 +03:00
writeBit True
writeBit False
writeNoun h
writeNoun t
{-# INLINE writeAtom #-}
writeAtom :: Atom -> Put ()
writeAtom !a = do
2019-07-02 05:43:10 +03:00
writeBit False
writeMat a
{-# INLINE writeBackRef #-}
writeBackRef :: Word -> Put ()
writeBackRef !a = do
2019-07-02 05:43:10 +03:00
p <- pos <$> getS
writeBit True
writeBit True
2019-07-12 04:16:40 +03:00
writeMat (fromIntegral a)
2019-07-02 05:43:10 +03:00
-- Calculate Jam Size and Backrefs ---------------------------------------------
{-# INLINE matSz #-}
matSz :: Atom -> Word
matSz !a = W# (matSz# a)
2019-07-02 05:43:10 +03:00
{-# INLINE matSz# #-}
matSz# :: Atom -> Word#
matSz# 0 = 1##
matSz# a = preW `plusWord#` preW `plusWord#` atmW
where
atmW = atomBitWidth# a
preW = wordBitWidth# atmW
2019-07-04 07:01:40 +03:00
{-# INLINE atomSz #-}
atomSz :: Atom -> Word
atomSz !w = 1 + matSz w
2019-07-04 07:01:40 +03:00
{-# INLINE refSz #-}
refSz :: Word -> Word
refSz !w = 1 + jamWordSz w
2019-07-04 07:01:40 +03:00
{-# INLINE jamWordSz #-}
jamWordSz :: Word -> Word
jamWordSz 0 = 2
jamWordSz (W# w) = 1 + 2*(W# preW) + (W# atmW)
where
atmW = wordBitWidth# w
preW = wordBitWidth# atmW
2019-07-02 05:43:10 +03:00
compress :: Noun -> IO (Word, H.CuckooHashTable Word Word)
compress !top = do
2019-07-05 01:40:36 +03:00
let sz = max 50
$ min 10_000_000
$ (2*) $ (10^) $ floor $ logBase 600 $ fromIntegral $ nounSize top
nodes :: H.BasicHashTable Noun Word <- H.newSized sz
backs :: H.CuckooHashTable Word Word <- H.newSized sz
2019-07-02 05:43:10 +03:00
let proc :: Word -> Noun -> IO Word
proc !pos = \case
Atom a -> pure (atomSz a)
Cell h t -> do !hSz <- go (pos+2) h
!tSz <- go (pos+2+hSz) t
pure (2+hSz+tSz)
go :: Word -> Noun -> IO Word
go !p !inp = do
2019-07-02 05:43:10 +03:00
H.lookup nodes inp >>= \case
Nothing -> do
H.insert nodes inp p
proc p inp
Just bak -> do
2019-07-04 07:01:40 +03:00
let rs = refSz bak
doRef = H.insert backs p bak $> rs
noRef = proc p inp
case inp of
Cell _ _ -> doRef
Atom a | rs < atomSz (fromIntegral a) -> doRef
_ -> noRef
2019-07-02 05:43:10 +03:00
res <- go 0 top
2019-07-05 01:40:36 +03:00
2019-07-02 05:43:10 +03:00
pure (res, backs)