mirror of
https://github.com/urbit/shrub.git
synced 2024-12-30 07:35:19 +03:00
393 lines
9.9 KiB
Haskell
393 lines
9.9 KiB
Haskell
{-# OPTIONS_GHC -O2 #-}
|
|
|
|
{-|
|
|
Fast implementation of `cue :: Atom -> Maybe Noun`.
|
|
|
|
Implementation is based on the approach used in `flat`.
|
|
-}
|
|
module Urbit.Noun.Cue (cue, cueExn, cueBS, cueBSExn, DecodeErr) where
|
|
|
|
import ClassyPrelude
|
|
|
|
import Urbit.Atom
|
|
import Urbit.Noun.Core
|
|
|
|
import Data.Bits (shiftL, shiftR, (.&.), (.|.))
|
|
import Data.Function ((&))
|
|
import Foreign.Ptr (Ptr, castPtr, plusPtr, ptrToWordPtr)
|
|
import Foreign.Storable (peek)
|
|
import GHC.Prim (ctz#)
|
|
import GHC.Word (Word(..))
|
|
import System.IO.Unsafe (unsafePerformIO)
|
|
import Text.Printf (printf)
|
|
|
|
import qualified Data.ByteString.Unsafe as BS
|
|
import qualified Data.HashTable.IO as H
|
|
import qualified Data.Vector.Primitive as VP
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
cueBS :: ByteString -> Either DecodeErr Noun
|
|
cueBS = doGet dNoun
|
|
|
|
cueBSExn :: MonadIO m => ByteString -> m Noun
|
|
cueBSExn bs =
|
|
cueBS bs & \case
|
|
Left e -> throwIO e
|
|
Right x -> pure x
|
|
|
|
cue :: Atom -> Either DecodeErr Noun
|
|
cue = cueBS . atomBytes
|
|
|
|
cueExn :: MonadIO m => Atom -> m Noun
|
|
cueExn = cueBSExn . atomBytes
|
|
|
|
|
|
-- Debugging -------------------------------------------------------------------
|
|
|
|
{-# INLINE debugM #-}
|
|
debugM :: Monad m => String -> m ()
|
|
debugM _ = pure ()
|
|
|
|
{-# INLINE debugMId #-}
|
|
debugMId :: (Monad m, Show a) => String -> m a -> m a
|
|
debugMId _ a = a
|
|
|
|
|
|
-- Types -----------------------------------------------------------------------
|
|
|
|
{-|
|
|
The decoder state.
|
|
|
|
- An array of words (internal structure of our atoms).
|
|
- A pointer to the word *after* the last word in the array.
|
|
- A pointer into the current word of that array.
|
|
- A bit-offset into that word.
|
|
-}
|
|
data S = S
|
|
{ currPtr :: {-# UNPACK #-} !(Ptr Word)
|
|
, usedBits :: {-# UNPACK #-} !Word
|
|
, pos :: {-# UNPACK #-} !Word
|
|
} deriving (Show,Eq,Ord)
|
|
|
|
type Env = (Ptr Word, S)
|
|
|
|
data DecodeErr
|
|
= InfiniteCue Env
|
|
| BadEncoding Env String
|
|
deriving (Show, Eq, Ord)
|
|
|
|
data GetResult a = GetResult {-# UNPACK #-} !S !a
|
|
deriving (Show, Functor)
|
|
|
|
newtype Get a = Get
|
|
{ runGet :: Ptr Word
|
|
-> H.BasicHashTable Word Noun
|
|
-> S
|
|
-> IO (GetResult a)
|
|
}
|
|
|
|
doGet :: Get a -> ByteString -> Either DecodeErr a
|
|
doGet m bs =
|
|
unsafePerformIO $ try $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> do
|
|
let endPtr = ptr `plusPtr` len
|
|
let sz = max 50
|
|
$ min 10_000_000
|
|
$ length bs `div` 6
|
|
tbl <- H.newSized sz
|
|
GetResult _ r <- runGet m endPtr tbl (S (castPtr ptr) 0 0)
|
|
pure r
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
instance Exception DecodeErr
|
|
|
|
instance Functor Get where
|
|
fmap f g = Get $ \end tbl s -> do
|
|
GetResult s' a <- runGet g end tbl s
|
|
return $ GetResult s' (f a)
|
|
{-# INLINE fmap #-}
|
|
|
|
instance Applicative Get where
|
|
pure x = Get (\_ _ s -> return $ GetResult s x)
|
|
{-# INLINE pure #-}
|
|
|
|
Get f <*> Get g = Get $ \end tbl s1 -> do
|
|
GetResult s2 f' <- f end tbl s1
|
|
GetResult s3 g' <- g end tbl s2
|
|
return $ GetResult s3 (f' g')
|
|
{-# INLINE (<*>) #-}
|
|
|
|
Get f *> Get g = Get $ \end tbl s1 -> do
|
|
GetResult s2 _ <- f end tbl s1
|
|
g end tbl s2
|
|
{-# INLINE (*>) #-}
|
|
|
|
instance Monad Get where
|
|
return = pure
|
|
{-# INLINE return #-}
|
|
|
|
(>>) = (*>)
|
|
{-# INLINE (>>) #-}
|
|
|
|
Get x >>= f = Get $ \end tbl s -> do
|
|
GetResult s' x' <- x end tbl s
|
|
runGet (f x') end tbl s'
|
|
{-# INLINE (>>=) #-}
|
|
|
|
fail msg = Get $ \end tbl s -> do
|
|
badEncoding end s msg
|
|
{-# INLINE fail #-}
|
|
|
|
instance MonadIO Get where
|
|
liftIO io = Get $ \end tbl s -> GetResult s <$> io
|
|
{-# INLINE liftIO #-}
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
{-# INLINE badEncoding #-}
|
|
badEncoding :: Ptr Word -> S -> String -> IO a
|
|
badEncoding !endPtr s msg = throwIO $ BadEncoding (endPtr,s) msg
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
{-# INLINE getPos #-}
|
|
getPos :: Get Word
|
|
getPos = Get $ \_ _ s ->
|
|
pure (GetResult s (pos s))
|
|
|
|
{-# INLINE insRef #-}
|
|
insRef :: Word -> Noun -> Get ()
|
|
insRef !pos !now = Get $ \_ tbl s -> do
|
|
H.insert tbl pos now
|
|
pure $ GetResult s ()
|
|
|
|
{-# INLINE getRef #-}
|
|
getRef :: Word -> Get Noun
|
|
getRef !ref = Get $ \x tbl s -> do
|
|
H.lookup tbl ref >>= \case
|
|
Nothing -> runGet (fail ("Invalid Reference: " <> show ref)) x tbl s
|
|
Just no -> pure (GetResult s no)
|
|
|
|
{-# INLINE advance #-}
|
|
advance :: Word -> Get ()
|
|
advance 0 = debugM "advance: 0" >> pure ()
|
|
advance !n = Get $ \_ _ s -> do
|
|
debugM ("advance: " <> show n)
|
|
let newUsed = n + usedBits s
|
|
newS = s { pos = pos s + n
|
|
, usedBits = newUsed `mod` 64
|
|
, currPtr = plusPtr (currPtr s)
|
|
(8 * (fromIntegral (newUsed `div` 64)))
|
|
}
|
|
|
|
pure (GetResult newS ())
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
{-# INLINE guardInfinite #-}
|
|
guardInfinite :: Ptr Word -> Ptr Word -> S -> IO ()
|
|
guardInfinite end cur s =
|
|
when (cur >= (end `plusPtr` 16)) $ do
|
|
throwIO (InfiniteCue (end, s))
|
|
|
|
-- TODO Should this be (>= end) or (> end)?
|
|
{-# INLINE peekCurWord #-}
|
|
peekCurWord :: Get Word
|
|
peekCurWord = Get $ \end _ s -> do
|
|
debugMId "peekCurWord" $ do
|
|
guardInfinite end (currPtr s) s
|
|
if ptrToWordPtr (currPtr s) >= ptrToWordPtr end
|
|
then pure (GetResult s 0)
|
|
else GetResult s <$> peek (currPtr s)
|
|
|
|
-- TODO Same question as above.
|
|
{-# INLINE peekNextWord #-}
|
|
peekNextWord :: Get Word
|
|
peekNextWord = Get $ \end _ s -> do
|
|
debugMId "peekNextWord" $ do
|
|
let pTarget = currPtr s `plusPtr` 8
|
|
guardInfinite end pTarget s
|
|
if ptrToWordPtr pTarget >= ptrToWordPtr end
|
|
then pure (GetResult s 0)
|
|
else GetResult s <$> peek pTarget
|
|
|
|
{-# INLINE peekUsedBits #-}
|
|
peekUsedBits :: Get Word
|
|
peekUsedBits =
|
|
debugMId "peekUsedBits" $ do
|
|
Get $ \_ _ s -> pure (GetResult s (usedBits s))
|
|
|
|
{-|
|
|
Get a bit.
|
|
|
|
- Peek the current word.
|
|
- Right-shift by the bit-offset.
|
|
- Mask the high bits.
|
|
-}
|
|
{-# INLINE dBit #-}
|
|
dBit :: Get Bool
|
|
dBit = do
|
|
debugMId "dBit" $ do
|
|
wor <- peekCurWord
|
|
use <- fromIntegral <$> peekUsedBits
|
|
advance 1
|
|
pure (0 /= shiftR wor use .&. 1)
|
|
|
|
{-# INLINE dWord #-}
|
|
dWord :: Get Word
|
|
dWord = do
|
|
debugMId "dWord" $ do
|
|
res <- peekWord
|
|
advance 64
|
|
pure res
|
|
|
|
{-|
|
|
Get n bits, where n > 64:
|
|
|
|
- Get (n/64) words.
|
|
- Advance by n bits.
|
|
- Calculate an offset (equal to the current bit-offset)
|
|
- Calculate the length (equal to n)
|
|
- Construct a bit-vector using the buffer*length*offset.
|
|
-}
|
|
{-# INLINE dAtomBits #-}
|
|
dAtomBits :: Word -> Get Atom
|
|
dAtomBits !(fromIntegral -> bits) = do
|
|
debugMId ("dAtomBits(" <> show bits <> ")") $ do
|
|
fmap wordsAtom $
|
|
VP.generateM bufSize $ \i -> do
|
|
debugM (show i)
|
|
if (i == lastIdx && numExtraBits /= 0)
|
|
then dWordBits (fromIntegral numExtraBits)
|
|
else dWord
|
|
where
|
|
bufSize = numFullWords + min 1 numExtraBits
|
|
lastIdx = bufSize - 1
|
|
numFullWords = bits `div` 64
|
|
numExtraBits = bits `mod` 64
|
|
|
|
{-|
|
|
In order to peek at the next Word64:
|
|
|
|
- If we are past the end of the buffer:
|
|
- Return zero.
|
|
- If the bit-offset is zero:
|
|
- Just peek.
|
|
- If we are pointing to the last word:
|
|
- Peek and right-shift by the bit offset.
|
|
- Otherwise,
|
|
- Peek the current word *and* the next word.
|
|
- Right-shift the current word by the bit-offset.
|
|
- Left-shift the next word by the bit-offset.
|
|
- Binary or the resulting two words.
|
|
-}
|
|
{-# INLINE peekWord #-}
|
|
peekWord :: Get Word
|
|
peekWord = do
|
|
debugMId "peekWord" $ do
|
|
off <- peekUsedBits
|
|
cur <- peekCurWord
|
|
nex <- peekNextWord
|
|
let res = swiz off (cur, nex)
|
|
debugM ("\t" <> (take 10 $ reverse $ printf "%b" (fromIntegral res :: Integer)) <> "..")
|
|
pure res
|
|
|
|
{-# INLINE swiz #-}
|
|
swiz :: Word -> (Word, Word) -> Word
|
|
swiz !(fromIntegral -> off) (!low, !hig) =
|
|
(.|.) (shiftR low off) (shiftL hig (64-off))
|
|
|
|
{-# INLINE takeLowBits #-}
|
|
takeLowBits :: Word -> Word -> Word
|
|
takeLowBits 64 !wor = wor
|
|
takeLowBits !wid !wor = (2^wid - 1) .&. wor
|
|
|
|
{-|
|
|
Make a word from the next n bits (where n <= 64).
|
|
|
|
- Peek at the next word.
|
|
- Mask the n lowest bits from the word.
|
|
- Advance by that number of bits.
|
|
- Return the word.
|
|
-}
|
|
{-# INLINE dWordBits #-}
|
|
dWordBits :: Word -> Get Word
|
|
dWordBits !n = do
|
|
debugMId ("dWordBits(" <> show n <> ")") $ do
|
|
w <- peekWord
|
|
advance n
|
|
debugM ("dWordBits: " <> show (takeLowBits n w))
|
|
pure (takeLowBits n w)
|
|
|
|
|
|
-- Fast Cue --------------------------------------------------------------------
|
|
|
|
{-|
|
|
Get the exponent-prefix of an atom:
|
|
|
|
- Peek at the next word.
|
|
- Calculate the number of least-significant bits in that word (there's
|
|
a primitive for this).
|
|
- Advance by that number of bits.
|
|
- Return the number of bits
|
|
-}
|
|
{-# INLINE dExp #-}
|
|
dExp :: Get Word
|
|
dExp = do
|
|
debugMId "dExp" $ do
|
|
W# w <- peekWord
|
|
let res = W# (ctz# w)
|
|
advance (res+1)
|
|
pure res
|
|
|
|
{-# INLINE dAtomLen #-}
|
|
dAtomLen :: Get Word
|
|
dAtomLen = do
|
|
debugMId "dAtomLen" $ do
|
|
dExp >>= \case
|
|
0 -> pure 0
|
|
e -> do p <- dWordBits (e-1)
|
|
pure (2^(e-1) .|. p)
|
|
|
|
{-# INLINE dRef #-}
|
|
dRef :: Get Word
|
|
dRef = debugMId "dRef" (dAtomLen >>= dWordBits)
|
|
|
|
{-# INLINE dAtom #-}
|
|
dAtom :: Get Atom
|
|
dAtom = do
|
|
debugMId "dAtom" $ do
|
|
dAtomLen >>= \case
|
|
0 -> pure 0
|
|
n -> dAtomBits n
|
|
|
|
{-# INLINE dCell #-}
|
|
dCell :: Get Noun
|
|
dCell = Cell <$> dNoun <*> dNoun
|
|
|
|
{-|
|
|
Get a Noun.
|
|
|
|
- Get a bit
|
|
- If it's zero, get an atom.
|
|
- Otherwise, get another bit.
|
|
- If it's zero, get a cell.
|
|
- If it's one, get an atom.
|
|
-}
|
|
dNoun :: Get Noun
|
|
dNoun = do
|
|
p <- getPos
|
|
|
|
let yield r = insRef p r >> pure r
|
|
|
|
dBit >>= \case
|
|
False -> do debugM "It's an atom"
|
|
(Atom <$> dAtom) >>= yield
|
|
True -> dBit >>= \case
|
|
False -> do debugM "It's a cell"
|
|
dCell >>= yield
|
|
True -> do debugM "It's a backref"
|
|
dRef >>= getRef
|