shrub/pkg/hs/king/lib/Ur/Noun/Cue.hs

393 lines
9.9 KiB
Haskell
Raw Normal View History

2019-08-22 02:49:08 +03:00
{-# OPTIONS_GHC -O2 #-}
2020-01-23 07:16:09 +03:00
{-|
Fast implementation of `cue :: Atom -> Maybe Noun`.
Implementation is based on the approach used in `flat`.
-}
2020-01-23 05:58:22 +03:00
module Ur.Noun.Cue (cue, cueExn, cueBS, cueBSExn, DecodeErr) where
2019-07-02 05:43:10 +03:00
import ClassyPrelude
2020-01-23 05:58:22 +03:00
import Ur.Noun.Atom
import Ur.Noun.Core
2019-07-12 04:16:40 +03:00
import Control.Lens (from, view, (&), (^.))
2019-07-12 22:24:44 +03:00
import Data.Bits (shiftL, shiftR, (.&.), (.|.))
import Foreign.Ptr (Ptr, castPtr, plusPtr, ptrToWordPtr)
import Foreign.Storable (peek)
import GHC.Prim (ctz#)
import GHC.Word (Word(..))
import System.IO.Unsafe (unsafePerformIO)
import Text.Printf (printf)
import qualified Data.ByteString.Unsafe as BS
import qualified Data.HashTable.IO as H
import qualified Data.Vector.Primitive as VP
--------------------------------------------------------------------------------
2019-07-16 03:01:45 +03:00
cueBS :: ByteString -> Either DecodeErr Noun
cueBS = doGet dNoun
2019-08-28 14:45:49 +03:00
cueBSExn :: MonadIO m => ByteString -> m Noun
cueBSExn bs =
cueBS bs & \case
Left e -> throwIO e
Right x -> pure x
2019-07-16 03:01:45 +03:00
cue :: Atom -> Either DecodeErr Noun
2019-07-12 04:16:40 +03:00
cue = cueBS . view atomBytes
2019-08-28 14:45:49 +03:00
cueExn :: MonadIO m => Atom -> m Noun
cueExn atm = cueBSExn (atm ^. atomBytes)
2019-07-03 08:14:39 +03:00
-- Debugging -------------------------------------------------------------------
{-# INLINE debugM #-}
debugM :: Monad m => String -> m ()
debugM _ = pure ()
{-# INLINE debugMId #-}
debugMId :: (Monad m, Show a) => String -> m a -> m a
debugMId _ a = a
-- Types -----------------------------------------------------------------------
{-|
The decoder state.
- An array of words (internal structure of our atoms).
- A pointer to the word *after* the last word in the array.
- A pointer into the current word of that array.
- A bit-offset into that word.
-}
data S = S
{ currPtr :: {-# UNPACK #-} !(Ptr Word)
, usedBits :: {-# UNPACK #-} !Word
, pos :: {-# UNPACK #-} !Word
} deriving (Show,Eq,Ord)
type Env = (Ptr Word, S)
2019-07-16 03:01:45 +03:00
data DecodeErr
= InfiniteCue Env
| BadEncoding Env String
deriving (Show, Eq, Ord)
data GetResult a = GetResult {-# UNPACK #-} !S !a
2019-07-03 08:14:39 +03:00
deriving (Show, Functor)
newtype Get a = Get
{ runGet :: Ptr Word
-> H.BasicHashTable Word Noun
-> S
-> IO (GetResult a)
}
2019-07-16 03:01:45 +03:00
doGet :: Get a -> ByteString -> Either DecodeErr a
doGet m bs =
2019-07-12 22:24:44 +03:00
unsafePerformIO $ try $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> do
let endPtr = ptr `plusPtr` len
2019-07-05 01:40:36 +03:00
let sz = max 50
$ min 10_000_000
$ length bs `div` 6
tbl <- H.newSized sz
GetResult _ r <- runGet m endPtr tbl (S (castPtr ptr) 0 0)
pure r
--------------------------------------------------------------------------------
2019-07-16 03:01:45 +03:00
instance Exception DecodeErr
instance Functor Get where
fmap f g = Get $ \end tbl s -> do
GetResult s' a <- runGet g end tbl s
return $ GetResult s' (f a)
{-# INLINE fmap #-}
instance Applicative Get where
pure x = Get (\_ _ s -> return $ GetResult s x)
{-# INLINE pure #-}
Get f <*> Get g = Get $ \end tbl s1 -> do
GetResult s2 f' <- f end tbl s1
GetResult s3 g' <- g end tbl s2
return $ GetResult s3 (f' g')
{-# INLINE (<*>) #-}
Get f *> Get g = Get $ \end tbl s1 -> do
GetResult s2 _ <- f end tbl s1
g end tbl s2
{-# INLINE (*>) #-}
instance Monad Get where
return = pure
{-# INLINE return #-}
(>>) = (*>)
{-# INLINE (>>) #-}
Get x >>= f = Get $ \end tbl s -> do
GetResult s' x' <- x end tbl s
runGet (f x') end tbl s'
{-# INLINE (>>=) #-}
2019-07-03 08:14:39 +03:00
fail msg = Get $ \end tbl s -> do
badEncoding end s msg
{-# INLINE fail #-}
instance MonadIO Get where
liftIO io = Get $ \end tbl s -> GetResult s <$> io
{-# INLINE liftIO #-}
--------------------------------------------------------------------------------
{-# INLINE badEncoding #-}
badEncoding :: Ptr Word -> S -> String -> IO a
badEncoding !endPtr s msg = throwIO $ BadEncoding (endPtr,s) msg
--------------------------------------------------------------------------------
{-# INLINE getPos #-}
getPos :: Get Word
getPos = Get $ \_ _ s ->
pure (GetResult s (pos s))
{-# INLINE insRef #-}
insRef :: Word -> Noun -> Get ()
2019-07-12 22:24:44 +03:00
insRef !pos !now = Get $ \_ tbl s -> do
H.insert tbl pos now
pure $ GetResult s ()
{-# INLINE getRef #-}
getRef :: Word -> Get Noun
2019-07-12 22:24:44 +03:00
getRef !ref = Get $ \x tbl s -> do
H.lookup tbl ref >>= \case
2019-07-03 08:14:39 +03:00
Nothing -> runGet (fail ("Invalid Reference: " <> show ref)) x tbl s
Just no -> pure (GetResult s no)
{-# INLINE advance #-}
advance :: Word -> Get ()
2019-07-03 08:14:39 +03:00
advance 0 = debugM "advance: 0" >> pure ()
2019-07-12 22:24:44 +03:00
advance !n = Get $ \_ _ s -> do
2019-07-03 08:14:39 +03:00
debugM ("advance: " <> show n)
let newUsed = n + usedBits s
newS = s { pos = pos s + n
, usedBits = newUsed `mod` 64
, currPtr = plusPtr (currPtr s)
2019-07-03 08:14:39 +03:00
(8 * (fromIntegral (newUsed `div` 64)))
}
pure (GetResult newS ())
--------------------------------------------------------------------------------
{-# INLINE guardInfinite #-}
guardInfinite :: Ptr Word -> Ptr Word -> S -> IO ()
guardInfinite end cur s =
when (cur >= (end `plusPtr` 16)) $ do
throwIO (InfiniteCue (end, s))
-- TODO Should this be (>= end) or (> end)?
{-# INLINE peekCurWord #-}
peekCurWord :: Get Word
2019-07-12 22:24:44 +03:00
peekCurWord = Get $ \end _ s -> do
2019-07-03 08:14:39 +03:00
debugMId "peekCurWord" $ do
guardInfinite end (currPtr s) s
if ptrToWordPtr (currPtr s) >= ptrToWordPtr end
then pure (GetResult s 0)
else GetResult s <$> peek (currPtr s)
-- TODO Same question as above.
{-# INLINE peekNextWord #-}
peekNextWord :: Get Word
2019-07-12 22:24:44 +03:00
peekNextWord = Get $ \end _ s -> do
2019-07-03 08:14:39 +03:00
debugMId "peekNextWord" $ do
let pTarget = currPtr s `plusPtr` 8
guardInfinite end pTarget s
2019-07-03 08:14:39 +03:00
if ptrToWordPtr pTarget >= ptrToWordPtr end
then pure (GetResult s 0)
2019-07-03 08:14:39 +03:00
else GetResult s <$> peek pTarget
{-# INLINE peekUsedBits #-}
peekUsedBits :: Get Word
2019-07-03 08:14:39 +03:00
peekUsedBits =
debugMId "peekUsedBits" $ do
2019-07-12 22:24:44 +03:00
Get $ \_ _ s -> pure (GetResult s (usedBits s))
{-|
Get a bit.
- Peek the current word.
- Right-shift by the bit-offset.
- Mask the high bits.
-}
{-# INLINE dBit #-}
dBit :: Get Bool
dBit = do
2019-07-03 08:14:39 +03:00
debugMId "dBit" $ do
wor <- peekCurWord
use <- fromIntegral <$> peekUsedBits
advance 1
pure (0 /= shiftR wor use .&. 1)
{-# INLINE dWord #-}
dWord :: Get Word
dWord = do
2019-07-03 08:14:39 +03:00
debugMId "dWord" $ do
res <- peekWord
advance 64
pure res
{-|
Get n bits, where n > 64:
- Get (n/64) words.
- Advance by n bits.
- Calculate an offset (equal to the current bit-offset)
- Calculate the length (equal to n)
- Construct a bit-vector using the buffer*length*offset.
-}
{-# INLINE dAtomBits #-}
dAtomBits :: Word -> Get Atom
dAtomBits !(fromIntegral -> bits) = do
2019-07-03 08:14:39 +03:00
debugMId ("dAtomBits(" <> show bits <> ")") $ do
fmap (view $ from atomWords) $
2019-07-12 22:24:44 +03:00
VP.generateM bufSize $ \i -> do
2019-07-03 08:14:39 +03:00
debugM (show i)
if (i == lastIdx && numExtraBits /= 0)
then dWordBits (fromIntegral numExtraBits)
else dWord
where
bufSize = numFullWords + min 1 numExtraBits
lastIdx = bufSize - 1
numFullWords = bits `div` 64
numExtraBits = bits `mod` 64
{-|
In order to peek at the next Word64:
- If we are past the end of the buffer:
- Return zero.
- If the bit-offset is zero:
- Just peek.
- If we are pointing to the last word:
- Peek and right-shift by the bit offset.
- Otherwise,
- Peek the current word *and* the next word.
- Right-shift the current word by the bit-offset.
- Left-shift the next word by the bit-offset.
- Binary or the resulting two words.
-}
{-# INLINE peekWord #-}
peekWord :: Get Word
peekWord = do
2019-07-03 08:14:39 +03:00
debugMId "peekWord" $ do
off <- peekUsedBits
cur <- peekCurWord
2019-07-03 08:14:39 +03:00
nex <- peekNextWord
let res = swiz off (cur, nex)
debugM ("\t" <> (take 10 $ reverse $ printf "%b" (fromIntegral res :: Integer)) <> "..")
pure res
{-# INLINE swiz #-}
2019-07-03 08:14:39 +03:00
swiz :: Word -> (Word, Word) -> Word
swiz !(fromIntegral -> off) (!low, !hig) =
2019-07-03 08:14:39 +03:00
(.|.) (shiftR low off) (shiftL hig (64-off))
{-# INLINE takeLowBits #-}
takeLowBits :: Word -> Word -> Word
takeLowBits 64 !wor = wor
takeLowBits !wid !wor = (2^wid - 1) .&. wor
{-|
Make a word from the next n bits (where n <= 64).
- Peek at the next word.
- Mask the n lowest bits from the word.
- Advance by that number of bits.
- Return the word.
-}
{-# INLINE dWordBits #-}
dWordBits :: Word -> Get Word
dWordBits !n = do
2019-07-03 08:14:39 +03:00
debugMId ("dWordBits(" <> show n <> ")") $ do
w <- peekWord
advance n
2019-07-03 08:14:39 +03:00
debugM ("dWordBits: " <> show (takeLowBits n w))
pure (takeLowBits n w)
2019-07-02 05:43:10 +03:00
-- Fast Cue --------------------------------------------------------------------
2020-01-23 07:16:09 +03:00
{-|
2019-07-02 05:43:10 +03:00
Get the exponent-prefix of an atom:
- Peek at the next word.
- Calculate the number of least-significant bits in that word (there's
a primitive for this).
- Advance by that number of bits.
- Return the number of bits
-}
{-# INLINE dExp #-}
2019-07-02 05:43:10 +03:00
dExp :: Get Word
dExp = do
2019-07-03 08:14:39 +03:00
debugMId "dExp" $ do
2019-07-02 05:43:10 +03:00
W# w <- peekWord
let res = W# (ctz# w)
2019-07-03 08:14:39 +03:00
advance (res+1)
2019-07-02 05:43:10 +03:00
pure res
{-# INLINE dAtomLen #-}
2019-07-02 05:43:10 +03:00
dAtomLen :: Get Word
dAtomLen = do
2019-07-03 08:14:39 +03:00
debugMId "dAtomLen" $ do
dExp >>= \case
0 -> pure 0
e -> do p <- dWordBits (e-1)
pure (2^(e-1) .|. p)
2019-07-02 05:43:10 +03:00
{-# INLINE dRef #-}
2019-07-02 05:43:10 +03:00
dRef :: Get Word
2019-07-03 08:14:39 +03:00
dRef = debugMId "dRef" (dAtomLen >>= dWordBits)
2019-07-02 05:43:10 +03:00
{-# INLINE dAtom #-}
2019-07-02 05:43:10 +03:00
dAtom :: Get Atom
dAtom = do
2019-07-03 08:14:39 +03:00
debugMId "dAtom" $ do
dAtomLen >>= \case
0 -> pure 0
n -> dAtomBits n
2019-07-02 05:43:10 +03:00
{-# INLINE dCell #-}
dCell :: Get Noun
dCell = Cell <$> dNoun <*> dNoun
2019-07-02 05:43:10 +03:00
{-|
Get a Noun.
- Get a bit
- If it's zero, get an atom.
- Otherwise, get another bit.
- If it's zero, get a cell.
- If it's one, get an atom.
-}
dNoun :: Get Noun
2019-07-02 05:43:10 +03:00
dNoun = do
p <- getPos
let yield r = insRef p r >> pure r
dBit >>= \case
2019-07-03 08:14:39 +03:00
False -> do debugM "It's an atom"
(Atom <$> dAtom) >>= yield
2019-07-02 05:43:10 +03:00
True -> dBit >>= \case
2019-07-03 08:14:39 +03:00
False -> do debugM "It's a cell"
dCell >>= yield
True -> do debugM "It's a backref"
dRef >>= getRef