module Noun.Cue (cue, cueBS) where import ClassyPrelude import Noun.Core import Noun.Atom import Control.Lens (view, from) import Data.Bits (shiftL, shiftR, (.|.), (.&.)) import Foreign.Ptr (Ptr, plusPtr, castPtr, ptrToWordPtr) import Foreign.Storable (peek) import GHC.Prim (ctz#) import GHC.Word (Word(..)) import System.IO.Unsafe (unsafePerformIO) import Text.Printf (printf) import qualified Data.ByteString.Unsafe as BS import qualified Data.HashTable.IO as H import qualified Data.Vector.Primitive as VP -------------------------------------------------------------------------------- cueBS :: ByteString -> Either DecodeExn Noun cueBS = doGet dNoun cue :: Atom -> Either DecodeExn Noun cue = cueBS . view atomBytes -- Debugging ------------------------------------------------------------------- {-# INLINE debugM #-} debugM :: Monad m => String -> m () debugM _ = pure () {-# INLINE debugMId #-} debugMId :: (Monad m, Show a) => String -> m a -> m a debugMId _ a = a -- Types ----------------------------------------------------------------------- {-| The decoder state. - An array of words (internal structure of our atoms). - A pointer to the word *after* the last word in the array. - A pointer into the current word of that array. - A bit-offset into that word. -} data S = S { currPtr :: {-# UNPACK #-} !(Ptr Word) , usedBits :: {-# UNPACK #-} !Word , pos :: {-# UNPACK #-} !Word } deriving (Show,Eq,Ord) type Env = (Ptr Word, S) data DecodeExn = NotEnoughSpace Env | TooMuchSpace Env | BadEncoding Env String deriving (Show, Eq, Ord) data GetResult a = GetResult {-# UNPACK #-} !S !a deriving (Show, Functor) newtype Get a = Get { runGet :: Ptr Word -> H.BasicHashTable Word Noun -> S -> IO (GetResult a) } doGet :: Get a -> ByteString -> Either DecodeExn a doGet m bs = unsafePerformIO $ try $ BS.unsafeUseAsCStringLen bs \(ptr, len) -> do let endPtr = ptr `plusPtr` len let sz = max 50 $ min 10_000_000 $ length bs `div` 6 tbl <- H.newSized sz GetResult _ r <- runGet m endPtr tbl (S (castPtr ptr) 0 0) pure r -------------------------------------------------------------------------------- instance Exception DecodeExn instance Functor Get where fmap f g = Get $ \end tbl s -> do GetResult s' a <- runGet g end tbl s return $ GetResult s' (f a) {-# INLINE fmap #-} instance Applicative Get where pure x = Get (\_ _ s -> return $ GetResult s x) {-# INLINE pure #-} Get f <*> Get g = Get $ \end tbl s1 -> do GetResult s2 f' <- f end tbl s1 GetResult s3 g' <- g end tbl s2 return $ GetResult s3 (f' g') {-# INLINE (<*>) #-} Get f *> Get g = Get $ \end tbl s1 -> do GetResult s2 _ <- f end tbl s1 g end tbl s2 {-# INLINE (*>) #-} instance Monad Get where return = pure {-# INLINE return #-} (>>) = (*>) {-# INLINE (>>) #-} Get x >>= f = Get $ \end tbl s -> do GetResult s' x' <- x end tbl s runGet (f x') end tbl s' {-# INLINE (>>=) #-} fail msg = Get $ \end tbl s -> do badEncoding end s msg {-# INLINE fail #-} -------------------------------------------------------------------------------- badEncoding :: Ptr Word -> S -> String -> IO a badEncoding !endPtr s msg = throwIO $ BadEncoding (endPtr,s) msg -------------------------------------------------------------------------------- getPos :: Get Word getPos = Get $ \_ _ s -> pure (GetResult s (pos s)) insRef :: Word -> Noun -> Get () insRef !pos !now = Get \_ tbl s -> do H.insert tbl pos now pure $ GetResult s () getRef :: Word -> Get Noun getRef !ref = Get \x tbl s -> do H.lookup tbl ref >>= \case Nothing -> runGet (fail ("Invalid Reference: " <> show ref)) x tbl s Just no -> pure (GetResult s no) advance :: Word -> Get () advance 0 = debugM "advance: 0" >> pure () advance !n = Get \_ _ s -> do debugM ("advance: " <> show n) let newUsed = n + usedBits s newS = s { pos = pos s + n , usedBits = newUsed `mod` 64 , currPtr = plusPtr (currPtr s) (8 * (fromIntegral (newUsed `div` 64))) } pure (GetResult newS ()) -------------------------------------------------------------------------------- -- TODO Should this be (>= end) or (> end)? peekCurWord :: Get Word peekCurWord = Get \end _ s -> do debugMId "peekCurWord" $ do if ptrToWordPtr (currPtr s) >= ptrToWordPtr end then pure (GetResult s 0) else GetResult s <$> peek (currPtr s) -- TODO Same question as above. peekNextWord :: Get Word peekNextWord = Get \end _ s -> do debugMId "peekNextWord" $ do let pTarget = currPtr s `plusPtr` 8 if ptrToWordPtr pTarget >= ptrToWordPtr end then pure (GetResult s 0) else GetResult s <$> peek pTarget peekUsedBits :: Get Word peekUsedBits = debugMId "peekUsedBits" $ do Get \_ _ s -> pure (GetResult s (usedBits s)) {-| Get a bit. - Peek the current word. - Right-shift by the bit-offset. - Mask the high bits. -} dBit :: Get Bool dBit = do debugMId "dBit" $ do wor <- peekCurWord use <- fromIntegral <$> peekUsedBits advance 1 pure (0 /= shiftR wor use .&. 1) dWord :: Get Word dWord = do debugMId "dWord" $ do res <- peekWord advance 64 pure res {-| Get n bits, where n > 64: - Get (n/64) words. - Advance by n bits. - Calculate an offset (equal to the current bit-offset) - Calculate the length (equal to n) - Construct a bit-vector using the buffer*length*offset. -} dAtomBits :: Word -> Get Atom dAtomBits !(fromIntegral -> bits) = do debugMId ("dAtomBits(" <> show bits <> ")") $ do fmap (view $ from atomWords) $ VP.generateM bufSize \i -> do debugM (show i) if (i == lastIdx && numExtraBits /= 0) then dWordBits (fromIntegral numExtraBits) else dWord where bufSize = numFullWords + min 1 numExtraBits lastIdx = bufSize - 1 numFullWords = bits `div` 64 numExtraBits = bits `mod` 64 {-| In order to peek at the next Word64: - If we are past the end of the buffer: - Return zero. - If the bit-offset is zero: - Just peek. - If we are pointing to the last word: - Peek and right-shift by the bit offset. - Otherwise, - Peek the current word *and* the next word. - Right-shift the current word by the bit-offset. - Left-shift the next word by the bit-offset. - Binary or the resulting two words. -} peekWord :: Get Word peekWord = do debugMId "peekWord" $ do off <- peekUsedBits cur <- peekCurWord nex <- peekNextWord let res = swiz off (cur, nex) debugM ("\t" <> (take 10 $ reverse $ printf "%b" (fromIntegral res :: Integer)) <> "..") pure res swiz :: Word -> (Word, Word) -> Word swiz !(fromIntegral -> off) (!low, !hig) = (.|.) (shiftR low off) (shiftL hig (64-off)) takeLowBits :: Word -> Word -> Word takeLowBits 64 !wor = wor takeLowBits !wid !wor = (2^wid - 1) .&. wor {-| Make a word from the next n bits (where n <= 64). - Peek at the next word. - Mask the n lowest bits from the word. - Advance by that number of bits. - Return the word. -} dWordBits :: Word -> Get Word dWordBits !n = do debugMId ("dWordBits(" <> show n <> ")") $ do w <- peekWord advance n debugM ("dWordBits: " <> show (takeLowBits n w)) pure (takeLowBits n w) -- Fast Cue -------------------------------------------------------------------- {- Get the exponent-prefix of an atom: - Peek at the next word. - Calculate the number of least-significant bits in that word (there's a primitive for this). - Advance by that number of bits. - Return the number of bits -} dExp :: Get Word dExp = do debugMId "dExp" $ do W# w <- peekWord let res = W# (ctz# w) advance (res+1) pure res dAtomLen :: Get Word dAtomLen = do debugMId "dAtomLen" $ do dExp >>= \case 0 -> pure 0 e -> do p <- dWordBits (e-1) pure (2^(e-1) .|. p) dRef :: Get Word dRef = debugMId "dRef" (dAtomLen >>= dWordBits) dAtom :: Get Atom dAtom = do debugMId "dAtom" $ do dAtomLen >>= \case 0 -> pure 0 n -> dAtomBits n dCell :: Get Noun dCell = Cell <$> dNoun <*> dNoun {-| Get a Noun. - Get a bit - If it's zero, get an atom. - Otherwise, get another bit. - If it's zero, get a cell. - If it's one, get an atom. -} dNoun :: Get Noun dNoun = do p <- getPos let yield r = insRef p r >> pure r dBit >>= \case False -> do debugM "It's an atom" (Atom <$> dAtom) >>= yield True -> dBit >>= \case False -> do debugM "It's a cell" dCell >>= yield True -> do debugM "It's a backref" dRef >>= getRef