mirror of
https://github.com/ilyakooo0/urbit.git
synced 2024-12-01 20:04:09 +03:00
Wrote most of the code for a high-perf cue.
This commit is contained in:
parent
a66aeb398a
commit
3fa12dcec4
@ -29,14 +29,14 @@ instance Show Atom where
|
||||
-}
|
||||
data Cursor = Cursor
|
||||
{ _cOffset :: {-# UNPACK #-} !Int
|
||||
, _cBuffer :: {-# UNPACK #-} !Atom
|
||||
, _cBuffer :: !Atom
|
||||
}
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
data Slice = Slice
|
||||
{ _sOffset :: {-# UNPACK #-} !Int
|
||||
, _sWidth :: {-# UNPACK #-} !Int
|
||||
, _sBuffer :: {-# UNPACK #-} !Atom
|
||||
, _sBuffer :: !Atom
|
||||
}
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
|
@ -6,22 +6,26 @@ import ClassyPrelude
|
||||
import Data.Noun
|
||||
import Data.Noun.Atom
|
||||
import Data.Noun.Poet
|
||||
import Data.Bits
|
||||
import Data.Bits hiding (Bits)
|
||||
import Control.Lens
|
||||
import Text.Printf
|
||||
import GHC.Prim
|
||||
import GHC.Word
|
||||
import GHC.Natural
|
||||
import Foreign.Ptr
|
||||
import Foreign.Storable (peek)
|
||||
|
||||
import Data.Map (Map)
|
||||
import Control.Monad (guard)
|
||||
|
||||
import Test.Tasty
|
||||
import Test.Tasty.TH
|
||||
import Test.Tasty.QuickCheck as QC
|
||||
import Test.QuickCheck
|
||||
import qualified Test.Tasty.QuickCheck as QC
|
||||
import Test.QuickCheck hiding ((.&.))
|
||||
|
||||
-- High-Performance Jam --------------------------------------------------------
|
||||
import qualified Data.HashTable.IO as H
|
||||
|
||||
-- Pre-Calculate the bit-width of `jam` ----------------------------------------
|
||||
|
||||
matSz# :: Atom -> Word#
|
||||
matSz# 0 = 1##
|
||||
@ -66,3 +70,290 @@ jamSz = fst . go 0 mempty
|
||||
else (1 + worSz, tbl)
|
||||
Cell _ _ ->
|
||||
(refSz, oldTbl)
|
||||
|
||||
-- How to write a faster `cue`? ------------------------------------------------
|
||||
|
||||
{-|
|
||||
The decoder state.
|
||||
|
||||
- An array of words (internal structure of our atoms).
|
||||
- A pointer to the word *after* the last word in the array.
|
||||
- A pointer into the current word of that array.
|
||||
- A bit-offset into that word.
|
||||
-}
|
||||
data S = S
|
||||
{ currPtr :: {-# UNPACK #-} !(Ptr Word)
|
||||
, usedBits :: {-# UNPACK #-} !Word
|
||||
, pos :: {-# UNPACK #-} !Word
|
||||
} deriving (Show,Eq,Ord)
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
type Env = (Ptr Word, S)
|
||||
|
||||
data DecodeException = NotEnoughSpace Env
|
||||
| TooMuchSpace Env
|
||||
| BadEncoding Env String
|
||||
deriving (Show, Eq, Ord)
|
||||
|
||||
instance Exception DecodeException
|
||||
|
||||
badEncoding :: Ptr Word -> S -> String -> IO a
|
||||
badEncoding endPtr s msg = throwIO $ BadEncoding (endPtr,s) msg
|
||||
|
||||
-- The Get Monad ---------------------------------------------------------------
|
||||
|
||||
data GetResult a = GetResult {-# UNPACK #-} !S !a
|
||||
deriving Functor
|
||||
|
||||
newtype Get a = Get
|
||||
{ runGet :: Ptr Word
|
||||
-> H.LinearHashTable Word Noun
|
||||
-> S
|
||||
-> IO (GetResult a)
|
||||
}
|
||||
|
||||
instance Functor Get where
|
||||
fmap f g = Get $ \end tbl s -> do
|
||||
GetResult s' a <- runGet g end tbl s
|
||||
return $ GetResult s' (f a)
|
||||
{-# INLINE fmap #-}
|
||||
|
||||
instance Applicative Get where
|
||||
pure x = Get (\_ _ s -> return $ GetResult s x)
|
||||
{-# INLINE pure #-}
|
||||
|
||||
Get f <*> Get g = Get $ \end tbl s1 -> do
|
||||
GetResult s2 f' <- f end tbl s1
|
||||
GetResult s3 g' <- g end tbl s2
|
||||
return $ GetResult s3 (f' g')
|
||||
{-# INLINE (<*>) #-}
|
||||
|
||||
Get f *> Get g = Get $ \end tbl s1 -> do
|
||||
GetResult s2 _ <- f end tbl s1
|
||||
g end tbl s2
|
||||
{-# INLINE (*>) #-}
|
||||
|
||||
instance Monad Get where
|
||||
return = pure
|
||||
{-# INLINE return #-}
|
||||
|
||||
(>>) = (*>)
|
||||
{-# INLINE (>>) #-}
|
||||
|
||||
Get x >>= f = Get $ \end tbl s -> do
|
||||
GetResult s' x' <- x end tbl s
|
||||
runGet (f x') end tbl s'
|
||||
{-# INLINE (>>=) #-}
|
||||
|
||||
fail msg = Get $ \end tbl s ->
|
||||
badEncoding end s msg
|
||||
{-# INLINE fail #-}
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
type Bits = Vector Bool
|
||||
|
||||
getPos :: Get Word
|
||||
getPos = Get $ \_ _ s ->
|
||||
pure (GetResult s (pos s))
|
||||
|
||||
insRef :: Word -> Noun -> Get ()
|
||||
insRef pos now = Get \_ tbl s -> do
|
||||
H.insert tbl pos now
|
||||
pure $ GetResult s ()
|
||||
|
||||
getRef :: Word -> Get Noun
|
||||
getRef ref = Get \_ tbl s -> do
|
||||
H.lookup tbl ref >>= \case
|
||||
Nothing -> fail "Invalid Reference"
|
||||
Just no -> pure (GetResult s no)
|
||||
|
||||
advance :: Word -> Get ()
|
||||
advance n = Get \_ _ s -> do
|
||||
let newUsed = n + usedBits s
|
||||
newS = s { pos = pos s + n
|
||||
, usedBits = newUsed `mod` 64
|
||||
, currPtr = plusPtr (currPtr s)
|
||||
(fromIntegral $ newUsed `div` 64)
|
||||
}
|
||||
|
||||
pure (GetResult newS ())
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- TODO Should this be (>= end) or (> end)?
|
||||
peekCurWord :: Get Word
|
||||
peekCurWord = Get \end _ s ->
|
||||
if ptrToWordPtr (currPtr s) >= ptrToWordPtr end
|
||||
then pure (GetResult s 0)
|
||||
else GetResult s <$> peek (currPtr s)
|
||||
|
||||
-- TODO Same question as above.
|
||||
peekNextWord :: Get Word
|
||||
peekNextWord = Get \end _ s ->
|
||||
if ptrToWordPtr (currPtr s) > ptrToWordPtr end
|
||||
then pure (GetResult s 0)
|
||||
else GetResult s <$> peek (currPtr s `plusPtr` 1)
|
||||
|
||||
peekUsedBits :: Get Word
|
||||
peekUsedBits = Get \_ _ s -> pure (GetResult s (usedBits s))
|
||||
|
||||
{-|
|
||||
Get a bit.
|
||||
|
||||
- Peek the current word.
|
||||
- Right-shift by the bit-offset.
|
||||
- Mask the high bits.
|
||||
-}
|
||||
dBit :: Get Bool
|
||||
dBit = do
|
||||
wor <- peekCurWord
|
||||
use <- fromIntegral <$> peekUsedBits
|
||||
advance 1
|
||||
pure (0 /= shiftR wor use .&. 1)
|
||||
|
||||
{-|
|
||||
Get n bits, where n > 64:
|
||||
|
||||
- Get (n/64) words.
|
||||
- Advance by n bits.
|
||||
- Calculate an offset (equal to the current bit-offset)
|
||||
- Calculate the length (equal to n)
|
||||
- Construct a bit-vector using the buffer*length*offset.
|
||||
-}
|
||||
dBits :: Word -> Get Bits
|
||||
dBits = undefined
|
||||
|
||||
{-|
|
||||
In order to peek at the next Word64:
|
||||
|
||||
- If we are past the end of the buffer:
|
||||
- Return zero.
|
||||
- If the bit-offset is zero:
|
||||
- Just peek.
|
||||
- If we are pointing to the last word:
|
||||
- Peek and right-shift by the bit offset.
|
||||
- Otherwise,
|
||||
- Peek the current word *and* the next word.
|
||||
- Right-shift the current word by the bit-offset.
|
||||
- Left-shift the next word by the bit-offset.
|
||||
- Binary or the resulting two words.
|
||||
-}
|
||||
peekWord :: Get Word
|
||||
peekWord = do
|
||||
off <- peekUsedBits
|
||||
cur <- peekCurWord
|
||||
if off == 0 then pure cur else do
|
||||
nex <- peekNextWord
|
||||
advance 64
|
||||
pure (dropLowBits off cur .|. dropHighBits off nex)
|
||||
|
||||
dropLowBits :: Word -> Word -> Word
|
||||
dropLowBits bits wor = shiftR wor (fromIntegral bits :: Int)
|
||||
|
||||
takeLowBits :: Word -> Word -> Word
|
||||
takeLowBits 64 wor = wor
|
||||
takeLowBits wid wor = (2^wid - 1) .&. wor
|
||||
|
||||
takeHighBits :: Word -> Word -> Word
|
||||
takeHighBits off wor = dropLowBits (64-off) wor
|
||||
|
||||
dropHighBits :: Word -> Word -> Word
|
||||
dropHighBits off wor = takeLowBits (64-off) wor
|
||||
|
||||
{-|
|
||||
Make a word from the next n bits (where n <= 64).
|
||||
|
||||
- Peek at the next word.
|
||||
- Mask the n lowest bits from the word.
|
||||
- Advance by that number of bits.
|
||||
- Return the word.
|
||||
-}
|
||||
dWordBits :: Word -> Get Word
|
||||
dWordBits n = do
|
||||
w <- peekWord
|
||||
advance n
|
||||
pure (takeLowBits n w)
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
bitsToAtom :: Bits -> Atom
|
||||
bitsToAtom = undefined
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
{-
|
||||
Get the exponent-prefix of an atom:
|
||||
|
||||
- Peek at the next word.
|
||||
- Calculate the number of least-significant bits in that word (there's
|
||||
a primitive for this).
|
||||
- Advance by that number of bits.
|
||||
- Return the number of bits
|
||||
-}
|
||||
dExp :: Get Word
|
||||
dExp = do
|
||||
W# w <- peekWord
|
||||
let res = W# (ctz# w)
|
||||
advance res
|
||||
pure res
|
||||
|
||||
dAtomLen :: Get Word
|
||||
dAtomLen = do
|
||||
e <- dExp
|
||||
p <- dWordBits (e-1)
|
||||
pure (2^e .|. p)
|
||||
|
||||
dRef :: Get Word
|
||||
dRef = dAtomLen >>= dWordBits
|
||||
|
||||
dAtom :: Get Atom
|
||||
dAtom = do
|
||||
n <- dAtomLen
|
||||
b <- dBits n
|
||||
pure (bitsToAtom b)
|
||||
|
||||
dCell :: Get Noun
|
||||
dCell = Cell <$> dNoun <*> dNoun
|
||||
|
||||
{-|
|
||||
Get a Noun.
|
||||
|
||||
- Get a bit
|
||||
- If it's zero, get an atom.
|
||||
- Otherwise, get another bit.
|
||||
- If it's zero, get a cell.
|
||||
- If it's one, get an atom.
|
||||
-}
|
||||
dNoun :: Get Noun
|
||||
dNoun = do
|
||||
p <- getPos
|
||||
|
||||
let yield r = insRef p r >> pure r
|
||||
|
||||
dBit >>= \case
|
||||
False -> (Atom <$> dAtom) >>= yield
|
||||
True -> dBit >>= \case
|
||||
False -> dCell >>= yield
|
||||
True -> dRef >>= getRef
|
||||
|
||||
{-
|
||||
Count leading zero bits.
|
||||
|
||||
Read a 64 bit word from the buffer and get the number of leading
|
||||
zeros in that word. This works as long as no atom is larger than
|
||||
2 zettabytes.
|
||||
|
||||
- TODO Need to handle the edge-case where there are less than 64 bits
|
||||
remaining in the buffer. Those extra bytes need to be zeros. One way
|
||||
to handle this might be to add a zero word to the end of the buffer,
|
||||
but that would require a re-alloc. Probably the right way is to
|
||||
write new `peek` primitives that handle this case.
|
||||
|
||||
- TODO Error out if we hit the end *and* the word is all zeros.
|
||||
|
||||
Alright, let's pseudo-code this out:
|
||||
|
||||
Grab the next 64 bits. Pill files are always LSB-first
|
||||
-}
|
||||
|
@ -1,6 +1,23 @@
|
||||
{-# LANGUAGE MagicHash #-}
|
||||
|
||||
-- TODO Handle 32-bit architectures
|
||||
{-
|
||||
TODO Handle 32-bit architectures
|
||||
TODO A faster version of this is possible:
|
||||
|
||||
- Get the byte-length of a file.
|
||||
- Round up to a multiple of 8 (or 4 if 32bit cpu)
|
||||
- Allocate a mutable vector of Word8 with that size.
|
||||
- Read the file into the array.
|
||||
- Manually cast to an array of Word.
|
||||
- On big endian, update each words with `System.Endian.fromLE64`.
|
||||
- If there are trailing 0 words, adjust the vector size to delete them.
|
||||
- unsafeFreeze the vector.
|
||||
- Run `byteArrayToBigNat#` on the underlying byte array.
|
||||
- Convert the BigNat to a Natural, to an Atom.
|
||||
- The whole thing becomes zero-copy for little endian machines, with
|
||||
one zero-copy transformation of the whole structure on big-endian
|
||||
machines.
|
||||
-}
|
||||
|
||||
module Data.Noun.Pill where
|
||||
|
||||
|
@ -67,6 +67,7 @@ dependencies:
|
||||
- transformers
|
||||
- unordered-containers
|
||||
- vector
|
||||
- hashtables
|
||||
|
||||
default-extensions:
|
||||
- ApplicativeDo
|
||||
|
@ -10,3 +10,8 @@ ghc-options:
|
||||
extra-deps:
|
||||
- para-1.1@sha256:a90eebb063ad70271e6e2a7f00a93e8e8f8b77273f100f39852fbf8301926f81
|
||||
- flat-0.3.4@sha256:002a0e0ae656ea8cc02a772d0bcb6ea7dbd7f2e79070959cc748ad1e7138eb38
|
||||
|
||||
build:
|
||||
library-profiling: true
|
||||
executable-profiling: true
|
||||
executable-stripping: false
|
||||
|
Loading…
Reference in New Issue
Block a user