shrub/pkg/hs/urbit-atom/lib/Urbit/Atom/Fast.hs

340 lines
9.7 KiB
Haskell
Raw Normal View History

{-# LANGUAGE CPP, UnliftedFFITypes #-}
2020-03-07 01:30:27 +03:00
{-|
Atom implementation with fast conversions between bytestrings
and atoms.
TODO Support Big Endian.
-}
module Urbit.Atom.Fast
( wordsAtom
, bytesAtom
, atomBytes
, atomWords
, exportBytes
, importBytes
2020-03-18 21:25:58 +03:00
, wordBitWidth#
, wordBitWidth
, atomBitWidth#
, atomBitWidth
, takeBitsWord
, bigNatWords
, wordsBigNat
, bit
, byt
)
where
import Prelude
import Control.Monad.Primitive (primitive_)
import Data.Bits (shiftL, shiftR, (.&.), (.|.))
import Data.ByteString (ByteString)
import Data.Vector.Primitive (Vector(..))
import Data.Word (Word8)
import GHC.Exts (Ptr(Ptr), sizeofByteArray#)
2020-03-07 01:10:58 +03:00
import GHC.Exts (Int(..))
import GHC.Integer.GMP.Internals (BigNat(..))
import GHC.Natural (Natural(..))
2020-03-07 01:10:58 +03:00
import GHC.Prim (Int#, clz#, minusWord#, plusWord#)
2020-03-18 21:25:58 +03:00
import GHC.Prim (Word#, Addr#, int2Word#, timesWord#)
import GHC.Prim (copyByteArrayToAddr#)
import GHC.Word (Word(..))
import System.IO.Unsafe (unsafePerformIO)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Internal as BS
2020-03-07 01:10:58 +03:00
import qualified Data.ByteString.Unsafe as BS
import qualified Data.Primitive.ByteArray as Prim
import qualified Data.Vector.Primitive as VP
import qualified Foreign.ForeignPtr as Ptr
import qualified Foreign.ForeignPtr.Unsafe as Ptr
import qualified GHC.Integer.GMP.Internals as G
2020-03-07 01:30:27 +03:00
-- Setup BIT and BYT macros. ---------------------------------------------------
#include <MachDeps.h>
#if WORD_SIZE_IN_BITS == 64
#define BIT 64
#define BYT 8
#elif WORD_SIZE_IN_BITS == 32
#define BIT 32
#define BYT 4
#else
#error WORD_SIZE_IN_BITS must be either 32 or 64
#endif
bit :: Word
bit = BIT
byt :: Word
byt = BYT
2020-03-07 01:30:27 +03:00
--------------------------------------------------------------------------------
wordBitWidth# :: Word# -> Word#
2020-03-07 01:30:27 +03:00
wordBitWidth# w = minusWord# BIT## (clz# w)
wordBitWidth :: Word -> Word
wordBitWidth (W# w) = W# (wordBitWidth# w)
bigNatBitWidth# :: BigNat -> Word#
bigNatBitWidth# nat =
2020-03-07 01:30:27 +03:00
lswBits `plusWord#` ((int2Word# lastIdx) `timesWord#` BIT##)
2020-03-07 01:10:58 +03:00
where
I# lastIdx = (I# (G.sizeofBigNat# nat)) - 1
lswBits = wordBitWidth# (G.indexBigNat# nat lastIdx)
atomBitWidth# :: Natural -> Word#
atomBitWidth# (NatS# gl) = wordBitWidth# gl
atomBitWidth# (NatJ# bn) = bigNatBitWidth# bn
2020-03-18 21:25:58 +03:00
atomBitWidth :: Num a => Natural -> a
atomBitWidth a = fromIntegral (W# (atomBitWidth# a))
--------------------------------------------------------------------------------
{-# INLINE takeBitsWord #-}
takeBitsWord :: Int -> Word -> Word
takeBitsWord wid wor = wor .&. (shiftL 1 wid - 1)
--------------------------------------------------------------------------------
{-
A `Pill` is a bytestring without trailing zeros.
-}
newtype Pill = Pill { unPill :: ByteString }
instance Eq Pill where
(==) x y = pillBytes x == pillBytes y
instance Show Pill where
show = show . pillBytes
2020-03-07 01:10:58 +03:00
--------------------------------------------------------------------------------
2020-03-07 01:10:58 +03:00
strip :: ByteString -> ByteString
strip buf = BS.take (len - go 0 (len - 1)) buf
2020-03-07 01:10:58 +03:00
where
len = BS.length buf
go n i | i < 0 = n
| 0 == BS.unsafeIndex buf i = go (n + 1) (i - 1)
| otherwise = n
pillBytes :: Pill -> ByteString
pillBytes = strip . unPill
bytesPill :: ByteString -> Pill
bytesPill = Pill . strip
--------------------------------------------------------------------------------
{-
Cast a BigNat to a vector without a copy.
-}
2020-03-07 01:10:58 +03:00
bigNatWords :: BigNat -> Vector Word
bigNatWords bn | G.isZeroBigNat bn = mempty
bigNatWords bn@(BN# bArr) =
2020-03-07 01:30:27 +03:00
Vector 0 (I# (sizeofByteArray# bArr) `div` BYT) (Prim.ByteArray bArr)
{-|
Cast a vector to a BigNat. This will not copy.
TODO Don't crash if given a slice.
-}
2020-03-07 01:10:58 +03:00
wordsBigNat :: Vector Word -> BigNat
wordsBigNat v@(Vector off (I# len) (Prim.ByteArray buf)) = case VP.length v of
0 -> G.zeroBigNat
2020-03-07 01:10:58 +03:00
1 -> case VP.unsafeIndex v 0 of
W# w -> G.wordToBigNat w
n -> if off /= 0
then error "words2Nat: bad-vec"
else G.byteArrayToBigNat# buf len
{-|
More careful version of `wordsBigNat`, but not yet tested.
Cast a vector to a BigNat. This will not copy unless input is a slice.
Note that the length of the vector is in words, and the length passed
to `byteArrayToBigNat#` is also in words.
-}
2020-03-18 21:25:58 +03:00
_wordsBigNat :: Vector Word -> BigNat
_wordsBigNat v = case VP.length v of
0 -> G.zeroBigNat
1 -> G.wordToBigNat w where W# w = VP.unsafeIndex v 0
2020-03-07 01:10:58 +03:00
n -> if offset v == 0 then extract v else extract (VP.force v)
where
offset (Vector off _ _) = off
extract (Vector _ (I# len) (Prim.ByteArray buf)) =
G.byteArrayToBigNat# buf len
--------------------------------------------------------------------------------
-- | Cast a nat to a vector (no copy)
atomWords :: Natural -> Vector Word
atomWords = bigNatWords . natBigNat
-- | Cast a vector to a nat (no copy)
wordsAtom :: Vector Word -> Natural
wordsAtom = bigNatNat . wordsBigNat
-- | Cast a Nat to a BigNat (no copy).
2020-03-07 01:10:58 +03:00
natBigNat :: Natural -> BigNat
natBigNat (NatS# w ) = G.wordToBigNat w
natBigNat (NatJ# bn) = bn
-- | Cast a BigNat to a Nat (no copy).
2020-03-07 01:10:58 +03:00
bigNatNat :: BigNat -> Natural
bigNatNat bn = case G.sizeofBigNat# bn of
2020-03-07 01:10:58 +03:00
0# -> 0
1# -> NatS# (G.bigNatToWord bn)
2020-03-07 01:10:58 +03:00
_ -> NatJ# bn
--------------------------------------------------------------------------------
2020-03-18 21:25:58 +03:00
_wordBytes :: Word -> ByteString
_wordBytes wor = BS.reverse $ BS.pack $ go 0 []
2020-03-07 01:10:58 +03:00
where
2020-03-07 01:30:27 +03:00
go i acc | i >= BYT = acc
go i acc | otherwise = go (i + 1) (fromIntegral (shiftR wor (i * BYT)) : acc)
2020-03-07 01:10:58 +03:00
bytesFirstWord :: ByteString -> Word
bytesFirstWord buf = go 0 0
2020-03-07 01:10:58 +03:00
where
2020-03-07 01:30:27 +03:00
top = min BYT (BS.length buf)
2020-03-07 01:10:58 +03:00
i idx off = shiftL (fromIntegral $ BS.index buf idx) off
go acc idx =
2020-03-07 01:30:27 +03:00
if idx >= top then acc else go (acc .|. i idx (BYT * idx)) (idx + 1)
2020-03-07 01:10:58 +03:00
--------------------------------------------------------------------------------
2020-03-18 21:25:58 +03:00
_pillWords :: Pill -> Vector Word
_pillWords = bsToWords . pillBytes
2020-03-07 01:10:58 +03:00
wordsPill :: Vector Word -> Pill
wordsPill = bytesPill . vecBytes . wordsToBytes
2020-03-07 01:10:58 +03:00
--------------------------------------------------------------------------------
wordsToBytes :: Vector Word -> Vector Word8
2020-03-07 01:30:27 +03:00
wordsToBytes (Vector off sz buf) = Vector (off * BYT) (sz * BYT) buf
bsToWords :: ByteString -> Vector Word
2020-03-07 01:30:27 +03:00
bsToWords bs = VP.generate (1 + BS.length bs `div` BYT)
$ \i -> bytesFirstWord (BS.drop (i * BYT) bs)
2020-03-07 01:10:58 +03:00
--------------------------------------------------------------------------------
vecBytes :: Vector Word8 -> ByteString
vecBytes (Vector off sz buf) = unsafePerformIO $ do
fp <- BS.mallocByteString sz
let Ptr a = Ptr.unsafeForeignPtrToPtr fp -- Safe b/c returning fp
copyByteArrayToAddr a buf 0 sz
pure (BS.PS fp off sz)
where
unI# :: Int -> Int#
unI# (I# n#) = n#
-- Hack to get GHCJS build working, since it has an old version of the
-- `primitive` library.
copyByteArrayToAddr dst# (Prim.ByteArray src#) soff sz =
primitive_ (copyByteArrayToAddr# src# (unI# soff) dst# (unI# sz))
2020-03-18 21:25:58 +03:00
_bytesVec :: ByteString -> Vector Word8
_bytesVec bs = VP.generate (BS.length bs) (BS.index bs)
2020-03-07 01:10:58 +03:00
--------------------------------------------------------------------------------
2020-03-07 01:10:58 +03:00
natPill :: Natural -> Pill
natPill = wordsPill . atomWords
pillAtom :: Pill -> Natural
pillAtom = wordsAtom . bsToWords . pillBytes
-- | Dump an atom to a bytestring.
atomBytes :: Natural -> ByteString
atomBytes = pillBytes . natPill
-- | Load a bytestring into an atom.
bytesAtom :: ByteString -> Natural
bytesAtom = pillAtom . bytesPill
-- Try using GMPs `input/export` feature. --------------------------------------
-- sizeInBaseInteger i 256#
-- sz = sizeInBaseNatural wor 256#
-- exportBigNatToAddr :: BigNat -> Addr# -> Int# -> IO Word Source#
-- exportWordToAddr :: Word -> Addr# -> Int# -> IO Word
-- Use "sizeInBaseInteger i 256#" to compute the exact number
-- of bytes written in advance for i /= 0. In case of i == 0,
-- exportIntegerToMutableByteArray will write and report zero bytes written,
-- whereas sizeInBaseInteger report one byte.
sizeInBaseNatural :: Natural -> Int# -> Word#
{-# INLINE sizeInBaseNatural #-}
sizeInBaseNatural (NatS# w) base = G.sizeInBaseWord# w base
sizeInBaseNatural (NatJ# n) base = G.sizeInBaseBigNat n base
exportNaturalToAddr :: Natural -> Addr# -> Int# -> IO Word
exportNaturalToAddr (NatS# w) = G.exportWordToAddr (W# w)
exportNaturalToAddr (NatJ# n) = G.exportBigNatToAddr n
exportNaturalToByteString :: Natural -> Int -> ByteString
exportNaturalToByteString nat (I# i#) = unsafePerformIO $ do
let sz# = sizeInBaseNatural nat 256#
let szi = fromIntegral (W# sz#)
fp <- BS.mallocByteString szi
let Ptr a = Ptr.unsafeForeignPtrToPtr fp
exportNaturalToAddr nat a i#
pure (BS.PS fp 0 szi)
exportBytes :: Natural -> ByteString
exportBytes 0 = mempty
exportBytes n = exportNaturalToByteString n 0
bigNatNatural :: BigNat -> Natural
bigNatNatural big =
case G.sizeofBigNat# big of
0# -> 0
1# -> NatS# (G.bigNatToWord big)
_ -> NatJ# big
stripBytes :: ByteString -> ByteString
stripBytes buf = BS.take (len - go 0 (len - 1)) buf
where
len = BS.length buf
go n i | i < 0 = n
| 0 == BS.unsafeIndex buf i = go (n + 1) (i - 1)
| otherwise = n
importBytes :: ByteString -> Natural
2020-03-18 21:25:58 +03:00
importBytes = go . stripBytes
where
go (BS.PS fp 0 sz) = unsafePerformIO $ do
let Ptr a = Ptr.unsafeForeignPtrToPtr fp -- TODO Not safe!
let W# sz# = fromIntegral sz
res <- bigNatNatural <$> G.importBigNatFromAddr a sz# 0#
Ptr.touchForeignPtr fp
pure res
-- TODO Avoid this extra copy when given a slice. Should be able to
-- just offset the raw pointer.
go bs = importBytes (BS.copy bs)