urbit-atom: Use GMPs import/export feature.

This commit is contained in:
Benjamin Summers 2020-03-13 10:04:25 -07:00
parent 029262333b
commit 0b68ab8a01
3 changed files with 128 additions and 39 deletions

View File

@ -24,22 +24,16 @@ import Data.ByteString (ByteString)
import Data.Vector.Primitive (Vector)
import GHC.Natural (Natural)
#if defined(__GHCJS__)
import Urbit.Atom.Slow (atomBytes, bytesAtom)
#else
import Urbit.Atom.Fast (atomBytes, bytesAtom)
#endif
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.Encoding.Error as T
#if defined(__GHCJS__)
import qualified Urbit.Atom.Slow as A
#else
import qualified Urbit.Atom.Fast as A
import qualified Urbit.Atom.Slow as Slow
#endif
import qualified Urbit.Atom.Fast as A
--------------------------------------------------------------------------------
@ -49,24 +43,40 @@ type Atom = Natural
-- | Cast an atom to a vector. Does not copy.
atomWords :: Atom -> Vector Word
atomWords = A.atomWords
atomWords =
#if defined(__GHCJS__)
Slow.atomWords
#else
A.atomWords
#endif
-- | Cast a vector to an atom. Does not copy unless given a slice.
wordsAtom :: Vector Word -> Atom
wordsAtom = A.wordsAtom
wordsAtom =
#if defined(__GHCJS__)
Slow.wordsAtom
#else
A.wordsAtom
#endif
atomBytes :: Atom -> ByteString
atomBytes = A.exportBytes
bytesAtom :: ByteString -> Atom
bytesAtom = A.importBytes
-- | Encode a utf8-encoded atom from text.
utf8Atom :: T.Text -> Atom
utf8Atom = A.bytesAtom . T.encodeUtf8
utf8Atom = A.importBytes . T.encodeUtf8
-- | Interpret an atom as utf8 text.
atomUtf8 :: Atom -> Either T.UnicodeException T.Text
atomUtf8 = T.decodeUtf8' . atomBytes
atomUtf8 = T.decodeUtf8' . A.exportBytes
-- | Interpret an atom as utf8 text, throwing an exception on bad unicode.
atomUtf8Exn :: Atom -> T.Text
atomUtf8Exn = T.decodeUtf8 . atomBytes
atomUtf8Exn = T.decodeUtf8 . A.exportBytes
-- | Interpret an atom as utf8 text, replacing bad unicode characters.
atomUtf8Lenient :: Atom -> T.Text
atomUtf8Lenient = T.decodeUtf8With T.lenientDecode . atomBytes
atomUtf8Lenient = T.decodeUtf8With T.lenientDecode . A.exportBytes

View File

@ -1,4 +1,4 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE CPP, UnliftedFFITypes #-}
{-|
Atom implementation with fast conversions between bytestrings
@ -12,6 +12,8 @@ module Urbit.Atom.Fast
, bytesAtom
, atomBytes
, atomWords
, exportBytes
, importBytes
, bit
, byt
)
@ -26,12 +28,10 @@ import Data.Vector.Primitive (Vector(..))
import Data.Word (Word8)
import GHC.Exts (Ptr(Ptr), sizeofByteArray#)
import GHC.Exts (Int(..))
import GHC.Integer.GMP.Internals (BigNat(..), bigNatToWord, sizeofBigNat#)
import GHC.Integer.GMP.Internals (isZeroBigNat, indexBigNat#)
import GHC.Integer.GMP.Internals (byteArrayToBigNat#, wordToBigNat, zeroBigNat)
import GHC.Integer.GMP.Internals (BigNat(..))
import GHC.Natural (Natural(..))
import GHC.Prim (Int#, clz#, minusWord#, plusWord#)
import GHC.Prim (Word#, int2Word#, subIntC#, timesWord#)
import GHC.Prim (Word#, Addr#, int2Word#, subIntC#, timesWord#)
import GHC.Prim (copyByteArrayToAddr#)
import GHC.Word (Word(..))
import System.IO.Unsafe (unsafePerformIO)
@ -41,7 +41,9 @@ import qualified Data.ByteString.Internal as BS
import qualified Data.ByteString.Unsafe as BS
import qualified Data.Primitive.ByteArray as Prim
import qualified Data.Vector.Primitive as VP
import qualified Foreign.ForeignPtr as Ptr
import qualified Foreign.ForeignPtr.Unsafe as Ptr
import qualified GHC.Integer.GMP.Internals as G
-- Setup BIT and BYT macros. ---------------------------------------------------
@ -77,8 +79,8 @@ bigNatBitWidth# :: BigNat -> Word#
bigNatBitWidth# nat =
lswBits `plusWord#` ((int2Word# lastIdx) `timesWord#` BIT##)
where
I# lastIdx = (I# (sizeofBigNat# nat)) - 1
lswBits = wordBitWidth# (indexBigNat# nat lastIdx)
I# lastIdx = (I# (G.sizeofBigNat# nat)) - 1
lswBits = wordBitWidth# (G.indexBigNat# nat lastIdx)
atomBitWidth# :: Natural -> Word#
atomBitWidth# (NatS# gl) = wordBitWidth# gl
@ -131,7 +133,7 @@ bytesPill = Pill . strip
Cast a BigNat to a vector without a copy.
-}
bigNatWords :: BigNat -> Vector Word
bigNatWords bn | isZeroBigNat bn = mempty
bigNatWords bn | G.isZeroBigNat bn = mempty
bigNatWords bn@(BN# bArr) =
Vector 0 (I# (sizeofByteArray# bArr) `div` BYT) (Prim.ByteArray bArr)
@ -142,11 +144,12 @@ bigNatWords bn@(BN# bArr) =
-}
wordsBigNat :: Vector Word -> BigNat
wordsBigNat v@(Vector off (I# len) (Prim.ByteArray buf)) = case VP.length v of
0 -> zeroBigNat
0 -> G.zeroBigNat
1 -> case VP.unsafeIndex v 0 of
W# w -> wordToBigNat w
n ->
if off /= 0 then error "words2Nat: bad-vec" else byteArrayToBigNat# buf len
W# w -> G.wordToBigNat w
n -> if off /= 0
then error "words2Nat: bad-vec"
else G.byteArrayToBigNat# buf len
{-|
More careful version of `wordsBigNat`, but not yet tested.
@ -158,13 +161,14 @@ wordsBigNat v@(Vector off (I# len) (Prim.ByteArray buf)) = case VP.length v of
-}
wordsBigNat' :: Vector Word -> BigNat
wordsBigNat' v = case VP.length v of
0 -> zeroBigNat
1 -> wordToBigNat w where W# w = VP.unsafeIndex v 0
0 -> G.zeroBigNat
1 -> G.wordToBigNat w where W# w = VP.unsafeIndex v 0
n -> if offset v == 0 then extract v else extract (VP.force v)
where
offset (Vector off _ _) = off
extract (Vector _ (I# len) (Prim.ByteArray buf)) = byteArrayToBigNat# buf len
extract (Vector _ (I# len) (Prim.ByteArray buf)) =
G.byteArrayToBigNat# buf len
--------------------------------------------------------------------------------
@ -179,14 +183,14 @@ wordsAtom = bigNatNat . wordsBigNat
-- | Cast a Nat to a BigNat (no copy).
natBigNat :: Natural -> BigNat
natBigNat (NatS# w ) = wordToBigNat w
natBigNat (NatS# w ) = G.wordToBigNat w
natBigNat (NatJ# bn) = bn
-- | Cast a BigNat to a Nat (no copy).
bigNatNat :: BigNat -> Natural
bigNatNat bn = case sizeofBigNat# bn of
bigNatNat bn = case G.sizeofBigNat# bn of
0# -> 0
1# -> NatS# (bigNatToWord bn)
1# -> NatS# (G.bigNatToWord bn)
_ -> NatJ# bn
@ -263,3 +267,60 @@ atomBytes = pillBytes . natPill
bytesAtom :: ByteString -> Natural
bytesAtom = pillAtom . bytesPill
-- Try using GMPs `input/export` feature. --------------------------------------
-- sizeInBaseInteger i 256#
-- sz = sizeInBaseNatural wor 256#
-- exportBigNatToAddr :: BigNat -> Addr# -> Int# -> IO Word Source#
-- exportWordToAddr :: Word -> Addr# -> Int# -> IO Word
-- Use "sizeInBaseInteger i 256#" to compute the exact number
-- of bytes written in advance for i /= 0. In case of i == 0,
-- exportIntegerToMutableByteArray will write and report zero bytes written,
-- whereas sizeInBaseInteger report one byte.
sizeInBaseNatural :: Natural -> Int# -> Word#
{-# INLINE sizeInBaseNatural #-}
sizeInBaseNatural (NatS# w) base = G.sizeInBaseWord# w base
sizeInBaseNatural (NatJ# n) base = G.sizeInBaseBigNat n base
exportNaturalToAddr :: Natural -> Addr# -> Int# -> IO Word
exportNaturalToAddr (NatS# w) = G.exportWordToAddr (W# w)
exportNaturalToAddr (NatJ# n) = G.exportBigNatToAddr n
exportNaturalToByteString :: Natural -> Int -> ByteString
exportNaturalToByteString nat (I# i#) = unsafePerformIO $ do
let sz# = sizeInBaseNatural nat 256#
let szi = fromIntegral (W# sz#)
fp <- BS.mallocByteString szi
let Ptr a = Ptr.unsafeForeignPtrToPtr fp
exportNaturalToAddr nat a i#
pure (BS.PS fp 0 szi)
exportBytes :: Natural -> ByteString
exportBytes 0 = mempty
exportBytes n = exportNaturalToByteString n 0
bigNatNatural :: BigNat -> Natural
bigNatNatural big =
case G.sizeofBigNat# big of
0# -> 0
1# -> NatS# (G.bigNatToWord big)
_ -> NatJ# big
stripBytes :: ByteString -> ByteString
stripBytes buf = BS.take (len - go 0 (len - 1)) buf
where
len = BS.length buf
go n i | i < 0 = n
| 0 == BS.unsafeIndex buf i = go (n + 1) (i - 1)
| otherwise = n
importBytes :: ByteString -> Natural
importBytes (stripBytes -> BS.PS fp 0 sz) = unsafePerformIO $ do
let Ptr a = Ptr.unsafeForeignPtrToPtr fp -- TODO Not safe!
let W# sz# = fromIntegral sz
res <- bigNatNatural <$> G.importBigNatFromAddr a sz# 0#
Ptr.touchForeignPtr fp
pure res

View File

@ -20,7 +20,7 @@ import Data.Vector.Primitive (Prim, Vector)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Unsafe as BS
import qualified Data.Vector.Primitive as VP
import qualified Urbit.Atom as F
import qualified Urbit.Atom.Fast as F
import qualified Urbit.Atom.Slow as S
@ -102,16 +102,22 @@ prop_fast_atom_words_correct x = F.atomWords x == S.atomWords x
prop_fast_bytes_atom_correct :: ByteString -> Bool
prop_fast_bytes_atom_correct x = F.bytesAtom x == S.bytesAtom x
prop_fast_atom_import_correct :: ByteString -> Bool
prop_fast_atom_import_correct x = F.importBytes x == S.bytesAtom x
prop_fast_atom_bytes_correct :: Natural -> Bool
prop_fast_atom_bytes_correct x = F.atomBytes x == S.atomBytes x
prop_fast_atom_export_correct :: Natural -> Bool
prop_fast_atom_export_correct x = F.exportBytes x == S.atomBytes x
--------------------------------------------------------------------------------
failed :: IORef Int
failed = unsafePerformIO (newIORef 0)
checkProp :: (Show x, Arbitrary x) => String -> (x -> Bool) -> IO ()
checkProp :: Testable prop => String -> prop -> IO ()
checkProp nm chk = do
putStrLn nm
res <- quickCheckResult chk
@ -144,20 +150,32 @@ main = do
checkProp "Fast: Bytestring <-> Atom roundtrip"
prop_fast_bytes_atom_roundtrip
checkProp "Fast: Export->Import roundtrip" $ do
withMaxSuccess 100000 (dumpLoad F.exportBytes F.importBytes)
checkProp "Fast: Import->Export roundtrip" $ do
withMaxSuccess 10000 (loadDump F.importBytes F.exportBytes stripBytes)
checkProp "Fast: Vector Word <-> Atom roundtrip"
prop_fast_words_atom_roundtrip
checkProp "Fast matches reference: Vector Words -> Atom"
prop_fast_words_atom_correct
(withMaxSuccess 10000 prop_fast_words_atom_correct)
checkProp "Fast matches reference: Atom -> Vector Word"
prop_fast_atom_words_correct
(withMaxSuccess 10000 prop_fast_atom_words_correct)
checkProp "Fast matches reference: ByteString -> Atom"
prop_fast_bytes_atom_correct
(withMaxSuccess 10000 prop_fast_bytes_atom_correct)
checkProp "Fast matches reference: Atom -> ByteString"
prop_fast_atom_bytes_correct
(withMaxSuccess 10000 prop_fast_atom_bytes_correct)
checkProp "Fast matches reference: Atom Import"
(withMaxSuccess 10000 prop_fast_atom_import_correct)
checkProp "Fast matches reference: Atom Export"
(withMaxSuccess 10000 prop_fast_atom_export_correct)
res <- readIORef failed
when (res /= 0) $ do