urbit-atom: Use GMPs import/export feature.

This commit is contained in:
Benjamin Summers 2020-03-13 10:04:25 -07:00
parent 029262333b
commit 0b68ab8a01
3 changed files with 128 additions and 39 deletions

View File

@ -24,22 +24,16 @@ import Data.ByteString (ByteString)
import Data.Vector.Primitive (Vector) import Data.Vector.Primitive (Vector)
import GHC.Natural (Natural) import GHC.Natural (Natural)
#if defined(__GHCJS__)
import Urbit.Atom.Slow (atomBytes, bytesAtom)
#else
import Urbit.Atom.Fast (atomBytes, bytesAtom)
#endif
import qualified Data.Text as T import qualified Data.Text as T
import qualified Data.Text.Encoding as T import qualified Data.Text.Encoding as T
import qualified Data.Text.Encoding.Error as T import qualified Data.Text.Encoding.Error as T
#if defined(__GHCJS__) #if defined(__GHCJS__)
import qualified Urbit.Atom.Slow as A import qualified Urbit.Atom.Slow as Slow
#else
import qualified Urbit.Atom.Fast as A
#endif #endif
import qualified Urbit.Atom.Fast as A
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
@ -49,24 +43,40 @@ type Atom = Natural
-- | Cast an atom to a vector. Does not copy. -- | Cast an atom to a vector. Does not copy.
atomWords :: Atom -> Vector Word atomWords :: Atom -> Vector Word
atomWords = A.atomWords atomWords =
#if defined(__GHCJS__)
Slow.atomWords
#else
A.atomWords
#endif
-- | Cast a vector to an atom. Does not copy unless given a slice. -- | Cast a vector to an atom. Does not copy unless given a slice.
wordsAtom :: Vector Word -> Atom wordsAtom :: Vector Word -> Atom
wordsAtom = A.wordsAtom wordsAtom =
#if defined(__GHCJS__)
Slow.wordsAtom
#else
A.wordsAtom
#endif
atomBytes :: Atom -> ByteString
atomBytes = A.exportBytes
bytesAtom :: ByteString -> Atom
bytesAtom = A.importBytes
-- | Encode a utf8-encoded atom from text. -- | Encode a utf8-encoded atom from text.
utf8Atom :: T.Text -> Atom utf8Atom :: T.Text -> Atom
utf8Atom = A.bytesAtom . T.encodeUtf8 utf8Atom = A.importBytes . T.encodeUtf8
-- | Interpret an atom as utf8 text. -- | Interpret an atom as utf8 text.
atomUtf8 :: Atom -> Either T.UnicodeException T.Text atomUtf8 :: Atom -> Either T.UnicodeException T.Text
atomUtf8 = T.decodeUtf8' . atomBytes atomUtf8 = T.decodeUtf8' . A.exportBytes
-- | Interpret an atom as utf8 text, throwing an exception on bad unicode. -- | Interpret an atom as utf8 text, throwing an exception on bad unicode.
atomUtf8Exn :: Atom -> T.Text atomUtf8Exn :: Atom -> T.Text
atomUtf8Exn = T.decodeUtf8 . atomBytes atomUtf8Exn = T.decodeUtf8 . A.exportBytes
-- | Interpret an atom as utf8 text, replacing bad unicode characters. -- | Interpret an atom as utf8 text, replacing bad unicode characters.
atomUtf8Lenient :: Atom -> T.Text atomUtf8Lenient :: Atom -> T.Text
atomUtf8Lenient = T.decodeUtf8With T.lenientDecode . atomBytes atomUtf8Lenient = T.decodeUtf8With T.lenientDecode . A.exportBytes

View File

@ -1,4 +1,4 @@
{-# LANGUAGE CPP #-} {-# LANGUAGE CPP, UnliftedFFITypes #-}
{-| {-|
Atom implementation with fast conversions between bytestrings Atom implementation with fast conversions between bytestrings
@ -12,6 +12,8 @@ module Urbit.Atom.Fast
, bytesAtom , bytesAtom
, atomBytes , atomBytes
, atomWords , atomWords
, exportBytes
, importBytes
, bit , bit
, byt , byt
) )
@ -26,12 +28,10 @@ import Data.Vector.Primitive (Vector(..))
import Data.Word (Word8) import Data.Word (Word8)
import GHC.Exts (Ptr(Ptr), sizeofByteArray#) import GHC.Exts (Ptr(Ptr), sizeofByteArray#)
import GHC.Exts (Int(..)) import GHC.Exts (Int(..))
import GHC.Integer.GMP.Internals (BigNat(..), bigNatToWord, sizeofBigNat#) import GHC.Integer.GMP.Internals (BigNat(..))
import GHC.Integer.GMP.Internals (isZeroBigNat, indexBigNat#)
import GHC.Integer.GMP.Internals (byteArrayToBigNat#, wordToBigNat, zeroBigNat)
import GHC.Natural (Natural(..)) import GHC.Natural (Natural(..))
import GHC.Prim (Int#, clz#, minusWord#, plusWord#) import GHC.Prim (Int#, clz#, minusWord#, plusWord#)
import GHC.Prim (Word#, int2Word#, subIntC#, timesWord#) import GHC.Prim (Word#, Addr#, int2Word#, subIntC#, timesWord#)
import GHC.Prim (copyByteArrayToAddr#) import GHC.Prim (copyByteArrayToAddr#)
import GHC.Word (Word(..)) import GHC.Word (Word(..))
import System.IO.Unsafe (unsafePerformIO) import System.IO.Unsafe (unsafePerformIO)
@ -41,7 +41,9 @@ import qualified Data.ByteString.Internal as BS
import qualified Data.ByteString.Unsafe as BS import qualified Data.ByteString.Unsafe as BS
import qualified Data.Primitive.ByteArray as Prim import qualified Data.Primitive.ByteArray as Prim
import qualified Data.Vector.Primitive as VP import qualified Data.Vector.Primitive as VP
import qualified Foreign.ForeignPtr as Ptr
import qualified Foreign.ForeignPtr.Unsafe as Ptr import qualified Foreign.ForeignPtr.Unsafe as Ptr
import qualified GHC.Integer.GMP.Internals as G
-- Setup BIT and BYT macros. --------------------------------------------------- -- Setup BIT and BYT macros. ---------------------------------------------------
@ -77,8 +79,8 @@ bigNatBitWidth# :: BigNat -> Word#
bigNatBitWidth# nat = bigNatBitWidth# nat =
lswBits `plusWord#` ((int2Word# lastIdx) `timesWord#` BIT##) lswBits `plusWord#` ((int2Word# lastIdx) `timesWord#` BIT##)
where where
I# lastIdx = (I# (sizeofBigNat# nat)) - 1 I# lastIdx = (I# (G.sizeofBigNat# nat)) - 1
lswBits = wordBitWidth# (indexBigNat# nat lastIdx) lswBits = wordBitWidth# (G.indexBigNat# nat lastIdx)
atomBitWidth# :: Natural -> Word# atomBitWidth# :: Natural -> Word#
atomBitWidth# (NatS# gl) = wordBitWidth# gl atomBitWidth# (NatS# gl) = wordBitWidth# gl
@ -131,7 +133,7 @@ bytesPill = Pill . strip
Cast a BigNat to a vector without a copy. Cast a BigNat to a vector without a copy.
-} -}
bigNatWords :: BigNat -> Vector Word bigNatWords :: BigNat -> Vector Word
bigNatWords bn | isZeroBigNat bn = mempty bigNatWords bn | G.isZeroBigNat bn = mempty
bigNatWords bn@(BN# bArr) = bigNatWords bn@(BN# bArr) =
Vector 0 (I# (sizeofByteArray# bArr) `div` BYT) (Prim.ByteArray bArr) Vector 0 (I# (sizeofByteArray# bArr) `div` BYT) (Prim.ByteArray bArr)
@ -142,11 +144,12 @@ bigNatWords bn@(BN# bArr) =
-} -}
wordsBigNat :: Vector Word -> BigNat wordsBigNat :: Vector Word -> BigNat
wordsBigNat v@(Vector off (I# len) (Prim.ByteArray buf)) = case VP.length v of wordsBigNat v@(Vector off (I# len) (Prim.ByteArray buf)) = case VP.length v of
0 -> zeroBigNat 0 -> G.zeroBigNat
1 -> case VP.unsafeIndex v 0 of 1 -> case VP.unsafeIndex v 0 of
W# w -> wordToBigNat w W# w -> G.wordToBigNat w
n -> n -> if off /= 0
if off /= 0 then error "words2Nat: bad-vec" else byteArrayToBigNat# buf len then error "words2Nat: bad-vec"
else G.byteArrayToBigNat# buf len
{-| {-|
More careful version of `wordsBigNat`, but not yet tested. More careful version of `wordsBigNat`, but not yet tested.
@ -158,13 +161,14 @@ wordsBigNat v@(Vector off (I# len) (Prim.ByteArray buf)) = case VP.length v of
-} -}
wordsBigNat' :: Vector Word -> BigNat wordsBigNat' :: Vector Word -> BigNat
wordsBigNat' v = case VP.length v of wordsBigNat' v = case VP.length v of
0 -> zeroBigNat 0 -> G.zeroBigNat
1 -> wordToBigNat w where W# w = VP.unsafeIndex v 0 1 -> G.wordToBigNat w where W# w = VP.unsafeIndex v 0
n -> if offset v == 0 then extract v else extract (VP.force v) n -> if offset v == 0 then extract v else extract (VP.force v)
where where
offset (Vector off _ _) = off offset (Vector off _ _) = off
extract (Vector _ (I# len) (Prim.ByteArray buf)) = byteArrayToBigNat# buf len extract (Vector _ (I# len) (Prim.ByteArray buf)) =
G.byteArrayToBigNat# buf len
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
@ -179,14 +183,14 @@ wordsAtom = bigNatNat . wordsBigNat
-- | Cast a Nat to a BigNat (no copy). -- | Cast a Nat to a BigNat (no copy).
natBigNat :: Natural -> BigNat natBigNat :: Natural -> BigNat
natBigNat (NatS# w ) = wordToBigNat w natBigNat (NatS# w ) = G.wordToBigNat w
natBigNat (NatJ# bn) = bn natBigNat (NatJ# bn) = bn
-- | Cast a BigNat to a Nat (no copy). -- | Cast a BigNat to a Nat (no copy).
bigNatNat :: BigNat -> Natural bigNatNat :: BigNat -> Natural
bigNatNat bn = case sizeofBigNat# bn of bigNatNat bn = case G.sizeofBigNat# bn of
0# -> 0 0# -> 0
1# -> NatS# (bigNatToWord bn) 1# -> NatS# (G.bigNatToWord bn)
_ -> NatJ# bn _ -> NatJ# bn
@ -263,3 +267,60 @@ atomBytes = pillBytes . natPill
bytesAtom :: ByteString -> Natural bytesAtom :: ByteString -> Natural
bytesAtom = pillAtom . bytesPill bytesAtom = pillAtom . bytesPill
-- Try using GMPs `input/export` feature. --------------------------------------
-- sizeInBaseInteger i 256#
-- sz = sizeInBaseNatural wor 256#
-- exportBigNatToAddr :: BigNat -> Addr# -> Int# -> IO Word Source#
-- exportWordToAddr :: Word -> Addr# -> Int# -> IO Word
-- Use "sizeInBaseInteger i 256#" to compute the exact number
-- of bytes written in advance for i /= 0. In case of i == 0,
-- exportIntegerToMutableByteArray will write and report zero bytes written,
-- whereas sizeInBaseInteger report one byte.
sizeInBaseNatural :: Natural -> Int# -> Word#
{-# INLINE sizeInBaseNatural #-}
sizeInBaseNatural (NatS# w) base = G.sizeInBaseWord# w base
sizeInBaseNatural (NatJ# n) base = G.sizeInBaseBigNat n base
exportNaturalToAddr :: Natural -> Addr# -> Int# -> IO Word
exportNaturalToAddr (NatS# w) = G.exportWordToAddr (W# w)
exportNaturalToAddr (NatJ# n) = G.exportBigNatToAddr n
exportNaturalToByteString :: Natural -> Int -> ByteString
exportNaturalToByteString nat (I# i#) = unsafePerformIO $ do
let sz# = sizeInBaseNatural nat 256#
let szi = fromIntegral (W# sz#)
fp <- BS.mallocByteString szi
let Ptr a = Ptr.unsafeForeignPtrToPtr fp
exportNaturalToAddr nat a i#
pure (BS.PS fp 0 szi)
exportBytes :: Natural -> ByteString
exportBytes 0 = mempty
exportBytes n = exportNaturalToByteString n 0
bigNatNatural :: BigNat -> Natural
bigNatNatural big =
case G.sizeofBigNat# big of
0# -> 0
1# -> NatS# (G.bigNatToWord big)
_ -> NatJ# big
stripBytes :: ByteString -> ByteString
stripBytes buf = BS.take (len - go 0 (len - 1)) buf
where
len = BS.length buf
go n i | i < 0 = n
| 0 == BS.unsafeIndex buf i = go (n + 1) (i - 1)
| otherwise = n
importBytes :: ByteString -> Natural
importBytes (stripBytes -> BS.PS fp 0 sz) = unsafePerformIO $ do
let Ptr a = Ptr.unsafeForeignPtrToPtr fp -- TODO Not safe!
let W# sz# = fromIntegral sz
res <- bigNatNatural <$> G.importBigNatFromAddr a sz# 0#
Ptr.touchForeignPtr fp
pure res

View File

@ -20,7 +20,7 @@ import Data.Vector.Primitive (Prim, Vector)
import qualified Data.ByteString as BS import qualified Data.ByteString as BS
import qualified Data.ByteString.Unsafe as BS import qualified Data.ByteString.Unsafe as BS
import qualified Data.Vector.Primitive as VP import qualified Data.Vector.Primitive as VP
import qualified Urbit.Atom as F import qualified Urbit.Atom.Fast as F
import qualified Urbit.Atom.Slow as S import qualified Urbit.Atom.Slow as S
@ -102,16 +102,22 @@ prop_fast_atom_words_correct x = F.atomWords x == S.atomWords x
prop_fast_bytes_atom_correct :: ByteString -> Bool prop_fast_bytes_atom_correct :: ByteString -> Bool
prop_fast_bytes_atom_correct x = F.bytesAtom x == S.bytesAtom x prop_fast_bytes_atom_correct x = F.bytesAtom x == S.bytesAtom x
prop_fast_atom_import_correct :: ByteString -> Bool
prop_fast_atom_import_correct x = F.importBytes x == S.bytesAtom x
prop_fast_atom_bytes_correct :: Natural -> Bool prop_fast_atom_bytes_correct :: Natural -> Bool
prop_fast_atom_bytes_correct x = F.atomBytes x == S.atomBytes x prop_fast_atom_bytes_correct x = F.atomBytes x == S.atomBytes x
prop_fast_atom_export_correct :: Natural -> Bool
prop_fast_atom_export_correct x = F.exportBytes x == S.atomBytes x
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
failed :: IORef Int failed :: IORef Int
failed = unsafePerformIO (newIORef 0) failed = unsafePerformIO (newIORef 0)
checkProp :: (Show x, Arbitrary x) => String -> (x -> Bool) -> IO () checkProp :: Testable prop => String -> prop -> IO ()
checkProp nm chk = do checkProp nm chk = do
putStrLn nm putStrLn nm
res <- quickCheckResult chk res <- quickCheckResult chk
@ -144,20 +150,32 @@ main = do
checkProp "Fast: Bytestring <-> Atom roundtrip" checkProp "Fast: Bytestring <-> Atom roundtrip"
prop_fast_bytes_atom_roundtrip prop_fast_bytes_atom_roundtrip
checkProp "Fast: Export->Import roundtrip" $ do
withMaxSuccess 100000 (dumpLoad F.exportBytes F.importBytes)
checkProp "Fast: Import->Export roundtrip" $ do
withMaxSuccess 10000 (loadDump F.importBytes F.exportBytes stripBytes)
checkProp "Fast: Vector Word <-> Atom roundtrip" checkProp "Fast: Vector Word <-> Atom roundtrip"
prop_fast_words_atom_roundtrip prop_fast_words_atom_roundtrip
checkProp "Fast matches reference: Vector Words -> Atom" checkProp "Fast matches reference: Vector Words -> Atom"
prop_fast_words_atom_correct (withMaxSuccess 10000 prop_fast_words_atom_correct)
checkProp "Fast matches reference: Atom -> Vector Word" checkProp "Fast matches reference: Atom -> Vector Word"
prop_fast_atom_words_correct (withMaxSuccess 10000 prop_fast_atom_words_correct)
checkProp "Fast matches reference: ByteString -> Atom" checkProp "Fast matches reference: ByteString -> Atom"
prop_fast_bytes_atom_correct (withMaxSuccess 10000 prop_fast_bytes_atom_correct)
checkProp "Fast matches reference: Atom -> ByteString" checkProp "Fast matches reference: Atom -> ByteString"
prop_fast_atom_bytes_correct (withMaxSuccess 10000 prop_fast_atom_bytes_correct)
checkProp "Fast matches reference: Atom Import"
(withMaxSuccess 10000 prop_fast_atom_import_correct)
checkProp "Fast matches reference: Atom Export"
(withMaxSuccess 10000 prop_fast_atom_export_correct)
res <- readIORef failed res <- readIORef failed
when (res /= 0) $ do when (res /= 0) $ do