From 0b68ab8a01d1799bf2e6b2a8aca644349bbec5d5 Mon Sep 17 00:00:00 2001 From: Benjamin Summers Date: Fri, 13 Mar 2020 10:04:25 -0700 Subject: [PATCH] urbit-atom: Use GMPs import/export feature. --- pkg/hs/urbit-atom/lib/Urbit/Atom.hs | 40 ++++++---- pkg/hs/urbit-atom/lib/Urbit/Atom/Fast.hs | 97 +++++++++++++++++++----- pkg/hs/urbit-atom/test/Main.hs | 30 ++++++-- 3 files changed, 128 insertions(+), 39 deletions(-) diff --git a/pkg/hs/urbit-atom/lib/Urbit/Atom.hs b/pkg/hs/urbit-atom/lib/Urbit/Atom.hs index 4094c36b6..5da0d918b 100644 --- a/pkg/hs/urbit-atom/lib/Urbit/Atom.hs +++ b/pkg/hs/urbit-atom/lib/Urbit/Atom.hs @@ -24,22 +24,16 @@ import Data.ByteString (ByteString) import Data.Vector.Primitive (Vector) import GHC.Natural (Natural) -#if defined(__GHCJS__) -import Urbit.Atom.Slow (atomBytes, bytesAtom) -#else -import Urbit.Atom.Fast (atomBytes, bytesAtom) -#endif - import qualified Data.Text as T import qualified Data.Text.Encoding as T import qualified Data.Text.Encoding.Error as T #if defined(__GHCJS__) -import qualified Urbit.Atom.Slow as A -#else -import qualified Urbit.Atom.Fast as A +import qualified Urbit.Atom.Slow as Slow #endif +import qualified Urbit.Atom.Fast as A + -------------------------------------------------------------------------------- @@ -49,24 +43,40 @@ type Atom = Natural -- | Cast an atom to a vector. Does not copy. atomWords :: Atom -> Vector Word -atomWords = A.atomWords +atomWords = +#if defined(__GHCJS__) + Slow.atomWords +#else + A.atomWords +#endif -- | Cast a vector to an atom. Does not copy unless given a slice. wordsAtom :: Vector Word -> Atom -wordsAtom = A.wordsAtom +wordsAtom = +#if defined(__GHCJS__) + Slow.wordsAtom +#else + A.wordsAtom +#endif + +atomBytes :: Atom -> ByteString +atomBytes = A.exportBytes + +bytesAtom :: ByteString -> Atom +bytesAtom = A.importBytes -- | Encode a utf8-encoded atom from text. utf8Atom :: T.Text -> Atom -utf8Atom = A.bytesAtom . T.encodeUtf8 +utf8Atom = A.importBytes . T.encodeUtf8 -- | Interpret an atom as utf8 text. atomUtf8 :: Atom -> Either T.UnicodeException T.Text -atomUtf8 = T.decodeUtf8' . atomBytes +atomUtf8 = T.decodeUtf8' . A.exportBytes -- | Interpret an atom as utf8 text, throwing an exception on bad unicode. atomUtf8Exn :: Atom -> T.Text -atomUtf8Exn = T.decodeUtf8 . atomBytes +atomUtf8Exn = T.decodeUtf8 . A.exportBytes -- | Interpret an atom as utf8 text, replacing bad unicode characters. atomUtf8Lenient :: Atom -> T.Text -atomUtf8Lenient = T.decodeUtf8With T.lenientDecode . atomBytes +atomUtf8Lenient = T.decodeUtf8With T.lenientDecode . A.exportBytes diff --git a/pkg/hs/urbit-atom/lib/Urbit/Atom/Fast.hs b/pkg/hs/urbit-atom/lib/Urbit/Atom/Fast.hs index 3cd36be5f..a7f584043 100644 --- a/pkg/hs/urbit-atom/lib/Urbit/Atom/Fast.hs +++ b/pkg/hs/urbit-atom/lib/Urbit/Atom/Fast.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE CPP #-} +{-# LANGUAGE CPP, UnliftedFFITypes #-} {-| Atom implementation with fast conversions between bytestrings @@ -12,6 +12,8 @@ module Urbit.Atom.Fast , bytesAtom , atomBytes , atomWords + , exportBytes + , importBytes , bit , byt ) @@ -26,12 +28,10 @@ import Data.Vector.Primitive (Vector(..)) import Data.Word (Word8) import GHC.Exts (Ptr(Ptr), sizeofByteArray#) import GHC.Exts (Int(..)) -import GHC.Integer.GMP.Internals (BigNat(..), bigNatToWord, sizeofBigNat#) -import GHC.Integer.GMP.Internals (isZeroBigNat, indexBigNat#) -import GHC.Integer.GMP.Internals (byteArrayToBigNat#, wordToBigNat, zeroBigNat) +import GHC.Integer.GMP.Internals (BigNat(..)) import GHC.Natural (Natural(..)) import GHC.Prim (Int#, clz#, minusWord#, plusWord#) -import GHC.Prim (Word#, int2Word#, subIntC#, timesWord#) +import GHC.Prim (Word#, Addr#, int2Word#, subIntC#, timesWord#) import GHC.Prim (copyByteArrayToAddr#) import GHC.Word (Word(..)) import System.IO.Unsafe (unsafePerformIO) @@ -41,7 +41,9 @@ import qualified Data.ByteString.Internal as BS import qualified Data.ByteString.Unsafe as BS import qualified Data.Primitive.ByteArray as Prim import qualified Data.Vector.Primitive as VP +import qualified Foreign.ForeignPtr as Ptr import qualified Foreign.ForeignPtr.Unsafe as Ptr +import qualified GHC.Integer.GMP.Internals as G -- Setup BIT and BYT macros. --------------------------------------------------- @@ -77,8 +79,8 @@ bigNatBitWidth# :: BigNat -> Word# bigNatBitWidth# nat = lswBits `plusWord#` ((int2Word# lastIdx) `timesWord#` BIT##) where - I# lastIdx = (I# (sizeofBigNat# nat)) - 1 - lswBits = wordBitWidth# (indexBigNat# nat lastIdx) + I# lastIdx = (I# (G.sizeofBigNat# nat)) - 1 + lswBits = wordBitWidth# (G.indexBigNat# nat lastIdx) atomBitWidth# :: Natural -> Word# atomBitWidth# (NatS# gl) = wordBitWidth# gl @@ -131,7 +133,7 @@ bytesPill = Pill . strip Cast a BigNat to a vector without a copy. -} bigNatWords :: BigNat -> Vector Word -bigNatWords bn | isZeroBigNat bn = mempty +bigNatWords bn | G.isZeroBigNat bn = mempty bigNatWords bn@(BN# bArr) = Vector 0 (I# (sizeofByteArray# bArr) `div` BYT) (Prim.ByteArray bArr) @@ -142,11 +144,12 @@ bigNatWords bn@(BN# bArr) = -} wordsBigNat :: Vector Word -> BigNat wordsBigNat v@(Vector off (I# len) (Prim.ByteArray buf)) = case VP.length v of - 0 -> zeroBigNat + 0 -> G.zeroBigNat 1 -> case VP.unsafeIndex v 0 of - W# w -> wordToBigNat w - n -> - if off /= 0 then error "words2Nat: bad-vec" else byteArrayToBigNat# buf len + W# w -> G.wordToBigNat w + n -> if off /= 0 + then error "words2Nat: bad-vec" + else G.byteArrayToBigNat# buf len {-| More careful version of `wordsBigNat`, but not yet tested. @@ -158,13 +161,14 @@ wordsBigNat v@(Vector off (I# len) (Prim.ByteArray buf)) = case VP.length v of -} wordsBigNat' :: Vector Word -> BigNat wordsBigNat' v = case VP.length v of - 0 -> zeroBigNat - 1 -> wordToBigNat w where W# w = VP.unsafeIndex v 0 + 0 -> G.zeroBigNat + 1 -> G.wordToBigNat w where W# w = VP.unsafeIndex v 0 n -> if offset v == 0 then extract v else extract (VP.force v) where offset (Vector off _ _) = off - extract (Vector _ (I# len) (Prim.ByteArray buf)) = byteArrayToBigNat# buf len + extract (Vector _ (I# len) (Prim.ByteArray buf)) = + G.byteArrayToBigNat# buf len -------------------------------------------------------------------------------- @@ -179,14 +183,14 @@ wordsAtom = bigNatNat . wordsBigNat -- | Cast a Nat to a BigNat (no copy). natBigNat :: Natural -> BigNat -natBigNat (NatS# w ) = wordToBigNat w +natBigNat (NatS# w ) = G.wordToBigNat w natBigNat (NatJ# bn) = bn -- | Cast a BigNat to a Nat (no copy). bigNatNat :: BigNat -> Natural -bigNatNat bn = case sizeofBigNat# bn of +bigNatNat bn = case G.sizeofBigNat# bn of 0# -> 0 - 1# -> NatS# (bigNatToWord bn) + 1# -> NatS# (G.bigNatToWord bn) _ -> NatJ# bn @@ -263,3 +267,60 @@ atomBytes = pillBytes . natPill bytesAtom :: ByteString -> Natural bytesAtom = pillAtom . bytesPill +-- Try using GMPs `input/export` feature. -------------------------------------- + +-- sizeInBaseInteger i 256# +-- sz = sizeInBaseNatural wor 256# +-- exportBigNatToAddr :: BigNat -> Addr# -> Int# -> IO Word Source# +-- exportWordToAddr :: Word -> Addr# -> Int# -> IO Word + +-- Use "sizeInBaseInteger i 256#" to compute the exact number +-- of bytes written in advance for i /= 0. In case of i == 0, +-- exportIntegerToMutableByteArray will write and report zero bytes written, +-- whereas sizeInBaseInteger report one byte. + +sizeInBaseNatural :: Natural -> Int# -> Word# +{-# INLINE sizeInBaseNatural #-} +sizeInBaseNatural (NatS# w) base = G.sizeInBaseWord# w base +sizeInBaseNatural (NatJ# n) base = G.sizeInBaseBigNat n base + +exportNaturalToAddr :: Natural -> Addr# -> Int# -> IO Word +exportNaturalToAddr (NatS# w) = G.exportWordToAddr (W# w) +exportNaturalToAddr (NatJ# n) = G.exportBigNatToAddr n + +exportNaturalToByteString :: Natural -> Int -> ByteString +exportNaturalToByteString nat (I# i#) = unsafePerformIO $ do + let sz# = sizeInBaseNatural nat 256# + let szi = fromIntegral (W# sz#) + fp <- BS.mallocByteString szi + let Ptr a = Ptr.unsafeForeignPtrToPtr fp + exportNaturalToAddr nat a i# + pure (BS.PS fp 0 szi) + +exportBytes :: Natural -> ByteString +exportBytes 0 = mempty +exportBytes n = exportNaturalToByteString n 0 + +bigNatNatural :: BigNat -> Natural +bigNatNatural big = + case G.sizeofBigNat# big of + 0# -> 0 + 1# -> NatS# (G.bigNatToWord big) + _ -> NatJ# big + +stripBytes :: ByteString -> ByteString +stripBytes buf = BS.take (len - go 0 (len - 1)) buf + where + len = BS.length buf + go n i | i < 0 = n + | 0 == BS.unsafeIndex buf i = go (n + 1) (i - 1) + | otherwise = n + + +importBytes :: ByteString -> Natural +importBytes (stripBytes -> BS.PS fp 0 sz) = unsafePerformIO $ do + let Ptr a = Ptr.unsafeForeignPtrToPtr fp -- TODO Not safe! + let W# sz# = fromIntegral sz + res <- bigNatNatural <$> G.importBigNatFromAddr a sz# 0# + Ptr.touchForeignPtr fp + pure res diff --git a/pkg/hs/urbit-atom/test/Main.hs b/pkg/hs/urbit-atom/test/Main.hs index 647e038e3..0365a2e3c 100644 --- a/pkg/hs/urbit-atom/test/Main.hs +++ b/pkg/hs/urbit-atom/test/Main.hs @@ -20,7 +20,7 @@ import Data.Vector.Primitive (Prim, Vector) import qualified Data.ByteString as BS import qualified Data.ByteString.Unsafe as BS import qualified Data.Vector.Primitive as VP -import qualified Urbit.Atom as F +import qualified Urbit.Atom.Fast as F import qualified Urbit.Atom.Slow as S @@ -102,16 +102,22 @@ prop_fast_atom_words_correct x = F.atomWords x == S.atomWords x prop_fast_bytes_atom_correct :: ByteString -> Bool prop_fast_bytes_atom_correct x = F.bytesAtom x == S.bytesAtom x +prop_fast_atom_import_correct :: ByteString -> Bool +prop_fast_atom_import_correct x = F.importBytes x == S.bytesAtom x + prop_fast_atom_bytes_correct :: Natural -> Bool prop_fast_atom_bytes_correct x = F.atomBytes x == S.atomBytes x +prop_fast_atom_export_correct :: Natural -> Bool +prop_fast_atom_export_correct x = F.exportBytes x == S.atomBytes x + -------------------------------------------------------------------------------- failed :: IORef Int failed = unsafePerformIO (newIORef 0) -checkProp :: (Show x, Arbitrary x) => String -> (x -> Bool) -> IO () +checkProp :: Testable prop => String -> prop -> IO () checkProp nm chk = do putStrLn nm res <- quickCheckResult chk @@ -144,20 +150,32 @@ main = do checkProp "Fast: Bytestring <-> Atom roundtrip" prop_fast_bytes_atom_roundtrip + checkProp "Fast: Export->Import roundtrip" $ do + withMaxSuccess 100000 (dumpLoad F.exportBytes F.importBytes) + + checkProp "Fast: Import->Export roundtrip" $ do + withMaxSuccess 10000 (loadDump F.importBytes F.exportBytes stripBytes) + checkProp "Fast: Vector Word <-> Atom roundtrip" prop_fast_words_atom_roundtrip checkProp "Fast matches reference: Vector Words -> Atom" - prop_fast_words_atom_correct + (withMaxSuccess 10000 prop_fast_words_atom_correct) checkProp "Fast matches reference: Atom -> Vector Word" - prop_fast_atom_words_correct + (withMaxSuccess 10000 prop_fast_atom_words_correct) checkProp "Fast matches reference: ByteString -> Atom" - prop_fast_bytes_atom_correct + (withMaxSuccess 10000 prop_fast_bytes_atom_correct) checkProp "Fast matches reference: Atom -> ByteString" - prop_fast_atom_bytes_correct + (withMaxSuccess 10000 prop_fast_atom_bytes_correct) + + checkProp "Fast matches reference: Atom Import" + (withMaxSuccess 10000 prop_fast_atom_import_correct) + + checkProp "Fast matches reference: Atom Export" + (withMaxSuccess 10000 prop_fast_atom_export_correct) res <- readIORef failed when (res /= 0) $ do