urbit-atom: Use GMPs import/export feature.

2024-12-25 13:04:17 +03:00 · 2020-03-13 10:04:25 -07:00 · 2020-03-13 10:04:25 -07:00 · 0b68ab8a01
commit 0b68ab8a01
parent 029262333b
3 changed files with 128 additions and 39 deletions
--- a/pkg/hs/urbit-atom/lib/Urbit/Atom.hs
+++ b/pkg/hs/urbit-atom/lib/Urbit/Atom.hs
@ -24,22 +24,16 @@ import Data.ByteString       (ByteString)
 import Data.Vector.Primitive (Vector)
 import GHC.Natural           (Natural)

-#if defined(__GHCJS__)
-import Urbit.Atom.Slow       (atomBytes, bytesAtom)
-#else
-import Urbit.Atom.Fast       (atomBytes, bytesAtom)
-#endif
-
 import qualified Data.Text                as T
 import qualified Data.Text.Encoding       as T
 import qualified Data.Text.Encoding.Error as T

 #if defined(__GHCJS__)
-import qualified Urbit.Atom.Slow as A
-#else
-import qualified Urbit.Atom.Fast as A
+import qualified Urbit.Atom.Slow as Slow
 #endif

+import qualified Urbit.Atom.Fast as A
+

 --------------------------------------------------------------------------------

@ -49,24 +43,40 @@ type Atom = Natural

 -- | Cast an atom to a vector. Does not copy.
 atomWords :: Atom -> Vector Word
-atomWords = A.atomWords
+atomWords =
+#if defined(__GHCJS__)
+  Slow.atomWords
+#else
+  A.atomWords
+#endif

 -- | Cast a vector to an atom. Does not copy unless given a slice.
 wordsAtom :: Vector Word -> Atom
-wordsAtom = A.wordsAtom
+wordsAtom =
+#if defined(__GHCJS__)
+  Slow.wordsAtom
+#else
+  A.wordsAtom
+#endif
+
+atomBytes :: Atom -> ByteString
+atomBytes = A.exportBytes
+
+bytesAtom :: ByteString -> Atom
+bytesAtom = A.importBytes

 -- | Encode a utf8-encoded atom from text.
 utf8Atom :: T.Text -> Atom
-utf8Atom = A.bytesAtom . T.encodeUtf8
+utf8Atom = A.importBytes . T.encodeUtf8

 -- | Interpret an atom as utf8 text.
 atomUtf8 :: Atom -> Either T.UnicodeException T.Text
-atomUtf8 = T.decodeUtf8' . atomBytes
+atomUtf8 = T.decodeUtf8' . A.exportBytes

 -- | Interpret an atom as utf8 text, throwing an exception on bad unicode.
 atomUtf8Exn :: Atom -> T.Text
-atomUtf8Exn = T.decodeUtf8 . atomBytes
+atomUtf8Exn = T.decodeUtf8 . A.exportBytes

 -- | Interpret an atom as utf8 text, replacing bad unicode characters.
 atomUtf8Lenient :: Atom -> T.Text
-atomUtf8Lenient = T.decodeUtf8With T.lenientDecode . atomBytes
+atomUtf8Lenient = T.decodeUtf8With T.lenientDecode . A.exportBytes
--- a/pkg/hs/urbit-atom/lib/Urbit/Atom/Fast.hs
+++ b/pkg/hs/urbit-atom/lib/Urbit/Atom/Fast.hs
@ -1,4 +1,4 @@
-{-# LANGUAGE CPP #-}
+{-# LANGUAGE CPP, UnliftedFFITypes #-}

 {-|
    Atom implementation with fast conversions between bytestrings
@ -12,6 +12,8 @@ module Urbit.Atom.Fast
  , bytesAtom
  , atomBytes
  , atomWords
+  , exportBytes
+  , importBytes
  , bit
  , byt
  )
@ -26,12 +28,10 @@ import Data.Vector.Primitive     (Vector(..))
 import Data.Word                 (Word8)
 import GHC.Exts                  (Ptr(Ptr), sizeofByteArray#)
 import GHC.Exts                  (Int(..))
-import GHC.Integer.GMP.Internals (BigNat(..), bigNatToWord, sizeofBigNat#)
-import GHC.Integer.GMP.Internals (isZeroBigNat, indexBigNat#)
-import GHC.Integer.GMP.Internals (byteArrayToBigNat#, wordToBigNat, zeroBigNat)
+import GHC.Integer.GMP.Internals (BigNat(..))
 import GHC.Natural               (Natural(..))
 import GHC.Prim                  (Int#, clz#, minusWord#, plusWord#)
-import GHC.Prim                  (Word#, int2Word#, subIntC#, timesWord#)
+import GHC.Prim                  (Word#, Addr#, int2Word#, subIntC#, timesWord#)
 import GHC.Prim                  (copyByteArrayToAddr#)
 import GHC.Word                  (Word(..))
 import System.IO.Unsafe          (unsafePerformIO)
@ -41,7 +41,9 @@ import qualified Data.ByteString.Internal  as BS
 import qualified Data.ByteString.Unsafe    as BS
 import qualified Data.Primitive.ByteArray  as Prim
 import qualified Data.Vector.Primitive     as VP
+import qualified Foreign.ForeignPtr        as Ptr
 import qualified Foreign.ForeignPtr.Unsafe as Ptr
+import qualified GHC.Integer.GMP.Internals as G


 -- Setup BIT and BYT macros. ---------------------------------------------------
@ -77,8 +79,8 @@ bigNatBitWidth# :: BigNat -> Word#
 bigNatBitWidth# nat =
  lswBits `plusWord#` ((int2Word# lastIdx) `timesWord#` BIT##)
 where
-  I# lastIdx = (I# (sizeofBigNat# nat)) - 1
-  lswBits    = wordBitWidth# (indexBigNat# nat lastIdx)
+  I# lastIdx = (I# (G.sizeofBigNat# nat)) - 1
+  lswBits    = wordBitWidth# (G.indexBigNat# nat lastIdx)

 atomBitWidth# :: Natural -> Word#
 atomBitWidth# (NatS# gl) = wordBitWidth# gl
@ -131,7 +133,7 @@ bytesPill = Pill . strip
    Cast a BigNat to a vector without a copy.
 -}
 bigNatWords :: BigNat -> Vector Word
-bigNatWords bn | isZeroBigNat bn = mempty
+bigNatWords bn | G.isZeroBigNat bn = mempty
 bigNatWords bn@(BN# bArr) =
  Vector 0 (I# (sizeofByteArray# bArr) `div` BYT) (Prim.ByteArray bArr)

@ -142,11 +144,12 @@ bigNatWords bn@(BN# bArr) =
 -}
 wordsBigNat :: Vector Word -> BigNat
 wordsBigNat v@(Vector off (I# len) (Prim.ByteArray buf)) = case VP.length v of
-  0 -> zeroBigNat
+  0 -> G.zeroBigNat
  1 -> case VP.unsafeIndex v 0 of
-    W# w -> wordToBigNat w
-  n ->
-    if off /= 0 then error "words2Nat: bad-vec" else byteArrayToBigNat# buf len
+    W# w -> G.wordToBigNat w
+  n -> if off /= 0
+    then error "words2Nat: bad-vec"
+    else G.byteArrayToBigNat# buf len

 {-|
    More careful version of `wordsBigNat`, but not yet tested.
@ -158,13 +161,14 @@ wordsBigNat v@(Vector off (I# len) (Prim.ByteArray buf)) = case VP.length v of
 -}
 wordsBigNat' :: Vector Word -> BigNat
 wordsBigNat' v = case VP.length v of
-  0 -> zeroBigNat
-  1 -> wordToBigNat w where W# w = VP.unsafeIndex v 0
+  0 -> G.zeroBigNat
+  1 -> G.wordToBigNat w where W# w = VP.unsafeIndex v 0
  n -> if offset v == 0 then extract v else extract (VP.force v)
 where
  offset (Vector off _ _) = off

-  extract (Vector _ (I# len) (Prim.ByteArray buf)) = byteArrayToBigNat# buf len
+  extract (Vector _ (I# len) (Prim.ByteArray buf)) =
+    G.byteArrayToBigNat# buf len


 --------------------------------------------------------------------------------
@ -179,14 +183,14 @@ wordsAtom = bigNatNat . wordsBigNat

 -- | Cast a Nat to a BigNat (no copy).
 natBigNat :: Natural -> BigNat
-natBigNat (NatS# w ) = wordToBigNat w
+natBigNat (NatS# w ) = G.wordToBigNat w
 natBigNat (NatJ# bn) = bn

 -- | Cast a BigNat to a Nat (no copy).
 bigNatNat :: BigNat -> Natural
-bigNatNat bn = case sizeofBigNat# bn of
+bigNatNat bn = case G.sizeofBigNat# bn of
  0# -> 0
-  1# -> NatS# (bigNatToWord bn)
+  1# -> NatS# (G.bigNatToWord bn)
  _  -> NatJ# bn


@ -263,3 +267,60 @@ atomBytes = pillBytes . natPill
 bytesAtom :: ByteString -> Natural
 bytesAtom = pillAtom . bytesPill

+-- Try using GMPs `input/export` feature. --------------------------------------
+
+-- sizeInBaseInteger i 256#
+-- sz = sizeInBaseNatural wor 256#
+-- exportBigNatToAddr :: BigNat -> Addr# -> Int# -> IO Word Source#
+-- exportWordToAddr :: Word -> Addr# -> Int# -> IO Word
+
+-- Use "sizeInBaseInteger i 256#" to compute the exact number
+-- of bytes written in advance for i /= 0. In case of i == 0,
+-- exportIntegerToMutableByteArray will write and report zero bytes written,
+-- whereas sizeInBaseInteger report one byte.
+
+sizeInBaseNatural :: Natural -> Int# -> Word#
+{-# INLINE sizeInBaseNatural #-}
+sizeInBaseNatural (NatS# w) base = G.sizeInBaseWord# w base
+sizeInBaseNatural (NatJ# n) base = G.sizeInBaseBigNat n base
+
+exportNaturalToAddr :: Natural -> Addr# -> Int# -> IO Word
+exportNaturalToAddr (NatS# w) = G.exportWordToAddr (W# w)
+exportNaturalToAddr (NatJ# n) = G.exportBigNatToAddr n
+
+exportNaturalToByteString :: Natural -> Int -> ByteString
+exportNaturalToByteString nat (I# i#) = unsafePerformIO $ do
+  let sz# = sizeInBaseNatural nat 256#
+  let szi = fromIntegral (W# sz#)
+  fp <- BS.mallocByteString szi
+  let Ptr a = Ptr.unsafeForeignPtrToPtr fp
+  exportNaturalToAddr nat a i#
+  pure (BS.PS fp 0 szi)
+
+exportBytes :: Natural -> ByteString
+exportBytes 0 = mempty
+exportBytes n = exportNaturalToByteString n 0
+
+bigNatNatural :: BigNat -> Natural
+bigNatNatural big =
+  case G.sizeofBigNat# big of
+    0# -> 0
+    1# -> NatS# (G.bigNatToWord big)
+    _  -> NatJ# big
+
+stripBytes :: ByteString -> ByteString
+stripBytes buf = BS.take (len - go 0 (len - 1)) buf
+ where
+  len = BS.length buf
+  go n i | i < 0                     = n
+         | 0 == BS.unsafeIndex buf i = go (n + 1) (i - 1)
+         | otherwise                 = n
+
+
+importBytes :: ByteString -> Natural
+importBytes (stripBytes -> BS.PS fp 0 sz) = unsafePerformIO $ do
+  let Ptr a  = Ptr.unsafeForeignPtrToPtr fp -- TODO Not safe!
+  let W# sz# = fromIntegral sz
+  res <- bigNatNatural <$> G.importBigNatFromAddr a sz# 0#
+  Ptr.touchForeignPtr fp
+  pure res
--- a/pkg/hs/urbit-atom/test/Main.hs
+++ b/pkg/hs/urbit-atom/test/Main.hs
@ -20,7 +20,7 @@ import Data.Vector.Primitive (Prim, Vector)
 import qualified Data.ByteString        as BS
 import qualified Data.ByteString.Unsafe as BS
 import qualified Data.Vector.Primitive  as VP
-import qualified Urbit.Atom             as F
+import qualified Urbit.Atom.Fast        as F
 import qualified Urbit.Atom.Slow        as S


@ -102,16 +102,22 @@ prop_fast_atom_words_correct x = F.atomWords x == S.atomWords x
 prop_fast_bytes_atom_correct :: ByteString -> Bool
 prop_fast_bytes_atom_correct x = F.bytesAtom x == S.bytesAtom x

+prop_fast_atom_import_correct :: ByteString -> Bool
+prop_fast_atom_import_correct x = F.importBytes x == S.bytesAtom x
+
 prop_fast_atom_bytes_correct :: Natural -> Bool
 prop_fast_atom_bytes_correct x = F.atomBytes x == S.atomBytes x

+prop_fast_atom_export_correct :: Natural -> Bool
+prop_fast_atom_export_correct x = F.exportBytes x == S.atomBytes x
+

 --------------------------------------------------------------------------------

 failed :: IORef Int
 failed = unsafePerformIO (newIORef 0)

-checkProp :: (Show x, Arbitrary x) => String -> (x -> Bool) -> IO ()
+checkProp :: Testable prop => String -> prop -> IO ()
 checkProp nm chk = do
  putStrLn nm
  res <- quickCheckResult chk
@ -144,20 +150,32 @@ main = do
  checkProp "Fast: Bytestring <-> Atom roundtrip"
    prop_fast_bytes_atom_roundtrip

+  checkProp "Fast: Export->Import roundtrip" $ do
+    withMaxSuccess 100000 (dumpLoad F.exportBytes F.importBytes)
+
+  checkProp "Fast: Import->Export roundtrip" $ do
+    withMaxSuccess 10000 (loadDump F.importBytes F.exportBytes stripBytes)
+
  checkProp "Fast: Vector Word <-> Atom roundtrip"
    prop_fast_words_atom_roundtrip

  checkProp "Fast matches reference: Vector Words -> Atom"
-    prop_fast_words_atom_correct
+    (withMaxSuccess 10000 prop_fast_words_atom_correct)

  checkProp "Fast matches reference: Atom -> Vector Word"
-    prop_fast_atom_words_correct
+    (withMaxSuccess 10000 prop_fast_atom_words_correct)

  checkProp "Fast matches reference: ByteString -> Atom"
-    prop_fast_bytes_atom_correct
+    (withMaxSuccess 10000 prop_fast_bytes_atom_correct)

  checkProp "Fast matches reference: Atom -> ByteString"
-    prop_fast_atom_bytes_correct
+    (withMaxSuccess 10000 prop_fast_atom_bytes_correct)
+
+  checkProp "Fast matches reference: Atom Import"
+    (withMaxSuccess 10000 prop_fast_atom_import_correct)
+
+  checkProp "Fast matches reference: Atom Export"
+    (withMaxSuccess 10000 prop_fast_atom_export_correct)

  res <- readIORef failed
  when (res /= 0) $ do