urbit-atom: Cleanup

2024-11-24 04:58:08 +03:00 · 2020-03-18 11:25:58 -07:00 · 2020-03-18 11:25:58 -07:00 · 5bf82f46c2
commit 5bf82f46c2
parent 33ef966afc
6 changed files with 99 additions and 59 deletions
--- a/pkg/hs/urbit-atom/TODO.md
+++ b/pkg/hs/urbit-atom/TODO.md
@ -1,3 +1,5 @@
 - Support big-endian CPUs (CPP flag; use GMP import/export)
 - Support `integer-simple`. (Android)
+- `atomWords` and `wordsAtom` are unacceptably slow on GHCJS.
+  - This is only used in `jam`/`cue`, so it doesn't matter for now.
 - Understand why my hand rolled import is slower than GMP import.
--- a/pkg/hs/urbit-atom/lib/Urbit/Atom.hs
+++ b/pkg/hs/urbit-atom/lib/Urbit/Atom.hs
@ -39,28 +39,18 @@ import qualified Urbit.Atom.Fast as A

 type Atom = Natural

--------------------------------------------------------------------------------

-- | Cast an atom to a vector. Does not copy.
-atomWords :: Atom -> Vector Word
-atomWords =
-#if defined(__GHCJS__)
-  Slow.atomWords
-#else
-  A.atomWords
-#endif
+-- Choose Implementation Based on Platform -------------------------------------

-- | Cast a vector to an atom. Does not copy unless given a slice.
-wordsAtom :: Vector Word -> Atom
-wordsAtom =
-#if defined(__GHCJS__)
-  Slow.wordsAtom
-#else
-  A.wordsAtom
-#endif
+{- |
+  Convert an Atom to a bytestring. O(n), copies.

-- My export routine is faster but doesn't work in GHCJS.
-- TODO: It also doesn't work on big-endian machines.
+  My hand-rolled implementation is faster, but doesn't work on GHCJS. So,
+  on GHCJS use GMP's `export` routine.
+
+  TODO GMP's `export` routine also handles big endian machines, so use
+  in that case too.
+-}
 atomBytes :: Atom -> ByteString
 atomBytes =
 #if defined(__GHCJS__)
@ -69,23 +59,59 @@ atomBytes =
  A.atomBytes
 #endif

+{- |
+  Convert a bytestring to an Atom. O(n), copies.

-- GMP's import is always faster than my loading routine.
+  This always uses GMP's `export` routine, since it's portable and faster
+  than my hand-rolled implementation.
+-}
 bytesAtom :: ByteString -> Atom
 bytesAtom = A.importBytes

+{- |
+  Cast an atom to a vector. O(1), does not copy.
+
+  My fast implementation doesn't work on GHCJS, so fallback to the naive
+  implementation on that platform for now.
+-}
+atomWords :: Atom -> Vector Word
+atomWords =
+#if defined(__GHCJS__)
+  Slow.atomWords
+#else
+  A.atomWords
+#endif
+
+{- |
+  Cast a vector to an atom. O(1), does not copy unless given a slice,
+  then O(n).
+
+  My fast implementation doesn't work on GHCJS, so fallback to the naive
+  implementation on that platform for now.
+-}
+wordsAtom :: Vector Word -> Atom
+wordsAtom =
+#if defined(__GHCJS__)
+  Slow.wordsAtom
+#else
+  A.wordsAtom
+#endif
+
+
+-- String/Cord Conversion ------------------------------------------------------
+
 -- | Encode a utf8-encoded atom from text.
 utf8Atom :: T.Text -> Atom
-utf8Atom = A.importBytes . T.encodeUtf8
+utf8Atom = bytesAtom . T.encodeUtf8

 -- | Interpret an atom as utf8 text.
 atomUtf8 :: Atom -> Either T.UnicodeException T.Text
-atomUtf8 = T.decodeUtf8' . A.exportBytes
+atomUtf8 = T.decodeUtf8' . atomBytes

 -- | Interpret an atom as utf8 text, throwing an exception on bad unicode.
 atomUtf8Exn :: Atom -> T.Text
-atomUtf8Exn = T.decodeUtf8 . A.exportBytes
+atomUtf8Exn = T.decodeUtf8 . atomBytes

 -- | Interpret an atom as utf8 text, replacing bad unicode characters.
 atomUtf8Lenient :: Atom -> T.Text
-atomUtf8Lenient = T.decodeUtf8With T.lenientDecode . A.exportBytes
+atomUtf8Lenient = T.decodeUtf8With T.lenientDecode . atomBytes
--- a/pkg/hs/urbit-atom/lib/Urbit/Atom/Fast.hs
+++ b/pkg/hs/urbit-atom/lib/Urbit/Atom/Fast.hs
@ -14,6 +14,13 @@ module Urbit.Atom.Fast
  , atomWords
  , exportBytes
  , importBytes
+  , wordBitWidth#
+  , wordBitWidth
+  , atomBitWidth#
+  , atomBitWidth
+  , takeBitsWord
+  , bigNatWords
+  , wordsBigNat
  , bit
  , byt
  )
@ -31,7 +38,7 @@ import GHC.Exts                  (Int(..))
 import GHC.Integer.GMP.Internals (BigNat(..))
 import GHC.Natural               (Natural(..))
 import GHC.Prim                  (Int#, clz#, minusWord#, plusWord#)
-import GHC.Prim                  (Word#, Addr#, int2Word#, subIntC#, timesWord#)
+import GHC.Prim                  (Word#, Addr#, int2Word#, timesWord#)
 import GHC.Prim                  (copyByteArrayToAddr#)
 import GHC.Word                  (Word(..))
 import System.IO.Unsafe          (unsafePerformIO)
@ -86,8 +93,8 @@ atomBitWidth# :: Natural -> Word#
 atomBitWidth# (NatS# gl) = wordBitWidth# gl
 atomBitWidth# (NatJ# bn) = bigNatBitWidth# bn

-bitWidth :: Num a => Natural -> a
-bitWidth a = fromIntegral (W# (atomBitWidth# a))
+atomBitWidth :: Num a => Natural -> a
+atomBitWidth a = fromIntegral (W# (atomBitWidth# a))

 --------------------------------------------------------------------------------

@ -159,8 +166,8 @@ wordsBigNat v@(Vector off (I# len) (Prim.ByteArray buf)) = case VP.length v of
    Note that the length of the vector is in words, and the length passed
    to `byteArrayToBigNat#` is also in words.
 -}
-wordsBigNat' :: Vector Word -> BigNat
-wordsBigNat' v = case VP.length v of
+_wordsBigNat :: Vector Word -> BigNat
+_wordsBigNat v = case VP.length v of
  0 -> G.zeroBigNat
  1 -> G.wordToBigNat w where W# w = VP.unsafeIndex v 0
  n -> if offset v == 0 then extract v else extract (VP.force v)
@ -196,8 +203,8 @@ bigNatNat bn = case G.sizeofBigNat# bn of

 --------------------------------------------------------------------------------

-wordBytes :: Word -> ByteString
-wordBytes wor = BS.reverse $ BS.pack $ go 0 []
+_wordBytes :: Word -> ByteString
+_wordBytes wor = BS.reverse $ BS.pack $ go 0 []
 where
  go i acc | i >= BYT  = acc
  go i acc | otherwise = go (i + 1) (fromIntegral (shiftR wor (i * BYT)) : acc)
@ -213,8 +220,8 @@ bytesFirstWord buf = go 0 0

 --------------------------------------------------------------------------------

-pillWords :: Pill -> Vector Word
-pillWords = bsToWords . pillBytes
+_pillWords :: Pill -> Vector Word
+_pillWords = bsToWords . pillBytes

 wordsPill :: Vector Word -> Pill
 wordsPill = bytesPill . vecBytes . wordsToBytes
@ -247,8 +254,8 @@ vecBytes (Vector off sz buf) = unsafePerformIO $ do
  copyByteArrayToAddr dst# (Prim.ByteArray src#) soff sz =
    primitive_ (copyByteArrayToAddr# src# (unI# soff) dst# (unI# sz))

-bytesVec :: ByteString -> Vector Word8
-bytesVec bs = VP.generate (BS.length bs) (BS.index bs)
+_bytesVec :: ByteString -> Vector Word8
+_bytesVec bs = VP.generate (BS.length bs) (BS.index bs)


 --------------------------------------------------------------------------------
@ -318,9 +325,15 @@ stripBytes buf = BS.take (len - go 0 (len - 1)) buf


 importBytes :: ByteString -> Natural
-importBytes (stripBytes -> BS.PS fp 0 sz) = unsafePerformIO $ do
-  let Ptr a  = Ptr.unsafeForeignPtrToPtr fp -- TODO Not safe!
-  let W# sz# = fromIntegral sz
-  res <- bigNatNatural <$> G.importBigNatFromAddr a sz# 0#
-  Ptr.touchForeignPtr fp
-  pure res
+importBytes = go . stripBytes
+ where
+  go (BS.PS fp 0 sz) = unsafePerformIO $ do
+    let Ptr a  = Ptr.unsafeForeignPtrToPtr fp -- TODO Not safe!
+    let W# sz# = fromIntegral sz
+    res <- bigNatNatural <$> G.importBigNatFromAddr a sz# 0#
+    Ptr.touchForeignPtr fp
+    pure res
+
+  -- TODO Avoid this extra copy when given a slice. Should be able to
+  -- just offset the raw pointer.
+  go bs = importBytes (BS.copy bs)
--- a/pkg/hs/urbit-atom/package.yaml
+++ b/pkg/hs/urbit-atom/package.yaml
@ -4,9 +4,9 @@ license: MIT
 license-file: LICENSE

 ghc-options:
-  # -fwarn-incomplete-patterns
-  # -fwarn-unused-binds
-  # -fwarn-unused-imports
+  - -fwarn-incomplete-patterns
+  - -fwarn-unused-binds
+  - -fwarn-unused-imports
  - -Werror
  - -O2

--- a/pkg/hs/urbit-atom/test/Main.hs
+++ b/pkg/hs/urbit-atom/test/Main.hs
@ -5,16 +5,15 @@ module Main (main) where

 --------------------------------------------------------------------------------

-import Prelude
-import Numeric.Natural
-import Test.QuickCheck
 import Data.IORef
-import System.IO.Unsafe
+import Numeric.Natural
+import Prelude
 import System.Exit
+import System.IO.Unsafe
+import Test.QuickCheck

 import Control.Monad         (when)
 import Data.ByteString       (ByteString)
-import Data.Text             (Text)
 import Data.Vector.Primitive (Prim, Vector)

 import qualified Data.ByteString        as BS
--- a/pkg/hs/urbit-king/lib/Urbit/Noun/Jam.hs
+++ b/pkg/hs/urbit-king/lib/Urbit/Noun/Jam.hs
@ -10,7 +10,6 @@ module Urbit.Noun.Jam (jam, jamBS) where
 import ClassyPrelude hiding (hash)

 import Urbit.Atom
-import Urbit.Atom.Fast
 import Urbit.Noun.Core

 import Data.Bits                 (clearBit, setBit, shiftL, shiftR, (.|.))
@ -25,6 +24,7 @@ import GHC.Prim                  (Word#, plusWord#, word2Int#)
 import GHC.Word                  (Word(W#))
 import System.IO.Unsafe          (unsafePerformIO)

+import qualified Urbit.Atom.Fast        as Atom
 import qualified Data.ByteString.Unsafe as BS
 import qualified Data.HashTable.IO      as H
 import qualified Data.Vector.Primitive  as VP
@ -154,7 +154,7 @@ writeWord wor = do
 {-# INLINE writeBitsFromWord #-}
 writeBitsFromWord :: Int -> Word -> Put ()
 writeBitsFromWord wid wor = do
-    wor <- pure (takeBitsWord wid wor)
+    wor <- pure (Atom.takeBitsWord wid wor)

    oldSt <- getS

@ -175,7 +175,7 @@ writeBitsFromWord wid wor = do
 {-# INLINE writeAtomWord# #-}
 writeAtomWord# :: Word# -> Put ()
 writeAtomWord# w = do
-    writeBitsFromWord (I# (word2Int# (wordBitWidth# w))) (W# w)
+    writeBitsFromWord (I# (word2Int# (Atom.wordBitWidth# w))) (W# w)

 {-# INLINE writeAtomWord #-}
 writeAtomWord :: Word -> Put ()
@ -188,7 +188,7 @@ writeAtomWord (W# w) = writeAtomWord# w
 -}
 {-# INLINE writeAtomBigNat #-}
 writeAtomBigNat :: BigNat -> Put ()
-writeAtomBigNat !(bigNatWords -> words) = do
+writeAtomBigNat !(Atom.bigNatWords -> words) = do
  let lastIdx = VP.length words - 1
  for_ [0..(lastIdx-1)] $ \i ->
      writeWord (words ! i)
@ -276,8 +276,8 @@ writeMat atm = do
    writeBitsFromWord (preWid-1) atmWid
    writeAtomBits atm
  where
-    atmWid = bitWidth atm
-    preWid = fromIntegral (wordBitWidth atmWid)
+    atmWid = Atom.atomBitWidth atm
+    preWid = fromIntegral (Atom.wordBitWidth atmWid)

 {-# INLINE writeCell #-}
 writeCell :: Noun -> Noun -> Put ()
@ -313,8 +313,8 @@ matSz# :: Atom -> Word#
 matSz# 0 = 1##
 matSz# a = preW `plusWord#` preW `plusWord#` atmW
  where
-    atmW = atomBitWidth# a
-    preW = wordBitWidth# atmW
+    atmW = Atom.atomBitWidth# a
+    preW = Atom.wordBitWidth# atmW

 {-# INLINE atomSz #-}
 atomSz :: Atom -> Word
@ -329,8 +329,8 @@ jamWordSz :: Word -> Word
 jamWordSz 0      = 2
 jamWordSz (W# w) = 1 + 2*(W# preW) + (W# atmW)
  where
-    atmW = wordBitWidth# w
-    preW = wordBitWidth# atmW
+    atmW = Atom.wordBitWidth# w
+    preW = Atom.wordBitWidth# atmW

 compress :: Noun -> IO (Word, H.CuckooHashTable Word Word)
 compress !top = do