diff --git a/pkg/hs/urbit-atom/TODO.md b/pkg/hs/urbit-atom/TODO.md index a749d53cfc..c0d8369c9d 100644 --- a/pkg/hs/urbit-atom/TODO.md +++ b/pkg/hs/urbit-atom/TODO.md @@ -1,3 +1,5 @@ - Support big-endian CPUs (CPP flag; use GMP import/export) - Support `integer-simple`. (Android) +- `atomWords` and `wordsAtom` are unacceptably slow on GHCJS. + - This is only used in `jam`/`cue`, so it doesn't matter for now. - Understand why my hand rolled import is slower than GMP import. diff --git a/pkg/hs/urbit-atom/lib/Urbit/Atom.hs b/pkg/hs/urbit-atom/lib/Urbit/Atom.hs index bed5cbc5db..d640c2d7c9 100644 --- a/pkg/hs/urbit-atom/lib/Urbit/Atom.hs +++ b/pkg/hs/urbit-atom/lib/Urbit/Atom.hs @@ -39,28 +39,18 @@ import qualified Urbit.Atom.Fast as A type Atom = Natural --------------------------------------------------------------------------------- --- | Cast an atom to a vector. Does not copy. -atomWords :: Atom -> Vector Word -atomWords = -#if defined(__GHCJS__) - Slow.atomWords -#else - A.atomWords -#endif +-- Choose Implementation Based on Platform ------------------------------------- --- | Cast a vector to an atom. Does not copy unless given a slice. -wordsAtom :: Vector Word -> Atom -wordsAtom = -#if defined(__GHCJS__) - Slow.wordsAtom -#else - A.wordsAtom -#endif +{- | + Convert an Atom to a bytestring. O(n), copies. --- My export routine is faster but doesn't work in GHCJS. --- TODO: It also doesn't work on big-endian machines. + My hand-rolled implementation is faster, but doesn't work on GHCJS. So, + on GHCJS use GMP's `export` routine. + + TODO GMP's `export` routine also handles big endian machines, so use + in that case too. +-} atomBytes :: Atom -> ByteString atomBytes = #if defined(__GHCJS__) @@ -69,23 +59,59 @@ atomBytes = A.atomBytes #endif +{- | + Convert a bytestring to an Atom. O(n), copies. --- GMP's import is always faster than my loading routine. + This always uses GMP's `export` routine, since it's portable and faster + than my hand-rolled implementation. +-} bytesAtom :: ByteString -> Atom bytesAtom = A.importBytes +{- | + Cast an atom to a vector. O(1), does not copy. + + My fast implementation doesn't work on GHCJS, so fallback to the naive + implementation on that platform for now. +-} +atomWords :: Atom -> Vector Word +atomWords = +#if defined(__GHCJS__) + Slow.atomWords +#else + A.atomWords +#endif + +{- | + Cast a vector to an atom. O(1), does not copy unless given a slice, + then O(n). + + My fast implementation doesn't work on GHCJS, so fallback to the naive + implementation on that platform for now. +-} +wordsAtom :: Vector Word -> Atom +wordsAtom = +#if defined(__GHCJS__) + Slow.wordsAtom +#else + A.wordsAtom +#endif + + +-- String/Cord Conversion ------------------------------------------------------ + -- | Encode a utf8-encoded atom from text. utf8Atom :: T.Text -> Atom -utf8Atom = A.importBytes . T.encodeUtf8 +utf8Atom = bytesAtom . T.encodeUtf8 -- | Interpret an atom as utf8 text. atomUtf8 :: Atom -> Either T.UnicodeException T.Text -atomUtf8 = T.decodeUtf8' . A.exportBytes +atomUtf8 = T.decodeUtf8' . atomBytes -- | Interpret an atom as utf8 text, throwing an exception on bad unicode. atomUtf8Exn :: Atom -> T.Text -atomUtf8Exn = T.decodeUtf8 . A.exportBytes +atomUtf8Exn = T.decodeUtf8 . atomBytes -- | Interpret an atom as utf8 text, replacing bad unicode characters. atomUtf8Lenient :: Atom -> T.Text -atomUtf8Lenient = T.decodeUtf8With T.lenientDecode . A.exportBytes +atomUtf8Lenient = T.decodeUtf8With T.lenientDecode . atomBytes diff --git a/pkg/hs/urbit-atom/lib/Urbit/Atom/Fast.hs b/pkg/hs/urbit-atom/lib/Urbit/Atom/Fast.hs index a7f5840435..45ce364799 100644 --- a/pkg/hs/urbit-atom/lib/Urbit/Atom/Fast.hs +++ b/pkg/hs/urbit-atom/lib/Urbit/Atom/Fast.hs @@ -14,6 +14,13 @@ module Urbit.Atom.Fast , atomWords , exportBytes , importBytes + , wordBitWidth# + , wordBitWidth + , atomBitWidth# + , atomBitWidth + , takeBitsWord + , bigNatWords + , wordsBigNat , bit , byt ) @@ -31,7 +38,7 @@ import GHC.Exts (Int(..)) import GHC.Integer.GMP.Internals (BigNat(..)) import GHC.Natural (Natural(..)) import GHC.Prim (Int#, clz#, minusWord#, plusWord#) -import GHC.Prim (Word#, Addr#, int2Word#, subIntC#, timesWord#) +import GHC.Prim (Word#, Addr#, int2Word#, timesWord#) import GHC.Prim (copyByteArrayToAddr#) import GHC.Word (Word(..)) import System.IO.Unsafe (unsafePerformIO) @@ -86,8 +93,8 @@ atomBitWidth# :: Natural -> Word# atomBitWidth# (NatS# gl) = wordBitWidth# gl atomBitWidth# (NatJ# bn) = bigNatBitWidth# bn -bitWidth :: Num a => Natural -> a -bitWidth a = fromIntegral (W# (atomBitWidth# a)) +atomBitWidth :: Num a => Natural -> a +atomBitWidth a = fromIntegral (W# (atomBitWidth# a)) -------------------------------------------------------------------------------- @@ -159,8 +166,8 @@ wordsBigNat v@(Vector off (I# len) (Prim.ByteArray buf)) = case VP.length v of Note that the length of the vector is in words, and the length passed to `byteArrayToBigNat#` is also in words. -} -wordsBigNat' :: Vector Word -> BigNat -wordsBigNat' v = case VP.length v of +_wordsBigNat :: Vector Word -> BigNat +_wordsBigNat v = case VP.length v of 0 -> G.zeroBigNat 1 -> G.wordToBigNat w where W# w = VP.unsafeIndex v 0 n -> if offset v == 0 then extract v else extract (VP.force v) @@ -196,8 +203,8 @@ bigNatNat bn = case G.sizeofBigNat# bn of -------------------------------------------------------------------------------- -wordBytes :: Word -> ByteString -wordBytes wor = BS.reverse $ BS.pack $ go 0 [] +_wordBytes :: Word -> ByteString +_wordBytes wor = BS.reverse $ BS.pack $ go 0 [] where go i acc | i >= BYT = acc go i acc | otherwise = go (i + 1) (fromIntegral (shiftR wor (i * BYT)) : acc) @@ -213,8 +220,8 @@ bytesFirstWord buf = go 0 0 -------------------------------------------------------------------------------- -pillWords :: Pill -> Vector Word -pillWords = bsToWords . pillBytes +_pillWords :: Pill -> Vector Word +_pillWords = bsToWords . pillBytes wordsPill :: Vector Word -> Pill wordsPill = bytesPill . vecBytes . wordsToBytes @@ -247,8 +254,8 @@ vecBytes (Vector off sz buf) = unsafePerformIO $ do copyByteArrayToAddr dst# (Prim.ByteArray src#) soff sz = primitive_ (copyByteArrayToAddr# src# (unI# soff) dst# (unI# sz)) -bytesVec :: ByteString -> Vector Word8 -bytesVec bs = VP.generate (BS.length bs) (BS.index bs) +_bytesVec :: ByteString -> Vector Word8 +_bytesVec bs = VP.generate (BS.length bs) (BS.index bs) -------------------------------------------------------------------------------- @@ -318,9 +325,15 @@ stripBytes buf = BS.take (len - go 0 (len - 1)) buf importBytes :: ByteString -> Natural -importBytes (stripBytes -> BS.PS fp 0 sz) = unsafePerformIO $ do - let Ptr a = Ptr.unsafeForeignPtrToPtr fp -- TODO Not safe! - let W# sz# = fromIntegral sz - res <- bigNatNatural <$> G.importBigNatFromAddr a sz# 0# - Ptr.touchForeignPtr fp - pure res +importBytes = go . stripBytes + where + go (BS.PS fp 0 sz) = unsafePerformIO $ do + let Ptr a = Ptr.unsafeForeignPtrToPtr fp -- TODO Not safe! + let W# sz# = fromIntegral sz + res <- bigNatNatural <$> G.importBigNatFromAddr a sz# 0# + Ptr.touchForeignPtr fp + pure res + + -- TODO Avoid this extra copy when given a slice. Should be able to + -- just offset the raw pointer. + go bs = importBytes (BS.copy bs) diff --git a/pkg/hs/urbit-atom/package.yaml b/pkg/hs/urbit-atom/package.yaml index 630657d4b4..369c2a1aa0 100644 --- a/pkg/hs/urbit-atom/package.yaml +++ b/pkg/hs/urbit-atom/package.yaml @@ -4,9 +4,9 @@ license: MIT license-file: LICENSE ghc-options: - # -fwarn-incomplete-patterns - # -fwarn-unused-binds - # -fwarn-unused-imports + - -fwarn-incomplete-patterns + - -fwarn-unused-binds + - -fwarn-unused-imports - -Werror - -O2 diff --git a/pkg/hs/urbit-atom/test/Main.hs b/pkg/hs/urbit-atom/test/Main.hs index 0365a2e3c2..e41449b5d0 100644 --- a/pkg/hs/urbit-atom/test/Main.hs +++ b/pkg/hs/urbit-atom/test/Main.hs @@ -5,16 +5,15 @@ module Main (main) where -------------------------------------------------------------------------------- -import Prelude -import Numeric.Natural -import Test.QuickCheck import Data.IORef -import System.IO.Unsafe +import Numeric.Natural +import Prelude import System.Exit +import System.IO.Unsafe +import Test.QuickCheck import Control.Monad (when) import Data.ByteString (ByteString) -import Data.Text (Text) import Data.Vector.Primitive (Prim, Vector) import qualified Data.ByteString as BS diff --git a/pkg/hs/urbit-king/lib/Urbit/Noun/Jam.hs b/pkg/hs/urbit-king/lib/Urbit/Noun/Jam.hs index d76f1923a3..3b5d02f44f 100644 --- a/pkg/hs/urbit-king/lib/Urbit/Noun/Jam.hs +++ b/pkg/hs/urbit-king/lib/Urbit/Noun/Jam.hs @@ -10,7 +10,6 @@ module Urbit.Noun.Jam (jam, jamBS) where import ClassyPrelude hiding (hash) import Urbit.Atom -import Urbit.Atom.Fast import Urbit.Noun.Core import Data.Bits (clearBit, setBit, shiftL, shiftR, (.|.)) @@ -25,6 +24,7 @@ import GHC.Prim (Word#, plusWord#, word2Int#) import GHC.Word (Word(W#)) import System.IO.Unsafe (unsafePerformIO) +import qualified Urbit.Atom.Fast as Atom import qualified Data.ByteString.Unsafe as BS import qualified Data.HashTable.IO as H import qualified Data.Vector.Primitive as VP @@ -154,7 +154,7 @@ writeWord wor = do {-# INLINE writeBitsFromWord #-} writeBitsFromWord :: Int -> Word -> Put () writeBitsFromWord wid wor = do - wor <- pure (takeBitsWord wid wor) + wor <- pure (Atom.takeBitsWord wid wor) oldSt <- getS @@ -175,7 +175,7 @@ writeBitsFromWord wid wor = do {-# INLINE writeAtomWord# #-} writeAtomWord# :: Word# -> Put () writeAtomWord# w = do - writeBitsFromWord (I# (word2Int# (wordBitWidth# w))) (W# w) + writeBitsFromWord (I# (word2Int# (Atom.wordBitWidth# w))) (W# w) {-# INLINE writeAtomWord #-} writeAtomWord :: Word -> Put () @@ -188,7 +188,7 @@ writeAtomWord (W# w) = writeAtomWord# w -} {-# INLINE writeAtomBigNat #-} writeAtomBigNat :: BigNat -> Put () -writeAtomBigNat !(bigNatWords -> words) = do +writeAtomBigNat !(Atom.bigNatWords -> words) = do let lastIdx = VP.length words - 1 for_ [0..(lastIdx-1)] $ \i -> writeWord (words ! i) @@ -276,8 +276,8 @@ writeMat atm = do writeBitsFromWord (preWid-1) atmWid writeAtomBits atm where - atmWid = bitWidth atm - preWid = fromIntegral (wordBitWidth atmWid) + atmWid = Atom.atomBitWidth atm + preWid = fromIntegral (Atom.wordBitWidth atmWid) {-# INLINE writeCell #-} writeCell :: Noun -> Noun -> Put () @@ -313,8 +313,8 @@ matSz# :: Atom -> Word# matSz# 0 = 1## matSz# a = preW `plusWord#` preW `plusWord#` atmW where - atmW = atomBitWidth# a - preW = wordBitWidth# atmW + atmW = Atom.atomBitWidth# a + preW = Atom.wordBitWidth# atmW {-# INLINE atomSz #-} atomSz :: Atom -> Word @@ -329,8 +329,8 @@ jamWordSz :: Word -> Word jamWordSz 0 = 2 jamWordSz (W# w) = 1 + 2*(W# preW) + (W# atmW) where - atmW = wordBitWidth# w - preW = wordBitWidth# atmW + atmW = Atom.wordBitWidth# w + preW = Atom.wordBitWidth# atmW compress :: Noun -> IO (Word, H.CuckooHashTable Word Word) compress !top = do