urbit-atom: Cleanup

This commit is contained in:
Benjamin Summers 2020-03-18 11:25:58 -07:00
parent 33ef966afc
commit 5bf82f46c2
6 changed files with 99 additions and 59 deletions

View File

@ -1,3 +1,5 @@
- Support big-endian CPUs (CPP flag; use GMP import/export)
- Support `integer-simple`. (Android)
- `atomWords` and `wordsAtom` are unacceptably slow on GHCJS.
- This is only used in `jam`/`cue`, so it doesn't matter for now.
- Understand why my hand rolled import is slower than GMP import.

View File

@ -39,28 +39,18 @@ import qualified Urbit.Atom.Fast as A
type Atom = Natural
--------------------------------------------------------------------------------
-- | Cast an atom to a vector. Does not copy.
atomWords :: Atom -> Vector Word
atomWords =
#if defined(__GHCJS__)
Slow.atomWords
#else
A.atomWords
#endif
-- Choose Implementation Based on Platform -------------------------------------
-- | Cast a vector to an atom. Does not copy unless given a slice.
wordsAtom :: Vector Word -> Atom
wordsAtom =
#if defined(__GHCJS__)
Slow.wordsAtom
#else
A.wordsAtom
#endif
{- |
Convert an Atom to a bytestring. O(n), copies.
-- My export routine is faster but doesn't work in GHCJS.
-- TODO: It also doesn't work on big-endian machines.
My hand-rolled implementation is faster, but doesn't work on GHCJS. So,
on GHCJS use GMP's `export` routine.
TODO GMP's `export` routine also handles big endian machines, so use
in that case too.
-}
atomBytes :: Atom -> ByteString
atomBytes =
#if defined(__GHCJS__)
@ -69,23 +59,59 @@ atomBytes =
A.atomBytes
#endif
{- |
Convert a bytestring to an Atom. O(n), copies.
-- GMP's import is always faster than my loading routine.
This always uses GMP's `export` routine, since it's portable and faster
than my hand-rolled implementation.
-}
bytesAtom :: ByteString -> Atom
bytesAtom = A.importBytes
{- |
Cast an atom to a vector. O(1), does not copy.
My fast implementation doesn't work on GHCJS, so fallback to the naive
implementation on that platform for now.
-}
atomWords :: Atom -> Vector Word
atomWords =
#if defined(__GHCJS__)
Slow.atomWords
#else
A.atomWords
#endif
{- |
Cast a vector to an atom. O(1), does not copy unless given a slice,
then O(n).
My fast implementation doesn't work on GHCJS, so fallback to the naive
implementation on that platform for now.
-}
wordsAtom :: Vector Word -> Atom
wordsAtom =
#if defined(__GHCJS__)
Slow.wordsAtom
#else
A.wordsAtom
#endif
-- String/Cord Conversion ------------------------------------------------------
-- | Encode a utf8-encoded atom from text.
utf8Atom :: T.Text -> Atom
utf8Atom = A.importBytes . T.encodeUtf8
utf8Atom = bytesAtom . T.encodeUtf8
-- | Interpret an atom as utf8 text.
atomUtf8 :: Atom -> Either T.UnicodeException T.Text
atomUtf8 = T.decodeUtf8' . A.exportBytes
atomUtf8 = T.decodeUtf8' . atomBytes
-- | Interpret an atom as utf8 text, throwing an exception on bad unicode.
atomUtf8Exn :: Atom -> T.Text
atomUtf8Exn = T.decodeUtf8 . A.exportBytes
atomUtf8Exn = T.decodeUtf8 . atomBytes
-- | Interpret an atom as utf8 text, replacing bad unicode characters.
atomUtf8Lenient :: Atom -> T.Text
atomUtf8Lenient = T.decodeUtf8With T.lenientDecode . A.exportBytes
atomUtf8Lenient = T.decodeUtf8With T.lenientDecode . atomBytes

View File

@ -14,6 +14,13 @@ module Urbit.Atom.Fast
, atomWords
, exportBytes
, importBytes
, wordBitWidth#
, wordBitWidth
, atomBitWidth#
, atomBitWidth
, takeBitsWord
, bigNatWords
, wordsBigNat
, bit
, byt
)
@ -31,7 +38,7 @@ import GHC.Exts (Int(..))
import GHC.Integer.GMP.Internals (BigNat(..))
import GHC.Natural (Natural(..))
import GHC.Prim (Int#, clz#, minusWord#, plusWord#)
import GHC.Prim (Word#, Addr#, int2Word#, subIntC#, timesWord#)
import GHC.Prim (Word#, Addr#, int2Word#, timesWord#)
import GHC.Prim (copyByteArrayToAddr#)
import GHC.Word (Word(..))
import System.IO.Unsafe (unsafePerformIO)
@ -86,8 +93,8 @@ atomBitWidth# :: Natural -> Word#
atomBitWidth# (NatS# gl) = wordBitWidth# gl
atomBitWidth# (NatJ# bn) = bigNatBitWidth# bn
bitWidth :: Num a => Natural -> a
bitWidth a = fromIntegral (W# (atomBitWidth# a))
atomBitWidth :: Num a => Natural -> a
atomBitWidth a = fromIntegral (W# (atomBitWidth# a))
--------------------------------------------------------------------------------
@ -159,8 +166,8 @@ wordsBigNat v@(Vector off (I# len) (Prim.ByteArray buf)) = case VP.length v of
Note that the length of the vector is in words, and the length passed
to `byteArrayToBigNat#` is also in words.
-}
wordsBigNat' :: Vector Word -> BigNat
wordsBigNat' v = case VP.length v of
_wordsBigNat :: Vector Word -> BigNat
_wordsBigNat v = case VP.length v of
0 -> G.zeroBigNat
1 -> G.wordToBigNat w where W# w = VP.unsafeIndex v 0
n -> if offset v == 0 then extract v else extract (VP.force v)
@ -196,8 +203,8 @@ bigNatNat bn = case G.sizeofBigNat# bn of
--------------------------------------------------------------------------------
wordBytes :: Word -> ByteString
wordBytes wor = BS.reverse $ BS.pack $ go 0 []
_wordBytes :: Word -> ByteString
_wordBytes wor = BS.reverse $ BS.pack $ go 0 []
where
go i acc | i >= BYT = acc
go i acc | otherwise = go (i + 1) (fromIntegral (shiftR wor (i * BYT)) : acc)
@ -213,8 +220,8 @@ bytesFirstWord buf = go 0 0
--------------------------------------------------------------------------------
pillWords :: Pill -> Vector Word
pillWords = bsToWords . pillBytes
_pillWords :: Pill -> Vector Word
_pillWords = bsToWords . pillBytes
wordsPill :: Vector Word -> Pill
wordsPill = bytesPill . vecBytes . wordsToBytes
@ -247,8 +254,8 @@ vecBytes (Vector off sz buf) = unsafePerformIO $ do
copyByteArrayToAddr dst# (Prim.ByteArray src#) soff sz =
primitive_ (copyByteArrayToAddr# src# (unI# soff) dst# (unI# sz))
bytesVec :: ByteString -> Vector Word8
bytesVec bs = VP.generate (BS.length bs) (BS.index bs)
_bytesVec :: ByteString -> Vector Word8
_bytesVec bs = VP.generate (BS.length bs) (BS.index bs)
--------------------------------------------------------------------------------
@ -318,9 +325,15 @@ stripBytes buf = BS.take (len - go 0 (len - 1)) buf
importBytes :: ByteString -> Natural
importBytes (stripBytes -> BS.PS fp 0 sz) = unsafePerformIO $ do
let Ptr a = Ptr.unsafeForeignPtrToPtr fp -- TODO Not safe!
let W# sz# = fromIntegral sz
res <- bigNatNatural <$> G.importBigNatFromAddr a sz# 0#
Ptr.touchForeignPtr fp
pure res
importBytes = go . stripBytes
where
go (BS.PS fp 0 sz) = unsafePerformIO $ do
let Ptr a = Ptr.unsafeForeignPtrToPtr fp -- TODO Not safe!
let W# sz# = fromIntegral sz
res <- bigNatNatural <$> G.importBigNatFromAddr a sz# 0#
Ptr.touchForeignPtr fp
pure res
-- TODO Avoid this extra copy when given a slice. Should be able to
-- just offset the raw pointer.
go bs = importBytes (BS.copy bs)

View File

@ -4,9 +4,9 @@ license: MIT
license-file: LICENSE
ghc-options:
# -fwarn-incomplete-patterns
# -fwarn-unused-binds
# -fwarn-unused-imports
- -fwarn-incomplete-patterns
- -fwarn-unused-binds
- -fwarn-unused-imports
- -Werror
- -O2

View File

@ -5,16 +5,15 @@ module Main (main) where
--------------------------------------------------------------------------------
import Prelude
import Numeric.Natural
import Test.QuickCheck
import Data.IORef
import System.IO.Unsafe
import Numeric.Natural
import Prelude
import System.Exit
import System.IO.Unsafe
import Test.QuickCheck
import Control.Monad (when)
import Data.ByteString (ByteString)
import Data.Text (Text)
import Data.Vector.Primitive (Prim, Vector)
import qualified Data.ByteString as BS

View File

@ -10,7 +10,6 @@ module Urbit.Noun.Jam (jam, jamBS) where
import ClassyPrelude hiding (hash)
import Urbit.Atom
import Urbit.Atom.Fast
import Urbit.Noun.Core
import Data.Bits (clearBit, setBit, shiftL, shiftR, (.|.))
@ -25,6 +24,7 @@ import GHC.Prim (Word#, plusWord#, word2Int#)
import GHC.Word (Word(W#))
import System.IO.Unsafe (unsafePerformIO)
import qualified Urbit.Atom.Fast as Atom
import qualified Data.ByteString.Unsafe as BS
import qualified Data.HashTable.IO as H
import qualified Data.Vector.Primitive as VP
@ -154,7 +154,7 @@ writeWord wor = do
{-# INLINE writeBitsFromWord #-}
writeBitsFromWord :: Int -> Word -> Put ()
writeBitsFromWord wid wor = do
wor <- pure (takeBitsWord wid wor)
wor <- pure (Atom.takeBitsWord wid wor)
oldSt <- getS
@ -175,7 +175,7 @@ writeBitsFromWord wid wor = do
{-# INLINE writeAtomWord# #-}
writeAtomWord# :: Word# -> Put ()
writeAtomWord# w = do
writeBitsFromWord (I# (word2Int# (wordBitWidth# w))) (W# w)
writeBitsFromWord (I# (word2Int# (Atom.wordBitWidth# w))) (W# w)
{-# INLINE writeAtomWord #-}
writeAtomWord :: Word -> Put ()
@ -188,7 +188,7 @@ writeAtomWord (W# w) = writeAtomWord# w
-}
{-# INLINE writeAtomBigNat #-}
writeAtomBigNat :: BigNat -> Put ()
writeAtomBigNat !(bigNatWords -> words) = do
writeAtomBigNat !(Atom.bigNatWords -> words) = do
let lastIdx = VP.length words - 1
for_ [0..(lastIdx-1)] $ \i ->
writeWord (words ! i)
@ -276,8 +276,8 @@ writeMat atm = do
writeBitsFromWord (preWid-1) atmWid
writeAtomBits atm
where
atmWid = bitWidth atm
preWid = fromIntegral (wordBitWidth atmWid)
atmWid = Atom.atomBitWidth atm
preWid = fromIntegral (Atom.wordBitWidth atmWid)
{-# INLINE writeCell #-}
writeCell :: Noun -> Noun -> Put ()
@ -313,8 +313,8 @@ matSz# :: Atom -> Word#
matSz# 0 = 1##
matSz# a = preW `plusWord#` preW `plusWord#` atmW
where
atmW = atomBitWidth# a
preW = wordBitWidth# atmW
atmW = Atom.atomBitWidth# a
preW = Atom.wordBitWidth# atmW
{-# INLINE atomSz #-}
atomSz :: Atom -> Word
@ -329,8 +329,8 @@ jamWordSz :: Word -> Word
jamWordSz 0 = 2
jamWordSz (W# w) = 1 + 2*(W# preW) + (W# atmW)
where
atmW = wordBitWidth# w
preW = wordBitWidth# atmW
atmW = Atom.wordBitWidth# w
preW = Atom.wordBitWidth# atmW
compress :: Noun -> IO (Word, H.CuckooHashTable Word Word)
compress !top = do