mirror of
https://github.com/urbit/shrub.git
synced 2024-11-24 04:58:08 +03:00
urbit-atom: Cleanup
This commit is contained in:
parent
33ef966afc
commit
5bf82f46c2
@ -1,3 +1,5 @@
|
||||
- Support big-endian CPUs (CPP flag; use GMP import/export)
|
||||
- Support `integer-simple`. (Android)
|
||||
- `atomWords` and `wordsAtom` are unacceptably slow on GHCJS.
|
||||
- This is only used in `jam`/`cue`, so it doesn't matter for now.
|
||||
- Understand why my hand rolled import is slower than GMP import.
|
||||
|
@ -39,28 +39,18 @@ import qualified Urbit.Atom.Fast as A
|
||||
|
||||
type Atom = Natural
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- | Cast an atom to a vector. Does not copy.
|
||||
atomWords :: Atom -> Vector Word
|
||||
atomWords =
|
||||
#if defined(__GHCJS__)
|
||||
Slow.atomWords
|
||||
#else
|
||||
A.atomWords
|
||||
#endif
|
||||
-- Choose Implementation Based on Platform -------------------------------------
|
||||
|
||||
-- | Cast a vector to an atom. Does not copy unless given a slice.
|
||||
wordsAtom :: Vector Word -> Atom
|
||||
wordsAtom =
|
||||
#if defined(__GHCJS__)
|
||||
Slow.wordsAtom
|
||||
#else
|
||||
A.wordsAtom
|
||||
#endif
|
||||
{- |
|
||||
Convert an Atom to a bytestring. O(n), copies.
|
||||
|
||||
-- My export routine is faster but doesn't work in GHCJS.
|
||||
-- TODO: It also doesn't work on big-endian machines.
|
||||
My hand-rolled implementation is faster, but doesn't work on GHCJS. So,
|
||||
on GHCJS use GMP's `export` routine.
|
||||
|
||||
TODO GMP's `export` routine also handles big endian machines, so use
|
||||
in that case too.
|
||||
-}
|
||||
atomBytes :: Atom -> ByteString
|
||||
atomBytes =
|
||||
#if defined(__GHCJS__)
|
||||
@ -69,23 +59,59 @@ atomBytes =
|
||||
A.atomBytes
|
||||
#endif
|
||||
|
||||
{- |
|
||||
Convert a bytestring to an Atom. O(n), copies.
|
||||
|
||||
-- GMP's import is always faster than my loading routine.
|
||||
This always uses GMP's `export` routine, since it's portable and faster
|
||||
than my hand-rolled implementation.
|
||||
-}
|
||||
bytesAtom :: ByteString -> Atom
|
||||
bytesAtom = A.importBytes
|
||||
|
||||
{- |
|
||||
Cast an atom to a vector. O(1), does not copy.
|
||||
|
||||
My fast implementation doesn't work on GHCJS, so fallback to the naive
|
||||
implementation on that platform for now.
|
||||
-}
|
||||
atomWords :: Atom -> Vector Word
|
||||
atomWords =
|
||||
#if defined(__GHCJS__)
|
||||
Slow.atomWords
|
||||
#else
|
||||
A.atomWords
|
||||
#endif
|
||||
|
||||
{- |
|
||||
Cast a vector to an atom. O(1), does not copy unless given a slice,
|
||||
then O(n).
|
||||
|
||||
My fast implementation doesn't work on GHCJS, so fallback to the naive
|
||||
implementation on that platform for now.
|
||||
-}
|
||||
wordsAtom :: Vector Word -> Atom
|
||||
wordsAtom =
|
||||
#if defined(__GHCJS__)
|
||||
Slow.wordsAtom
|
||||
#else
|
||||
A.wordsAtom
|
||||
#endif
|
||||
|
||||
|
||||
-- String/Cord Conversion ------------------------------------------------------
|
||||
|
||||
-- | Encode a utf8-encoded atom from text.
|
||||
utf8Atom :: T.Text -> Atom
|
||||
utf8Atom = A.importBytes . T.encodeUtf8
|
||||
utf8Atom = bytesAtom . T.encodeUtf8
|
||||
|
||||
-- | Interpret an atom as utf8 text.
|
||||
atomUtf8 :: Atom -> Either T.UnicodeException T.Text
|
||||
atomUtf8 = T.decodeUtf8' . A.exportBytes
|
||||
atomUtf8 = T.decodeUtf8' . atomBytes
|
||||
|
||||
-- | Interpret an atom as utf8 text, throwing an exception on bad unicode.
|
||||
atomUtf8Exn :: Atom -> T.Text
|
||||
atomUtf8Exn = T.decodeUtf8 . A.exportBytes
|
||||
atomUtf8Exn = T.decodeUtf8 . atomBytes
|
||||
|
||||
-- | Interpret an atom as utf8 text, replacing bad unicode characters.
|
||||
atomUtf8Lenient :: Atom -> T.Text
|
||||
atomUtf8Lenient = T.decodeUtf8With T.lenientDecode . A.exportBytes
|
||||
atomUtf8Lenient = T.decodeUtf8With T.lenientDecode . atomBytes
|
||||
|
@ -14,6 +14,13 @@ module Urbit.Atom.Fast
|
||||
, atomWords
|
||||
, exportBytes
|
||||
, importBytes
|
||||
, wordBitWidth#
|
||||
, wordBitWidth
|
||||
, atomBitWidth#
|
||||
, atomBitWidth
|
||||
, takeBitsWord
|
||||
, bigNatWords
|
||||
, wordsBigNat
|
||||
, bit
|
||||
, byt
|
||||
)
|
||||
@ -31,7 +38,7 @@ import GHC.Exts (Int(..))
|
||||
import GHC.Integer.GMP.Internals (BigNat(..))
|
||||
import GHC.Natural (Natural(..))
|
||||
import GHC.Prim (Int#, clz#, minusWord#, plusWord#)
|
||||
import GHC.Prim (Word#, Addr#, int2Word#, subIntC#, timesWord#)
|
||||
import GHC.Prim (Word#, Addr#, int2Word#, timesWord#)
|
||||
import GHC.Prim (copyByteArrayToAddr#)
|
||||
import GHC.Word (Word(..))
|
||||
import System.IO.Unsafe (unsafePerformIO)
|
||||
@ -86,8 +93,8 @@ atomBitWidth# :: Natural -> Word#
|
||||
atomBitWidth# (NatS# gl) = wordBitWidth# gl
|
||||
atomBitWidth# (NatJ# bn) = bigNatBitWidth# bn
|
||||
|
||||
bitWidth :: Num a => Natural -> a
|
||||
bitWidth a = fromIntegral (W# (atomBitWidth# a))
|
||||
atomBitWidth :: Num a => Natural -> a
|
||||
atomBitWidth a = fromIntegral (W# (atomBitWidth# a))
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
@ -159,8 +166,8 @@ wordsBigNat v@(Vector off (I# len) (Prim.ByteArray buf)) = case VP.length v of
|
||||
Note that the length of the vector is in words, and the length passed
|
||||
to `byteArrayToBigNat#` is also in words.
|
||||
-}
|
||||
wordsBigNat' :: Vector Word -> BigNat
|
||||
wordsBigNat' v = case VP.length v of
|
||||
_wordsBigNat :: Vector Word -> BigNat
|
||||
_wordsBigNat v = case VP.length v of
|
||||
0 -> G.zeroBigNat
|
||||
1 -> G.wordToBigNat w where W# w = VP.unsafeIndex v 0
|
||||
n -> if offset v == 0 then extract v else extract (VP.force v)
|
||||
@ -196,8 +203,8 @@ bigNatNat bn = case G.sizeofBigNat# bn of
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
wordBytes :: Word -> ByteString
|
||||
wordBytes wor = BS.reverse $ BS.pack $ go 0 []
|
||||
_wordBytes :: Word -> ByteString
|
||||
_wordBytes wor = BS.reverse $ BS.pack $ go 0 []
|
||||
where
|
||||
go i acc | i >= BYT = acc
|
||||
go i acc | otherwise = go (i + 1) (fromIntegral (shiftR wor (i * BYT)) : acc)
|
||||
@ -213,8 +220,8 @@ bytesFirstWord buf = go 0 0
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
pillWords :: Pill -> Vector Word
|
||||
pillWords = bsToWords . pillBytes
|
||||
_pillWords :: Pill -> Vector Word
|
||||
_pillWords = bsToWords . pillBytes
|
||||
|
||||
wordsPill :: Vector Word -> Pill
|
||||
wordsPill = bytesPill . vecBytes . wordsToBytes
|
||||
@ -247,8 +254,8 @@ vecBytes (Vector off sz buf) = unsafePerformIO $ do
|
||||
copyByteArrayToAddr dst# (Prim.ByteArray src#) soff sz =
|
||||
primitive_ (copyByteArrayToAddr# src# (unI# soff) dst# (unI# sz))
|
||||
|
||||
bytesVec :: ByteString -> Vector Word8
|
||||
bytesVec bs = VP.generate (BS.length bs) (BS.index bs)
|
||||
_bytesVec :: ByteString -> Vector Word8
|
||||
_bytesVec bs = VP.generate (BS.length bs) (BS.index bs)
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
@ -318,9 +325,15 @@ stripBytes buf = BS.take (len - go 0 (len - 1)) buf
|
||||
|
||||
|
||||
importBytes :: ByteString -> Natural
|
||||
importBytes (stripBytes -> BS.PS fp 0 sz) = unsafePerformIO $ do
|
||||
let Ptr a = Ptr.unsafeForeignPtrToPtr fp -- TODO Not safe!
|
||||
let W# sz# = fromIntegral sz
|
||||
res <- bigNatNatural <$> G.importBigNatFromAddr a sz# 0#
|
||||
Ptr.touchForeignPtr fp
|
||||
pure res
|
||||
importBytes = go . stripBytes
|
||||
where
|
||||
go (BS.PS fp 0 sz) = unsafePerformIO $ do
|
||||
let Ptr a = Ptr.unsafeForeignPtrToPtr fp -- TODO Not safe!
|
||||
let W# sz# = fromIntegral sz
|
||||
res <- bigNatNatural <$> G.importBigNatFromAddr a sz# 0#
|
||||
Ptr.touchForeignPtr fp
|
||||
pure res
|
||||
|
||||
-- TODO Avoid this extra copy when given a slice. Should be able to
|
||||
-- just offset the raw pointer.
|
||||
go bs = importBytes (BS.copy bs)
|
||||
|
@ -4,9 +4,9 @@ license: MIT
|
||||
license-file: LICENSE
|
||||
|
||||
ghc-options:
|
||||
# -fwarn-incomplete-patterns
|
||||
# -fwarn-unused-binds
|
||||
# -fwarn-unused-imports
|
||||
- -fwarn-incomplete-patterns
|
||||
- -fwarn-unused-binds
|
||||
- -fwarn-unused-imports
|
||||
- -Werror
|
||||
- -O2
|
||||
|
||||
|
@ -5,16 +5,15 @@ module Main (main) where
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
import Prelude
|
||||
import Numeric.Natural
|
||||
import Test.QuickCheck
|
||||
import Data.IORef
|
||||
import System.IO.Unsafe
|
||||
import Numeric.Natural
|
||||
import Prelude
|
||||
import System.Exit
|
||||
import System.IO.Unsafe
|
||||
import Test.QuickCheck
|
||||
|
||||
import Control.Monad (when)
|
||||
import Data.ByteString (ByteString)
|
||||
import Data.Text (Text)
|
||||
import Data.Vector.Primitive (Prim, Vector)
|
||||
|
||||
import qualified Data.ByteString as BS
|
||||
|
@ -10,7 +10,6 @@ module Urbit.Noun.Jam (jam, jamBS) where
|
||||
import ClassyPrelude hiding (hash)
|
||||
|
||||
import Urbit.Atom
|
||||
import Urbit.Atom.Fast
|
||||
import Urbit.Noun.Core
|
||||
|
||||
import Data.Bits (clearBit, setBit, shiftL, shiftR, (.|.))
|
||||
@ -25,6 +24,7 @@ import GHC.Prim (Word#, plusWord#, word2Int#)
|
||||
import GHC.Word (Word(W#))
|
||||
import System.IO.Unsafe (unsafePerformIO)
|
||||
|
||||
import qualified Urbit.Atom.Fast as Atom
|
||||
import qualified Data.ByteString.Unsafe as BS
|
||||
import qualified Data.HashTable.IO as H
|
||||
import qualified Data.Vector.Primitive as VP
|
||||
@ -154,7 +154,7 @@ writeWord wor = do
|
||||
{-# INLINE writeBitsFromWord #-}
|
||||
writeBitsFromWord :: Int -> Word -> Put ()
|
||||
writeBitsFromWord wid wor = do
|
||||
wor <- pure (takeBitsWord wid wor)
|
||||
wor <- pure (Atom.takeBitsWord wid wor)
|
||||
|
||||
oldSt <- getS
|
||||
|
||||
@ -175,7 +175,7 @@ writeBitsFromWord wid wor = do
|
||||
{-# INLINE writeAtomWord# #-}
|
||||
writeAtomWord# :: Word# -> Put ()
|
||||
writeAtomWord# w = do
|
||||
writeBitsFromWord (I# (word2Int# (wordBitWidth# w))) (W# w)
|
||||
writeBitsFromWord (I# (word2Int# (Atom.wordBitWidth# w))) (W# w)
|
||||
|
||||
{-# INLINE writeAtomWord #-}
|
||||
writeAtomWord :: Word -> Put ()
|
||||
@ -188,7 +188,7 @@ writeAtomWord (W# w) = writeAtomWord# w
|
||||
-}
|
||||
{-# INLINE writeAtomBigNat #-}
|
||||
writeAtomBigNat :: BigNat -> Put ()
|
||||
writeAtomBigNat !(bigNatWords -> words) = do
|
||||
writeAtomBigNat !(Atom.bigNatWords -> words) = do
|
||||
let lastIdx = VP.length words - 1
|
||||
for_ [0..(lastIdx-1)] $ \i ->
|
||||
writeWord (words ! i)
|
||||
@ -276,8 +276,8 @@ writeMat atm = do
|
||||
writeBitsFromWord (preWid-1) atmWid
|
||||
writeAtomBits atm
|
||||
where
|
||||
atmWid = bitWidth atm
|
||||
preWid = fromIntegral (wordBitWidth atmWid)
|
||||
atmWid = Atom.atomBitWidth atm
|
||||
preWid = fromIntegral (Atom.wordBitWidth atmWid)
|
||||
|
||||
{-# INLINE writeCell #-}
|
||||
writeCell :: Noun -> Noun -> Put ()
|
||||
@ -313,8 +313,8 @@ matSz# :: Atom -> Word#
|
||||
matSz# 0 = 1##
|
||||
matSz# a = preW `plusWord#` preW `plusWord#` atmW
|
||||
where
|
||||
atmW = atomBitWidth# a
|
||||
preW = wordBitWidth# atmW
|
||||
atmW = Atom.atomBitWidth# a
|
||||
preW = Atom.wordBitWidth# atmW
|
||||
|
||||
{-# INLINE atomSz #-}
|
||||
atomSz :: Atom -> Word
|
||||
@ -329,8 +329,8 @@ jamWordSz :: Word -> Word
|
||||
jamWordSz 0 = 2
|
||||
jamWordSz (W# w) = 1 + 2*(W# preW) + (W# atmW)
|
||||
where
|
||||
atmW = wordBitWidth# w
|
||||
preW = wordBitWidth# atmW
|
||||
atmW = Atom.wordBitWidth# w
|
||||
preW = Atom.wordBitWidth# atmW
|
||||
|
||||
compress :: Noun -> IO (Word, H.CuckooHashTable Word Word)
|
||||
compress !top = do
|
||||
|
Loading…
Reference in New Issue
Block a user