2020-03-12 23:02:45 +03:00
|
|
|
{-# LANGUAGE CPP #-}
|
|
|
|
|
2020-01-23 12:22:30 +03:00
|
|
|
{-|
|
|
|
|
Atom implementation with fast conversions between bytestrings
|
|
|
|
and atoms.
|
|
|
|
-}
|
|
|
|
|
|
|
|
module Urbit.Atom
|
2020-03-07 01:10:58 +03:00
|
|
|
( Atom
|
|
|
|
, atomBytes
|
|
|
|
, bytesAtom
|
|
|
|
, atomWords
|
|
|
|
, wordsAtom
|
|
|
|
, utf8Atom
|
|
|
|
, atomUtf8
|
|
|
|
, atomUtf8Exn
|
|
|
|
, atomUtf8Lenient
|
|
|
|
)
|
|
|
|
where
|
2020-01-23 12:22:30 +03:00
|
|
|
|
|
|
|
import Prelude
|
|
|
|
|
|
|
|
import Data.ByteString (ByteString)
|
|
|
|
import Data.Vector.Primitive (Vector)
|
|
|
|
import GHC.Natural (Natural)
|
|
|
|
|
|
|
|
import qualified Data.Text as T
|
|
|
|
import qualified Data.Text.Encoding as T
|
|
|
|
import qualified Data.Text.Encoding.Error as T
|
2020-03-12 23:02:45 +03:00
|
|
|
|
|
|
|
#if defined(__GHCJS__)
|
2020-03-13 20:04:25 +03:00
|
|
|
import qualified Urbit.Atom.Slow as Slow
|
2020-03-12 23:02:45 +03:00
|
|
|
#endif
|
2020-01-23 12:22:30 +03:00
|
|
|
|
2020-03-13 20:04:25 +03:00
|
|
|
import qualified Urbit.Atom.Fast as A
|
|
|
|
|
2020-01-23 12:22:30 +03:00
|
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
type Atom = Natural
|
|
|
|
|
|
|
|
|
2020-03-18 21:25:58 +03:00
|
|
|
-- Choose Implementation Based on Platform -------------------------------------
|
|
|
|
|
|
|
|
{- |
|
|
|
|
Convert an Atom to a bytestring. O(n), copies.
|
|
|
|
|
|
|
|
My hand-rolled implementation is faster, but doesn't work on GHCJS. So,
|
|
|
|
on GHCJS use GMP's `export` routine.
|
|
|
|
|
|
|
|
TODO GMP's `export` routine also handles big endian machines, so use
|
|
|
|
in that case too.
|
|
|
|
-}
|
|
|
|
atomBytes :: Atom -> ByteString
|
|
|
|
atomBytes =
|
|
|
|
#if defined(__GHCJS__)
|
|
|
|
A.exportBytes
|
|
|
|
#else
|
|
|
|
A.atomBytes
|
|
|
|
#endif
|
|
|
|
|
|
|
|
{- |
|
|
|
|
Convert a bytestring to an Atom. O(n), copies.
|
|
|
|
|
|
|
|
This always uses GMP's `export` routine, since it's portable and faster
|
|
|
|
than my hand-rolled implementation.
|
|
|
|
-}
|
|
|
|
bytesAtom :: ByteString -> Atom
|
|
|
|
bytesAtom = A.importBytes
|
|
|
|
|
|
|
|
{- |
|
|
|
|
Cast an atom to a vector. O(1), does not copy.
|
|
|
|
|
|
|
|
My fast implementation doesn't work on GHCJS, so fallback to the naive
|
|
|
|
implementation on that platform for now.
|
|
|
|
-}
|
2020-03-07 01:10:58 +03:00
|
|
|
atomWords :: Atom -> Vector Word
|
2020-03-13 20:04:25 +03:00
|
|
|
atomWords =
|
|
|
|
#if defined(__GHCJS__)
|
|
|
|
Slow.atomWords
|
|
|
|
#else
|
|
|
|
A.atomWords
|
|
|
|
#endif
|
2020-01-23 12:22:30 +03:00
|
|
|
|
2020-03-18 21:25:58 +03:00
|
|
|
{- |
|
|
|
|
Cast a vector to an atom. O(1), does not copy unless given a slice,
|
|
|
|
then O(n).
|
|
|
|
|
|
|
|
My fast implementation doesn't work on GHCJS, so fallback to the naive
|
|
|
|
implementation on that platform for now.
|
|
|
|
-}
|
2020-03-07 01:10:58 +03:00
|
|
|
wordsAtom :: Vector Word -> Atom
|
2020-03-13 20:04:25 +03:00
|
|
|
wordsAtom =
|
|
|
|
#if defined(__GHCJS__)
|
|
|
|
Slow.wordsAtom
|
|
|
|
#else
|
|
|
|
A.wordsAtom
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
2020-03-18 21:25:58 +03:00
|
|
|
-- String/Cord Conversion ------------------------------------------------------
|
2020-01-23 12:22:30 +03:00
|
|
|
|
|
|
|
-- | Encode a utf8-encoded atom from text.
|
2020-03-07 01:10:58 +03:00
|
|
|
utf8Atom :: T.Text -> Atom
|
2020-03-18 21:25:58 +03:00
|
|
|
utf8Atom = bytesAtom . T.encodeUtf8
|
2020-01-23 12:22:30 +03:00
|
|
|
|
|
|
|
-- | Interpret an atom as utf8 text.
|
2020-03-07 01:10:58 +03:00
|
|
|
atomUtf8 :: Atom -> Either T.UnicodeException T.Text
|
2020-03-18 21:25:58 +03:00
|
|
|
atomUtf8 = T.decodeUtf8' . atomBytes
|
2020-01-23 12:22:30 +03:00
|
|
|
|
|
|
|
-- | Interpret an atom as utf8 text, throwing an exception on bad unicode.
|
2020-03-07 01:10:58 +03:00
|
|
|
atomUtf8Exn :: Atom -> T.Text
|
2020-03-18 21:25:58 +03:00
|
|
|
atomUtf8Exn = T.decodeUtf8 . atomBytes
|
2020-01-23 12:22:30 +03:00
|
|
|
|
|
|
|
-- | Interpret an atom as utf8 text, replacing bad unicode characters.
|
2020-03-07 01:10:58 +03:00
|
|
|
atomUtf8Lenient :: Atom -> T.Text
|
2020-03-18 21:25:58 +03:00
|
|
|
atomUtf8Lenient = T.decodeUtf8With T.lenientDecode . atomBytes
|