Add a typeclass for serializing variable length types

This commit is contained in:
Adithya Kumar 2023-06-13 14:43:44 +05:30
parent eea1e0f634
commit 58b86c5281
3 changed files with 184 additions and 0 deletions

View File

@ -0,0 +1,152 @@
module Streamly.Internal.Data.Serialize
( Size(..)
, Serialize(..)
) where
--------------------------------------------------------------------------------
-- Imports
--------------------------------------------------------------------------------
import Control.Monad (void)
import Data.List (foldl')
import Data.Proxy (Proxy (..))
import Streamly.Internal.Data.Unbox (MutableByteArray(..))
import GHC.Int (Int16(..), Int32(..), Int64(..), Int8(..))
import GHC.Word (Word16(..), Word32(..), Word64(..), Word8(..))
import GHC.Stable (StablePtr(..))
import qualified Streamly.Internal.Data.Unbox as Unbox
import GHC.Exts
--------------------------------------------------------------------------------
-- Types
--------------------------------------------------------------------------------
-- | Info about the length of a serializable type. Length can depend on the
-- value or can be independent.
data Size a
= VarSize (a -> Int)
| ConstSize !Int
-- | A type implementing the 'Serialize' interface supplies operations for
-- reading and writing the type from and to a mutable byte array (an unboxed
-- representation of the type) in memory. The read operation 'deserialize'
-- deserializes the boxed type from the mutable byte array. The write operation
-- 'serialize' serializes the boxed type to the mutable byte array.
--
-- 'Serialize' contains enough information to serialize and deserialize variable
-- length types.
--
-- >>> import Streamly.Internal.Data.Serialize (Serialize(..), Size(..))
--
-- >>> :{
-- data Object = Object
-- { _varLen :: [Int]
-- , _constLen :: Int
-- }
-- :}
--
-- >>> :{
-- instance Serialize Object where
-- size =
-- case (size :: Size [Int], size :: Size Int) of
-- (VarSize f, ConstSize g) ->
-- VarSize $ \obj ->
-- f (_varLen obj) + g
-- _ -> error "size is not defined properly"
-- deserialize i arr = do
-- (i1, x0) <- deserialize i arr
-- (i2, x1) <- deserialize i1 arr
-- pure (i2, Object x0 x1)
-- serialize i arr (Object x0 x1) = do
-- i1 <- serialize i arr x0
-- i2 <- serialize i1 arr x1
-- pure i2
-- :}
--
class Serialize a where
-- | Get the 'Size', in bytes, reqired to store the serialized
-- representation of the type. Size cannot be zero.
size :: Size a
-- We can implement the following functions without returning the `Int`
-- offset but that may require traversing the Haskell structure again to get
-- the size. Therefore, this is a performance optimization.
-- | Deserialize a value from the given byte-index in the array. Returns a
-- tuple of the next byte-index and the deserialized value.
deserialize :: Int -> MutableByteArray -> IO (Int, a)
-- | Write the serialized representation of the value in the array at the
-- given byte-index. Returns the next byte-index.
serialize :: Int -> MutableByteArray -> a -> IO Int
--------------------------------------------------------------------------------
-- Instances
--------------------------------------------------------------------------------
#define DERIVE_SERIALIZE_FROM_UNBOX(_type) \
instance Serialize _type where \
; size = ConstSize $ Unbox.sizeOf (Proxy :: Proxy _type) \
; deserialize off arr = \
Unbox.peekByteIndex off arr >>= \
\val -> let sz = Unbox.sizeOf (Proxy :: Proxy _type) \
in pure (off + sz, val) \
; serialize off arr val = \
Unbox.pokeByteIndex off arr val \
>> let sz = Unbox.sizeOf (Proxy :: Proxy _type) \
in pure (off + sz)
DERIVE_SERIALIZE_FROM_UNBOX(Char)
DERIVE_SERIALIZE_FROM_UNBOX(Int8)
DERIVE_SERIALIZE_FROM_UNBOX(Int16)
DERIVE_SERIALIZE_FROM_UNBOX(Int32)
DERIVE_SERIALIZE_FROM_UNBOX(Int)
DERIVE_SERIALIZE_FROM_UNBOX(Int64)
DERIVE_SERIALIZE_FROM_UNBOX(Word)
DERIVE_SERIALIZE_FROM_UNBOX(Word8)
DERIVE_SERIALIZE_FROM_UNBOX(Word16)
DERIVE_SERIALIZE_FROM_UNBOX(Word32)
DERIVE_SERIALIZE_FROM_UNBOX(Word64)
DERIVE_SERIALIZE_FROM_UNBOX(Double)
DERIVE_SERIALIZE_FROM_UNBOX(Float)
DERIVE_SERIALIZE_FROM_UNBOX((StablePtr a))
DERIVE_SERIALIZE_FROM_UNBOX((Ptr a))
DERIVE_SERIALIZE_FROM_UNBOX((FunPtr a))
instance forall a. Serialize a => Serialize [a] where
{-# INLINE size #-}
size = VarSize $ \lst ->
case size :: Size a of
VarSize f ->
foldl'
(\acc x -> acc + f x)
(Unbox.sizeOf (Proxy :: Proxy Int))
lst
ConstSize sz ->
length lst
* sz
+ Unbox.sizeOf (Proxy :: Proxy Int)
{-# INLINE deserialize #-}
deserialize off arr = do
len <- Unbox.peekByteIndex off arr :: IO Int
let off1 = off + Unbox.sizeOf (Proxy :: Proxy Int)
let peekList buf o 0 = pure (o, reverse buf)
peekList buf o i = do
(o1, x) <- deserialize o arr
peekList (x:buf) o1 (i - 1)
peekList [] off1 len
{-# INLINE serialize #-}
serialize off arr val = do
void $ serialize off arr (length val)
let off1 = off + Unbox.sizeOf (Proxy :: Proxy Int)
let pokeList o [] = pure o
pokeList o (x:xs) = do
o1 <- serialize o arr x
pokeList o1 xs
pokeList off1 val

View File

@ -302,6 +302,8 @@ library
-- streamly-core-array-types
, Streamly.Internal.Data.Unbox
, Streamly.Internal.Data.Serialize
-- Unboxed IORef
, Streamly.Internal.Data.IORef.Unboxed
-- May depend on streamly-core-stream

View File

@ -40,6 +40,8 @@ import Streamly.Internal.Data.Unbox
import Test.Hspec as H
import qualified Streamly.Internal.Data.Serialize as Serialize
--------------------------------------------------------------------------------
-- Types
--------------------------------------------------------------------------------
@ -146,6 +148,27 @@ testGenericConsistency val = do
checkSizeOf :: forall a. Unbox a => Proxy a -> Int -> IO ()
checkSizeOf _ size = sizeOf (Proxy :: Proxy a) `shouldBe` size
testSerializeList
:: forall a. (Eq a, Show a, Serialize.Serialize a)
=> Int
-> a
-> IO ()
testSerializeList sizeOfA val = do
let sz =
case Serialize.size :: Serialize.Size a of
Serialize.VarSize f -> f val
Serialize.ConstSize csz -> csz
sz `shouldBe` sizeOfA
arr <- newUnpinnedBytes sz
off1 <- Serialize.serialize 0 arr val
(off2, val2) <- Serialize.deserialize 0 arr
val2 `shouldBe` val
off2 `shouldBe` off1
--------------------------------------------------------------------------------
-- CPP helpers
--------------------------------------------------------------------------------
@ -203,6 +226,13 @@ testCases = do
it "GenericConsistency (Identity Int)"
$ testGenericConsistency (Identity 56760 :: Identity Int)
it "Serialize [Int]"
$ testSerializeList (8 + 4 * 8) ([1, 2, 3, 4] :: [Int])
it "Serialize [[Int]]"
$ testSerializeList
(8 + 3 * 8 + 6 * 8)
([[1], [1, 2], [1, 2, 3]] :: [[Int]])
-- Fingerprint does not work for GHC 8.6.5
-- it "Fingerprint" $ testSerialization (Fingerprint 123456 876588)
-- it "GenericConsistency Fingerprint"