Improve "size" op performance in Serialize

By using tail-recursive traversal of the structures.
This commit is contained in:
Harendra Kumar 2023-08-15 17:50:29 +05:30 committed by Adithya Kumar
parent 8b18616dba
commit f7f42e0242
5 changed files with 66 additions and 70 deletions

View File

@ -208,8 +208,7 @@ getSize _ = sizeOf (Proxy :: Proxy a)
#else
getSize val =
case size :: Size a of
ConstSize x -> x
VarSize f -> f val
Size f -> f 0 val
#endif
#ifndef USE_UNBOX
@ -422,7 +421,7 @@ main = do
-- Approximately 100000 constructors
-- Assuming Leaf nodes constitute two constructors (Leaf, Int) and internal
-- nodes 1 level = log_2 (100001/3) + 1 = 16
!tInt <- force <$> mkBinTree 16
!(tInt :: BinTree Int) <- force <$> mkBinTree 16
-- Approximately 100000 constructors, assuming two constructors (Cons, Int)
-- per element.
@ -442,14 +441,14 @@ main = do
-- Enable one benchmark below, and run the benchmark
-- Check the .dump-simpl output
let value = 100000
-- print $ getSize (CDT1C3 4 2)
-- peekTimes ((CDT1C2 (5 :: Int)) :: CustomDT1) value
-- peekTimes (Sum2525) value
-- peekTimes (Product25 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25) value
-- !(tInt :: BinTree Int) <- force <$> mkBinTree 16
-- print $ sizeOfOnce tInt
print $ sizeOfOnce lInt
-- peekTimes tInt 1
-- roundtrip ((CDT1C2 (5 :: Int)) :: CustomDT1) value
let !n = getSize lEnum
in peekTimes n lEnum 1
return ()
#endif

View File

@ -39,11 +39,10 @@ import GHC.Exts
-- Types
--------------------------------------------------------------------------------
-- | Info about the length of a serializable type. Length can depend on the
-- value or can be independent.
data Size a
= VarSize (a -> Int)
| ConstSize !Int
-- XXX Use (a -> Sum Int) instead, remove the Size type
-- | A left fold step to fold a generic structure to its serializable size.
newtype Size a = Size (Int -> a -> Int) -- a left fold or Sum monoid
-- | A type implementing the 'Serialize' interface supplies operations for
-- reading and writing the type from and to a mutable byte array (an unboxed
@ -63,8 +62,8 @@ data Size a
--
-- >>> :{
-- data Object = Object
-- { _varLen :: [Int]
-- , _constLen :: Int
-- { _obj1 :: [Int]
-- , _obj2 :: Int
-- }
-- :}
--
@ -72,10 +71,9 @@ data Size a
-- instance Serialize Object where
-- size =
-- case (size :: Size [Int], size :: Size Int) of
-- (VarSize f, ConstSize g) ->
-- VarSize $ \obj ->
-- f (_varLen obj) + g
-- _ -> error "size is not defined properly"
-- (Size f, Size g) ->
-- Size $ \acc obj ->
-- acc + f 0 (_obj1 obj) + g 0 (_obj2 obj)
-- deserialize i arr = do
-- (i1, x0) <- deserialize i arr
-- (i2, x1) <- deserialize i1 arr
@ -110,7 +108,7 @@ class Serialize a where
#define DERIVE_SERIALIZE_FROM_UNBOX(_type) \
instance Serialize _type where \
; {-# INLINE size #-} \
; size = ConstSize $ Unbox.sizeOf (Proxy :: Proxy _type) \
; size = Size (\acc _ -> acc + Unbox.sizeOf (Proxy :: Proxy _type)) \
; {-# INLINE deserialize #-} \
; deserialize off arr = \
Unbox.peekByteIndex off arr >>= \
@ -142,18 +140,10 @@ DERIVE_SERIALIZE_FROM_UNBOX((FunPtr a))
instance forall a. Serialize a => Serialize [a] where
{-# INLINE size #-}
size = VarSize $ \lst ->
-- {-# INLINE size #-}
size = Size $ \acc xs ->
case size :: Size a of
VarSize f ->
foldl'
(\acc x -> acc + f x)
(Unbox.sizeOf (Proxy :: Proxy Int))
lst
ConstSize sz ->
length lst
* sz
+ Unbox.sizeOf (Proxy :: Proxy Int)
Size f -> foldl' f (acc + (Unbox.sizeOf (Proxy :: Proxy Int))) xs
{-# INLINE deserialize #-}
deserialize off arr = do
@ -185,8 +175,7 @@ encodeAs ps a =
unsafeInlineIO $ do
let len =
case size :: Size a of
ConstSize sz -> sz
VarSize f -> f a
Size f -> f 0 a
-- We encode the length of the encoding as a header hence the 8 extra
-- bytes to encode Int64
mbarr <- Unbox.newBytesAs ps (8 + len)

View File

@ -17,6 +17,7 @@ module Streamly.Internal.Data.Serialize.TH
-- Imports
--------------------------------------------------------------------------------
import Data.List (foldl')
import Data.Word (Word16, Word32, Word64, Word8)
import Language.Haskell.TH
@ -40,6 +41,9 @@ type Field = (Maybe Name, Type)
_x :: Name
_x = mkName "x"
_acc :: Name
_acc = mkName "acc"
_arr :: Name
_arr = mkName "arr"
@ -72,22 +76,10 @@ matchConstructor cname numFields exp0 =
(normalB exp0)
[]
exprGetSize :: Int -> Type -> Q Exp
exprGetSize i ty =
caseE
(sigE (varE 'size) (appT (conT ''Size) (pure ty)))
[ match
(conP 'VarSize [varP _f])
(normalB (appE (varE _f) (varE (mkFieldName i))))
[]
, match (conP 'ConstSize [varP _sz]) (normalB (varE _sz)) []
]
where
_f = mkName $ "f"
_sz = mkName $ "sz"
exprGetSize :: Q Exp -> (Int, Type) -> Q Exp
exprGetSize acc (i, ty) =
[|case size :: Size $(pure ty) of
Size f -> f $(acc) $(varE (mkFieldName i))|]
getTagSize :: Int -> Int
getTagSize numConstructors
@ -123,16 +115,23 @@ mkSizeOfExpr headTy constructors =
("Attempting to get size with no constructors (" ++
$(lift (pprint headTy)) ++ ")")|]
-- One constructor with no fields is a unit type. Size of a unit type is
-- 1.
-- 1. XXX Use isUnitType?
[constructor@(DataCon _ _ _ fields)] ->
case fields of
[] -> appE (conE 'ConstSize) (litE (IntegerL 1))
-- Unit type
[] ->
appE
(conE 'Size)
(lamE [varP _acc, wildP] [| $(varE _acc) + 1 |])
-- Product type
_ ->
appE
(conE 'VarSize)
(conE 'Size)
(lamE
[varP _x]
(caseE (varE _x) [matchCons constructor]))
[varP _acc, varP _x]
(caseE (varE _x)
[matchCons (varE _acc) constructor]))
-- Sum type
_ -> sizeOfHeadDt
where
@ -140,22 +139,30 @@ mkSizeOfExpr headTy constructors =
tagSizeExp =
litE (IntegerL (fromIntegral (getTagSize (length constructors))))
sizeOfField (i, (_, ty)) = exprGetSize i ty
-- XXX fields of the same type can be folded together, will reduce the code
-- size when there are many fields of the same type.
-- XXX const size fields can be calculated statically.
-- XXX This can result in large compilation times due to nesting when there
-- are many constructors. We can create a list and sum the list at run time
-- to avoid that depending on the number of constructors. Or using a let
-- statement for each case may help?
-- appE (varE 'sum) (listE (acc : map (exprGetSize (litE (IntegerL 0))) (zip [0..] fields)))
sizeOfFields acc fields =
foldl' exprGetSize acc $ zip [0..] fields
sizeOfFields fields =
appE (varE 'sum) (listE (map sizeOfField (zip [0..] fields)))
matchCons (DataCon cname _ _ fields) =
matchConstructor cname (length fields) (sizeOfFields fields)
matchCons acc (DataCon cname _ _ fields) =
let expr = sizeOfFields acc (map snd fields)
in matchConstructor cname (length fields) expr
-- XXX We fix VarSize for simplicity. Should be changed later.
sizeOfHeadDt =
appE
(conE 'VarSize)
let acc = [|$(varE _acc) + $(tagSizeExp)|]
f =
(lamE
[varP _x]
[|$(tagSizeExp)
+ $(caseE (varE _x) (fmap matchCons constructors))|])
[varP _acc, varP _x]
(caseE (varE _x) (fmap (matchCons acc) constructors))
)
in appE (conE 'Size) f
--------------------------------------------------------------------------------
-- Peek
@ -328,7 +335,7 @@ deriveSerializeInternal preds headTy cons = do
let methods =
-- INLINE on sizeOf actually worsens some benchmarks, and improves
-- none
[ -- PragmaD (InlineP 'size Inline FunLike AllPhases)
[ -- PragmaD (InlineP 'size Inlinable FunLike AllPhases)
FunD 'size [Clause [] (NormalB sizeOfMethod) []]
, PragmaD (InlineP 'deserialize Inline FunLike AllPhases)
, FunD

View File

@ -59,9 +59,12 @@ roundtrip val = do
let sz =
case Serialize.size :: Serialize.Size a of
Serialize.VarSize f -> f val
Serialize.ConstSize csz -> csz
Serialize.Size f -> f 0 val
-- putStrLn "----------------------------------------------------------------"
-- putStrLn $ show val
-- putStrLn $ "Size is: " ++ show sz
-- putStrLn "----------------------------------------------------------------"
arr <- newBytes sz
off1 <- Serialize.serialize 0 arr val
@ -78,8 +81,7 @@ testSerializeList sizeOfA val = do
let sz =
case Serialize.size :: Serialize.Size a of
Serialize.VarSize f -> f val
Serialize.ConstSize csz -> csz
Serialize.Size f -> f 0 val
sz `shouldBe` sizeOfA

View File

@ -181,8 +181,7 @@ variableSizeOf ::
-> Int
variableSizeOf val =
case size :: Size a of
ConstSize x -> x
VarSize f -> f val
Size f -> f 0 val
#endif
testSerialization ::