mirror of
https://github.com/ilyakooo0/streamly.git
synced 2024-10-26 09:59:48 +03:00
Improve "size" op performance in Serialize
By using tail-recursive traversal of the structures.
This commit is contained in:
parent
8b18616dba
commit
f7f42e0242
@ -208,8 +208,7 @@ getSize _ = sizeOf (Proxy :: Proxy a)
|
|||||||
#else
|
#else
|
||||||
getSize val =
|
getSize val =
|
||||||
case size :: Size a of
|
case size :: Size a of
|
||||||
ConstSize x -> x
|
Size f -> f 0 val
|
||||||
VarSize f -> f val
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef USE_UNBOX
|
#ifndef USE_UNBOX
|
||||||
@ -422,7 +421,7 @@ main = do
|
|||||||
-- Approximately 100000 constructors
|
-- Approximately 100000 constructors
|
||||||
-- Assuming Leaf nodes constitute two constructors (Leaf, Int) and internal
|
-- Assuming Leaf nodes constitute two constructors (Leaf, Int) and internal
|
||||||
-- nodes 1 level = log_2 (100001/3) + 1 = 16
|
-- nodes 1 level = log_2 (100001/3) + 1 = 16
|
||||||
!tInt <- force <$> mkBinTree 16
|
!(tInt :: BinTree Int) <- force <$> mkBinTree 16
|
||||||
|
|
||||||
-- Approximately 100000 constructors, assuming two constructors (Cons, Int)
|
-- Approximately 100000 constructors, assuming two constructors (Cons, Int)
|
||||||
-- per element.
|
-- per element.
|
||||||
@ -442,14 +441,14 @@ main = do
|
|||||||
-- Enable one benchmark below, and run the benchmark
|
-- Enable one benchmark below, and run the benchmark
|
||||||
-- Check the .dump-simpl output
|
-- Check the .dump-simpl output
|
||||||
let value = 100000
|
let value = 100000
|
||||||
|
-- print $ getSize (CDT1C3 4 2)
|
||||||
-- peekTimes ((CDT1C2 (5 :: Int)) :: CustomDT1) value
|
-- peekTimes ((CDT1C2 (5 :: Int)) :: CustomDT1) value
|
||||||
-- peekTimes (Sum2525) value
|
-- peekTimes (Sum2525) value
|
||||||
-- peekTimes (Product25 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25) value
|
-- peekTimes (Product25 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25) value
|
||||||
-- !(tInt :: BinTree Int) <- force <$> mkBinTree 16
|
-- !(tInt :: BinTree Int) <- force <$> mkBinTree 16
|
||||||
-- print $ sizeOfOnce tInt
|
-- print $ sizeOfOnce tInt
|
||||||
|
print $ sizeOfOnce lInt
|
||||||
-- peekTimes tInt 1
|
-- peekTimes tInt 1
|
||||||
-- roundtrip ((CDT1C2 (5 :: Int)) :: CustomDT1) value
|
-- roundtrip ((CDT1C2 (5 :: Int)) :: CustomDT1) value
|
||||||
let !n = getSize lEnum
|
|
||||||
in peekTimes n lEnum 1
|
|
||||||
return ()
|
return ()
|
||||||
#endif
|
#endif
|
||||||
|
@ -39,11 +39,10 @@ import GHC.Exts
|
|||||||
-- Types
|
-- Types
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
-- | Info about the length of a serializable type. Length can depend on the
|
-- XXX Use (a -> Sum Int) instead, remove the Size type
|
||||||
-- value or can be independent.
|
|
||||||
data Size a
|
-- | A left fold step to fold a generic structure to its serializable size.
|
||||||
= VarSize (a -> Int)
|
newtype Size a = Size (Int -> a -> Int) -- a left fold or Sum monoid
|
||||||
| ConstSize !Int
|
|
||||||
|
|
||||||
-- | A type implementing the 'Serialize' interface supplies operations for
|
-- | A type implementing the 'Serialize' interface supplies operations for
|
||||||
-- reading and writing the type from and to a mutable byte array (an unboxed
|
-- reading and writing the type from and to a mutable byte array (an unboxed
|
||||||
@ -63,8 +62,8 @@ data Size a
|
|||||||
--
|
--
|
||||||
-- >>> :{
|
-- >>> :{
|
||||||
-- data Object = Object
|
-- data Object = Object
|
||||||
-- { _varLen :: [Int]
|
-- { _obj1 :: [Int]
|
||||||
-- , _constLen :: Int
|
-- , _obj2 :: Int
|
||||||
-- }
|
-- }
|
||||||
-- :}
|
-- :}
|
||||||
--
|
--
|
||||||
@ -72,10 +71,9 @@ data Size a
|
|||||||
-- instance Serialize Object where
|
-- instance Serialize Object where
|
||||||
-- size =
|
-- size =
|
||||||
-- case (size :: Size [Int], size :: Size Int) of
|
-- case (size :: Size [Int], size :: Size Int) of
|
||||||
-- (VarSize f, ConstSize g) ->
|
-- (Size f, Size g) ->
|
||||||
-- VarSize $ \obj ->
|
-- Size $ \acc obj ->
|
||||||
-- f (_varLen obj) + g
|
-- acc + f 0 (_obj1 obj) + g 0 (_obj2 obj)
|
||||||
-- _ -> error "size is not defined properly"
|
|
||||||
-- deserialize i arr = do
|
-- deserialize i arr = do
|
||||||
-- (i1, x0) <- deserialize i arr
|
-- (i1, x0) <- deserialize i arr
|
||||||
-- (i2, x1) <- deserialize i1 arr
|
-- (i2, x1) <- deserialize i1 arr
|
||||||
@ -110,7 +108,7 @@ class Serialize a where
|
|||||||
#define DERIVE_SERIALIZE_FROM_UNBOX(_type) \
|
#define DERIVE_SERIALIZE_FROM_UNBOX(_type) \
|
||||||
instance Serialize _type where \
|
instance Serialize _type where \
|
||||||
; {-# INLINE size #-} \
|
; {-# INLINE size #-} \
|
||||||
; size = ConstSize $ Unbox.sizeOf (Proxy :: Proxy _type) \
|
; size = Size (\acc _ -> acc + Unbox.sizeOf (Proxy :: Proxy _type)) \
|
||||||
; {-# INLINE deserialize #-} \
|
; {-# INLINE deserialize #-} \
|
||||||
; deserialize off arr = \
|
; deserialize off arr = \
|
||||||
Unbox.peekByteIndex off arr >>= \
|
Unbox.peekByteIndex off arr >>= \
|
||||||
@ -142,18 +140,10 @@ DERIVE_SERIALIZE_FROM_UNBOX((FunPtr a))
|
|||||||
|
|
||||||
instance forall a. Serialize a => Serialize [a] where
|
instance forall a. Serialize a => Serialize [a] where
|
||||||
|
|
||||||
{-# INLINE size #-}
|
-- {-# INLINE size #-}
|
||||||
size = VarSize $ \lst ->
|
size = Size $ \acc xs ->
|
||||||
case size :: Size a of
|
case size :: Size a of
|
||||||
VarSize f ->
|
Size f -> foldl' f (acc + (Unbox.sizeOf (Proxy :: Proxy Int))) xs
|
||||||
foldl'
|
|
||||||
(\acc x -> acc + f x)
|
|
||||||
(Unbox.sizeOf (Proxy :: Proxy Int))
|
|
||||||
lst
|
|
||||||
ConstSize sz ->
|
|
||||||
length lst
|
|
||||||
* sz
|
|
||||||
+ Unbox.sizeOf (Proxy :: Proxy Int)
|
|
||||||
|
|
||||||
{-# INLINE deserialize #-}
|
{-# INLINE deserialize #-}
|
||||||
deserialize off arr = do
|
deserialize off arr = do
|
||||||
@ -185,8 +175,7 @@ encodeAs ps a =
|
|||||||
unsafeInlineIO $ do
|
unsafeInlineIO $ do
|
||||||
let len =
|
let len =
|
||||||
case size :: Size a of
|
case size :: Size a of
|
||||||
ConstSize sz -> sz
|
Size f -> f 0 a
|
||||||
VarSize f -> f a
|
|
||||||
-- We encode the length of the encoding as a header hence the 8 extra
|
-- We encode the length of the encoding as a header hence the 8 extra
|
||||||
-- bytes to encode Int64
|
-- bytes to encode Int64
|
||||||
mbarr <- Unbox.newBytesAs ps (8 + len)
|
mbarr <- Unbox.newBytesAs ps (8 + len)
|
||||||
|
@ -17,6 +17,7 @@ module Streamly.Internal.Data.Serialize.TH
|
|||||||
-- Imports
|
-- Imports
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
import Data.List (foldl')
|
||||||
import Data.Word (Word16, Word32, Word64, Word8)
|
import Data.Word (Word16, Word32, Word64, Word8)
|
||||||
|
|
||||||
import Language.Haskell.TH
|
import Language.Haskell.TH
|
||||||
@ -40,6 +41,9 @@ type Field = (Maybe Name, Type)
|
|||||||
_x :: Name
|
_x :: Name
|
||||||
_x = mkName "x"
|
_x = mkName "x"
|
||||||
|
|
||||||
|
_acc :: Name
|
||||||
|
_acc = mkName "acc"
|
||||||
|
|
||||||
_arr :: Name
|
_arr :: Name
|
||||||
_arr = mkName "arr"
|
_arr = mkName "arr"
|
||||||
|
|
||||||
@ -72,22 +76,10 @@ matchConstructor cname numFields exp0 =
|
|||||||
(normalB exp0)
|
(normalB exp0)
|
||||||
[]
|
[]
|
||||||
|
|
||||||
exprGetSize :: Int -> Type -> Q Exp
|
exprGetSize :: Q Exp -> (Int, Type) -> Q Exp
|
||||||
exprGetSize i ty =
|
exprGetSize acc (i, ty) =
|
||||||
caseE
|
[|case size :: Size $(pure ty) of
|
||||||
(sigE (varE 'size) (appT (conT ''Size) (pure ty)))
|
Size f -> f $(acc) $(varE (mkFieldName i))|]
|
||||||
[ match
|
|
||||||
(conP 'VarSize [varP _f])
|
|
||||||
(normalB (appE (varE _f) (varE (mkFieldName i))))
|
|
||||||
[]
|
|
||||||
, match (conP 'ConstSize [varP _sz]) (normalB (varE _sz)) []
|
|
||||||
]
|
|
||||||
|
|
||||||
where
|
|
||||||
|
|
||||||
_f = mkName $ "f"
|
|
||||||
_sz = mkName $ "sz"
|
|
||||||
|
|
||||||
|
|
||||||
getTagSize :: Int -> Int
|
getTagSize :: Int -> Int
|
||||||
getTagSize numConstructors
|
getTagSize numConstructors
|
||||||
@ -123,16 +115,23 @@ mkSizeOfExpr headTy constructors =
|
|||||||
("Attempting to get size with no constructors (" ++
|
("Attempting to get size with no constructors (" ++
|
||||||
$(lift (pprint headTy)) ++ ")")|]
|
$(lift (pprint headTy)) ++ ")")|]
|
||||||
-- One constructor with no fields is a unit type. Size of a unit type is
|
-- One constructor with no fields is a unit type. Size of a unit type is
|
||||||
-- 1.
|
-- 1. XXX Use isUnitType?
|
||||||
[constructor@(DataCon _ _ _ fields)] ->
|
[constructor@(DataCon _ _ _ fields)] ->
|
||||||
case fields of
|
case fields of
|
||||||
[] -> appE (conE 'ConstSize) (litE (IntegerL 1))
|
-- Unit type
|
||||||
|
[] ->
|
||||||
|
appE
|
||||||
|
(conE 'Size)
|
||||||
|
(lamE [varP _acc, wildP] [| $(varE _acc) + 1 |])
|
||||||
|
-- Product type
|
||||||
_ ->
|
_ ->
|
||||||
appE
|
appE
|
||||||
(conE 'VarSize)
|
(conE 'Size)
|
||||||
(lamE
|
(lamE
|
||||||
[varP _x]
|
[varP _acc, varP _x]
|
||||||
(caseE (varE _x) [matchCons constructor]))
|
(caseE (varE _x)
|
||||||
|
[matchCons (varE _acc) constructor]))
|
||||||
|
-- Sum type
|
||||||
_ -> sizeOfHeadDt
|
_ -> sizeOfHeadDt
|
||||||
|
|
||||||
where
|
where
|
||||||
@ -140,22 +139,30 @@ mkSizeOfExpr headTy constructors =
|
|||||||
tagSizeExp =
|
tagSizeExp =
|
||||||
litE (IntegerL (fromIntegral (getTagSize (length constructors))))
|
litE (IntegerL (fromIntegral (getTagSize (length constructors))))
|
||||||
|
|
||||||
sizeOfField (i, (_, ty)) = exprGetSize i ty
|
-- XXX fields of the same type can be folded together, will reduce the code
|
||||||
|
-- size when there are many fields of the same type.
|
||||||
|
-- XXX const size fields can be calculated statically.
|
||||||
|
-- XXX This can result in large compilation times due to nesting when there
|
||||||
|
-- are many constructors. We can create a list and sum the list at run time
|
||||||
|
-- to avoid that depending on the number of constructors. Or using a let
|
||||||
|
-- statement for each case may help?
|
||||||
|
-- appE (varE 'sum) (listE (acc : map (exprGetSize (litE (IntegerL 0))) (zip [0..] fields)))
|
||||||
|
sizeOfFields acc fields =
|
||||||
|
foldl' exprGetSize acc $ zip [0..] fields
|
||||||
|
|
||||||
sizeOfFields fields =
|
matchCons acc (DataCon cname _ _ fields) =
|
||||||
appE (varE 'sum) (listE (map sizeOfField (zip [0..] fields)))
|
let expr = sizeOfFields acc (map snd fields)
|
||||||
|
in matchConstructor cname (length fields) expr
|
||||||
matchCons (DataCon cname _ _ fields) =
|
|
||||||
matchConstructor cname (length fields) (sizeOfFields fields)
|
|
||||||
|
|
||||||
-- XXX We fix VarSize for simplicity. Should be changed later.
|
-- XXX We fix VarSize for simplicity. Should be changed later.
|
||||||
sizeOfHeadDt =
|
sizeOfHeadDt =
|
||||||
appE
|
let acc = [|$(varE _acc) + $(tagSizeExp)|]
|
||||||
(conE 'VarSize)
|
f =
|
||||||
(lamE
|
(lamE
|
||||||
[varP _x]
|
[varP _acc, varP _x]
|
||||||
[|$(tagSizeExp)
|
(caseE (varE _x) (fmap (matchCons acc) constructors))
|
||||||
+ $(caseE (varE _x) (fmap matchCons constructors))|])
|
)
|
||||||
|
in appE (conE 'Size) f
|
||||||
|
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
-- Peek
|
-- Peek
|
||||||
@ -328,7 +335,7 @@ deriveSerializeInternal preds headTy cons = do
|
|||||||
let methods =
|
let methods =
|
||||||
-- INLINE on sizeOf actually worsens some benchmarks, and improves
|
-- INLINE on sizeOf actually worsens some benchmarks, and improves
|
||||||
-- none
|
-- none
|
||||||
[ -- PragmaD (InlineP 'size Inline FunLike AllPhases)
|
[ -- PragmaD (InlineP 'size Inlinable FunLike AllPhases)
|
||||||
FunD 'size [Clause [] (NormalB sizeOfMethod) []]
|
FunD 'size [Clause [] (NormalB sizeOfMethod) []]
|
||||||
, PragmaD (InlineP 'deserialize Inline FunLike AllPhases)
|
, PragmaD (InlineP 'deserialize Inline FunLike AllPhases)
|
||||||
, FunD
|
, FunD
|
||||||
|
@ -59,9 +59,12 @@ roundtrip val = do
|
|||||||
|
|
||||||
let sz =
|
let sz =
|
||||||
case Serialize.size :: Serialize.Size a of
|
case Serialize.size :: Serialize.Size a of
|
||||||
Serialize.VarSize f -> f val
|
Serialize.Size f -> f 0 val
|
||||||
Serialize.ConstSize csz -> csz
|
|
||||||
|
|
||||||
|
-- putStrLn "----------------------------------------------------------------"
|
||||||
|
-- putStrLn $ show val
|
||||||
|
-- putStrLn $ "Size is: " ++ show sz
|
||||||
|
-- putStrLn "----------------------------------------------------------------"
|
||||||
arr <- newBytes sz
|
arr <- newBytes sz
|
||||||
|
|
||||||
off1 <- Serialize.serialize 0 arr val
|
off1 <- Serialize.serialize 0 arr val
|
||||||
@ -78,8 +81,7 @@ testSerializeList sizeOfA val = do
|
|||||||
|
|
||||||
let sz =
|
let sz =
|
||||||
case Serialize.size :: Serialize.Size a of
|
case Serialize.size :: Serialize.Size a of
|
||||||
Serialize.VarSize f -> f val
|
Serialize.Size f -> f 0 val
|
||||||
Serialize.ConstSize csz -> csz
|
|
||||||
|
|
||||||
sz `shouldBe` sizeOfA
|
sz `shouldBe` sizeOfA
|
||||||
|
|
||||||
|
@ -181,8 +181,7 @@ variableSizeOf ::
|
|||||||
-> Int
|
-> Int
|
||||||
variableSizeOf val =
|
variableSizeOf val =
|
||||||
case size :: Size a of
|
case size :: Size a of
|
||||||
ConstSize x -> x
|
Size f -> f 0 val
|
||||||
VarSize f -> f val
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
testSerialization ::
|
testSerialization ::
|
||||||
|
Loading…
Reference in New Issue
Block a user