Add more literals to value serialization format.

This covers values that are stored as pseudo data types in the Haskell
runtime, which would otherwise need to be serialized using unboxed data
in the byte format.
This commit is contained in:
Dan Doel 2023-09-05 17:14:39 -04:00
parent bd51afea26
commit 42458974fd
4 changed files with 169 additions and 39 deletions

View File

@ -1513,6 +1513,10 @@ data BLit
| Quote Value
| Code (SuperGroup Symbol)
| BArr PA.ByteArray
| Pos Word64
| Neg Word64
| Char Word64
| Float Double
deriving (Show)
groupVars :: ANFM v (Set v)

View File

@ -82,6 +82,10 @@ data BLTag
| QuoteT
| CodeT
| BArrT
| PosT
| NegT
| CharT
| FloatT
data VaTag = PartialT | DataT | ContT | BLitT
@ -186,6 +190,10 @@ instance Tag BLTag where
QuoteT -> 5
CodeT -> 6
BArrT -> 7
PosT -> 8
NegT -> 9
CharT -> 10
FloatT -> 11
word2tag = \case
0 -> pure TextT
@ -196,6 +204,10 @@ instance Tag BLTag where
5 -> pure QuoteT
6 -> pure CodeT
7 -> pure BArrT
8 -> pure PosT
9 -> pure NegT
10 -> pure CharT
11 -> pure FloatT
t -> unknownTag "BLTag" t
instance Tag VaTag where
@ -643,6 +655,10 @@ putBLit (Bytes b) = putTag BytesT *> putBytes b
putBLit (Quote v) = putTag QuoteT *> putValue v
putBLit (Code g) = putTag CodeT *> putGroup mempty mempty g
putBLit (BArr a) = putTag BArrT *> putByteArray a
putBLit (Pos n) = putTag PosT *> putPositive n
putBLit (Neg n) = putTag NegT *> putPositive n
putBLit (Char n) = putTag CharT *> putWord64be n
putBLit (Float d) = putTag FloatT *> putFloat d
getBLit :: (MonadGet m) => Version -> m BLit
getBLit v =
@ -655,6 +671,10 @@ getBLit v =
QuoteT -> Quote <$> getValue v
CodeT -> Code <$> getGroup
BArrT -> BArr <$> getByteArray
PosT -> Pos <$> getPositive
NegT -> Neg <$> getPositive
CharT -> Char <$> getWord64be
FloatT -> Float <$> getFloat
putRefs :: (MonadPut m) => [Reference] -> m ()
putRefs rs = putFoldable putReference rs
@ -763,76 +783,106 @@ getGroupRef :: (MonadGet m) => m GroupRef
getGroupRef = GR <$> getReference <*> getWord64be
putValue :: (MonadPut m) => Value -> m ()
putValue (Partial gr ws vs) =
putValue (Partial gr [] vs) =
putTag PartialT
*> putGroupRef gr
*> putFoldable putWord64be ws
*> putFoldable putValue vs
putValue (Data r t ws vs) =
putValue Partial{} =
exn "putValue: Partial with unboxed values no longer supported"
putValue (Data r t [] vs) =
putTag DataT
*> putReference r
*> putWord64be t
*> putFoldable putWord64be ws
*> putFoldable putValue vs
putValue (Cont us bs k) =
putValue Data{} =
exn "putValue: Data with unboxed contents no longer supported"
putValue (Cont [] bs k) =
putTag ContT
*> putFoldable putWord64be us
*> putFoldable putValue bs
*> putCont k
putValue Cont{} =
exn "putValue: Cont with unboxed stack no longer supported"
putValue (BLit l) =
putTag BLitT *> putBLit l
getValue :: (MonadGet m) => Version -> m Value
getValue v =
getTag >>= \case
PartialT ->
Partial <$> getGroupRef <*> getList getWord64be <*> getList (getValue v)
DataT ->
Data
<$> getReference
<*> getWord64be
<*> getList getWord64be
<*> getList (getValue v)
ContT -> Cont <$> getList getWord64be <*> getList (getValue v) <*> getCont v
PartialT
| v < 4 ->
Partial <$> getGroupRef <*> getList getWord64be <*> getList (getValue v)
| otherwise ->
flip Partial [] <$> getGroupRef <*> getList (getValue v)
DataT
| v < 4 ->
Data
<$> getReference
<*> getWord64be
<*> getList getWord64be
<*> getList (getValue v)
| otherwise ->
(\r t -> Data r t [])
<$> getReference
<*> getWord64be
<*> getList (getValue v)
ContT
| v < 4 ->
Cont <$> getList getWord64be <*> getList (getValue v) <*> getCont v
| otherwise -> Cont [] <$> getList (getValue v) <*> getCont v
BLitT -> BLit <$> getBLit v
putCont :: (MonadPut m) => Cont -> m ()
putCont KE = putTag KET
putCont (Mark ua ba rs ds k) =
putCont (Mark 0 ba rs ds k) =
putTag MarkT
*> putWord64be ua
*> putWord64be ba
*> putFoldable putReference rs
*> putMap putReference putValue ds
*> putCont k
putCont (Push i j m n gr k) =
putCont Mark{} =
exn "putCont: Mark with unboxed args no longer supported"
putCont (Push 0 j 0 n gr k) =
putTag PushT
*> putWord64be i
*> putWord64be j
*> putWord64be m
*> putWord64be n
*> putGroupRef gr
*> putCont k
putCont Push{} =
exn "putCont: Push with unboxed information no longer supported"
getCont :: (MonadGet m) => Version -> m Cont
getCont v =
getTag >>= \case
KET -> pure KE
MarkT ->
Mark
<$> getWord64be
<*> getWord64be
<*> getList getReference
<*> getMap getReference (getValue v)
<*> getCont v
PushT ->
Push
<$> getWord64be
<*> getWord64be
<*> getWord64be
<*> getWord64be
<*> getGroupRef
<*> getCont v
MarkT
| v < 4 ->
Mark
<$> getWord64be
<*> getWord64be
<*> getList getReference
<*> getMap getReference (getValue v)
<*> getCont v
| otherwise ->
Mark 0
<$> getWord64be
<*> getList getReference
<*> getMap getReference (getValue v)
<*> getCont v
PushT
| v < 4 ->
Push
<$> getWord64be
<*> getWord64be
<*> getWord64be
<*> getWord64be
<*> getGroupRef
<*> getCont v
| otherwise ->
(\j n -> Push 0 j 0 n)
<$> getWord64be
<*> getWord64be
<*> getGroupRef
<*> getCont v
deserializeGroup :: (Var v) => ByteString -> Either String (SuperGroup v)
deserializeGroup bs = runGetS (getVersion *> getGroup) bs
@ -890,7 +940,7 @@ deserializeValue bs = runGetS (getVersion >>= getValue) bs
n
| n < 1 -> fail $ "deserializeValue: unknown version: " ++ show n
| n < 3 -> fail $ "deserializeValue: unsupported version: " ++ show n
| n == 3 -> pure n
| n <= 4 -> pure n
| otherwise -> fail $ "deserializeValue: unknown version: " ++ show n
serializeValue :: Value -> ByteString
@ -904,7 +954,7 @@ serializeValueLazy v = runPutLazy (putVersion *> putValue v)
putVersion = putWord32be valueVersion
valueVersion :: Word32
valueVersion = 3
valueVersion = 4
codeVersion :: Word32
codeVersion = 1

View File

@ -18,6 +18,7 @@ import Data.Ord (comparing)
import Data.Sequence qualified as Sq
import Data.Set qualified as S
import Data.Set qualified as Set
import Data.Primitive.ByteArray qualified as BA
import Data.Text qualified as DTx
import Data.Text.IO qualified as Tx
import Data.Traversable
@ -2079,6 +2080,7 @@ reflectValue rty = goV
goV (PApV cix ua ba) =
ANF.Partial (goIx cix) (fromIntegral <$> ua) <$> traverse goV ba
goV (DataC _ t [w] []) = ANF.BLit <$> reflectUData t w
goV (DataC r t us bs) =
ANF.Data r (maskTags t) (fromIntegral <$> us) <$> traverse goV bs
goV (CapV k _ _ us bs) =
@ -2120,6 +2122,15 @@ reflectValue rty = goV
pure (ANF.BArr a)
| otherwise = die $ err $ "foreign value: " <> (show f)
reflectUData :: Word64 -> Int -> IO ANF.BLit
reflectUData t v
| t == natTag = pure $ ANF.Pos (fromIntegral v)
| t == charTag = pure $ ANF.Char (fromIntegral v)
| t == intTag, v >= 0 = pure $ ANF.Pos (fromIntegral v)
| t == intTag, v < 0 = pure $ ANF.Neg (fromIntegral (- v))
| t == floatTag = pure $ ANF.Float (intToDouble v)
| otherwise = die . err $ "unboxed data: " <> show (t,v)
reifyValue :: CCache -> ANF.Value -> IO (Either [Reference] Closure)
reifyValue cc val = do
erc <-
@ -2193,6 +2204,19 @@ reifyValue0 (rty, rtm) = goV
goL (ANF.Quote v) = pure . Foreign $ Wrap Rf.valueRef v
goL (ANF.Code g) = pure . Foreign $ Wrap Rf.codeRef g
goL (ANF.BArr a) = pure . Foreign $ Wrap Rf.ibytearrayRef a
goL (ANF.Char w) = pure $ DataU1 Rf.charRef charTag (fromIntegral w)
goL (ANF.Pos w) =
pure $ DataU1 Rf.natRef natTag (fromIntegral w)
goL (ANF.Neg w) =
pure $ DataU1 Rf.intRef intTag (- fromIntegral w)
goL (ANF.Float d) =
pure $ DataU1 Rf.floatRef floatTag (doubleToInt d)
doubleToInt :: Double -> Int
doubleToInt d = indexByteArray (BA.byteArrayFromList [d]) 0
intToDouble :: Int -> Double
intToDouble w = indexByteArray (BA.byteArrayFromList [w]) 0
-- Universal comparison functions
@ -2211,6 +2235,8 @@ universalEq ::
universalEq frn = eqc
where
eql cm l r = length l == length r && and (zipWith cm l r)
eqc (DataC _ ct1 [w1] []) (DataC _ ct2 [w2] []) =
matchTags ct1 ct2 && w1 == w2
eqc (DataC _ ct1 us1 bs1) (DataC _ ct2 us2 bs2) =
ct1 == ct2
&& eql (==) us1 us2
@ -2235,6 +2261,13 @@ universalEq frn = eqc
| otherwise = frn fl fr
eqc c d = closureNum c == closureNum d
-- serialization doesn't necessarily preserve Int tags, so be
-- more accepting for those.
matchTags ct1 ct2 =
ct1 == ct2
|| (ct1 == intTag && ct2 == natTag)
|| (ct1 == natTag && ct2 == intTag)
arrayEq :: (Closure -> Closure -> Bool) -> PA.Array Closure -> PA.Array Closure -> Bool
arrayEq eqc l r
| PA.sizeofArray l /= PA.sizeofArray r = False
@ -2296,6 +2329,13 @@ natTag
packTags rt 0
| otherwise = error "internal error: natTag"
intTag :: Word64
intTag
| Just n <- M.lookup Rf.intRef builtinTypeNumbering,
rt <- toEnum (fromIntegral n) =
packTags rt 0
| otherwise = error "internal error: intTag"
charTag :: Word64
charTag
| Just n <- M.lookup Rf.charRef builtinTypeNumbering,
@ -2320,8 +2360,10 @@ universalCompare frn = cmpc False
cmpl cm l r =
compare (length l) (length r) <> fold (zipWith cm l r)
cmpc _ (DataC _ ct1 [i] []) (DataC _ ct2 [j] [])
| ct1 == floatTag && ct2 == floatTag = compareAsFloat i j
| ct1 == natTag && ct2 == natTag = compareAsNat i j
| ct1 == floatTag, ct2 == floatTag = compareAsFloat i j
| ct1 == natTag, ct2 == natTag = compareAsNat i j
| ct1 == intTag, ct2 == natTag = compare i j
| ct1 == natTag, ct2 == intTag = compare i j
cmpc tyEq (DataC rf1 ct1 us1 bs1) (DataC rf2 ct2 us2 bs2) =
(if tyEq && ct1 /= ct2 then compare rf1 rf2 else EQ)
<> compare (maskTags ct1) (maskTags ct2)

View File

@ -115,6 +115,40 @@ getLength ::
m n
getLength = unVarInt <$> deserialize
-- Checks for negatives, in case you put an Integer, which does not
-- behave properly for negative numbers.
putPositive ::
MonadPut m =>
Bits n =>
Bits (Unsigned n) =>
Integral n =>
Integral (Unsigned n) =>
n ->
m ()
putPositive n
| n < 0 = exn $ "putPositive: negative number: " ++ show (toInteger n)
| otherwise = serialize (VarInt n)
-- Reads as an Integer, then checks that the result will fit in the
-- result type.
getPositive ::
forall m n.
Bounded n =>
Integral n =>
MonadGet m =>
m n
getPositive = validate . unVarInt =<< deserialize
where
mx0 :: n
mx0 = maxBound
mx :: Integer
mx = fromIntegral mx0
validate :: Integer -> m n
validate n
| n <= mx = pure $ fromIntegral n
| otherwise = fail $ "getPositive: overflow: " ++ show n
putFoldable ::
(Foldable f, MonadPut m) => (a -> m ()) -> f a -> m ()
putFoldable putA as = do