Some serialization related tweaks

1. Allow for serializing `Code` with builtin foreign references to
   binary, using a mapping to the builtin name used. This is useful for
   parsing the binary and emitting scheme within unison.
2. Fixed an encoding error where variable indexing was wrong. The
   context maintained for serialization is stored backwards, but
   simultaneous bindings of multiple variables were being pushed on in
   order, which resulted in reversing the order of variables in some
   cases.
This commit is contained in:
Dan Doel 2022-03-21 15:26:22 -04:00
parent 9fc61ff292
commit 46e5c910ce
5 changed files with 100 additions and 45 deletions

View File

@ -16,6 +16,7 @@ import Data.Functor ((<&>))
import Data.Map as Map (Map, fromList, lookup)
import qualified Data.Sequence as Seq
import Data.Serialize.Put (runPutLazy)
import Data.Text (Text)
import Data.Word (Word16, Word64)
import GHC.Stack
import Unison.ABT.Normalized (Term (..))
@ -23,6 +24,7 @@ import Unison.Reference (Reference)
import Unison.Runtime.ANF as ANF hiding (Tag)
import Unison.Runtime.Exception
import Unison.Runtime.Serialize
import qualified Unison.Util.EnumContainers as EC
import qualified Unison.Util.Text as Util.Text
import Unison.Var (Type (ANFBlank), Var (..))
import Prelude hiding (getChar, putChar)
@ -47,6 +49,7 @@ data FnTag
| FConT
| FReqT
| FPrimT
| FForeignT
data MtTag
= MIntT
@ -106,6 +109,7 @@ instance Tag FnTag where
FConT -> 3
FReqT -> 4
FPrimT -> 5
FForeignT -> 6
word2tag = \case
0 -> pure FVarT
@ -114,6 +118,7 @@ instance Tag FnTag where
3 -> pure FConT
4 -> pure FReqT
5 -> pure FPrimT
6 -> pure FForeignT
n -> unknownTag "FnTag" n
instance Tag MtTag where
@ -244,9 +249,14 @@ getCCs =
1 -> BX
_ -> exn "getCCs: bad calling convention"
putGroup :: MonadPut m => Var v => SuperGroup v -> m ()
putGroup (Rec bs e) =
putLength n *> traverse_ (putComb ctx) cs *> putComb ctx e
putGroup ::
MonadPut m =>
Var v =>
EC.EnumMap FOp Text ->
SuperGroup v ->
m ()
putGroup fops (Rec bs e) =
putLength n *> traverse_ (putComb fops ctx) cs *> putComb fops ctx e
where
n = length ctx
(ctx, cs) = unzip bs
@ -259,9 +269,15 @@ getGroup = do
cs <- replicateM l (getComb vs n)
Rec (zip vs cs) <$> getComb vs n
putComb :: MonadPut m => Var v => [v] -> SuperNormal v -> m ()
putComb ctx (Lambda ccs (TAbss us e)) =
putCCs ccs *> putNormal (us ++ ctx) e
putComb ::
MonadPut m =>
Var v =>
EC.EnumMap FOp Text ->
[v] ->
SuperNormal v ->
m ()
putComb fops ctx (Lambda ccs (TAbss us e)) =
putCCs ccs *> putNormal fops (reverse us ++ ctx) e
getFresh :: Var v => Word64 -> v
getFresh n = freshenId n $ typed ANFBlank
@ -273,29 +289,35 @@ getComb ctx frsh0 = do
frsh = frsh0 + fromIntegral (length ccs)
Lambda ccs . TAbss us <$> getNormal (us ++ ctx) frsh
putNormal :: MonadPut m => Var v => [v] -> ANormal v -> m ()
putNormal ctx tm = case tm of
putNormal ::
MonadPut m =>
Var v =>
EC.EnumMap FOp Text ->
[v] ->
ANormal v ->
m ()
putNormal fops ctx tm = case tm of
TVar v -> putTag VarT *> putVar ctx v
TFrc v -> putTag ForceT *> putVar ctx v
TApp f as -> putTag AppT *> putFunc ctx f *> putArgs ctx as
TApp f as -> putTag AppT *> putFunc fops ctx f *> putArgs ctx as
THnd rs h e ->
putTag HandleT *> putRefs rs *> putVar ctx h *> putNormal ctx e
putTag HandleT *> putRefs rs *> putVar ctx h *> putNormal fops ctx e
TShift r v e ->
putTag ShiftT *> putReference r *> putNormal (v : ctx) e
TMatch v bs -> putTag MatchT *> putVar ctx v *> putBranches ctx bs
putTag ShiftT *> putReference r *> putNormal fops (v : ctx) e
TMatch v bs -> putTag MatchT *> putVar ctx v *> putBranches fops ctx bs
TLit l -> putTag LitT *> putLit l
TName v (Left r) as e ->
putTag NameRefT *> putReference r *> putArgs ctx as
*> putNormal (v : ctx) e
*> putNormal fops (v : ctx) e
TName v (Right u) as e ->
putTag NameVarT *> putVar ctx u *> putArgs ctx as
*> putNormal (v : ctx) e
*> putNormal fops (v : ctx) e
TLets Direct us ccs l e ->
putTag LetDirT *> putCCs ccs *> putNormal ctx l
*> putNormal (us ++ ctx) e
putTag LetDirT *> putCCs ccs *> putNormal fops ctx l
*> putNormal fops (reverse us ++ ctx) e
TLets (Indirect w) us ccs l e ->
putTag LetIndT *> putWord16be w *> putCCs ccs *> putNormal ctx l
*> putNormal (us ++ ctx) e
putTag LetIndT *> putWord16be w *> putCCs ccs *> putNormal fops ctx l
*> putNormal fops (reverse us ++ ctx) e
_ -> exn "putNormal: malformed term"
getNormal :: MonadGet m => Var v => [v] -> Word64 -> m (ANormal v)
@ -343,15 +365,24 @@ getNormal ctx frsh0 =
<$> getNormal ctx frsh0
<*> getNormal (us ++ ctx) frsh
putFunc :: MonadPut m => Var v => [v] -> Func v -> m ()
putFunc ctx f = case f of
putFunc ::
MonadPut m =>
Var v =>
EC.EnumMap FOp Text ->
[v] ->
Func v ->
m ()
putFunc fops ctx f = case f of
FVar v -> putTag FVarT *> putVar ctx v
FComb r -> putTag FCombT *> putReference r
FCont v -> putTag FContT *> putVar ctx v
FCon r c -> putTag FConT *> putReference r *> putCTag c
FReq r c -> putTag FReqT *> putReference r *> putCTag c
FPrim (Left p) -> putTag FPrimT *> putPOp p
FPrim _ -> exn "putFunc: can't serialize foreign func"
FPrim (Right f)
| Just nm <- EC.lookup f fops ->
putTag FForeignT *> putText nm
| otherwise -> exn $ "putFUnc: unknown FOp: " ++ show f
getFunc :: MonadGet m => Var v => [v] -> m (Func v)
getFunc ctx =
@ -362,6 +393,7 @@ getFunc ctx =
FConT -> FCon <$> getReference <*> getCTag
FReqT -> FReq <$> getReference <*> getCTag
FPrimT -> FPrim . Left <$> getPOp
FForeignT -> exn "getFunc: can't deserialize a foreign func"
putPOp :: MonadPut m => POp -> m ()
putPOp op
@ -535,31 +567,37 @@ putRefs rs = putFoldable putReference rs
getRefs :: MonadGet m => m [Reference]
getRefs = getList getReference
putBranches :: MonadPut m => Var v => [v] -> Branched (ANormal v) -> m ()
putBranches ctx bs = case bs of
putBranches ::
MonadPut m =>
Var v =>
EC.EnumMap FOp Text ->
[v] ->
Branched (ANormal v) ->
m ()
putBranches fops ctx bs = case bs of
MatchEmpty -> putTag MEmptyT
MatchIntegral m df -> do
putTag MIntT
putEnumMap putWord64be (putNormal ctx) m
putMaybe df $ putNormal ctx
putEnumMap putWord64be (putNormal fops ctx) m
putMaybe df $ putNormal fops ctx
MatchText m df -> do
putTag MTextT
putMap (putText . Util.Text.toText) (putNormal ctx) m
putMaybe df $ putNormal ctx
putMap (putText . Util.Text.toText) (putNormal fops ctx) m
putMaybe df $ putNormal fops ctx
MatchRequest m (TAbs v df) -> do
putTag MReqT
putMap putReference (putEnumMap putCTag (putCase ctx)) m
putNormal (v : ctx) df
putMap putReference (putEnumMap putCTag (putCase fops ctx)) m
putNormal fops (v : ctx) df
where
MatchData r m df -> do
putTag MDataT
putReference r
putEnumMap putCTag (putCase ctx) m
putMaybe df $ putNormal ctx
putEnumMap putCTag (putCase fops ctx) m
putMaybe df $ putNormal fops ctx
MatchSum m -> do
putTag MSumT
putEnumMap putWord64be (putCase ctx) m
putEnumMap putWord64be (putCase fops ctx) m
_ -> exn "putBranches: malformed intermediate term"
getBranches ::
@ -588,8 +626,15 @@ getBranches ctx frsh0 =
<*> getMaybe (getNormal ctx frsh0)
MSumT -> MatchSum <$> getEnumMap getWord64be (getCase ctx frsh0)
putCase :: MonadPut m => Var v => [v] -> ([Mem], ANormal v) -> m ()
putCase ctx (ccs, (TAbss us e)) = putCCs ccs *> putNormal (us ++ ctx) e
putCase ::
MonadPut m =>
Var v =>
EC.EnumMap FOp Text ->
[v] ->
([Mem], ANormal v) ->
m ()
putCase fops ctx (ccs, (TAbss us e)) =
putCCs ccs *> putNormal fops (us ++ ctx) e
getCase :: MonadGet m => Var v => [v] -> Word64 -> m ([Mem], ANormal v)
getCase ctx frsh0 = do
@ -684,8 +729,9 @@ deserializeGroup bs = runGetS (getVersion *> getGroup) bs
1 -> pure ()
n -> fail $ "deserializeGroup: unknown version: " ++ show n
serializeGroup :: Var v => SuperGroup v -> ByteString
serializeGroup sg = runPutS (putVersion *> putGroup sg)
serializeGroup ::
Var v => EC.EnumMap FOp Text -> SuperGroup v -> ByteString
serializeGroup fops sg = runPutS (putVersion *> putGroup fops sg)
where
putVersion = putWord32be 1

View File

@ -1683,7 +1683,7 @@ builtinLookup =
++ foreignWrappers
type FDecl v =
State (Word64, [(Data.Text.Text, (Sandbox, SuperNormal v))], EnumMap Word64 ForeignFunc)
State (Word64, [(Data.Text.Text, (Sandbox, SuperNormal v))], EnumMap Word64 (Data.Text.Text, ForeignFunc))
-- Data type to determine whether a builtin should be tracked for
-- sandboxing. Untracked means that it can be freely used, and Tracked
@ -1700,7 +1700,7 @@ declareForeign ::
FDecl Symbol ()
declareForeign sand name op func =
modify $ \(w, cs, fs) ->
(w + 1, (name, (sand, uncurry Lambda (op w))) : cs, mapInsert w func fs)
(w + 1, (name, (sand, uncurry Lambda (op w))) : cs, mapInsert w (name, func) fs)
mkForeignIOF ::
(ForeignConvention a, ForeignConvention r) =>
@ -2086,7 +2086,7 @@ declareForeigns = do
declareForeign Untracked "Code.serialize" boxDirect
. mkForeign
$ \(sg :: SuperGroup Symbol) ->
pure . Bytes.fromArray $ serializeGroup sg
pure . Bytes.fromArray $ serializeGroup builtinForeignNames sg
declareForeign Untracked "Code.deserialize" boxToEBoxBox
. mkForeign
$ pure . deserializeGroup @Symbol . Bytes.toArray
@ -2205,7 +2205,7 @@ typeReferences = zip rs [1 ..]
++ [DerivedId i | (_, i, _) <- Ty.builtinEffectDecls]
foreignDeclResults ::
(Word64, [(Data.Text.Text, (Sandbox, SuperNormal Symbol))], EnumMap Word64 ForeignFunc)
(Word64, [(Data.Text.Text, (Sandbox, SuperNormal Symbol))], EnumMap Word64 (Data.Text.Text, ForeignFunc))
foreignDeclResults = execState declareForeigns (0, [], mempty)
foreignWrappers :: [(Data.Text.Text, (Sandbox, SuperNormal Symbol))]
@ -2232,7 +2232,10 @@ builtinTypeBackref = mapFromList $ swap <$> typeReferences
swap (x, y) = (y, x)
builtinForeigns :: EnumMap Word64 ForeignFunc
builtinForeigns | (_, _, m) <- foreignDeclResults = m
builtinForeigns | (_, _, m) <- foreignDeclResults = snd <$> m
builtinForeignNames :: EnumMap Word64 Data.Text.Text
builtinForeignNames | (_, _, m) <- foreignDeclResults = fst <$> m
-- Bootstrapping for sandbox check. The eventual map will be one with
-- associations `r -> s` where `s` is all the 'sensitive' base

View File

@ -566,7 +566,7 @@ putStoredCache (SCache cs crs trs ftm fty int rtm rty sbs) = do
putEnumMap putNat putReference trs
putNat ftm
putNat fty
putMap putReference putGroup int
putMap putReference (putGroup mempty) int
putMap putReference putNat rtm
putMap putReference putNat rty
putMap putReference (putFoldable putReference) sbs

View File

@ -288,7 +288,13 @@ exec !env !denv !_activeThreads !ustk !bstk !k (BPrim1 LKUP i) = do
m <- readTVarIO (intermed env)
ustk <- bump ustk
bstk <- case M.lookup link m of
Nothing -> bstk <$ poke ustk 0
Nothing
| Just w <- M.lookup link builtinTermNumbering,
Just sn <- EC.lookup w numberedTermLookup -> do
poke ustk 1
bstk <- bump bstk
bstk <$ pokeBi bstk (ANF.Rec [] sn)
| otherwise -> bstk <$ poke ustk 0
Just sg -> do
poke ustk 1
bstk <- bump bstk

View File

@ -106,11 +106,11 @@ ex5 = crypto.hmac Sha2_256 mysecret f |> hex
25 | > ex4
"1e014deb2a1ef1dc3c8765a6f7ebf7184ccaeaecbc2b5428030befd7085139db"
"a52c81c976ff4fe9c809d9896d6dc32775c6272bb100555c507b72f20ace4b39"
26 | > ex5
"c729f5ed4b2a89dc33ae06cd0b925174c990328c736123bc220e6fe8b42d3d53"
"b9f05335381fc8eecba3bfa6e82a4dc23fdab95a04f24b97d14785f0f15f56b4"
```
And here's the full API: