From b76bfdb3953c9e20a9a0e88afa33ca491ef0858b Mon Sep 17 00:00:00 2001 From: Tristan Ravitch Date: Wed, 18 Dec 2019 17:01:49 -0800 Subject: [PATCH] Add a new entry point to macaw-symbolic This version constructs a Crucible CFG for a collection of blocks while preserving control flow between them. It allows the caller to specify blocks that are considered "terminal": those blocks return the current register state. Control flow to blocks no included in the "slice" are directed to synthetic blocks that assume False in order to stop the symbolic simulator from exploring those branches. --- symbolic/src/Data/Macaw/Symbolic.hs | 167 ++++++++++++++++++++ symbolic/src/Data/Macaw/Symbolic/CrucGen.hs | 2 + 2 files changed, 169 insertions(+) diff --git a/symbolic/src/Data/Macaw/Symbolic.hs b/symbolic/src/Data/Macaw/Symbolic.hs index fcd8bb72..9f3a70fd 100644 --- a/symbolic/src/Data/Macaw/Symbolic.hs +++ b/symbolic/src/Data/Macaw/Symbolic.hs @@ -64,6 +64,9 @@ module Data.Macaw.Symbolic -- ** Translating block paths , mkBlockPathRegCFG , mkBlockPathCFG + -- * Translating slices of CFGs + , mkBlockSliceRegCFG + , mkBlockSliceCFG -- ** Post-processing helpers , toCoreCFG -- ** Translation-related types @@ -119,6 +122,7 @@ import qualified Data.Parameterized.Context as Ctx import Data.Parameterized.Nonce ( NonceGenerator, newIONonceGenerator ) import Data.Parameterized.Some ( Some(Some) ) import qualified Data.Parameterized.TraversableFC as FC +import qualified Data.Set as S import qualified Data.Vector as V import qualified What4.FunctionName as C @@ -459,6 +463,169 @@ mkParsedBlockCFG :: forall arch ids mkParsedBlockCFG archFns halloc posFn b = toCoreCFG archFns <$> mkParsedBlockRegCFG archFns halloc posFn b +parsedTermTargets :: M.ParsedTermStmt arch ids -> [M.ArchSegmentOff arch] +parsedTermTargets t = + case t of + M.ParsedCall _ Nothing -> [] + M.ParsedCall _ (Just ret) -> [ret] + M.ParsedJump _ addr -> [addr] + M.ParsedBranch _ _ taddr faddr -> [taddr, faddr] + M.ParsedLookupTable _ _ addrs -> toList addrs + M.ParsedReturn {} -> [] + M.ParsedArchTermStmt _ _ Nothing -> [] + M.ParsedArchTermStmt _ _ (Just addr) -> [addr] + M.PLTStub {} -> [] + M.ParsedTranslateError {} -> [] + M.ClassifyFailure {} -> [] + +-- | See the documentation for 'mkBlockSliceCFG' +mkBlockSliceRegCFG :: forall arch ids + . MacawSymbolicArchFunctions arch + -> C.HandleAllocator + -> (M.ArchSegmentOff arch -> C.Position) + -> M.ParsedBlock arch ids + -- ^ Entry block + -> [M.ParsedBlock arch ids] + -- ^ Non-entry non-terminal blocks + -> [M.ParsedBlock arch ids] + -- ^ Terminal blocks + -> IO (CR.SomeCFG (MacawExt arch) (EmptyCtx ::> ArchRegStruct arch) (ArchRegStruct arch)) +mkBlockSliceRegCFG archFns halloc posFn entry body0 terms = crucGenArchConstraints archFns $ mkCrucRegCFG archFns halloc "" $ do + -- Build up some initial values needed to set up the entry point to the + -- function (including the initial value of all registers) + inputRegId <- mmFreshNonce + let inputReg = CR.Reg { CR.regPosition = entryPos + , CR.regId = inputRegId + , CR.typeOfReg = archRegTy + } + ng <- mmNonceGenerator + inputAtom <- mmExecST (Ctx.last <$> CR.mkInputAtoms ng entryPos (Empty :> archRegTy)) + + -- Allocate Crucible CFG labels for all of the blocks provided by the caller + labelMap0 <- mkBlockLabelMap allBlocks + + -- Add synthetic blocks for all jump targets mentioned by the input blocks, + -- but not included in the list of all blocks. The synthetic blocks simply + -- assume False to indicate to the symbolic execution engine that executions + -- reaching those missing blocks are not feasible paths. + (labelMap, syntheticBlocks) <- foldlM (makeSyntheticBlocks inputReg) (labelMap0, []) allBlocks + + -- Set up a fake entry block that initializes the register file and jumps + -- to the real entry point + entryLabel <- CR.Label <$> mmFreshNonce + (initCrucBlock, initExtraCrucBlocks) <- runCrucGen archFns (offPosFn entryAddr) entryLabel inputReg $ do + setMachineRegs inputAtom + addTermStmt $ CR.Jump (parsedBlockLabel labelMap entryAddr) + + -- Add each block in the slice + -- + -- For blocks marked as terminators, we rewrite their terminator statement + -- into a return. + crucBlocks <- forM allBlocks $ \block -> do + let blockAddr = M.pblockAddr block + let label = case Map.lookup blockAddr labelMap of + Just lbl -> lbl + Nothing -> error ("Missing block label for block at " ++ show blockAddr) + (mainCrucBlock, auxCrucBlocks) <- runCrucGen archFns (offPosFn blockAddr) label inputReg $ do + mapM_ (addMacawStmt blockAddr) (M.pblockStmts block) + case S.member blockAddr termAddrs of + True -> do + -- NOTE: If the entry block is also a terminator, we'll just + -- return at the end of the entry block and ignore all other + -- blocks. This is the intended behavior, but it is an + -- interesting consequence. + + -- Convert the existing terminator into a return. This function + -- preserves the existing register state, which is important when + -- generating the Crucible return. + let retTerm = termStmtToReturn (M.pblockTermStmt block) + addMacawParsedTermStmt labelMap blockAddr retTerm + False -> addMacawParsedTermStmt labelMap blockAddr (M.pblockTermStmt block) + return (reverse (mainCrucBlock : auxCrucBlocks)) + return (entryLabel, initCrucBlock : (initExtraCrucBlocks ++ concat crucBlocks ++ concat syntheticBlocks)) + where + entryAddr = M.pblockAddr entry + entryPos = posFn entryAddr + archRegTy = C.StructRepr (crucArchRegTypes archFns) + -- Addresses of blocks marked as terminators + termAddrs = S.fromList (fmap M.pblockAddr terms) + + -- Blocks are "body blocks" if they are not the entry or marked as + -- terminator blocks. We need this distinction because we modify terminator + -- blocks to end in a return (even if they don't naturally do so). + isBodyBlock :: M.ParsedBlock arch ids -> Bool + isBodyBlock pb = not (S.member (M.pblockAddr pb) termAddrs) && M.pblockAddr pb /= entryAddr + + -- Blocks that are not the entry or terminators + realBody = filter isBodyBlock body0 + -- The list of all blocks without duplicates + allBlocks = entry : (realBody ++ terms) + + offPosFn :: (M.MemWidth (M.ArchAddrWidth arch)) => M.ArchSegmentOff arch -> M.ArchAddrWord arch -> C.Position + offPosFn base = posFn . fromJust . M.incSegmentOff base . toInteger + + -- There may be blocks that are jumped to but not included in the list of + -- blocks provided in this slice. We need to add synthetic blocks to stand in + -- for them. The blocks are simple: they just assert False to indicate that + -- those branches are never taken. + makeSyntheticBlock :: forall s + . (M.MemWidth (M.ArchAddrWidth arch)) + => CR.Reg s (ArchRegStruct arch) + -> (Map.Map (M.ArchSegmentOff arch) (CR.Label s), [[CR.Block (MacawExt arch) s (ArchRegStruct arch)]]) + -> M.ArchSegmentOff arch + -> MacawMonad arch ids s (Map.Map (M.ArchSegmentOff arch) (CR.Label s), [[CR.Block (MacawExt arch) s (ArchRegStruct arch)]]) + makeSyntheticBlock inputReg (lm, blks) baddr = + case Map.lookup baddr lm of + Just _ -> return (lm, blks) + Nothing -> do + synLabel <- CR.Label <$> mmFreshNonce + (synBlock, extraSynBlocks) <- runCrucGen archFns (offPosFn baddr) synLabel inputReg $ do + falseAtom <- valueToCrucible (M.BoolValue False) + msg <- appAtom (C.StringLit (C.UnicodeLiteral "Elided block")) + addStmt (CR.Assume falseAtom msg) + errMsg <- crucibleValue (C.StringLit (C.UnicodeLiteral "Elided block")) + addTermStmt (CR.ErrorStmt errMsg) + return (Map.insert baddr synLabel lm, reverse (synBlock : extraSynBlocks) : blks) + + makeSyntheticBlocks :: forall s + . (M.MemWidth (M.ArchAddrWidth arch)) + => CR.Reg s (ArchRegStruct arch) + -> (Map.Map (M.ArchSegmentOff arch) (CR.Label s), [[CR.Block (MacawExt arch) s (ArchRegStruct arch)]]) + -> M.ParsedBlock arch ids + -> MacawMonad arch ids s (Map.Map (M.ArchSegmentOff arch) (CR.Label s), [[CR.Block (MacawExt arch) s (ArchRegStruct arch)]]) + makeSyntheticBlocks inputReg (lm, blks) blk = + foldlM (makeSyntheticBlock inputReg) (lm, blks) (parsedTermTargets (M.pblockTermStmt blk)) + + +-- | Construct a Crucible CFG from a (possibly incomplete) collection of macaw blocks +-- +-- The CFG starts with the provided entry block and returns from the terminal +-- block. Control flow between the remaining (body) blocks is preserved. If a +-- block ends in a branch to a block not included in the body, the translation +-- will generate a new block that simply asserts false (i.e., that execution +-- should never reach that block). The terminal block will have its term +-- statement translated into a return. +-- +-- The entry and terminal block can be the same, in which case the body is +-- expected to be empty (and will be ignored). +-- +-- The intended use of this function is to ask for models of registers after a +-- subset of code in a function has executed by examining the register state +-- after the fragment executes. +mkBlockSliceCFG :: forall arch ids + . MacawSymbolicArchFunctions arch + -> C.HandleAllocator + -> (M.ArchSegmentOff arch -> C.Position) + -> M.ParsedBlock arch ids + -- ^ Entry block + -> [M.ParsedBlock arch ids] + -- ^ Non-entry non-terminal blocks + -> [M.ParsedBlock arch ids] + -- ^ Terminal blocks + -> IO (C.SomeCFG (MacawExt arch) (EmptyCtx ::> ArchRegStruct arch) (ArchRegStruct arch)) +mkBlockSliceCFG archFns halloc posFn entry body terms = + toCoreCFG archFns <$> mkBlockSliceRegCFG archFns halloc posFn entry body terms + mkBlockPathRegCFG :: forall arch ids . MacawSymbolicArchFunctions arch diff --git a/symbolic/src/Data/Macaw/Symbolic/CrucGen.hs b/symbolic/src/Data/Macaw/Symbolic/CrucGen.hs index b8b63c58..04db32a5 100644 --- a/symbolic/src/Data/Macaw/Symbolic/CrucGen.hs +++ b/symbolic/src/Data/Macaw/Symbolic/CrucGen.hs @@ -75,6 +75,8 @@ module Data.Macaw.Symbolic.CrucGen , getRegValue , bvLit , archAddrWidth + , evalAtom + , crucibleValue ) where import Control.Lens hiding (Empty, (:>))