Add a new entry point to macaw-symbolic

This version constructs a Crucible CFG for a collection of blocks while
preserving control flow between them.  It allows the caller to specify blocks
that are considered "terminal": those blocks return the current register state.
Control flow to blocks no included in the "slice" are directed to synthetic
blocks that assume False in order to stop the symbolic simulator from exploring
those branches.
This commit is contained in:
Tristan Ravitch 2019-12-18 17:01:49 -08:00
parent a744ca7a95
commit b76bfdb395
2 changed files with 169 additions and 0 deletions

View File

@ -64,6 +64,9 @@ module Data.Macaw.Symbolic
-- ** Translating block paths
, mkBlockPathRegCFG
, mkBlockPathCFG
-- * Translating slices of CFGs
, mkBlockSliceRegCFG
, mkBlockSliceCFG
-- ** Post-processing helpers
, toCoreCFG
-- ** Translation-related types
@ -119,6 +122,7 @@ import qualified Data.Parameterized.Context as Ctx
import Data.Parameterized.Nonce ( NonceGenerator, newIONonceGenerator )
import Data.Parameterized.Some ( Some(Some) )
import qualified Data.Parameterized.TraversableFC as FC
import qualified Data.Set as S
import qualified Data.Vector as V
import qualified What4.FunctionName as C
@ -459,6 +463,169 @@ mkParsedBlockCFG :: forall arch ids
mkParsedBlockCFG archFns halloc posFn b =
toCoreCFG archFns <$> mkParsedBlockRegCFG archFns halloc posFn b
parsedTermTargets :: M.ParsedTermStmt arch ids -> [M.ArchSegmentOff arch]
parsedTermTargets t =
case t of
M.ParsedCall _ Nothing -> []
M.ParsedCall _ (Just ret) -> [ret]
M.ParsedJump _ addr -> [addr]
M.ParsedBranch _ _ taddr faddr -> [taddr, faddr]
M.ParsedLookupTable _ _ addrs -> toList addrs
M.ParsedReturn {} -> []
M.ParsedArchTermStmt _ _ Nothing -> []
M.ParsedArchTermStmt _ _ (Just addr) -> [addr]
M.PLTStub {} -> []
M.ParsedTranslateError {} -> []
M.ClassifyFailure {} -> []
-- | See the documentation for 'mkBlockSliceCFG'
mkBlockSliceRegCFG :: forall arch ids
. MacawSymbolicArchFunctions arch
-> C.HandleAllocator
-> (M.ArchSegmentOff arch -> C.Position)
-> M.ParsedBlock arch ids
-- ^ Entry block
-> [M.ParsedBlock arch ids]
-- ^ Non-entry non-terminal blocks
-> [M.ParsedBlock arch ids]
-- ^ Terminal blocks
-> IO (CR.SomeCFG (MacawExt arch) (EmptyCtx ::> ArchRegStruct arch) (ArchRegStruct arch))
mkBlockSliceRegCFG archFns halloc posFn entry body0 terms = crucGenArchConstraints archFns $ mkCrucRegCFG archFns halloc "" $ do
-- Build up some initial values needed to set up the entry point to the
-- function (including the initial value of all registers)
inputRegId <- mmFreshNonce
let inputReg = CR.Reg { CR.regPosition = entryPos
, CR.regId = inputRegId
, CR.typeOfReg = archRegTy
}
ng <- mmNonceGenerator
inputAtom <- mmExecST (Ctx.last <$> CR.mkInputAtoms ng entryPos (Empty :> archRegTy))
-- Allocate Crucible CFG labels for all of the blocks provided by the caller
labelMap0 <- mkBlockLabelMap allBlocks
-- Add synthetic blocks for all jump targets mentioned by the input blocks,
-- but not included in the list of all blocks. The synthetic blocks simply
-- assume False to indicate to the symbolic execution engine that executions
-- reaching those missing blocks are not feasible paths.
(labelMap, syntheticBlocks) <- foldlM (makeSyntheticBlocks inputReg) (labelMap0, []) allBlocks
-- Set up a fake entry block that initializes the register file and jumps
-- to the real entry point
entryLabel <- CR.Label <$> mmFreshNonce
(initCrucBlock, initExtraCrucBlocks) <- runCrucGen archFns (offPosFn entryAddr) entryLabel inputReg $ do
setMachineRegs inputAtom
addTermStmt $ CR.Jump (parsedBlockLabel labelMap entryAddr)
-- Add each block in the slice
--
-- For blocks marked as terminators, we rewrite their terminator statement
-- into a return.
crucBlocks <- forM allBlocks $ \block -> do
let blockAddr = M.pblockAddr block
let label = case Map.lookup blockAddr labelMap of
Just lbl -> lbl
Nothing -> error ("Missing block label for block at " ++ show blockAddr)
(mainCrucBlock, auxCrucBlocks) <- runCrucGen archFns (offPosFn blockAddr) label inputReg $ do
mapM_ (addMacawStmt blockAddr) (M.pblockStmts block)
case S.member blockAddr termAddrs of
True -> do
-- NOTE: If the entry block is also a terminator, we'll just
-- return at the end of the entry block and ignore all other
-- blocks. This is the intended behavior, but it is an
-- interesting consequence.
-- Convert the existing terminator into a return. This function
-- preserves the existing register state, which is important when
-- generating the Crucible return.
let retTerm = termStmtToReturn (M.pblockTermStmt block)
addMacawParsedTermStmt labelMap blockAddr retTerm
False -> addMacawParsedTermStmt labelMap blockAddr (M.pblockTermStmt block)
return (reverse (mainCrucBlock : auxCrucBlocks))
return (entryLabel, initCrucBlock : (initExtraCrucBlocks ++ concat crucBlocks ++ concat syntheticBlocks))
where
entryAddr = M.pblockAddr entry
entryPos = posFn entryAddr
archRegTy = C.StructRepr (crucArchRegTypes archFns)
-- Addresses of blocks marked as terminators
termAddrs = S.fromList (fmap M.pblockAddr terms)
-- Blocks are "body blocks" if they are not the entry or marked as
-- terminator blocks. We need this distinction because we modify terminator
-- blocks to end in a return (even if they don't naturally do so).
isBodyBlock :: M.ParsedBlock arch ids -> Bool
isBodyBlock pb = not (S.member (M.pblockAddr pb) termAddrs) && M.pblockAddr pb /= entryAddr
-- Blocks that are not the entry or terminators
realBody = filter isBodyBlock body0
-- The list of all blocks without duplicates
allBlocks = entry : (realBody ++ terms)
offPosFn :: (M.MemWidth (M.ArchAddrWidth arch)) => M.ArchSegmentOff arch -> M.ArchAddrWord arch -> C.Position
offPosFn base = posFn . fromJust . M.incSegmentOff base . toInteger
-- There may be blocks that are jumped to but not included in the list of
-- blocks provided in this slice. We need to add synthetic blocks to stand in
-- for them. The blocks are simple: they just assert False to indicate that
-- those branches are never taken.
makeSyntheticBlock :: forall s
. (M.MemWidth (M.ArchAddrWidth arch))
=> CR.Reg s (ArchRegStruct arch)
-> (Map.Map (M.ArchSegmentOff arch) (CR.Label s), [[CR.Block (MacawExt arch) s (ArchRegStruct arch)]])
-> M.ArchSegmentOff arch
-> MacawMonad arch ids s (Map.Map (M.ArchSegmentOff arch) (CR.Label s), [[CR.Block (MacawExt arch) s (ArchRegStruct arch)]])
makeSyntheticBlock inputReg (lm, blks) baddr =
case Map.lookup baddr lm of
Just _ -> return (lm, blks)
Nothing -> do
synLabel <- CR.Label <$> mmFreshNonce
(synBlock, extraSynBlocks) <- runCrucGen archFns (offPosFn baddr) synLabel inputReg $ do
falseAtom <- valueToCrucible (M.BoolValue False)
msg <- appAtom (C.StringLit (C.UnicodeLiteral "Elided block"))
addStmt (CR.Assume falseAtom msg)
errMsg <- crucibleValue (C.StringLit (C.UnicodeLiteral "Elided block"))
addTermStmt (CR.ErrorStmt errMsg)
return (Map.insert baddr synLabel lm, reverse (synBlock : extraSynBlocks) : blks)
makeSyntheticBlocks :: forall s
. (M.MemWidth (M.ArchAddrWidth arch))
=> CR.Reg s (ArchRegStruct arch)
-> (Map.Map (M.ArchSegmentOff arch) (CR.Label s), [[CR.Block (MacawExt arch) s (ArchRegStruct arch)]])
-> M.ParsedBlock arch ids
-> MacawMonad arch ids s (Map.Map (M.ArchSegmentOff arch) (CR.Label s), [[CR.Block (MacawExt arch) s (ArchRegStruct arch)]])
makeSyntheticBlocks inputReg (lm, blks) blk =
foldlM (makeSyntheticBlock inputReg) (lm, blks) (parsedTermTargets (M.pblockTermStmt blk))
-- | Construct a Crucible CFG from a (possibly incomplete) collection of macaw blocks
--
-- The CFG starts with the provided entry block and returns from the terminal
-- block. Control flow between the remaining (body) blocks is preserved. If a
-- block ends in a branch to a block not included in the body, the translation
-- will generate a new block that simply asserts false (i.e., that execution
-- should never reach that block). The terminal block will have its term
-- statement translated into a return.
--
-- The entry and terminal block can be the same, in which case the body is
-- expected to be empty (and will be ignored).
--
-- The intended use of this function is to ask for models of registers after a
-- subset of code in a function has executed by examining the register state
-- after the fragment executes.
mkBlockSliceCFG :: forall arch ids
. MacawSymbolicArchFunctions arch
-> C.HandleAllocator
-> (M.ArchSegmentOff arch -> C.Position)
-> M.ParsedBlock arch ids
-- ^ Entry block
-> [M.ParsedBlock arch ids]
-- ^ Non-entry non-terminal blocks
-> [M.ParsedBlock arch ids]
-- ^ Terminal blocks
-> IO (C.SomeCFG (MacawExt arch) (EmptyCtx ::> ArchRegStruct arch) (ArchRegStruct arch))
mkBlockSliceCFG archFns halloc posFn entry body terms =
toCoreCFG archFns <$> mkBlockSliceRegCFG archFns halloc posFn entry body terms
mkBlockPathRegCFG
:: forall arch ids
. MacawSymbolicArchFunctions arch

View File

@ -75,6 +75,8 @@ module Data.Macaw.Symbolic.CrucGen
, getRegValue
, bvLit
, archAddrWidth
, evalAtom
, crucibleValue
) where
import Control.Lens hiding (Empty, (:>))