diff --git a/base/src/Data/Macaw/Analysis/FunctionArgs.hs b/base/src/Data/Macaw/Analysis/FunctionArgs.hs index 4e85a9cf..1f4e7b76 100644 --- a/base/src/Data/Macaw/Analysis/FunctionArgs.hs +++ b/base/src/Data/Macaw/Analysis/FunctionArgs.hs @@ -531,20 +531,20 @@ summarizeBlock :: forall arch ids . ArchConstraints arch => Memory (ArchAddrWidth arch) -> DiscoveryFunInfo arch ids - -> ArchSegmentOff arch -- ^ Address of the code. - -> StatementList arch ids -- ^ Current block + -> ParsedBlock arch ids -- ^ Current block -> FunctionArgsM arch ids () -summarizeBlock mem interpState addr stmts = do - let lbl = GeneratedBlock addr (stmtsIdent stmts) +summarizeBlock mem interpState b = do + let addr = pblockAddr b + let lbl = GeneratedBlock addr 0 -- Add this label to block demand map with empty set. addBlockDemands lbl mempty ctx <- gets $ demandInfoCtx . archDemandInfo -- Add all values demanded by non-terminal statements in list. mapM_ (mapM_ (\(Some v) -> demandValue lbl v) . stmtDemandedValues ctx) - (stmtsNonterm stmts) + (pblockNonterm b) -- Add values demanded by terminal statements - case stmtsTerm stmts of + case pblockTerm b of ParsedCall finalRegs m_ret_addr -> do -- Record the demands based on the call, and add edges between -- this note and next nodes. @@ -592,14 +592,6 @@ summarizeBlock mem interpState addr stmts = do demands <- withAssignmentCache $ traverse regDemandSet retRegs addBlockDemands lbl $ Map.fromList $ zip demandTypes demands - - - ParsedIte c tblock fblock -> do - -- Demand condition then summarize recursive blocks. - demandValue lbl c - summarizeBlock mem interpState addr tblock - summarizeBlock mem interpState addr fblock - ParsedArchTermStmt tstmt finalRegs next_addr -> do -- Compute effects of terminal statement. ainfo <- gets $ archDemandInfo @@ -632,9 +624,9 @@ summarizeIter mem ist = do case fnFrontier of [] -> return () - reg : frontier' -> do + b : frontier' -> do blockFrontier .= frontier' - summarizeBlock mem ist (pblockAddr reg) (blockStatementList reg) + summarizeBlock mem ist b summarizeIter mem ist diff --git a/base/src/Data/Macaw/Discovery.hs b/base/src/Data/Macaw/Discovery.hs index 0cc6f776..ffcec031 100644 --- a/base/src/Data/Macaw/Discovery.hs +++ b/base/src/Data/Macaw/Discovery.hs @@ -53,8 +53,9 @@ module Data.Macaw.Discovery , State.pblockAddr , State.blockSize , State.blockReason - , State.blockStatementList - , State.StatementList(..) + , State.pblockNonterm + , State.blockFinalAbsState + , State.pblockTerm , State.ParsedTermStmt(..) -- * Simplification , eliminateDeadStmts @@ -199,10 +200,10 @@ eliminateDeadStmts ainfo bs0 = elimDeadStmtsInBlock demandSet <$> bs0 -- | Add any assignments needed to evaluate statements with side -- effects and terminal statement to demand set. -addStatementListDemands :: StatementList arch ids -> DemandComp arch ids () -addStatementListDemands sl = do - mapM_ addStmtDemands (stmtsNonterm sl) - case stmtsTerm sl of +addParsedBlockDemands :: ParsedBlock arch ids -> DemandComp arch ids () +addParsedBlockDemands b = do + mapM_ addStmtDemands (pblockNonterm b) + case pblockTerm b of ParsedCall regs _ -> do traverseF_ addValueDemands regs PLTStub regs _ _ -> @@ -215,10 +216,6 @@ addStatementListDemands sl = do traverseF_ addValueDemands regs ParsedReturn regs -> do traverseF_ addValueDemands regs - ParsedIte b x y -> do - addValueDemands b - addStatementListDemands x - addStatementListDemands y ParsedArchTermStmt _ regs _ -> do traverseF_ addValueDemands regs ParsedTranslateError _ -> do @@ -226,43 +223,18 @@ addStatementListDemands sl = do ClassifyFailure regs -> do traverseF_ addValueDemands regs --- | Apply the given predicate to all statements in the list and only --- include statements that pass. --- --- Note. This may break the program if one is not careful. -filterStmtList :: (Stmt arch ids -> Bool) - -> StatementList arch ids - -> StatementList arch ids -filterStmtList stmtPred s = do - let term' = - case stmtsTerm s of - ParsedIte b x y -> - let x' = filterStmtList stmtPred x - y' = filterStmtList stmtPred y - in ParsedIte b x' y' - term -> term - in s { stmtsNonterm = filter stmtPred (stmtsNonterm s) - , stmtsTerm = term' - } - --- | Force each nonterminal statement in the list. -seqStmtList :: StatementList arch ids -> () -seqStmtList stmts = foldr seq () (stmtsNonterm stmts) - -- | Eliminate all dead statements in blocks dropUnusedCodeInParsedBlock :: ArchitectureInfo arch - -> ParsedBlock arch ids - -> ParsedBlock arch ids + -> ParsedBlock arch ids + -> ParsedBlock arch ids dropUnusedCodeInParsedBlock ainfo b = -- Important to force the result list here, since otherwise we -- hold onto the entire input list - seqStmtList stmtList' `seq` - b { blockStatementList = stmtList' } - where stmtList' = filterStmtList stmtPred l - l = blockStatementList b + foldr seq () stmts' `seq` b { pblockNonterm = stmts' } + where stmts' = filter stmtPred (pblockNonterm b) demandSet = runDemandComp (archDemandContext ainfo) $ do - addStatementListDemands l + addParsedBlockDemands b stmtPred = stmtNeeded demandSet ------------------------------------------------------------------------ @@ -926,11 +898,19 @@ containsAssignId droppedAssign = hasId v = Any (Set.member (Some droppedAssign) (refsInValue v)) in getAny . foldMapF hasId +-- | Stores the main block features that may changes from parsing a block. +data ParsedContents arch ids = + ParsedContents { parsedNonterm :: !([Stmt arch ids]) + -- ^ The non-terminal statements in the block + , parsedAbsState :: !(AbsProcessorState (ArchReg arch) ids) + -- ^ The abstract state of the block just before terminal + , parsedTerm :: !(ParsedTermStmt arch ids) + -- ^ The terminal statement in the block. + } + -- | This parses a block that ended with a fetch and execute instruction. parseFetchAndExecute :: forall arch ids . ParseContext arch ids - -> State.StatementLabel - -- ^ Index label of this block -> RegState (ArchReg arch) (Value arch ids) -- ^ Initial register values -> Seq (Stmt arch ids) @@ -938,13 +918,10 @@ parseFetchAndExecute :: forall arch ids -- ^ Abstract state of registers prior to blocks being executed. -> RegState (ArchReg arch) (Value arch ids) -- ^ Final register values - -> State (ParseState arch ids) (StatementList arch ids, StatementLabel) - -- ^ Returns the StatementList constructed from - -- the FetchAndExecute parsing, along with the - -- next StatementLabel to assign (StatementLists - -- can be a recursive tree, e.g. with a - -- 'ParsedIte' in 'ParsedTermStatement'). -parseFetchAndExecute ctx idx initRegs stmts absProcState finalRegs = do + -> State (ParseState arch ids) (ParsedContents arch ids) + -- ^ Returns the parsed statements, terminal and + -- abstract state. +parseFetchAndExecute ctx initRegs stmts absProcState finalRegs = do let mem = pctxMemory ctx let ainfo = pctxArchInfo ctx let absProcState' = absEvalStmts ainfo absProcState stmts @@ -962,43 +939,27 @@ parseFetchAndExecute ctx idx initRegs stmts absProcState finalRegs = do mapM_ (recordWriteStmt ainfo mem absProcState') stmts - -- Registers with true value set. - let trueRegs = finalRegs & boundValue ip_reg .~ t - -- Abstract state after true regs. - let trueEvalState = - let refinedTrueState = refineProcStateBounds c True $ + let trueAbsState = + -- Registers with true value set. + let trueRegs = finalRegs & boundValue ip_reg .~ t + refinedTrueState = refineProcStateBounds c True $ refineProcState c absTrue absProcState' - in absEvalStmts ainfo refinedTrueState stmts - let trueAbsState = finalAbsBlockState trueEvalState trueRegs + trueEvalState = absEvalStmts ainfo refinedTrueState stmts + in finalAbsBlockState trueEvalState trueRegs - -- Merge block state and add intra jump target. - - let tStmts = StatementList { stmtsIdent = idx+1 - , stmtsNonterm = [] - , stmtsTerm = ParsedJump trueRegs trueTgtAddr - , stmtsAbsState = trueEvalState - } - - let falseRegs = finalRegs & boundValue ip_reg .~ f - let falseEvalState = - let refinedFalseState = refineProcStateBounds c False $ + let falseAbsState = + let falseRegs = finalRegs & boundValue ip_reg .~ f + refinedFalseState = refineProcStateBounds c False $ refineProcState c absFalse absProcState' - in absEvalStmts ainfo refinedFalseState stmts - let falseAbsState = finalAbsBlockState falseEvalState falseRegs - intraJumpTargets %= ([(trueTgtAddr, trueAbsState), (falseTgtAddr, falseAbsState)]++) - let fStmts = StatementList { stmtsIdent = idx + 2 - , stmtsNonterm = [] - , stmtsTerm = ParsedJump falseRegs falseTgtAddr - , stmtsAbsState = falseEvalState - } + falseEvalState = absEvalStmts ainfo refinedFalseState stmts + in finalAbsBlockState falseEvalState falseRegs - let ret = StatementList { stmtsIdent = idx - , stmtsNonterm = toList stmts - , stmtsTerm = ParsedIte c tStmts fStmts - , stmtsAbsState = absProcState' - } - pure (ret, idx + 3) + intraJumpTargets %= ([(trueTgtAddr, trueAbsState), (falseTgtAddr, falseAbsState)]++) + pure $! ParsedContents { parsedNonterm = toList stmts + , parsedAbsState = absProcState' + , parsedTerm = ParsedBranch finalRegs c trueTgtAddr falseTgtAddr + } -- Use architecture-specific callback to check if last statement was a call. -- Note that in some cases the call is known not to return, and thus @@ -1012,13 +973,10 @@ parseFetchAndExecute ctx idx initRegs stmts absProcState finalRegs = do addNewFunctionAddrs $ identifyCallTargets mem abst finalRegs -- Use the call-specific code to look for new IPs. - - let r = StatementList { stmtsIdent = idx - , stmtsNonterm = toList stmts - , stmtsTerm = ParsedCall finalRegs (Just ret) - , stmtsAbsState = absProcState' - } - pure (r, idx+1) + pure $! ParsedContents { parsedNonterm = toList stmts + , parsedAbsState = absProcState' + , parsedTerm = ParsedCall finalRegs (Just ret) + } -- This block ends with a return as identified by the -- architecture-specific processing. Basic return @@ -1034,12 +992,10 @@ parseFetchAndExecute ctx idx initRegs stmts absProcState finalRegs = do | Just prev_stmts <- identifyReturn ainfo stmts finalRegs absProcState' -> do mapM_ (recordWriteStmt ainfo mem absProcState') prev_stmts - let ret = StatementList { stmtsIdent = idx - , stmtsNonterm = toList prev_stmts - , stmtsTerm = ParsedReturn finalRegs - , stmtsAbsState = absProcState' - } - pure (ret, idx+1) + pure $! ParsedContents { parsedNonterm = toList prev_stmts + , parsedAbsState = absProcState' + , parsedTerm = ParsedReturn finalRegs + } -- Jump to a block within this function. | Just tgt_mseg <- valueAsSegmentOff mem (finalRegs^.boundValue ip_reg) @@ -1051,12 +1007,10 @@ parseFetchAndExecute ctx idx initRegs stmts absProcState finalRegs = do let abst = finalAbsBlockState absProcState' finalRegs let abst' = abst & setAbsIP tgt_mseg intraJumpTargets %= ((tgt_mseg, abst'):) - let ret = StatementList { stmtsIdent = idx - , stmtsNonterm = toList stmts - , stmtsTerm = ParsedJump finalRegs tgt_mseg - , stmtsAbsState = absProcState' - } - pure (ret, idx+1) + pure $! ParsedContents { parsedNonterm = toList stmts + , parsedAbsState = absProcState' + , parsedTerm = ParsedJump finalRegs tgt_mseg + } -- Block ends with what looks like a jump table. | Just (_jt, entries, jumpIndex) <- matchJumpTableRef mem absProcState' (finalRegs^.curIP) -> do @@ -1072,16 +1026,13 @@ parseFetchAndExecute ctx idx initRegs stmts absProcState finalRegs = do intraJumpTargets %= ((tgtAddr, abst'):) let term = ParsedLookupTable finalRegs jumpIndex entries - let ret = StatementList { stmtsIdent = idx - , stmtsNonterm = toList stmts - , stmtsTerm = term - , stmtsAbsState = absProcState' - } - pure (ret,idx+1) + pure $! ParsedContents { parsedNonterm = toList stmts + , parsedAbsState = absProcState' + , parsedTerm = term + } -- Code for PLT entry - _ | 0 <- idx - , AssignedValue (Assignment valId v) <- finalRegs^.boundValue ip_reg + _ | AssignedValue (Assignment valId v) <- finalRegs^.boundValue ip_reg , ReadMem gotVal _repr <- v , Just gotAddr <- valueAsMemAddr gotVal , Just gotSegOff <- asSegmentOff mem gotAddr @@ -1094,12 +1045,10 @@ parseFetchAndExecute ctx idx initRegs stmts absProcState finalRegs = do , not (containsAssignId valId strippedRegs) -> do mapM_ (recordWriteStmt ainfo mem absProcState') strippedStmts - let ret = StatementList { stmtsIdent = idx - , stmtsNonterm = toList strippedStmts - , stmtsTerm = PLTStub strippedRegs gotSegOff r - , stmtsAbsState = absEvalStmts ainfo absProcState strippedStmts - } - pure (ret, idx+1) + pure $! ParsedContents { parsedNonterm = toList strippedStmts + , parsedAbsState = absEvalStmts ainfo absProcState strippedStmts + , parsedTerm = PLTStub strippedRegs gotSegOff r + } -- Check for tail call when the calling convention seems to be satisfied. | spVal <- finalRegs^.boundValue sp_reg @@ -1113,17 +1062,14 @@ parseFetchAndExecute ctx idx initRegs stmts absProcState finalRegs = do -- Block that ends with some unknown | otherwise -> do mapM_ (recordWriteStmt ainfo mem absProcState') stmts - let ret = StatementList { stmtsIdent = idx - , stmtsNonterm = toList stmts - , stmtsTerm = ClassifyFailure finalRegs - , stmtsAbsState = absProcState' - } - pure (ret,idx+1) + pure $! ParsedContents { parsedNonterm = toList stmts + , parsedAbsState = absProcState' + , parsedTerm = ClassifyFailure finalRegs + } where finishWithTailCall :: RegisterInfo (ArchReg arch) => AbsProcessorState (ArchReg arch) ids - -> State (ParseState arch ids) ( StatementList arch ids - , State.StatementLabel) + -> State (ParseState arch ids) (ParsedContents arch ids) finishWithTailCall absProcState' = do let mem = pctxMemory ctx mapM_ (recordWriteStmt (pctxArchInfo ctx) mem absProcState') stmts @@ -1135,50 +1081,40 @@ parseFetchAndExecute ctx idx initRegs stmts absProcState finalRegs = do -- Look for new instruction pointers addNewFunctionAddrs $ identifyConcreteAddresses mem (abst^.absRegState^.curIP) - let ret = StatementList { stmtsIdent = idx - , stmtsNonterm = toList stmts - , stmtsTerm = ParsedCall finalRegs Nothing - , stmtsAbsState = absProcState' - } - seq ret $ pure (ret,idx+1) + pure $! ParsedContents { parsedNonterm = toList stmts + , parsedAbsState = absProcState' + , parsedTerm = ParsedCall finalRegs Nothing + } -- | this evalutes the statements in a block to expand the information known -- about control flow targets of this block. parseBlock :: ParseContext arch ids -- ^ Context for parsing blocks. - -> State.StatementLabel - -- ^ Index for next statements -> RegState (ArchReg arch) (Value arch ids) -- ^ Initial register values -> Block arch ids -- ^ Block to parse -> AbsProcessorState (ArchReg arch) ids -- ^ Abstract state at start of block - -> State (ParseState arch ids) ( StatementList arch ids - , State.StatementLabel) - -- ^ Returns the StatementList constructed from the - -- parsing, along with the next StatementLabel to assign - -- (StatementLists can be a recursive tree, e.g. with a - -- 'ParsedIte' in 'ParsedTermStatement'). -parseBlock ctx idx initRegs b absProcState = do - let mem = pctxMemory ctx + -> State (ParseState arch ids) (ParsedContents arch ids) + -- ^ Returns the parsed statements, terminal and pre-terminal + -- abstract state. +parseBlock ctx initRegs b absProcState = do + let mem = pctxMemory ctx let ainfo = pctxArchInfo ctx withArchConstraints ainfo $ do case blockTerm b of FetchAndExecute finalRegs -> do - parseFetchAndExecute ctx idx initRegs (Seq.fromList (blockStmts b)) absProcState finalRegs + parseFetchAndExecute ctx initRegs (Seq.fromList (blockStmts b)) absProcState finalRegs -- Do nothing when this block ends in a translation error. TranslateError _ msg -> do -- FIXME: we should propagate c back to the initial block, not just b let absProcState' = absEvalStmts ainfo absProcState (blockStmts b) - - let ret = StatementList { stmtsIdent = idx - , stmtsNonterm = blockStmts b - , stmtsTerm = ParsedTranslateError msg - , stmtsAbsState = absProcState' - } - pure (ret, idx+1) + pure $! ParsedContents { parsedNonterm = blockStmts b + , parsedAbsState = absProcState' + , parsedTerm = ParsedTranslateError msg + } ArchTermStmt ts s -> do -- FIXME: we should propagate c back to the initial block, not just b let absProcState' = absEvalStmts ainfo absProcState (blockStmts b) @@ -1190,12 +1126,10 @@ parseBlock ctx idx initRegs b absProcState = do Just (addr,post) -> intraJumpTargets %= ((addr, post):) Nothing -> pure () - let ret = StatementList { stmtsIdent = idx - , stmtsNonterm = blockStmts b - , stmtsTerm = ParsedArchTermStmt ts s (fst <$> r) - , stmtsAbsState = absProcState' - } - pure (ret, idx+1) + pure $! ParsedContents { parsedNonterm = blockStmts b + , parsedTerm = ParsedArchTermStmt ts s (fst <$> r) + , parsedAbsState = absProcState' + } -- | This evaluates the statements in a block to expand the information known -- about control flow targets of this block. @@ -1225,12 +1159,14 @@ addBlock src finfo initRegs sz b = do , _intraJumpTargets = [] , _newFunctionAddrs = [] } - let ((pblock,_), ps) = runState (parseBlock ctx 0 initRegs b regs) ps0 + let (pc, ps) = runState (parseBlock ctx initRegs b regs) ps0 let pb = ParsedBlock { pblockAddr = src , blockSize = sz , blockReason = foundReason finfo , blockAbstractState = foundAbstractState finfo - , blockStatementList = pblock + , pblockNonterm = parsedNonterm pc + , blockFinalAbsState = parsedAbsState pc + , pblockTerm = parsedTerm pc } let pb' = dropUnusedCodeInParsedBlock (archInfo s) pb id %= addFunBlock src pb' @@ -1244,17 +1180,13 @@ recordErrorBlock addr finfo maybeError = do s <- use curFunCtx let mem = memory s let errMsg = maybe "Unknown error" Text.pack maybeError - let stmts = StatementList - { stmtsIdent = 0 - , stmtsNonterm = [] - , stmtsTerm = ParsedTranslateError errMsg - , stmtsAbsState = initAbsProcessorState mem (foundAbstractState finfo) - } let pb = ParsedBlock { pblockAddr = addr , blockSize = 0 , blockReason = foundReason finfo , blockAbstractState = foundAbstractState finfo - , blockStatementList = stmts + , pblockNonterm = [] + , blockFinalAbsState = initAbsProcessorState mem (foundAbstractState finfo) + , pblockTerm = ParsedTranslateError errMsg } id %= addFunBlock addr pb diff --git a/base/src/Data/Macaw/Discovery/State.hs b/base/src/Data/Macaw/Discovery/State.hs index 97f171e3..a0eb43f7 100644 --- a/base/src/Data/Macaw/Discovery/State.hs +++ b/base/src/Data/Macaw/Discovery/State.hs @@ -21,7 +21,6 @@ representing this information. module Data.Macaw.Discovery.State ( GlobalDataInfo(..) , ParsedTermStmt(..) - , StatementList(..), StatementLabel , ParsedBlock(..) -- * The interpreter state , DiscoveryState @@ -58,7 +57,6 @@ import Data.Set (Set) import Data.Text (Text) import qualified Data.Text as Text import qualified Data.Vector as V -import Data.Word import Numeric (showHex) import Text.PrettyPrint.ANSI.Leijen as PP hiding ((<$>)) @@ -182,8 +180,6 @@ data ParsedTermStmt arch ids !(V.Vector (ArchSegmentOff arch)) -- | A return with the given registers. | ParsedReturn !(RegState (ArchReg arch) (Value arch ids)) - -- | An if-then-else - | ParsedIte !(Value arch ids BoolType) !(StatementList arch ids) !(StatementList arch ids) -- | An architecture-specific statement with the registers prior to execution, and -- the given next control flow address. | ParsedArchTermStmt !(ArchTermStmt arch ids) @@ -194,20 +190,10 @@ data ParsedTermStmt arch ids -- | The classifier failed to identity the block. | ClassifyFailure !(RegState (ArchReg arch) (Value arch ids)) --- | Pretty print the block contents indented inside brackets. -ppStatementList :: ArchConstraints arch => (ArchAddrWord arch -> Doc) -> StatementList arch ids -> Doc -ppStatementList ppOff b = - text "{" <$$> - indent 2 (vcat (ppStmt ppOff <$> stmtsNonterm b) <$$> - ppTermStmt ppOff (stmtsTerm b)) <$$> - text "}" - ppTermStmt :: ArchConstraints arch - => (ArchAddrWord arch -> Doc) - -- ^ Given an address offset, this prints the value - -> ParsedTermStmt arch ids + => ParsedTermStmt arch ids -> Doc -ppTermStmt ppOff tstmt = +ppTermStmt tstmt = case tstmt of ParsedCall s Nothing -> text "tail_call" <$$> @@ -232,10 +218,6 @@ ppTermStmt ppOff tstmt = ParsedReturn s -> text "return" <$$> indent 2 (pretty s) - ParsedIte c t f -> - text "ite" <+> pretty c <$$> - ppStatementList ppOff t <$$> - ppStatementList ppOff f ParsedArchTermStmt ts s maddr -> let addrDoc = case maddr of Just a -> text ", return to" <+> text (show a) @@ -249,32 +231,7 @@ ppTermStmt ppOff tstmt = indent 2 (pretty s) instance ArchConstraints arch => Show (ParsedTermStmt arch ids) where - show = show . ppTermStmt (text . show) - ------------------------------------------------------------------------- --- StatementList - --- | The type of label for each StatementList -type StatementLabel = Word64 - --- | This is a code block after we have classified the control flow --- statement(s) that the block ends with. -data StatementList arch ids - = StatementList { stmtsIdent :: !StatementLabel - -- ^ An index for uniquely identifying the block. - -- - -- This is primarily used so that we can reference - -- which branch lead to a particular next state. - , stmtsNonterm :: !([Stmt arch ids]) - -- ^ The non-terminal statements in the block - , stmtsTerm :: !(ParsedTermStmt arch ids) - -- ^ The terminal statement in the block. - , stmtsAbsState :: !(AbsProcessorState (ArchReg arch) ids) - -- ^ The abstract state of the block just before terminal - } - -deriving instance ArchConstraints arch - => Show (StatementList arch ids) + show = show . ppTermStmt ------------------------------------------------------------------------ -- ParsedBlock @@ -291,8 +248,12 @@ data ParsedBlock arch ids , blockAbstractState :: !(AbsBlockState (ArchReg arch)) -- ^ Abstract state prior to the execution of -- this region. - , blockStatementList :: !(StatementList arch ids) - -- ^ Returns the entry block for the region + , pblockNonterm :: !([Stmt arch ids]) + -- ^ The non-terminal statements in the block + , blockFinalAbsState :: !(AbsProcessorState (ArchReg arch) ids) + -- ^ The abstract state of the block just before terminal + , pblockTerm :: !(ParsedTermStmt arch ids) + -- ^ The terminal statement in the block. } deriving instance ArchConstraints arch @@ -301,10 +262,9 @@ deriving instance ArchConstraints arch instance ArchConstraints arch => Pretty (ParsedBlock arch ids) where pretty b = - let sl = blockStatementList b - ppOff o = text (show (incAddr (toInteger o) (segoffAddr (pblockAddr b)))) + let ppOff o = text (show (incAddr (toInteger o) (segoffAddr (pblockAddr b)))) in text (show (pblockAddr b)) PP.<> text ":" <$$> - indent 2 (vcat (ppStmt ppOff <$> stmtsNonterm sl) <$$> ppTermStmt ppOff (stmtsTerm sl)) + indent 2 (vcat (ppStmt ppOff <$> pblockNonterm b) <$$> ppTermStmt (pblockTerm b)) ------------------------------------------------------------------------ -- DiscoveryFunInfo diff --git a/symbolic/src/Data/Macaw/Symbolic.hs b/symbolic/src/Data/Macaw/Symbolic.hs index 1ec3b994..998e9487 100644 --- a/symbolic/src/Data/Macaw/Symbolic.hs +++ b/symbolic/src/Data/Macaw/Symbolic.hs @@ -336,60 +336,45 @@ mkBlocksCFG archFns halloc memBaseVarMap nm addr posFn macawBlock = mkBlockLabelMap :: [M.ParsedBlock arch ids] -> MacawMonad arch ids h s (BlockLabelMap arch s) mkBlockLabelMap blks = foldM insBlock Map.empty blks where insBlock :: BlockLabelMap arch s -> M.ParsedBlock arch ids -> MacawMonad arch ids h s (BlockLabelMap arch s) - insBlock m b = insSentences (M.pblockAddr b) m [M.blockStatementList b] - - insSentences :: M.ArchSegmentOff arch - -> (BlockLabelMap arch s) - -> [M.StatementList arch ids] - -> MacawMonad arch ids h s (BlockLabelMap arch s) - insSentences _ m [] = return m - insSentences base m (s:r) = do + insBlock m b = do + let base = M.pblockAddr b n <- mmFreshNonce - insSentences base - (Map.insert (base,M.stmtsIdent s) (CR.Label n) m) - (nextStatements (M.stmtsTerm s) ++ r) + pure $! Map.insert base (CR.Label n) m -- | Normalise any term statements to returns --- i.e., remove branching, jumping, etc. -- -- This is used when translating a single Macaw block into Crucible, as Crucible -- functions must end in a return. -termStmtToReturn :: forall arch ids. M.StatementList arch ids -> M.StatementList arch ids -termStmtToReturn sl = sl { M.stmtsTerm = tm } - where - tm :: M.ParsedTermStmt arch ids - tm = case M.stmtsTerm sl of - tm0@M.ParsedReturn{} -> tm0 - M.ParsedCall r _ -> M.ParsedReturn r - M.ParsedJump r _ -> M.ParsedReturn r - M.ParsedBranch r _ _ _ -> M.ParsedReturn r - M.ParsedLookupTable r _ _ -> M.ParsedReturn r - M.ParsedIte b l r -> M.ParsedIte b (termStmtToReturn l) (termStmtToReturn r) - M.ParsedArchTermStmt _ r _ -> M.ParsedReturn r - M.ClassifyFailure r -> M.ParsedReturn r - tm0@M.PLTStub{} -> tm0 - tm0@M.ParsedTranslateError{} -> tm0 +termStmtToReturn :: forall arch ids. M.ParsedTermStmt arch ids -> M.ParsedTermStmt arch ids +termStmtToReturn tm0 = + case tm0 of + M.ParsedReturn{} -> tm0 + M.ParsedCall r _ -> M.ParsedReturn r + M.ParsedJump r _ -> M.ParsedReturn r + M.ParsedBranch r _ _ _ -> M.ParsedReturn r + M.ParsedLookupTable r _ _ -> M.ParsedReturn r + M.ParsedArchTermStmt _ r _ -> M.ParsedReturn r + M.ClassifyFailure r -> M.ParsedReturn r + M.PLTStub{} -> tm0 + M.ParsedTranslateError{} -> tm0 -- | Normalise any term statements to jumps. termStmtToJump :: forall arch ids - . M.StatementList arch ids + . M.ParsedTermStmt arch ids -> M.ArchSegmentOff arch - -> M.StatementList arch ids -termStmtToJump sl addr = sl { M.stmtsTerm = tm } - where - tm :: M.ParsedTermStmt arch ids - tm = case M.stmtsTerm sl of - M.ParsedJump r _ -> M.ParsedJump r addr - M.ParsedBranch r _ _ _ -> M.ParsedJump r addr - M.ParsedCall r _ -> M.ParsedJump r addr - M.ParsedReturn r -> M.ParsedJump r addr - M.ParsedLookupTable r _ _ -> M.ParsedJump r addr - M.ParsedIte b l r -> - M.ParsedIte b (termStmtToJump l addr) (termStmtToJump r addr) - M.ParsedArchTermStmt _ r _ -> M.ParsedJump r addr - M.ClassifyFailure r -> M.ParsedJump r addr - tm0@M.PLTStub{} -> tm0 - tm0@M.ParsedTranslateError{} -> tm0 + -> M.ParsedTermStmt arch ids +termStmtToJump tm0 addr = + case tm0 of + M.ParsedJump r _ -> M.ParsedJump r addr + M.ParsedBranch r _ _ _ -> M.ParsedJump r addr + M.ParsedCall r _ -> M.ParsedJump r addr + M.ParsedReturn r -> M.ParsedJump r addr + M.ParsedLookupTable r _ _ -> M.ParsedJump r addr + M.ParsedArchTermStmt _ r _ -> M.ParsedJump r addr + M.ClassifyFailure r -> M.ParsedJump r addr + M.PLTStub{} -> tm0 + M.ParsedTranslateError{} -> tm0 -- | Create a registerized Crucible CFG from a single Macaw 'M.ParsedBlock'. -- Note that the term statement of the block is updated to make it a return (and @@ -415,7 +400,7 @@ mkParsedBlockRegCFG :: forall h arch ids -> ST h (CR.SomeCFG (MacawExt arch) (EmptyCtx ::> ArchRegStruct arch) (ArchRegStruct arch)) mkParsedBlockRegCFG archFns halloc memBaseVarMap posFn b = crucGenArchConstraints archFns $ do mkCrucRegCFG archFns halloc "" $ do - let strippedBlock = b { M.blockStatementList = termStmtToReturn (M.blockStatementList b) } + let strippedBlock = b { M.pblockTerm = termStmtToReturn (M.pblockTerm b) } let entryAddr = M.pblockAddr strippedBlock @@ -448,7 +433,7 @@ mkParsedBlockRegCFG archFns halloc memBaseVarMap posFn b = crucGenArchConstraint -- Initialize value in regReg with initial registers setMachineRegs inputAtom -- Jump to function entry point - addTermStmt $ CR.Jump (parsedBlockLabel blockLabelMap entryAddr 0) + addTermStmt $ CR.Jump (parsedBlockLabel blockLabelMap entryAddr) -- Generate code for Macaw block after entry crucibleBlock <- addParsedBlock archFns memBaseVarMap blockLabelMap posFn regReg strippedBlock @@ -500,10 +485,10 @@ mkBlockPathRegCFG arch_fns halloc mem_base_var_map pos_fn blocks = let entry_addr = M.pblockAddr $ head blocks let first_blocks = zipWith (\block next_block -> - block { M.blockStatementList = termStmtToJump (M.blockStatementList block) (M.pblockAddr next_block) }) + block { M.pblockTerm = termStmtToJump (M.pblockTerm block) (M.pblockAddr next_block) }) (take (length blocks - 1) blocks) (tail blocks) - let last_block = (last blocks) { M.blockStatementList = termStmtToReturn (M.blockStatementList (last blocks)) } + let last_block = (last blocks) { M.pblockTerm = termStmtToReturn (M.pblockTerm (last blocks)) } let block_path = first_blocks ++ [last_block] -- Get type for representing Machine registers @@ -544,13 +529,12 @@ mkBlockPathRegCFG arch_fns halloc mem_base_var_map pos_fn blocks = -- Initialize value in arch_reg_struct_reg with initial registers setMachineRegs input_atom -- Jump to function entry point - addTermStmt $ CR.Jump (parsedBlockLabel block_label_map entry_addr 0) + addTermStmt $ CR.Jump (parsedBlockLabel block_label_map entry_addr) -- Generate code for Macaw blocks crucible_blocks <- forM block_path $ \block -> do let block_addr = M.pblockAddr block - let stmts = M.blockStatementList block - let label = block_label_map Map.! (block_addr, M.stmtsIdent stmts) + let label = block_label_map Map.! block_addr (first_crucible_block, first_extra_crucible_blocks, off) <- runCrucGen' block_addr label $ do arch_width <- archAddrWidth @@ -562,19 +546,9 @@ mkBlockPathRegCFG arch_fns halloc mem_base_var_map pos_fn blocks = "the current block follows the previous block in the path" addStmt $ CR.Assume cond msg - mapM_ (addMacawStmt block_addr) (M.stmtsNonterm stmts) - addMacawParsedTermStmt block_label_map block_addr (M.stmtsTerm stmts) - - let next_stmts = map ((,) off) $ nextStatements $ M.stmtsTerm stmts - addStatementList - arch_fns - mem_base_var_map - block_label_map - block_addr - (off_pos_fn block_addr) - arch_reg_struct_reg - next_stmts - (first_crucible_block:first_extra_crucible_blocks) + mapM_ (addMacawStmt block_addr) (M.pblockNonterm block) + addMacawParsedTermStmt block_label_map block_addr (M.pblockTerm block) + pure (reverse (first_crucible_block:first_extra_crucible_blocks)) pure (entry_label, init_crucible_block : init_extra_crucible_blocks ++ concat crucible_blocks) @@ -650,7 +624,7 @@ mkFunRegCFG archFns halloc memBaseVarMap nm posFn fn = crucGenArchConstraints ar -- Initialize value in regReg with initial registers setMachineRegs inputAtom -- Jump to function entry point - addTermStmt $ CR.Jump (parsedBlockLabel blockLabelMap entryAddr 0) + addTermStmt $ CR.Jump (parsedBlockLabel blockLabelMap entryAddr) -- Generate code for Macaw blocks after entry restCrucibleBlocks <- diff --git a/symbolic/src/Data/Macaw/Symbolic/CrucGen.hs b/symbolic/src/Data/Macaw/Symbolic/CrucGen.hs index 9c089899..cba47ed8 100644 --- a/symbolic/src/Data/Macaw/Symbolic/CrucGen.hs +++ b/symbolic/src/Data/Macaw/Symbolic/CrucGen.hs @@ -52,7 +52,6 @@ module Data.Macaw.Symbolic.CrucGen , mmExecST , BlockLabelMap , addParsedBlock - , nextStatements , valueToCrucible , evalArchStmt , MemSegmentMap @@ -65,7 +64,6 @@ module Data.Macaw.Symbolic.CrucGen , ArchAddrWidthRepr , addrWidthIsPos , getRegs - , addStatementList , addMacawStmt , addMacawParsedTermStmt , addStmt @@ -1368,14 +1366,13 @@ addMacawBlock archFns baseAddrMap addr lbl posFn b = do addMacawTermStmt (M.blockTerm b) parsedBlockLabel :: (Ord addr, Show addr) - => Map (addr, Word64) (CR.Label s) + => Map addr (CR.Label s) -- ^ Map from block addresses to starting label -> addr - -> Word64 -> CR.Label s -parsedBlockLabel blockLabelMap addr idx = +parsedBlockLabel blockLabelMap addr = fromMaybe (error $ "Could not find entry point: " ++ show addr) $ - Map.lookup (addr, idx) blockLabelMap + Map.lookup addr blockLabelMap -- | DO NOT CALL THIS FROM USER CODE. We count on the registers not -- changing until the end of the block. @@ -1385,7 +1382,7 @@ setMachineRegs newRegs = do addStmt $ CR.SetReg regReg newRegs -- | Map from block information to Crucible label (used to generate term statements) -type BlockLabelMap arch s = Map (M.ArchSegmentOff arch, Word64) (CR.Label s) +type BlockLabelMap arch s = Map (M.ArchSegmentOff arch) (CR.Label s) addMacawParsedTermStmt :: BlockLabelMap arch s -- ^ Block label map for this function @@ -1405,17 +1402,17 @@ addMacawParsedTermStmt blockLabelMap thisAddr tstmt = do case mret of Just nextAddr -> do setMachineRegs newRegs - addTermStmt $ CR.Jump (parsedBlockLabel blockLabelMap nextAddr 0) + addTermStmt $ CR.Jump (parsedBlockLabel blockLabelMap nextAddr) Nothing -> addTermStmt $ CR.Return newRegs M.ParsedJump regs nextAddr -> do setMachineRegs =<< createRegStruct regs - addTermStmt $ CR.Jump (parsedBlockLabel blockLabelMap nextAddr 0) + addTermStmt $ CR.Jump (parsedBlockLabel blockLabelMap nextAddr) M.ParsedBranch regs c trueAddr falseAddr -> do setMachineRegs =<< createRegStruct regs crucCond <- valueToCrucible c - let tlbl = parsedBlockLabel blockLabelMap trueAddr 0 - let flbl = parsedBlockLabel blockLabelMap falseAddr 0 + let tlbl = parsedBlockLabel blockLabelMap trueAddr + let flbl = parsedBlockLabel blockLabelMap falseAddr addTermStmt $! CR.Br crucCond tlbl flbl M.ParsedLookupTable regs idx possibleAddrs -> do setMachineRegs =<< createRegStruct regs @@ -1423,15 +1420,10 @@ addMacawParsedTermStmt blockLabelMap thisAddr tstmt = do M.ParsedReturn regs -> do regValues <- createRegStruct regs addTermStmt $ CR.Return regValues - M.ParsedIte c t f -> do - crucCond <- valueToCrucible c - let tlbl = parsedBlockLabel blockLabelMap thisAddr (M.stmtsIdent t) - let flbl = parsedBlockLabel blockLabelMap thisAddr (M.stmtsIdent f) - addTermStmt $! CR.Br crucCond tlbl flbl M.ParsedArchTermStmt aterm regs mnextAddr -> do crucGenArchTermStmt archFns aterm regs case mnextAddr of - Just nextAddr -> addTermStmt $ CR.Jump (parsedBlockLabel blockLabelMap nextAddr 0) + Just nextAddr -> addTermStmt $ CR.Jump (parsedBlockLabel blockLabelMap nextAddr) -- There won't be a next instruction if, for instance, this is -- an X86 HLT instruction. TODO: We may want to do something -- else for an exit syscall, since that's a normal outcome. @@ -1508,7 +1500,7 @@ addSwitch blockLabelMap idx possibleAddrs = do thnIdxAtom <- mkAtom ptrRepr eqAtom <- mkAtom C.BoolRepr - let thnLbl = parsedBlockLabel blockLabelMap thnAddr 0 + let thnLbl = parsedBlockLabel blockLabelMap thnAddr return ( [ CR.DefineAtom thnIdxBitsAtom $ CR.EvalApp $ C.BVLit width (toInteger thnIdx) @@ -1536,44 +1528,6 @@ addSwitch blockLabelMap idx possibleAddrs = do mapM_ addStmt stmts addTermStmt termStmt -nextStatements :: M.ParsedTermStmt arch ids -> [M.StatementList arch ids] -nextStatements tstmt = - case tstmt of - M.ParsedIte _ x y -> [x, y] - _ -> [] - -addStatementList :: MacawSymbolicArchFunctions arch - -> MemSegmentMap (M.ArchAddrWidth arch) - -- ^ Base address map - -> BlockLabelMap arch s - -- ^ Map from block index to Crucible label - -> M.ArchSegmentOff arch - -- ^ Address of block that starts statements - -> (M.ArchAddrWord arch -> C.Position) - -- ^ Function for generating position from offset from start of this block. - -> CR.Reg s (ArchRegStruct arch) - -- ^ Register that stores Macaw registers - -> [(M.ArchAddrWord arch, M.StatementList arch ids)] - -> [CR.Block (MacawExt arch) s (MacawFunctionResult arch)] - -> MacawMonad arch ids h s [CR.Block (MacawExt arch) s (MacawFunctionResult arch)] -addStatementList _ _ _ _ _ _ [] rlist = - pure (reverse rlist) -addStatementList archFns baseAddrMap blockLabelMap startAddr posFn regReg ((off,stmts):rest) r = do - crucGenArchConstraints archFns $ do - let idx = M.stmtsIdent stmts - lbl <- - case Map.lookup (startAddr, idx) blockLabelMap of - Just lbl -> - pure lbl - Nothing -> - throwError $ "Internal: Could not find block with address " ++ show startAddr ++ " index " ++ show idx - (b,bs,off') <- - runCrucGen archFns baseAddrMap posFn off lbl regReg $ do - mapM_ (addMacawStmt startAddr) (M.stmtsNonterm stmts) - addMacawParsedTermStmt blockLabelMap startAddr (M.stmtsTerm stmts) - let new = (off',) <$> nextStatements (M.stmtsTerm stmts) - addStatementList archFns baseAddrMap blockLabelMap startAddr posFn regReg (new ++ rest) (b : bs ++ r) - addParsedBlock :: forall arch ids h s . MacawSymbolicArchFunctions arch -> MemSegmentMap (M.ArchAddrWidth arch) @@ -1586,14 +1540,24 @@ addParsedBlock :: forall arch ids h s -- ^ Register that stores Macaw registers -> M.ParsedBlock arch ids -> MacawMonad arch ids h s [CR.Block (MacawExt arch) s (MacawFunctionResult arch)] -addParsedBlock archFns memBaseVarMap blockLabelMap posFn regReg b = do +addParsedBlock archFns memSegMap blockLabelMap posFn regReg b = do crucGenArchConstraints archFns $ do let base = M.pblockAddr b let thisPosFn :: M.ArchAddrWord arch -> C.Position thisPosFn off = posFn r where Just r = M.incSegmentOff base (toInteger off) - addStatementList archFns memBaseVarMap blockLabelMap - (M.pblockAddr b) thisPosFn regReg [(0, M.blockStatementList b)] [] + let startAddr = M.pblockAddr b + lbl <- + case Map.lookup startAddr blockLabelMap of + Just lbl -> + pure lbl + Nothing -> + throwError $ "Internal: Could not find block with address " ++ show startAddr + (b,bs,_) <- + runCrucGen archFns memSegMap thisPosFn 0 lbl regReg $ do + mapM_ (addMacawStmt startAddr) (M.pblockNonterm b) + addMacawParsedTermStmt blockLabelMap startAddr (M.pblockTerm b) + pure (reverse (b : bs)) traverseArchStateUpdateMap :: (Applicative m) => (forall tp . e tp -> m (f tp))