WIP: upgrade Classifier monad

This commit is contained in:
Daniel Matichuk 2023-06-28 12:39:19 -07:00
parent 89c2bb3fd1
commit 7ba7124794
12 changed files with 374 additions and 90 deletions

View File

@ -70,6 +70,7 @@ library
Data.Macaw.CFG.Rewriter
Data.Macaw.DebugLogging
Data.Macaw.Discovery
Data.Macaw.Discovery.DiscoveryM
Data.Macaw.Discovery.AbsEval
Data.Macaw.Discovery.Classifier
Data.Macaw.Discovery.Classifier.JumpTable

View File

@ -6,6 +6,8 @@ This defines the architecture-specific information needed for code discovery.
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module Data.Macaw.Architecture.Info
( ArchitectureInfo(..)
, postCallAbsState
@ -23,6 +25,10 @@ module Data.Macaw.Architecture.Info
-- * Unclassified blocks
, module Data.Macaw.CFG.Block
, rewriteBlock
, classifyThrow
, classifyCatch
, classifyLiftMaybe
, classifyLiftBool
) where
import Control.Applicative ( Alternative(..), liftA )
@ -43,6 +49,7 @@ import Data.Macaw.CFG.Core
import Data.Macaw.CFG.DemandSet
import Data.Macaw.CFG.Rewriter
import qualified Data.Macaw.Discovery.ParsedContents as Parsed
import qualified Data.Macaw.Discovery.DiscoveryM as DM
import Data.Macaw.Memory
@ -56,30 +63,56 @@ data NoReturnFunStatus
| MayReturnFun
-- ^ Function may retun
type ClassificationError = String
type ClassificationError arch = DM.DiscoveryTrace arch
-- | The result of block classification, which collects information about all of
-- the match failures to help diagnose shortcomings in the analysis
data Classifier o = ClassifyFailed [ClassificationError]
| ClassifySucceeded [ClassificationError] o
data Classifier arch o = ClassifyFailed [DM.DiscoveryTrace arch]
| ClassifySucceeded [DM.DiscoveryTrace arch] o
instance DM.MonadDiscoveryTrace arch (Classifier arch) where
emitDiscoveryTraces tr = ClassifySucceeded tr ()
withDiscoveryTracing d f = case f of
ClassifyFailed l -> ClassifyFailed [DM.discoveryTrace $ DM.NestedTraceData d l]
ClassifySucceeded l a -> ClassifySucceeded [DM.discoveryTrace $ DM.NestedTraceData d l] a
instance DM.MonadDiscoveryTrace arch (BlockClassifierM arch ids) where
emitDiscoveryTraces tr = BlockClassifier $ CMT.lift $ ClassifySucceeded tr ()
withDiscoveryTracing d (BlockClassifier (CMR.ReaderT m)) = BlockClassifier $ CMR.ReaderT $ \i ->
DM.withDiscoveryTracing d (m i)
-- | In the given context, set the name of the current classifier
--
-- This is used to improve the labels for each classifier failure
classifierName :: String -> BlockClassifierM arch ids a -> BlockClassifierM arch ids a
classifierName nm (BlockClassifier (CMR.ReaderT m)) = BlockClassifier $ CMR.ReaderT $ \i ->
case m i of
ClassifyFailed [] -> ClassifyFailed [nm ++ " classification failed."]
ClassifyFailed l -> ClassifyFailed (fmap ((nm ++ ": ") ++) l)
ClassifySucceeded l a -> ClassifySucceeded (fmap ((nm ++ ": ") ++) l) a
classifierName nm f = DM.withDiscoveryTracing (DM.NamedClassifier nm) f
classifyFail :: Classifier a
classifyThrow :: DM.DiscoveryTraceData arch ids s -> Classifier arch a
classifyThrow d = ClassifyFailed [DM.discoveryTrace d]
-- | Convert a given classifier into one that unconditionally succeeds, but returns a
-- 'Maybe' value, where 'Nothing' indicates that the given classifier failed.
classifyCatch :: Classifier arch a -> Classifier arch (Maybe a)
classifyCatch f = case f of
ClassifySucceeded l a -> ClassifySucceeded l (Just a)
ClassifyFailed tr -> ClassifySucceeded [DM.discoveryTrace $ DM.NestedTraceData (DM.ClassifierMonadFail "classifyCatch") tr] Nothing
classifyLiftMaybe :: Maybe a -> Classifier arch a
classifyLiftMaybe (Just a) = return a
classifyLiftMaybe Nothing = fail "classifyLiftMaybe"
classifyLiftBool :: Bool-> Classifier arch ()
classifyLiftBool True = return ()
classifyLiftBool False = fail "classifyLiftBool"
classifyFail :: Classifier arch a
classifyFail = ClassifyFailed []
classifySuccess :: a -> Classifier a
classifySuccess :: a -> Classifier arch a
classifySuccess = \x -> ClassifySucceeded [] x
classifyBind :: Classifier a -> (a -> Classifier b) -> Classifier b
classifyBind :: Classifier arch a -> (a -> Classifier arch b) -> Classifier arch b
classifyBind m f =
case m of
ClassifyFailed e -> ClassifyFailed e
@ -89,7 +122,7 @@ classifyBind m f =
ClassifyFailed e -> ClassifyFailed (l++e)
ClassifySucceeded e b -> ClassifySucceeded (l++e) b
classifyAppend :: Classifier a -> Classifier a -> Classifier a
classifyAppend :: Classifier arch a -> Classifier arch a -> Classifier arch a
classifyAppend m n =
case m of
ClassifySucceeded e a -> ClassifySucceeded e a
@ -99,22 +132,22 @@ classifyAppend m n =
ClassifySucceeded f a -> ClassifySucceeded (e++f) a
ClassifyFailed f -> ClassifyFailed (e++f)
instance Alternative Classifier where
instance Alternative (Classifier arch) where
empty = classifyFail
(<|>) = classifyAppend
instance Functor Classifier where
instance Functor (Classifier arch) where
fmap = liftA
instance Applicative Classifier where
instance Applicative (Classifier arch) where
pure = classifySuccess
(<*>) = ap
instance Monad Classifier where
instance Monad (Classifier arch) where
(>>=) = classifyBind
instance MF.MonadFail Classifier where
fail = \m -> ClassifyFailed [m]
instance MF.MonadFail (Classifier arch) where
fail = \m -> classifyThrow $ DM.ClassifierMonadFail m
------------------------------------------------------------------------
-- ParseContext
@ -182,7 +215,7 @@ type BlockClassifier arch ids = BlockClassifierM arch ids (Parsed.ParsedContents
-- matching errors
newtype BlockClassifierM arch ids a =
BlockClassifier { unBlockClassifier :: CMR.ReaderT (BlockClassifierContext arch ids)
Classifier
(Classifier arch)
a
}
deriving ( Functor
@ -197,10 +230,10 @@ newtype BlockClassifierM arch ids a =
runBlockClassifier
:: BlockClassifier arch ids
-> BlockClassifierContext arch ids
-> Classifier (Parsed.ParsedContents arch ids)
-> Classifier arch (Parsed.ParsedContents arch ids)
runBlockClassifier cl = CMR.runReaderT (unBlockClassifier cl)
liftClassifier :: Classifier a -> BlockClassifierM arch ids a
liftClassifier :: Classifier arch a -> BlockClassifierM arch ids a
liftClassifier c = BlockClassifier (CMT.lift c)
------------------------------------------------------------------------
@ -269,7 +302,7 @@ data ArchitectureInfo arch
. Memory (ArchAddrWidth arch)
-> Seq (Stmt arch ids)
-> RegState (ArchReg arch) (Value arch ids)
-> Maybe (Seq (Stmt arch ids), ArchSegmentOff arch)
-> Classifier arch (Seq (Stmt arch ids), ArchSegmentOff arch)
-- ^ Function for recognizing call statements.
--
-- Given a memory state, list of statements, and final register
@ -283,8 +316,8 @@ data ArchitectureInfo arch
, checkForReturnAddr :: forall ids
. RegState (ArchReg arch) (Value arch ids)
-> AbsProcessorState (ArchReg arch) ids
-> Bool
-- ^ @checkForReturnAddr regs s@ returns true if the location
-> Classifier arch ()
-- ^ @checkForReturnAddr regs s@ succeds if the location
-- where the return address is normally stored in regs when
-- calling a function does indeed contain the abstract value
-- associated with return addresses.
@ -298,7 +331,7 @@ data ArchitectureInfo arch
. Seq (Stmt arch ids)
-> RegState (ArchReg arch) (Value arch ids)
-> AbsProcessorState (ArchReg arch) ids
-> Maybe (Seq (Stmt arch ids))
-> Classifier arch (Seq (Stmt arch ids))
-- ^ Identify returns to the classifier.
--
-- Given a list of statements and the final state of the registers, this

View File

@ -516,7 +516,7 @@ defaultClassifier = branchClassifier
-- | This parses a block that ended with a fetch and execute instruction.
parseFetchAndExecute :: forall arch ids
. (RegisterInfo (ArchReg arch))
. ArchConstraints arch
=> ArchitectureInfo arch
-> BlockClassifierContext arch ids
-> [Stmt arch ids]
@ -525,8 +525,8 @@ parseFetchAndExecute ainfo classCtx stmts = do
case runBlockClassifier (archClassifier ainfo) classCtx of
ClassifySucceeded _ m -> m
ClassifyFailed rsns ->
ParsedContents { parsedNonterm = stmts
, parsedTerm = ClassifyFailure (classifierFinalRegState classCtx) rsns
emptyParsedContents { parsedNonterm = stmts
, parsedTerm = ClassifyFailure (classifierFinalRegState classCtx) (map showSimpleTrace rsns)
, writtenCodeAddrs = classifierWrittenAddrs classCtx
, intraJumpTargets = fromMaybe [] (useExternalTargets classCtx)
, newFunctionAddrs = []
@ -571,7 +571,7 @@ parseBlock ctx initRegs b sz absBlockState blockBnds = do
-- Do nothing when this block ends in a translation error.
TranslateError _ msg ->
ParsedContents { parsedNonterm = blockStmts b
emptyParsedContents { parsedNonterm = blockStmts b
, parsedTerm = ParsedTranslateError msg
, writtenCodeAddrs = writtenAddrs
, intraJumpTargets = []
@ -579,7 +579,7 @@ parseBlock ctx initRegs b sz absBlockState blockBnds = do
}
ArchTermStmt tstmt regs ->
let r = postArchTermStmtAbsState ainfo mem absState jmpBounds regs tstmt
in ParsedContents { parsedNonterm = blockStmts b
in emptyParsedContents { parsedNonterm = blockStmts b
, parsedTerm = ParsedArchTermStmt tstmt regs ((\(a,_,_) -> a) <$> r)
, writtenCodeAddrs = writtenAddrs
, intraJumpTargets = maybeToList r

View File

@ -172,7 +172,7 @@ branchClassifier = classifierName "Branch" $ do
let jmpBounds = classifierJumpBounds bcc
case Jmp.postBranchBounds jmpBounds finalRegs c of
Jmp.BothFeasibleBranch trueJmpState falseJmpState -> do
pure $ Parsed.ParsedContents { Parsed.parsedNonterm = F.toList stmts
pure $ Parsed.emptyParsedContents { Parsed.parsedNonterm = F.toList stmts
, Parsed.parsedTerm =
Parsed.ParsedBranch finalRegs c trueTgtAddr falseTgtAddr
, Parsed.writtenCodeAddrs = writtenAddrs
@ -181,10 +181,11 @@ branchClassifier = classifierName "Branch" $ do
, (falseTgtAddr, falseAbsState, falseJmpState)
]
, Parsed.newFunctionAddrs = []
, Parsed.parsedDiscoveryTraces = []
}
-- The false branch is impossible.
Jmp.TrueFeasibleBranch trueJmpState -> do
pure $ Parsed.ParsedContents { Parsed.parsedNonterm = F.toList stmts
pure $ Parsed.emptyParsedContents { Parsed.parsedNonterm = F.toList stmts
, Parsed.parsedTerm = Parsed.ParsedJump finalRegs trueTgtAddr
, Parsed.writtenCodeAddrs = writtenAddrs
, Parsed.intraJumpTargets =
@ -193,7 +194,7 @@ branchClassifier = classifierName "Branch" $ do
}
-- The true branch is impossible.
Jmp.FalseFeasibleBranch falseJmpState -> do
pure $ Parsed.ParsedContents { Parsed.parsedNonterm = F.toList stmts
pure $ Parsed.emptyParsedContents { Parsed.parsedNonterm = F.toList stmts
, Parsed.parsedTerm = Parsed.ParsedJump finalRegs falseTgtAddr
, Parsed.writtenCodeAddrs = writtenAddrs
, Parsed.intraJumpTargets =
@ -244,11 +245,9 @@ callClassifier = classifierName "Call" $ do
let finalRegs = classifierFinalRegState bcc
let ainfo = pctxArchInfo ctx
let mem = pctxMemory ctx
ret <- case Info.identifyCall ainfo mem (classifierStmts bcc) finalRegs of
Just (_prev_stmts, ret) -> pure ret
Nothing -> fail $ "Call classifier failed."
(_prev_stmts, ret) <- liftClassifier $ Info.identifyCall ainfo mem (classifierStmts bcc) finalRegs
Info.withArchConstraints ainfo $ do
pure $ Parsed.ParsedContents { Parsed.parsedNonterm = F.toList (classifierStmts bcc)
pure $ Parsed.emptyParsedContents { Parsed.parsedNonterm = F.toList (classifierStmts bcc)
, Parsed.parsedTerm = Parsed.ParsedCall finalRegs (Just ret)
-- The return address may be written to
-- stack, but is highly unlikely to be
@ -279,12 +278,11 @@ returnClassifier = classifierName "Return" $ do
bcc <- CMR.ask
let ainfo = pctxArchInfo (classifierParseContext bcc)
Info.withArchConstraints ainfo $ do
Just prevStmts <-
pure $ Info.identifyReturn ainfo
prevStmts <- liftClassifier $ Info.identifyReturn ainfo
(classifierStmts bcc)
(classifierFinalRegState bcc)
(classifierAbsState bcc)
pure $ Parsed.ParsedContents { Parsed.parsedNonterm = F.toList prevStmts
pure $ Parsed.emptyParsedContents { Parsed.parsedNonterm = F.toList prevStmts
, Parsed.parsedTerm = Parsed.ParsedReturn (classifierFinalRegState bcc)
, Parsed.writtenCodeAddrs = classifierWrittenAddrs bcc
, Parsed.intraJumpTargets = []
@ -319,7 +317,7 @@ directJumpClassifier = classifierName "Jump" $ do
let abst = finalAbsBlockState (classifierAbsState bcc) (classifierFinalRegState bcc)
let abst' = abst & setAbsIP tgtMSeg
let tgtBnds = Jmp.postJumpBounds (classifierJumpBounds bcc) (classifierFinalRegState bcc)
pure $ Parsed.ParsedContents { Parsed.parsedNonterm = F.toList (classifierStmts bcc)
pure $ Parsed.emptyParsedContents { Parsed.parsedNonterm = F.toList (classifierStmts bcc)
, Parsed.parsedTerm = Parsed.ParsedJump (classifierFinalRegState bcc) tgtMSeg
, Parsed.writtenCodeAddrs = classifierWrittenAddrs bcc
, Parsed.intraJumpTargets = [(tgtMSeg, abst', tgtBnds)]
@ -340,7 +338,7 @@ noreturnCallParsedContents bcc =
regs = classifierFinalRegState bcc
blockEnd = classifierEndBlock bcc
in Info.withArchConstraints (pctxArchInfo ctx) $
Parsed.ParsedContents { Parsed.parsedNonterm = F.toList (classifierStmts bcc)
Parsed.emptyParsedContents { Parsed.parsedNonterm = F.toList (classifierStmts bcc)
, Parsed.parsedTerm = Parsed.ParsedCall regs Nothing
, Parsed.writtenCodeAddrs =
filter (\a -> segoffAddr a /= blockEnd) $
@ -416,5 +414,5 @@ tailCallClassifier = classifierName "Tail call" $ do
unless (o == 0) $
fail "Expected stack height of 0"
-- Return address is pushed
unless (Info.checkForReturnAddr ainfo (classifierFinalRegState bcc) (classifierAbsState bcc)) empty
liftClassifier $ Info.checkForReturnAddr ainfo (classifierFinalRegState bcc) (classifierAbsState bcc)
pure $! noreturnCallParsedContents bcc

View File

@ -224,7 +224,7 @@ extractJumpTableSlices :: ArchConstraints arch
-> Natural -- ^ Stride
-> BVValue arch ids idxWidth
-> MemRepr tp -- ^ Type of values
-> Info.Classifier (V.Vector [MemChunk (ArchAddrWidth arch)])
-> Info.Classifier arch (V.Vector [MemChunk (ArchAddrWidth arch)])
extractJumpTableSlices jmpBounds base stride ixVal tp = do
cnt <-
case Jmp.unsignedUpperBound jmpBounds ixVal of
@ -262,7 +262,7 @@ matchBoundedMemArray
-> Jmp.IntraJumpBounds arch ids
-- ^ Bounds for jump table
-> Value arch ids tp -- ^ Value to interpret
-> Info.Classifier (Parsed.BoundedMemArray arch tp, ArchAddrValue arch ids)
-> Info.Classifier arch (Parsed.BoundedMemArray arch tp, ArchAddrValue arch ids)
matchBoundedMemArray mem aps jmpBounds val = do
AssignedValue (Assignment _ (ReadMem addr tp)) <- pure val
Just (base, offset) <- pure $ valueAsMemOffset mem aps addr
@ -306,7 +306,7 @@ matchAbsoluteJumpTable = Info.classifierName "Absolute jump table" $ do
BVMemRepr _arByteCount e -> pure e
let go :: Int
-> [MemChunk (ArchAddrWidth arch)]
-> Info.Classifier (MemSegmentOff (ArchAddrWidth arch))
-> Info.Classifier arch (MemSegmentOff (ArchAddrWidth arch))
go entryIndex contents = do
addr <- case resolveAsAddr mem endianness contents of
Just a -> pure a
@ -374,7 +374,7 @@ jumpTableClassifier = Info.classifierName "Jump table" $ do
let nextBnds = Jmp.postJumpBounds jmpBounds (Info.classifierFinalRegState bcc)
let term = Parsed.ParsedLookupTable layout (Info.classifierFinalRegState bcc) jumpIndex entries
pure $ seq abst $
Parsed.ParsedContents { Parsed.parsedNonterm = F.toList (Info.classifierStmts bcc)
Parsed.emptyParsedContents { Parsed.parsedNonterm = F.toList (Info.classifierStmts bcc)
, Parsed.parsedTerm = term
, Parsed.writtenCodeAddrs = Info.classifierWrittenAddrs bcc
, Parsed.intraJumpTargets =

View File

@ -141,7 +141,7 @@ pltStubClassifier = Info.classifierName "PLT stub" $ do
let strippedRegs = removeUnassignedRegs (Info.classifierInitRegState bcc) (Info.classifierFinalRegState bcc)
when (containsAssignId valId strippedRegs) $ do
fail $ "PLT IP must be assigned."
pure $ Parsed.ParsedContents { Parsed.parsedNonterm = F.toList strippedStmts
pure $ Parsed.emptyParsedContents { Parsed.parsedNonterm = F.toList strippedStmts
, Parsed.parsedTerm = Parsed.PLTStub strippedRegs gotSegOff (VerSym sym symVer)
, Parsed.writtenCodeAddrs = Info.classifierWrittenAddrs bcc
, Parsed.intraJumpTargets = []

View File

@ -0,0 +1,152 @@
{-|
Defines specialized monads for collecting logging information while
performing discovery
-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE InstanceSigs #-}
module Data.Macaw.Discovery.DiscoveryM
(
MonadDiscoveryTrace(..)
, DiscoverySTL
, DiscoverySTS
, DiscoveryT
, runDiscoveryT
, discoverySTStoSTL
, discoverySTLtoSTS
, runDiscoverySTL
, runDiscoverySTS
, showSimpleTrace
, emitDiscoveryTrace
-- re-exports
, DiscoveryTrace
, discoveryTrace
, DiscoveryTraceData(..)
, traceValues
) where
import Control.Monad.Writer.Strict
import GHC.Stack
import Data.Functor.Identity
import Control.Monad.Reader
import qualified Control.Monad.ST.Lazy as STL
import qualified Control.Monad.ST.Strict as STS
import qualified Data.STRef.Lazy as STL
import qualified Data.STRef.Strict as STS
import Data.Macaw.Discovery.ParsedContents
import Data.Parameterized
import Data.Macaw.CFG.Core (Value)
class Monad m => MonadDiscoveryTrace arch m | m -> arch where
-- | Add the given traces to the current trace
emitDiscoveryTraces :: [DiscoveryTrace arch] -> m ()
-- | Nest the tracing results from the sub-computation in
-- a 'NestedTraceData', and drop them from the outer trace
withDiscoveryTracing :: DiscoveryTraceData arch ids s -> m a -> m a
traceValues :: HasCallStack => MonadDiscoveryTrace arch m => [(String, Some (Value arch ids))] -> m ()
traceValues vals = forM_ vals $ \(nm,Some v) -> emitDiscoveryTrace $ MacawValue nm v
newtype DiscoveryT arch m a = DiscoveryT { unDiscoveryT :: WriterT ([DiscoveryTrace arch]) m a }
deriving (MonadTrans, Functor, Applicative)
runDiscoveryT :: DiscoveryT arch m a -> m (a, [DiscoveryTrace arch])
runDiscoveryT (DiscoveryT f) = runWriterT f
deriving instance Monad m => Monad (DiscoveryT arch m)
instance Monad m => MonadDiscoveryTrace arch (DiscoveryT arch m) where
emitDiscoveryTraces tr = DiscoveryT $ tell tr
withDiscoveryTracing d (DiscoveryT f) = do
(a, tr) <- DiscoveryT $ censor (\_ -> []) $ listen f
emitDiscoveryTrace (NestedTraceData d tr)
return a
type DiscoveryM arch = DiscoveryT arch Identity
emitDiscoveryTrace :: forall arch ids s m. HasCallStack => MonadDiscoveryTrace arch m => DiscoveryTraceData arch ids s -> m ()
emitDiscoveryTrace d = emitDiscoveryTraces [discoveryTrace d]
newtype DiscoveryST arch s m a = DiscoveryST { unDiscoveryST :: ReaderT (STL.STRef s [DiscoveryTrace arch]) m a }
deriving (MonadTrans, Functor, Applicative)
deriving instance Monad m => Monad (DiscoveryST arch s m)
type DiscoverySTL arch s = DiscoveryST arch s (STL.ST s)
type DiscoverySTS arch s = DiscoveryST arch s (STS.ST s)
-- type DiscoveryM arch ids s m = DiscoveryT arch ids s (STS.ST s)
discoverySTStoSTL :: DiscoverySTS arch s a -> DiscoverySTL arch s a
discoverySTStoSTL (DiscoveryST f) = do
r <- DiscoveryST $ ask
DiscoveryST $ lift $ (STL.strictToLazyST (runReaderT f r))
discoverySTLtoSTS :: DiscoverySTL arch s a -> DiscoverySTS arch s a
discoverySTLtoSTS (DiscoveryST f) = do
r <- DiscoveryST $ ask
DiscoveryST $ lift $ (STL.lazyToStrictST (runReaderT f r))
instance MonadDiscoveryTrace arch (DiscoveryST arch s (STL.ST s)) where
emitDiscoveryTraces [] = return ()
emitDiscoveryTraces tr = DiscoveryST $ ask >>= \r -> lift $ STL.modifySTRef r (\traces -> tr ++ traces)
withDiscoveryTracing d f = do
r <- DiscoveryST $ ask
oldTraces <- lift $ STL.readSTRef r
a <- f
nextTraces <- lift $ STL.readSTRef r
-- take the prefix of new traces
let (newTraces, _) = splitAt (length nextTraces - length oldTraces) nextTraces
-- discard changes from the sub-computation
lift $ STL.writeSTRef r oldTraces
emitDiscoveryTrace (NestedTraceData d newTraces)
return a
instance MonadDiscoveryTrace arch (DiscoveryST arch s (STS.ST s)) where
emitDiscoveryTraces [] = return ()
emitDiscoveryTraces tr = DiscoveryST $ ask >>= \r -> lift $ STS.modifySTRef r (\traces -> tr ++ traces)
withDiscoveryTracing d f = do
r <- DiscoveryST $ ask
oldTraces <- lift $ STS.readSTRef r
a <- f
nextTraces <- lift $ STS.readSTRef r
-- take the prefix of new traces
let (newTraces, _) = splitAt (length nextTraces - length oldTraces) nextTraces
lift $ STS.writeSTRef r oldTraces
emitDiscoveryTrace (NestedTraceData d newTraces)
return a
runDiscoverySTL :: DiscoverySTL arch s a -> STL.ST s (a, [DiscoveryTrace arch])
runDiscoverySTL f = do
r <- STL.newSTRef []
a <- runReaderT (unDiscoveryST f) r
traces <- STL.readSTRef r
return (a, traces)
runDiscoverySTS :: DiscoverySTS arch s a -> STS.ST s (a, [DiscoveryTrace arch])
runDiscoverySTS f = do
r <- STS.newSTRef []
a <- runReaderT (unDiscoveryST f) r
traces <- STS.readSTRef r
return (a, traces)
newtype DiscoveryTraceT arch m a = DiscoveryTraceT { _unDiscoveryTraceT :: WriterT [DiscoveryTrace arch] m a }
deriving (Functor, Applicative, Monad)
runDiscoveryTraceT :: DiscoveryTraceT arch m a -> m (a, [DiscoveryTrace arch])
runDiscoveryTraceT (DiscoveryTraceT f) = runWriterT f

View File

@ -2,6 +2,8 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE RankNTypes #-}
-- | Macaw AST elements used after block classification
--
-- There are two stages of code discovery:
@ -28,6 +30,11 @@ module Data.Macaw.Discovery.ParsedContents (
, isReadOnlyBoundedMemArray
-- * Pretty Printing
, ppTermStmt
, DiscoveryTrace
, DiscoveryTraceData(..)
, showSimpleTrace
, discoveryTrace
, emptyParsedContents
) where
import qualified Control.Lens as CL
@ -39,11 +46,12 @@ import Data.Word ( Word64 )
import qualified Prettyprinter as PP
import Prettyprinter ( (<+>) )
import Data.Macaw.AbsDomain.AbsState ( AbsBlockState )
import Data.Macaw.AbsDomain.AbsState ( AbsBlockState, ArchAbsValue, AbsValue )
import Data.Macaw.CFG
import qualified Data.Macaw.AbsDomain.JumpBounds as Jmp
import qualified Data.Macaw.Memory.Permissions as Perm
import Data.Macaw.Types
import GHC.Stack
------------------------------------------------------------------------
-- BlockExploreReason
@ -294,6 +302,73 @@ parsedTermSucc ts = do
ParsedTranslateError{} -> []
ClassifyFailure{} -> []
data DiscoveryTraceData arch ids s =
ClassifierMonadFail String
| forall tp. MacawValue String (Value arch ids tp)
| forall w tp. MemWidth w => MacawAbsValue String (AbsValue w tp)
| NamedClassifier String
| FunctionAddress (ArchSegmentOff arch)
| BlockAddress (ArchSegmentOff arch)
| NestedTraceData (DiscoveryTraceData arch ids s) [DiscoveryTrace arch]
ppDiscoveryTraceData :: ArchConstraints arch
=> (DiscoveryTrace arch -> PP.Doc a)
-> DiscoveryTraceData arch ids s
-> PP.Doc a
ppDiscoveryTraceData printTrace traceData = case traceData of
ClassifierMonadFail msg -> "fail:" <> PP.indent 1 (PP.viaShow msg)
MacawValue msg v -> "Macaw Value:" <+> case null msg of
True -> PP.pretty v
False -> PP.viaShow msg <> PP.indent 1 (PP.pretty v)
MacawAbsValue msg v -> "Macaw Abstract Value:" <+> case null msg of
True -> PP.pretty v
False -> PP.viaShow msg <> PP.indent 1 (PP.pretty v)
NamedClassifier nm -> "Classifier Failure:" <+> PP.viaShow nm
FunctionAddress addr -> "Function Address:" <+> PP.pretty addr
BlockAddress addr -> "Block Address:" <+> PP.pretty addr
-- as a special case: we print out all of the bindings for a macaw 'Value'
-- if it's being nested
-- this is so that we can establish the binding context for an expression once
-- and just print the identifier for each individual expression when
-- tracing a 'MacawValue'
NestedTraceData (MacawValue msg v) subTraces ->
PP.viaShow msg <> PP.line
<> ppValueAssignments v
<> PP.indent 1 (PP.vsep (reverse $ map printTrace subTraces))
NestedTraceData traceData' subTraces ->
ppDiscoveryTraceData printTrace traceData'
-- NB: the head of the trace is the most recent tracing data, so we
-- reverse on display to give the natural ordering
<> PP.indent 1 (PP.vsep (reverse $ map printTrace subTraces))
ppDiscoveryTrace :: ArchConstraints arch
=> (forall ids s. DiscoveryTraceData arch ids s -> PP.Doc a)
-> (CallStack -> Maybe (PP.Doc a))
-> DiscoveryTrace arch
-> PP.Doc a
ppDiscoveryTrace printTraceData printCallStack tr@(DiscoveryTrace _ d) =
case printCallStack (trCallStack tr) of
Nothing -> printTraceData d
Just ppstk -> PP.vsep [ppstk, printTraceData d]
instance ArchConstraints arch => PP.Pretty (DiscoveryTraceData arch ids s) where
pretty td = ppDiscoveryTraceData PP.pretty td
instance ArchConstraints arch => PP.Pretty (DiscoveryTrace arch) where
pretty tr = ppDiscoveryTrace PP.pretty (\stk -> Just (PP.viaShow stk)) tr
data DiscoveryTrace arch = forall ids s. DiscoveryTrace
{ trCallStack :: CallStack
, _trData :: DiscoveryTraceData arch ids s
}
showSimpleTrace :: ArchConstraints arch => DiscoveryTrace arch -> String
showSimpleTrace (DiscoveryTrace _ d) = show (PP.pretty d)
discoveryTrace :: DiscoveryTraceData arch ids s -> DiscoveryTrace arch
discoveryTrace d = DiscoveryTrace callStack d
------------------------------------------------------------------------
-- ParsedBlock
@ -348,4 +423,15 @@ data ParsedContents arch ids =
--
-- Note. In a binary, these could denote the non-executable
-- segments, so they are filtered before traversing.
, parsedDiscoveryTraces :: ![DiscoveryTrace arch]
}
emptyParsedContents :: ParsedContents arch ids
emptyParsedContents = ParsedContents
{ parsedNonterm = []
, parsedTerm = ParsedTranslateError "Unexpected emptyParsedContents"
, writtenCodeAddrs = []
, intraJumpTargets = []
, newFunctionAddrs = []
, parsedDiscoveryTraces = []
}

View File

@ -89,14 +89,14 @@ isValidReturnAddress val0 =
identifyCall :: MM.Memory (MC.ArchAddrWidth ARM.AArch32)
-> Seq.Seq (MC.Stmt ARM.AArch32 ids)
-> MC.RegState (MC.ArchReg ARM.AArch32) (MC.Value ARM.AArch32 ids)
-> Maybe (Seq.Seq (MC.Stmt ARM.AArch32 ids), MC.ArchSegmentOff ARM.AArch32)
-> MAI.Classifier ARM.AArch32 (Seq.Seq (MC.Stmt ARM.AArch32 ids), MC.ArchSegmentOff ARM.AArch32)
identifyCall mem stmts0 rs
| not (null stmts0)
, Just retVal <- isValidReturnAddress (rs ^. MC.boundValue AR.arm_LR)
, Just retAddrVal <- MC.valueAsMemAddr retVal
, Just retAddr <- MM.asSegmentOff mem retAddrVal =
Just (stmts0, retAddr)
| otherwise = Nothing
return (stmts0, retAddr)
| otherwise = fail "identifyCall"
-- | Intended to identify a return statement.
--
@ -110,11 +110,10 @@ identifyCall mem stmts0 rs
identifyReturn :: Seq.Seq (MC.Stmt ARM.AArch32 ids)
-> MC.RegState (MC.ArchReg ARM.AArch32) (MC.Value ARM.AArch32 ids)
-> MA.AbsProcessorState (MC.ArchReg ARM.AArch32) ids
-> Maybe (Seq.Seq (MC.Stmt ARM.AArch32 ids))
identifyReturn stmts s finalRegSt8 =
if isReturnValue finalRegSt8 (s^.MC.boundValue MC.ip_reg)
then Just stmts
else Nothing
-> MAI.Classifier ARM.AArch32 (Seq.Seq (MC.Stmt ARM.AArch32 ids))
identifyReturn stmts s finalRegSt8 = do
isReturnValue finalRegSt8 (s^.MC.boundValue MC.ip_reg)
return stmts
-- | Determines if the supplied value is the symbolic return address
-- from Macaw, modulo any ARM semantics operations (lots of ite caused
@ -122,8 +121,8 @@ identifyReturn stmts s finalRegSt8 =
-- of the low bits (1 in T32 mode, 2 in A32 mode).
isReturnValue :: MA.AbsProcessorState (MC.ArchReg ARM.AArch32) ids
-> MC.Value ARM.AArch32 ids (MT.BVType (MC.RegAddrWidth (MC.ArchReg ARM.AArch32)))
-> Bool
isReturnValue absProcState val =
-> MAI.Classifier ARM.AArch32 ()
isReturnValue absProcState val = MAI.classifyLiftBool $
case MA.transferValue absProcState val of
MA.ReturnAddr -> True
_ -> False
@ -136,7 +135,7 @@ asReturnAddrAndConstant
-> MA.AbsProcessorState (MC.ArchReg ARM.AArch32) ids
-> MC.Value ARM.AArch32 ids (MT.BVType (MC.ArchAddrWidth ARM.AArch32))
-> MC.Value ARM.AArch32 ids (MT.BVType (MC.ArchAddrWidth ARM.AArch32))
-> MAI.Classifier (MC.ArchSegmentOff ARM.AArch32)
-> MAI.Classifier ARM.AArch32 (MC.ArchSegmentOff ARM.AArch32)
asReturnAddrAndConstant mem absProcState mRet mConstant = do
MA.ReturnAddr <- return (MA.transferValue absProcState mRet)
Just memAddr <- return (MC.valueAsMemAddr mConstant)
@ -191,7 +190,7 @@ identifyConditionalReturn
-> Seq.Seq (MC.Stmt ARM.AArch32 ids)
-> MC.RegState (MC.ArchReg ARM.AArch32) (MC.Value ARM.AArch32 ids)
-> MA.AbsProcessorState (MC.ArchReg ARM.AArch32) ids
-> MAI.Classifier ( MC.Value ARM.AArch32 ids MT.BoolType
-> MAI.Classifier ARM.AArch32 ( MC.Value ARM.AArch32 ids MT.BoolType
, MC.ArchSegmentOff ARM.AArch32
, ReturnsOnBranch
, Seq.Seq (MC.Stmt ARM.AArch32 ids)
@ -237,7 +236,7 @@ conditionalReturnClassifier = do
, abs'
, if returnBranch == ReturnsOnTrue then falseJumpState else trueJumpState
)
return Parsed.ParsedContents { Parsed.parsedNonterm = F.toList stmts'
return Parsed.emptyParsedContents { Parsed.parsedNonterm = F.toList stmts'
, Parsed.parsedTerm = Parsed.ParsedArchTermStmt term regs (Just nextIP)
, Parsed.intraJumpTargets = [fallthroughTarget]
, Parsed.newFunctionAddrs = []
@ -271,7 +270,7 @@ asConditionalCallReturnAddress
-- ^ The value of the PC at one condition polarity
-> MC.Value ARM.AArch32 ids (MT.BVType (MC.ArchAddrWidth ARM.AArch32))
-- ^ The value of the LR at the other condition polarity
-> MAI.Classifier (MC.ArchSegmentOff ARM.AArch32)
-> MAI.Classifier ARM.AArch32 (MC.ArchSegmentOff ARM.AArch32)
asConditionalCallReturnAddress mem pc_val lr_val = do
Just memAddr_pc <- return (MC.valueAsMemAddr pc_val)
Just memAddr_lr <- return (MC.valueAsMemAddr lr_val)
@ -293,7 +292,7 @@ identifyConditionalCall
:: MC.Memory 32
-> Seq.Seq (MC.Stmt ARM.AArch32 ids)
-> MC.RegState (MC.ArchReg ARM.AArch32) (MC.Value ARM.AArch32 ids)
-> MAI.Classifier ( MC.Value ARM.AArch32 ids MT.BoolType -- Condition
-> MAI.Classifier ARM.AArch32 ( MC.Value ARM.AArch32 ids MT.BoolType -- Condition
, MC.Value ARM.AArch32 ids (MT.BVType 32) -- Call target
, MC.Value ARM.AArch32 ids (MT.BVType 32) -- Raw link register value
, MC.ArchSegmentOff ARM.AArch32 -- Return address (also the fallthrough address) decoded into a segment offset
@ -350,7 +349,7 @@ conditionalCallClassifier = do
, abs'
, if callBranch == CallsOnTrue then falseJumpState else trueJumpState
)
return Parsed.ParsedContents { Parsed.parsedNonterm = F.toList stmts'
return Parsed.emptyParsedContents { Parsed.parsedNonterm = F.toList stmts'
, Parsed.parsedTerm = Parsed.ParsedArchTermStmt term regs (Just fallthroughIP)
, Parsed.intraJumpTargets = [fallthroughTarget]
, Parsed.newFunctionAddrs = extractCallTargets mem callTarget

View File

@ -64,6 +64,7 @@ import Data.Macaw.PPC.Identify ( identifyCall
import qualified Data.Macaw.PPC.PPCReg as R
import qualified Data.Macaw.PPC.Semantics.PPC32 as PPC32
import qualified Data.Macaw.PPC.Semantics.PPC64 as PPC64
import Control.Monad (void)
-- | The constructor for type tags for PowerPC
type AnyPPC = PPC.AnyPPC
@ -101,7 +102,7 @@ ppc64_linux_info binData =
, MI.absEvalArchFn = absEvalArchFn proxy
, MI.absEvalArchStmt = absEvalArchStmt proxy
, MI.identifyCall = identifyCall proxy
, MI.checkForReturnAddr = \r s -> isJust $ matchReturn s (r ^. MC.boundValue R.PPC_LNK)
, MI.checkForReturnAddr = \r s -> void $ matchReturn s (r ^. MC.boundValue R.PPC_LNK)
, MI.identifyReturn = identifyReturn proxy
, MI.rewriteArchFn = rewritePrimFn
, MI.rewriteArchStmt = rewriteStmt
@ -127,7 +128,7 @@ ppc32_linux_info =
, MI.absEvalArchFn = absEvalArchFn proxy
, MI.absEvalArchStmt = absEvalArchStmt proxy
, MI.identifyCall = identifyCall proxy
, MI.checkForReturnAddr = \r s -> isJust $ matchReturn s (r ^. MC.boundValue R.PPC_LNK)
, MI.checkForReturnAddr = \r s -> void $ matchReturn s (r ^. MC.boundValue R.PPC_LNK)
, MI.identifyReturn = identifyReturn proxy
, MI.rewriteArchFn = rewritePrimFn
, MI.rewriteArchStmt = rewriteStmt

View File

@ -1,6 +1,8 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Data.Macaw.PPC.Identify
( identifyCall
, identifyReturn
@ -16,6 +18,8 @@ import qualified Data.Sequence as Seq
import qualified Data.Macaw.AbsDomain.AbsState as MA
import qualified Data.Macaw.CFG as MC
import qualified Data.Macaw.Discovery.AbsEval as DE
import qualified Data.Macaw.Discovery.DiscoveryM as DM
import Data.Macaw.Architecture.Info (Classifier, classifyThrow)
import qualified Data.Macaw.Memory as MM
import qualified Data.Macaw.Types as MT
import qualified SemMC.Architecture.PPC as SP
@ -37,15 +41,15 @@ identifyCall :: (ppc ~ SP.AnyPPC var, PPCArchConstraints var)
-> MM.Memory (MC.ArchAddrWidth ppc)
-> Seq.Seq (MC.Stmt ppc ids)
-> MC.RegState (PPCReg var) (MC.Value ppc ids)
-> Maybe (Seq.Seq (MC.Stmt ppc ids), MC.ArchSegmentOff ppc)
-> Classifier ppc (Seq.Seq (MC.Stmt ppc ids), MC.ArchSegmentOff ppc)
identifyCall _ mem stmts0 rs
| not (null stmts0)
, MC.RelocatableValue {} <- rs ^. MC.boundValue PPC_LNK
, Just retVal <- simplifyValue (rs ^. MC.boundValue PPC_LNK)
, Just retAddrVal <- MC.valueAsMemAddr retVal
, Just retAddr <- MM.asSegmentOff mem retAddrVal =
Just (stmts0, retAddr)
| otherwise = Nothing
return $ (stmts0, retAddr)
| otherwise = fail "identifyCall"
-- | Called to determine if the instruction sequence contains a return
@ -59,16 +63,18 @@ identifyReturn :: (PPCArchConstraints var)
-> Seq.Seq (MC.Stmt (SP.AnyPPC var) ids)
-> MC.RegState (PPCReg var) (MC.Value (SP.AnyPPC var) ids)
-> MA.AbsProcessorState (PPCReg var) ids
-> Maybe (Seq.Seq (MC.Stmt (SP.AnyPPC var) ids))
-> Classifier (SP.AnyPPC var) (Seq.Seq (MC.Stmt (SP.AnyPPC var) ids))
identifyReturn _ stmts regState absState = do
Some MA.ReturnAddr <- matchReturn absState (regState ^. MC.boundValue MC.ip_reg)
return stmts
matchReturn :: (ppc ~ SP.AnyPPC var, PPCArchConstraints var)
matchReturn :: forall ppc var ids w
. (ppc ~ SP.AnyPPC var, PPCArchConstraints var)
=> MA.AbsProcessorState (PPCReg var) ids
-> MC.Value ppc ids (MT.BVType (SP.AddrWidth var))
-> Maybe (Some (MA.AbsValue w))
matchReturn absProcState' ip = matchRead ip <|> matchShift ip
-> Classifier ppc (Some (MA.AbsValue w))
matchReturn absProcState' ip = DM.withDiscoveryTracing (DM.MacawValue "IP:" ip) $
matchRead ip <|> matchShift ip
where
{- example:
r15 := (bv_shr r13 (0x2 :: [32]))
@ -78,23 +84,32 @@ matchReturn absProcState' ip = matchRead ip <|> matchShift ip
-}
matchShift r = do
MC.AssignedValue (MC.Assignment _ (MC.EvalApp (MC.BVShl _ addr (MC.BVValue _ shiftAmt)))) <- return r
DM.traceValues [("addr", Some addr)]
guard (shiftAmt == 0x2)
Some (MC.AssignedValue (MC.Assignment _ (MC.EvalApp (MC.BVShr _ addr' (MC.BVValue _ shiftAmt'))))) <- return (stripExtTrunc addr)
Some stripped <- return $ stripExtTrunc addr
DM.traceValues [("stripped", Some stripped)]
(MC.AssignedValue (MC.Assignment _ (MC.EvalApp (MC.BVShr _ addr' (MC.BVValue _ shiftAmt'))))) <- return stripped
DM.traceValues [("addr'", Some addr')]
guard (shiftAmt' == 0x2)
case MA.transferValue absProcState' addr' of
MA.ReturnAddr -> return (Some MA.ReturnAddr)
_ -> case addr' of
MC.AssignedValue (MC.Assignment _ (MC.ReadMem readAddr memRep))
| MA.ReturnAddr <- DE.absEvalReadMem absProcState' readAddr memRep -> return (Some MA.ReturnAddr)
_ -> Nothing
_ -> do
MC.AssignedValue (MC.Assignment _ (MC.ReadMem readAddr memRep)) <- return addr'
DM.traceValues [("readAddr'", Some readAddr)]
ret <- return $ DE.absEvalReadMem absProcState' readAddr memRep
() <- case ret of
MA.ReturnAddr -> return ()
_ -> classifyThrow $ (DM.MacawAbsValue "Not ReturnAddr" ret)
return (Some MA.ReturnAddr)
{- example:
r8 := (bv_add r1_0 (0x14 :: [32]))
r9 := read_mem r8 (bvbe4)
-}
matchRead :: forall w2. MC.Value ppc ids (MT.BVType w2) -> Classifier ppc (Some (MA.AbsValue w))
matchRead r = case r of
MC.AssignedValue (MC.Assignment _ (MC.ReadMem readAddr memRep))
| MA.ReturnAddr <- DE.absEvalReadMem absProcState' readAddr memRep -> return (Some MA.ReturnAddr)
_ -> Nothing
_ -> classifyThrow $ (DM.MacawValue "Not ReturnAddr" r)

View File

@ -98,6 +98,7 @@ import Data.Macaw.X86.SyscallInfo.Linux as Linux
import Data.Macaw.X86.X86Reg
import Data.Macaw.X86.Generator
import qualified Data.Macaw.Architecture.Info as DM
------------------------------------------------------------------------
-- ExploreLoc
@ -485,8 +486,8 @@ translateBlockWithRegs gen addr initRegs maxSize = do
identifyX86Call :: Memory 64
-> Seq (Stmt X86_64 ids)
-> RegState X86Reg (Value X86_64 ids)
-> Maybe (Seq (Stmt X86_64 ids), MemSegmentOff 64)
identifyX86Call mem stmts0 s = go stmts0 Seq.empty
-> Classifier X86_64 (Seq (Stmt X86_64 ids), MemSegmentOff 64)
identifyX86Call mem stmts0 s = DM.classifyLiftMaybe $ go stmts0 Seq.empty
where -- Get value of stack pointer
next_sp = s^.boundValue RSP
-- Recurse on statements.
@ -516,12 +517,10 @@ identifyX86Call mem stmts0 s = go stmts0 Seq.empty
-- return address is on top of stack.
checkForReturnAddrX86 :: forall ids
. AbsProcessorState X86Reg ids
-> Bool
checkForReturnAddrX86 absState
| Just (StackEntry _ ReturnAddr) <- Map.lookup 0 (absState^.curAbsStack) =
True
| otherwise =
False
-> Classifier X86_64 ()
checkForReturnAddrX86 absState = case Map.lookup 0 (absState^.curAbsStack) of
Just (StackEntry _ ReturnAddr) -> return ()
_ -> fail "checkForReturnAddrX86"
-- | Called to determine if the instruction sequence contains a return
-- from the current function.
@ -532,8 +531,8 @@ checkForReturnAddrX86 absState
identifyX86Return :: Seq (Stmt X86_64 ids)
-> RegState X86Reg (Value X86_64 ids)
-> AbsProcessorState X86Reg ids
-> Maybe (Seq (Stmt X86_64 ids))
identifyX86Return stmts s finalRegSt8 =
-> Classifier X86_64 (Seq (Stmt X86_64 ids))
identifyX86Return stmts s finalRegSt8 = DM.classifyLiftMaybe $
case transferValue finalRegSt8 (s^.boundValue ip_reg) of
ReturnAddr -> Just stmts
_ -> Nothing