mirror of
https://github.com/GaloisInc/macaw.git
synced 2024-12-25 23:23:18 +03:00
Cleanup and documentation pass
This commit is contained in:
parent
599a357515
commit
b9835b9767
@ -13,7 +13,6 @@ module Data.Macaw.PPC.Arch (
|
||||
PPCTermStmt(..),
|
||||
rewriteTermStmt,
|
||||
PPCStmt(..),
|
||||
valuesInPPCStmt,
|
||||
rewriteStmt,
|
||||
PPCPrimFn(..),
|
||||
rewritePrimFn,
|
||||
@ -26,7 +25,6 @@ import GHC.TypeLits
|
||||
import qualified Text.PrettyPrint.ANSI.Leijen as PP
|
||||
import qualified Data.Parameterized.TraversableFC as FC
|
||||
import qualified Data.Parameterized.TraversableF as TF
|
||||
import Data.Parameterized.Some ( Some(..) )
|
||||
import qualified Data.Macaw.CFG as MC
|
||||
import Data.Macaw.CFG.Rewriter ( Rewriter, rewriteValue, evalRewrittenArchFn )
|
||||
import qualified Data.Macaw.Memory as MM
|
||||
@ -143,9 +141,6 @@ instance FC.TraversableFC (PPCPrimFn ppc) where
|
||||
type instance MC.ArchFn PPC64.PPC = PPCPrimFn PPC64.PPC
|
||||
type instance MC.ArchFn PPC32.PPC = PPCPrimFn PPC32.PPC
|
||||
|
||||
valuesInPPCStmt :: PPCStmt ppc (MC.Value ppc ids) -> [Some (MC.Value ppc ids)]
|
||||
valuesInPPCStmt s = TF.foldMapF (\x -> [Some x]) s
|
||||
|
||||
type PPCArchConstraints ppc = ( MC.ArchReg ppc ~ PPCReg ppc
|
||||
, MC.ArchFn ppc ~ PPCPrimFn ppc
|
||||
, MC.ArchStmt ppc ~ PPCStmt ppc
|
||||
|
@ -41,9 +41,6 @@ import Data.Macaw.PPC.Generator
|
||||
import Data.Macaw.PPC.PPCReg
|
||||
import Data.Macaw.PPC.Simplify ( simplifyValue )
|
||||
|
||||
import Debug.Trace (trace)
|
||||
import Text.PrettyPrint.ANSI.Leijen as PP hiding ((<$>))
|
||||
|
||||
-- | Read one instruction from the 'MM.Memory' at the given segmented offset.
|
||||
--
|
||||
-- Returns the instruction and number of bytes consumed /or/ an error.
|
||||
@ -75,13 +72,31 @@ readInstruction mem addr = MM.addrWidthClass (MM.memAddrWidth mem) $ do
|
||||
Just insn -> return (insn, fromIntegral bytesRead)
|
||||
Nothing -> ET.throwError (MM.InvalidInstruction (MM.relativeSegmentAddr addr) contents)
|
||||
|
||||
-- | Disassemble an instruction and terminate the current block if we run out of
|
||||
-- instructions to disassemble. We can run out if:
|
||||
--
|
||||
-- 1) We exceed the offset that macaw has told us to disassemble to
|
||||
--
|
||||
-- 2) We can't decode the IP (i.e., it isn't a constant)
|
||||
--
|
||||
-- 3) The IP after executing the semantics transformer is not equal to the
|
||||
-- expected next IP value, which indicates a jump to another block or
|
||||
-- function
|
||||
--
|
||||
-- In most of those cases, we end the block with a simple terminator. If the IP
|
||||
-- becomes a mux, we split execution using 'conditionalBranch'.
|
||||
disassembleBlock :: forall ppc ids s
|
||||
. PPCArchConstraints ppc
|
||||
=> (Value ppc ids (BVType (ArchAddrWidth ppc)) -> D.Instruction -> Maybe (PPCGenerator ppc ids s ()))
|
||||
-- ^ A function to look up the semantics for an instruction that we disassemble
|
||||
-> MM.Memory (ArchAddrWidth ppc)
|
||||
-> GenState ppc ids s
|
||||
-> MM.MemSegmentOff (ArchAddrWidth ppc)
|
||||
-- ^ The current instruction pointer
|
||||
-> MM.MemWord (ArchAddrWidth ppc)
|
||||
-- ^ The maximum offset into the bytestring that we should
|
||||
-- disassemble to; in principle, macaw can tell us to limit our
|
||||
-- search with this.
|
||||
-> DisM ppc ids s (MM.MemWord (ArchAddrWidth ppc), BlockSeq ppc ids)
|
||||
disassembleBlock lookupSemantics mem gs curIPAddr maxOffset = do
|
||||
let seg = MM.msegSegment curIPAddr
|
||||
@ -110,9 +125,13 @@ disassembleBlock lookupSemantics mem gs curIPAddr maxOffset = do
|
||||
addStmt (Comment (T.pack lineStr))
|
||||
transformer
|
||||
|
||||
-- Check to see if the IP has become conditionally-defined (by e.g.,
|
||||
-- a mux). If it has, we need to split execution using a primitive
|
||||
-- provided by the PPCGenerator monad.
|
||||
nextIPExpr <- getRegValue PPC_IP
|
||||
case matchConditionalBranch nextIPExpr of
|
||||
Just (cond, t_ip, f_ip) -> conditionalBranch cond (setRegVal PPC_IP t_ip) (setRegVal PPC_IP f_ip)
|
||||
Just (cond, t_ip, f_ip) ->
|
||||
conditionalBranch cond (setRegVal PPC_IP t_ip) (setRegVal PPC_IP f_ip)
|
||||
Nothing -> return ())
|
||||
case egs1 of
|
||||
Left genErr -> failAt gs off curIPAddr (GenerationError i genErr)
|
||||
@ -121,9 +140,8 @@ disassembleBlock lookupSemantics mem gs curIPAddr maxOffset = do
|
||||
Just preBlock
|
||||
| Seq.null (resBlockSeq gs1 ^. frontierBlocks)
|
||||
, v <- preBlock ^. (pBlockState . curIP)
|
||||
, trace ("raw ip = " ++ show (pretty v)) True
|
||||
, Just simplifiedIP <- simplifyValue v
|
||||
, trace ("v = " ++ show (pretty simplifiedIP) ++ "\nnextIPVal = " ++ show nextIPVal ++ "\n") $ simplifiedIP == nextIPVal
|
||||
, simplifiedIP == nextIPVal
|
||||
, nextIPOffset < maxOffset
|
||||
, Just nextIPSegAddr <- MM.asSegmentOff mem nextIP -> do
|
||||
let preBlock' = (pBlockState . curIP .~ simplifiedIP) preBlock
|
||||
@ -137,6 +155,8 @@ disassembleBlock lookupSemantics mem gs curIPAddr maxOffset = do
|
||||
|
||||
_ -> return (nextIPOffset, finishBlock FetchAndExecute gs1)
|
||||
|
||||
-- | Examine a value and see if it is a mux; if it is, break the mux up and
|
||||
-- return its component values (the condition and two alternatives)
|
||||
matchConditionalBranch :: (PPCArchConstraints arch)
|
||||
=> Value arch ids tp
|
||||
-> Maybe (Value arch ids BoolType, Value arch ids tp, Value arch ids tp)
|
||||
@ -161,7 +181,7 @@ tryDisassembleBlock lookupSemantics mem nonceGen startAddr maxSize = do
|
||||
let gs0 = initGenState nonceGen mem startAddr (initRegState startAddr)
|
||||
let startOffset = MM.msegOffset startAddr
|
||||
(nextIPOffset, blocks) <- disassembleBlock lookupSemantics mem gs0 startAddr (startOffset + maxSize)
|
||||
traceShow blocks $ unless (nextIPOffset > startOffset) $ do
|
||||
unless (nextIPOffset > startOffset) $ do
|
||||
let reason = InvalidNextIP (fromIntegral nextIPOffset) (fromIntegral startOffset)
|
||||
failAt gs0 nextIPOffset startAddr reason
|
||||
return (F.toList (blocks ^. frontierBlocks), nextIPOffset - startOffset)
|
||||
@ -194,7 +214,7 @@ disassembleFn _ lookupSemantics mem nonceGen startAddr maxSize _ = do
|
||||
mr <- ET.runExceptT (unDisM (tryDisassembleBlock lookupSemantics mem nonceGen startAddr maxSize))
|
||||
case mr of
|
||||
Left (blocks, off, exn) -> return (blocks, off, Just (show exn))
|
||||
Right (blocks, bytes) -> traceShow blocks $ return (blocks, bytes, Nothing)
|
||||
Right (blocks, bytes) -> return (blocks, bytes, Nothing)
|
||||
|
||||
type LocatedError ppc ids = ([Block ppc ids], MM.MemWord (ArchAddrWidth ppc), TranslationError (ArchAddrWidth ppc))
|
||||
-- | This is a monad for error handling during disassembly
|
||||
|
@ -67,7 +67,6 @@ import Data.Macaw.CFG.Block
|
||||
import Data.Macaw.Types ( BVType, BoolType )
|
||||
import qualified Data.Macaw.Memory as MM
|
||||
import Data.Parameterized.Classes
|
||||
import qualified Data.Parameterized.Map as MapF
|
||||
import qualified Data.Parameterized.NatRepr as NR
|
||||
import qualified Data.Parameterized.Nonce as NC
|
||||
import qualified Lang.Crucible.BaseTypes as S
|
||||
@ -116,23 +115,36 @@ frontierBlocks = lens _frontierBlocks (\s v -> s { _frontierBlocks = v })
|
||||
------------------------------------------------------------------------
|
||||
-- PreBlock
|
||||
|
||||
data PreBlock ppc ids = PreBlock { pBlockIndex :: !Word64
|
||||
, pBlockAddr :: !(MM.MemSegmentOff (ArchAddrWidth ppc))
|
||||
-- ^ Starting address of function in preblock.
|
||||
, _pBlockStmts :: !(Seq.Seq (Stmt ppc ids))
|
||||
, _pBlockState :: !(RegState (PPCReg ppc) (Value ppc ids))
|
||||
, pBlockApps :: !(MapF.MapF (App (Value ppc ids)) (Assignment ppc ids))
|
||||
}
|
||||
-- | A basic block that is under construction; when we get to the end of a block
|
||||
-- (by reaching a terminator statement), we turn this into a full 'Block' (added
|
||||
-- to the 'BlockSeq').
|
||||
data PreBlock ppc ids =
|
||||
PreBlock { pBlockIndex :: !Word64
|
||||
-- ^ The index of the block we are currently constructing
|
||||
, pBlockAddr :: !(MM.MemSegmentOff (ArchAddrWidth ppc))
|
||||
-- ^ Starting address of function in preblock.
|
||||
, _pBlockStmts :: !(Seq.Seq (Stmt ppc ids))
|
||||
-- ^ The list of statements that we have constructed so far. We
|
||||
-- append to the right.
|
||||
, _pBlockState :: !(RegState (PPCReg ppc) (Value ppc ids))
|
||||
-- ^ The current register state, which is updated as we simulate
|
||||
-- instructions
|
||||
}
|
||||
|
||||
-- | An accessor for the current statements in the 'PreBlock'
|
||||
pBlockStmts :: Simple Lens (PreBlock ppc ids) (Seq.Seq (Stmt ppc ids))
|
||||
pBlockStmts = lens _pBlockStmts (\s v -> s { _pBlockStmts = v })
|
||||
|
||||
-- | An accessor for the current register state held in the 'PreBlock'
|
||||
pBlockState :: Simple Lens (PreBlock ppc ids) (RegState (PPCReg ppc) (Value ppc ids))
|
||||
pBlockState = lens _pBlockState (\s v -> s { _pBlockState = v })
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- GenState
|
||||
|
||||
-- | The state held in our state monad. It tracks the current model of the
|
||||
-- machine state as well as a collection of discovered blocks and the current
|
||||
-- block we are constructing.
|
||||
data GenState ppc ids s =
|
||||
GenState { assignIdGen :: !(NC.NonceGenerator (ST s) ids)
|
||||
, _blockSeq :: !(BlockSeq ppc ids)
|
||||
@ -141,6 +153,11 @@ data GenState ppc ids s =
|
||||
, genMemory :: !(MM.Memory (ArchAddrWidth ppc))
|
||||
}
|
||||
|
||||
-- | Initialize a fresh 'GenState'
|
||||
--
|
||||
-- This does most of what you would expect. Note that the next block ID is set
|
||||
-- to 1, since we use ID 0 for the first block (assigned to the initial
|
||||
-- 'PreBlock'.
|
||||
initGenState :: NC.NonceGenerator (ST s) ids
|
||||
-> MM.Memory (ArchAddrWidth ppc)
|
||||
-> MM.MemSegmentOff (ArchAddrWidth ppc)
|
||||
@ -154,16 +171,20 @@ initGenState nonceGen mem addr st =
|
||||
, genMemory = mem
|
||||
}
|
||||
|
||||
initRegState :: ( KnownNat (RegAddrWidth (PPCReg ppc))
|
||||
, ArchReg ppc ~ PPCReg ppc
|
||||
, 1 <= RegAddrWidth (PPCReg ppc)
|
||||
, MM.MemWidth (RegAddrWidth (PPCReg ppc))
|
||||
, ArchWidth ppc)
|
||||
-- | Create an initial register state.
|
||||
--
|
||||
-- All of the register values are set to the special 'Initial' value, which
|
||||
-- indicates that the values are not known precisely, but are some abstract
|
||||
-- initial values.
|
||||
--
|
||||
-- Aside from that, we set the initial IP to the given value (@startIP@).
|
||||
initRegState :: ( PPCArchConstraints ppc)
|
||||
=> ArchSegmentOff ppc
|
||||
-> RegState (PPCReg ppc) (Value ppc ids)
|
||||
initRegState startIP = mkRegState Initial
|
||||
& curIP .~ RelocatableValue NR.knownNat (MM.relativeSegmentAddr startIP)
|
||||
|
||||
-- | Set up an empty initial block with a provided state, address, and block ID.
|
||||
emptyPreBlock :: RegState (PPCReg ppc) (Value ppc ids)
|
||||
-> Word64
|
||||
-> MM.MemSegmentOff (RegAddrWidth (ArchReg ppc))
|
||||
@ -173,7 +194,6 @@ emptyPreBlock s0 idx addr =
|
||||
, pBlockAddr = addr
|
||||
, _pBlockStmts = Seq.empty
|
||||
, _pBlockState = s0
|
||||
, pBlockApps = MapF.empty
|
||||
}
|
||||
|
||||
blockSeq :: Simple Lens (GenState ppc ids s) (BlockSeq ppc ids)
|
||||
@ -189,7 +209,7 @@ curPPCState = blockState . pBlockState
|
||||
-- Factored-out Operations for PPCGenerator
|
||||
|
||||
-- | Set a register to a value after attempting to simplify the value as much as
|
||||
-- possible.
|
||||
-- possible (using 'simplifyValue').
|
||||
setRegVal :: (PPCArchConstraints ppc, RegAddrWidth (PPCReg ppc) ~ w)
|
||||
=> PPCReg ppc tp
|
||||
-> Value ppc ids tp
|
||||
@ -218,6 +238,9 @@ bvconcat bv1Val bv2Val repV repU repW = do
|
||||
bv1Shf <- addExpr (AppExpr (BVShl repW bv1Ext bv1Shifter))
|
||||
return $ AppExpr (BVOr repW bv1Shf bv2Ext)
|
||||
|
||||
-- | Select @n@ bits from a @w@ bit bitvector starting at index @i@
|
||||
--
|
||||
-- This code is factored out of the TH module to improve compilation times.
|
||||
bvselect :: (ArchConstraints ppc, 1 <= w, 1 <= n, 1 <= i, (i+n) <= w)
|
||||
=> Value ppc ids (BVType w)
|
||||
-> NR.NatRepr n
|
||||
@ -234,7 +257,7 @@ bvselect bvVal repN repI repW = do
|
||||
bvShf <- addExpr (AppExpr (BVShr repW bvVal (mkLit repW (NR.natValue repI))))
|
||||
return (AppExpr (Trunc bvShf repN))
|
||||
|
||||
-- Add an expression in the PPCGenerator monad. This returns a Macaw value
|
||||
-- | Add an expression in the PPCGenerator monad. This returns a Macaw value
|
||||
-- corresponding to the added expression.
|
||||
addExpr :: (ArchConstraints ppc) => Expr ppc ids tp -> PPCGenerator ppc ids s (Value ppc ids tp)
|
||||
addExpr expr = do
|
||||
@ -252,6 +275,19 @@ data GeneratorError = InvalidEncoding
|
||||
| GeneratorMessage String
|
||||
deriving (Show)
|
||||
|
||||
-- | This is a state monad (with error termination via 'ET.ExceptT') built on
|
||||
-- top of 'Ct.ContT' and 'Rd.ReaderT'.
|
||||
--
|
||||
-- We use the 'Ct.ContT' for early termination when dealing with control flow
|
||||
-- terminators (e.g., syscall) and to split for dealing with conditional
|
||||
-- branches.
|
||||
--
|
||||
-- The state is managed by explicitly passing it through the continuation.
|
||||
--
|
||||
-- The most important operation provided is the primitive 'shiftGen', which is
|
||||
-- used to build 'conditionalBranch'.
|
||||
--
|
||||
-- The base monad is 'ST', which is used for nonce generation.
|
||||
newtype PPCGenerator ppc ids s a =
|
||||
PPCGenerator { runG :: Ct.ContT (GenResult ppc ids)
|
||||
(Rd.ReaderT (GenState ppc ids s)
|
||||
@ -261,6 +297,8 @@ newtype PPCGenerator ppc ids s a =
|
||||
Rd.MonadReader (GenState ppc ids s),
|
||||
Ct.MonadCont)
|
||||
|
||||
-- | The 'Monad' instance is manually-specified so that calls to 'fail' actually
|
||||
-- use our 'ET.ExceptT' 'ET.throwError' for nicer error behavior.
|
||||
instance Monad (PPCGenerator ppc ids s) where
|
||||
return v = PPCGenerator (return v)
|
||||
PPCGenerator m >>= h = PPCGenerator (m >>= \v -> runG (h v))
|
||||
@ -284,14 +322,19 @@ instance ET.MonadError GeneratorError (PPCGenerator ppc ids s) where
|
||||
-- Right res -> return res
|
||||
-- Right res -> return res
|
||||
|
||||
-- | The type of continuations provided by 'shiftGen'
|
||||
type GenCont ppc ids s a = a -> GenState ppc ids s -> ET.ExceptT GeneratorError (ST s) (GenResult ppc ids)
|
||||
|
||||
-- | Given a final continuation to call, run the generator (and produce a final
|
||||
-- result with the given continuation @k@). Also takes an initial state.
|
||||
runGenerator :: GenCont ppc ids s a
|
||||
-> GenState ppc ids s
|
||||
-> PPCGenerator ppc ids s a
|
||||
-> ET.ExceptT GeneratorError (ST s) (GenResult ppc ids)
|
||||
runGenerator k st (PPCGenerator m) = Rd.runReaderT (Ct.runContT m (Rd.ReaderT . k)) st
|
||||
|
||||
-- | Capture the current continuation and execute an action that gets that
|
||||
-- continuation as an argument (it can be invoked as many times as desired).
|
||||
shiftGen :: (GenCont ppc ids s a -> GenState ppc ids s -> ET.ExceptT GeneratorError (ST s) (GenResult ppc ids))
|
||||
-> PPCGenerator ppc ids s a
|
||||
shiftGen f =
|
||||
@ -303,19 +346,25 @@ genResult _ s = do
|
||||
, resState = Just (s ^. blockState)
|
||||
}
|
||||
|
||||
-- | Append a statement (on the right) to the list of statements in the current
|
||||
-- 'PreBlock'.
|
||||
addStmt :: (ArchConstraints ppc) => Stmt ppc ids -> PPCGenerator ppc ids s ()
|
||||
addStmt stmt = (blockState . pBlockStmts) %= (Seq.|> stmt)
|
||||
|
||||
-- | Generate the next unique ID for an assignment using the nonce generator
|
||||
newAssignId :: PPCGenerator ppc ids s (AssignId ids tp)
|
||||
newAssignId = do
|
||||
nonceGen <- St.gets assignIdGen
|
||||
n <- liftST $ NC.freshNonce nonceGen
|
||||
return (AssignId n)
|
||||
|
||||
-- | Lift an 'ST' action into 'PPCGenerator'
|
||||
liftST :: ST s a -> PPCGenerator ppc ids s a
|
||||
liftST = PPCGenerator . lift . lift . lift
|
||||
|
||||
addAssignment :: ArchConstraints ppc
|
||||
-- | Append an assignment statement to the list of statements in the current
|
||||
-- 'PreBlock'
|
||||
addAssignment :: (ArchConstraints ppc)
|
||||
=> AssignRhs ppc ids tp
|
||||
-> PPCGenerator ppc ids s (Assignment ppc ids tp)
|
||||
addAssignment rhs = do
|
||||
@ -324,6 +373,11 @@ addAssignment rhs = do
|
||||
addStmt $ AssignStmt a
|
||||
return a
|
||||
|
||||
-- | Get all of the current register values
|
||||
--
|
||||
-- This is meant to be used to snapshot the register state at the beginning of
|
||||
-- an instruction transformer so that we can perform atomic updates to the
|
||||
-- machine state (by reading values from the captured initial state).
|
||||
getRegs :: PPCGenerator ppc ids s (RegState (PPCReg ppc) (Value ppc ids))
|
||||
getRegs = do
|
||||
genState <- St.get
|
||||
@ -341,10 +395,22 @@ getRegValue r = do
|
||||
genState <- St.get
|
||||
return (genState ^. blockState ^. pBlockState ^. boundValue r)
|
||||
|
||||
-- | A primitive for splitting execution in the presence of conditional
|
||||
-- branches.
|
||||
--
|
||||
-- This function uses the underlying continuation monad to split execution. It
|
||||
-- captures the current continuation and builds a block for the true branch and
|
||||
-- another block for the false branch. It manually threads the state between
|
||||
-- the two branches and makes updates to keep them consistent. It also joins
|
||||
-- the two exploration frontiers after processing the true and false branches so
|
||||
-- that the underlying return value contains all discovered blocks.
|
||||
conditionalBranch :: (PPCArchConstraints ppc)
|
||||
=> Value ppc ids BoolType
|
||||
-- ^ The conditional guarding the branch
|
||||
-> PPCGenerator ppc ids s ()
|
||||
-- ^ The action to take on the true branch
|
||||
-> PPCGenerator ppc ids s ()
|
||||
-- ^ The action to take on the false branch
|
||||
-> PPCGenerator ppc ids s ()
|
||||
conditionalBranch condExpr t f =
|
||||
go (fromMaybe condExpr (simplifyValue condExpr))
|
||||
|
@ -17,7 +17,13 @@ import Data.Macaw.PPC.Simplify ( simplifyValue )
|
||||
|
||||
import Debug.Trace (trace)
|
||||
|
||||
|
||||
-- | Our current heuristic is that we are issuing a (potentially conditional)
|
||||
-- call if we see an address in the link register.
|
||||
--
|
||||
-- This seems reasonable, as the only time the link register would be populated
|
||||
-- with a constant is at a call site. This might be a problem if we see a
|
||||
-- @mtlr@ and put a stack value into the link register. That might look like a
|
||||
-- call...
|
||||
identifyCall :: (PPCArchConstraints ppc)
|
||||
=> proxy ppc
|
||||
-> MM.Memory (MC.ArchAddrWidth ppc)
|
||||
@ -27,12 +33,11 @@ identifyCall :: (PPCArchConstraints ppc)
|
||||
identifyCall _ mem stmts0 rs
|
||||
| trace ("Identify call: " ++ unlines (map show stmts0)) False = undefined
|
||||
| not (null stmts0)
|
||||
, MC.AssignedValue (MC.Assignment { MC.assignRhs = MC.EvalApp app }) <- rs ^. MC.boundValue PPC_LNK
|
||||
, MC.BVAdd _ (MC.RelocatableValue {}) (MC.BVValue _ 0x4) <- app
|
||||
, MC.RelocatableValue {} <- rs ^. MC.boundValue PPC_LNK
|
||||
, Just retVal <- simplifyValue (rs ^. MC.boundValue PPC_LNK)
|
||||
, Just retAddrVal <- MC.asLiteralAddr retVal
|
||||
, Just retAddr <- MM.asSegmentOff mem retAddrVal =
|
||||
Just (Seq.fromList stmts0, retAddr)
|
||||
Just (Seq.fromList stmts0, retAddr)
|
||||
| otherwise = Nothing
|
||||
|
||||
identifyReturn :: (PPCArchConstraints ppc)
|
||||
|
Loading…
Reference in New Issue
Block a user