This fixes tail call detection, and allows architecture-specific checks.

This commit is contained in:
Joe Hendrix 2018-11-12 11:56:44 -05:00
parent 6a1a4162f1
commit bb63f9f859
No known key found for this signature in database
GPG Key ID: 8DFA5FF784098C4F
7 changed files with 69 additions and 32 deletions

View File

@ -1,5 +1,5 @@
name: macaw-base
version: 0.3.2
version: 0.3.3
author: Galois, Inc.
maintainer: jhendrix@galois.com
build-type: Simple

View File

@ -97,8 +97,23 @@ data ArchitectureInfo arch
-- Given a memory state, list of statements, and final register
-- state, the should determine if this is a call, and if so,
-- return the statements with any action to push the return
-- value to the stack removed, and provide the explicit return
-- address that the function should return to.
-- value to the stack removed, and provide the return address that
-- the function should return to.
, checkForReturnAddr :: forall ids
. RegState (ArchReg arch) (Value arch ids)
-> AbsProcessorState (ArchReg arch) ids
-> Bool
-- ^ @checkForReturnAddr regs s@ returns true if the location
-- where the return address is normally stored in regs when
-- calling a function does indeed contain the abstract value
-- associated with return addresses.
--
-- For x86 this checks if the address just above the stack is the
-- return address. For ARM, this should check the link register.
--
-- This predicate is invoked when considering if a potential tail call
-- is setup to return to the right location.
, identifyReturn :: forall ids
. [Stmt arch ids]
-> RegState (ArchReg arch) (Value arch ids)

View File

@ -693,7 +693,7 @@ data Stmt arch ids
ppStmt :: ArchConstraints arch
=> (ArchAddrWord arch -> Doc)
-- ^ Function for pretty printing an offset
-- ^ Function for pretty printing an instruction address offset
-> Stmt arch ids
-> Doc
ppStmt ppOff stmt =

View File

@ -18,6 +18,7 @@ module Data.Macaw.Discovery
( -- * DiscoveryInfo
State.DiscoveryState(..)
, State.emptyDiscoveryState
, State.trustedFunctionEntryPoints
, State.AddrSymMap
, State.funInfo
, State.exploredFunctions
@ -844,6 +845,7 @@ parseFetchAndExecute ctx idx stmts regs s = do
-- We define calls as statements that end with a write that
-- stores the pc to an address.
case () of
-- The block ends with a Mux, so we turn this into a `ParsedIte` statement.
_ | Just (Mux _ c t f) <- valueAsApp (s^.boundValue ip_reg) -> do
mapM_ (recordWriteStmt ainfo mem absProcState') stmts
@ -869,7 +871,7 @@ parseFetchAndExecute ctx idx stmts regs s = do
}
pure (ret, falseIdx)
-- The last statement was a call.
-- Use architecture-specific callback to check if last statement was a call.
-- Note that in some cases the call is known not to return, and thus
-- this code will never jump to the return value.
_ | Just (prev_stmts, ret) <- identifyCall ainfo mem stmts s -> do
@ -913,7 +915,7 @@ parseFetchAndExecute ctx idx stmts regs s = do
-- Jump to a block within this function.
| Just tgt_mseg <- valueAsSegmentOff mem (s^.boundValue ip_reg)
-- Check
-- Check target block address is in executable segment.
, segmentFlags (segoffSegment tgt_mseg) `Perm.hasPerm` Perm.execute
-- Check the target address is not the entry point of this function.
@ -935,7 +937,8 @@ parseFetchAndExecute ctx idx stmts regs s = do
, stmtsAbsState = absProcState'
}
pure (ret, idx+1)
-- Block ends with what looks like a jump table.
-- Block ends with what looks like a jump table.
| Just (_jt, entries, jumpIndex) <- matchJumpTableRef mem absProcState' (s^.curIP) -> do
mapM_ (recordWriteStmt ainfo mem absProcState') stmts
@ -957,12 +960,13 @@ parseFetchAndExecute ctx idx stmts regs s = do
}
pure (ret,idx+1)
-- Check for tail call when the stack pointer points to the return address.
--
-- TODO: this makes sense for x86, but is not correct for all architectures
| ptrType <- addrMemRepr ainfo
, sp_val <- s^.boundValue sp_reg
, ReturnAddr <- absEvalReadMem absProcState' sp_val ptrType -> do
-- Check for tail call when the calling convention seems to be satisfied.
| spVal <- s^.boundValue sp_reg
-- Check to see if the stack pointer points to an offset of the initial stack.
, StackOffset _ offsets <- transferValue absProcState' spVal
-- Stack stack is back to height when function was called.
, offsets == Set.singleton 0
, checkForReturnAddr ainfo s absProcState' -> do
finishWithTailCall absProcState'
-- Is this a jump to a known function entry? We're already past the
@ -1097,19 +1101,9 @@ addBlocks src finfo sz blockMap =
funAddr <- gets curFunAddr
s <- use curFunCtx
-- Combine entries of functions we've discovered thus far with
-- undiscovered functions with entries marked InitAddr, which we assume is
-- info we know from the symbol table or some other reliable source, and
-- pass in. Only used in analysis if pctxTrustKnownFns is True.
let knownFns =
if s^.trustKnownFns then
Set.union (Map.keysSet $ s^.funInfo)
(Map.keysSet $ Map.filter (== InitAddr) $ s^.unexploredFunctions)
else
Set.empty
let ctx = ParseContext { pctxMemory = memory s
, pctxArchInfo = archInfo s
, pctxKnownFnEntries = knownFns
, pctxKnownFnEntries = s^.trustedFunctionEntryPoints
, pctxFunAddr = funAddr
, pctxAddr = src
, pctxBlockMap = blockMap

View File

@ -33,7 +33,7 @@ module Data.Macaw.Discovery.State
, globalDataMap
, funInfo
, unexploredFunctions
, trustKnownFns
, trustedFunctionEntryPoints
, exploreFnPred
-- * DiscoveryFunInfo
, DiscoveryFunInfo(..)
@ -51,6 +51,8 @@ import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map
import Data.Parameterized.Classes
import Data.Parameterized.Some
import Data.Set (Set)
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as Text
import qualified Data.Vector as V
@ -310,9 +312,20 @@ data DiscoveryState arch
-- they are analyzed.
--
-- The keys in this map and `_funInfo` should be mutually disjoint.
, _trustKnownFns :: !Bool
-- ^ Should we use and depend on known function entries in
-- our analysis? E.g. used to distinguish jumps vs. tail calls
, _trustedFunctionEntryPoints :: !(Set (ArchSegmentOff arch))
-- ^ This is the set of addresses that we treat
-- as definitely belonging to function entry
-- points.
--
-- The discovery process will not allow
-- intra-procedural jumps to these addresses.
-- Jumps to these addresses must either be calls
-- or tail calls.
--
-- To ensure translation is invariant on the
-- order in which functions are visited, this
-- set should be initialized upfront, and not
-- changed.
, _exploreFnPred :: Maybe (ArchSegmentOff arch -> Bool)
-- ^ if present, this predicate decides whether to explore
-- a function at the given address or not
@ -350,7 +363,7 @@ emptyDiscoveryState mem symbols info =
, _globalDataMap = Map.empty
, _funInfo = Map.empty
, _unexploredFunctions = Map.empty
, _trustKnownFns = False
, _trustedFunctionEntryPoints = Set.empty
, _exploreFnPred = Nothing
}
@ -368,8 +381,10 @@ unexploredFunctions = lens _unexploredFunctions (\s v -> s { _unexploredFunction
funInfo :: Simple Lens (DiscoveryState arch) (Map (ArchSegmentOff arch) (Some (DiscoveryFunInfo arch)))
funInfo = lens _funInfo (\s v -> s { _funInfo = v })
trustKnownFns :: Simple Lens (DiscoveryState arch) Bool
trustKnownFns = lens _trustKnownFns (\s v -> s { _trustKnownFns = v })
trustedFunctionEntryPoints :: Simple Lens (DiscoveryState arch) (Set (ArchSegmentOff arch))
trustedFunctionEntryPoints =
lens _trustedFunctionEntryPoints
(\s v -> s { _trustedFunctionEntryPoints = v })
exploreFnPred :: Simple Lens (DiscoveryState arch) (Maybe (ArchSegmentOff arch -> Bool))
exploreFnPred = lens _exploreFnPred (\s v -> s { _exploreFnPred = v })

View File

@ -16,7 +16,7 @@ library
containers,
flexdis86 >= 0.1.2,
lens >= 4.7,
macaw-base >= 0.3.2,
macaw-base >= 0.3.3,
mtl,
parameterized-utils,
text,

View File

@ -68,6 +68,7 @@ import Text.PrettyPrint.ANSI.Leijen (Pretty(..), text)
import Data.Macaw.AbsDomain.AbsState
( AbsBlockState
, curAbsStack
, setAbsIP
, absRegState
, StackEntry(..)
@ -503,6 +504,17 @@ identifyX86Call mem stmts0 s = go (Seq.fromList stmts0) Seq.empty
-- Otherwise skip over this instruction.
| otherwise -> go prev (stmt Seq.<| after)
-- | Return true if stack pointer has been reset to original value, and
-- return address is on top of stack.
checkForReturnAddrX86 :: forall ids
. AbsProcessorState X86Reg ids
-> Bool
checkForReturnAddrX86 absState
| Just (StackEntry _ ReturnAddr) <- Map.lookup 8 (absState^.curAbsStack) =
True
| otherwise =
False
-- | Called to determine if the instruction sequence contains a return
-- from the current function.
--
@ -575,6 +587,7 @@ x86_64_info preservePred =
, absEvalArchStmt = \s _ -> s
, postCallAbsState = x86PostCallAbsState
, identifyCall = identifyX86Call
, checkForReturnAddr = \_ s -> checkForReturnAddrX86 s
, identifyReturn = identifyX86Return
, rewriteArchFn = rewriteX86PrimFn
, rewriteArchStmt = rewriteX86Stmt