Fix tail call classification (#286)

The tail call classifier came after the jump classifier, which was a problem because it is less strict than the tail call classifier, meaning it would always fire.  This commit moves direct jump to be the last classifier applied, giving the others a chance.

Includes a test case in the ARM backend.

This requires some updates to some of the expected test results, as a few blocks are now classified as tail calls that were
plain jumps before.  They really could be considered either.  I think it would be nice if these could be classified as jumps instead, but the reason they are flagged as tail calls is mostly down to the fact that their surrounding context is so simple that either interpretation works.

Correcting this would require some heuristics based on additional analysis passes.

The test harness for macaw symbolic required a few changes because the new detection of some jumps as tail calls introduces new calls into the symbolic test suites. However, the symbolic testing harness did not support calls before.  Adding support required a bit of plumbing, including a more extensive code discovery pass.


Fixes #285
This commit is contained in:
Tristan Ravitch 2022-05-10 09:29:55 -05:00 committed by GitHub
parent a5796fc955
commit 8e10643b0f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 100 additions and 33 deletions

View File

@ -496,15 +496,25 @@ useExternalTargets bcc = do
-- See 'Data.Macaw.Discovery.Classifier' for the primitives necessary to define
-- new classifiers (e.g., classifiers that can produce architecture-specific
-- terminators).
--
-- Note that the direct jump classifier is last; this is to give the other
-- classifiers a chance to run and match code. In particular, tail calls and
-- direct local jumps are very difficult to distinguish from each other. Since
-- we have to choose some order between them, we put the tail call classifier
-- first so that it at least *could* fire without being completely subsumed by
-- direct jumps. This means that some control flow transfers that look like
-- direct jumps could instead be classified as tail calls. It would take some
-- higher-level heuristics (e.g., live registers at the call site, locality) to
-- distinguish the cases.
defaultClassifier :: BlockClassifier arch ids
defaultClassifier = branchClassifier
<|> noreturnCallClassifier
<|> callClassifier
<|> returnClassifier
<|> directJumpClassifier
<|> jumpTableClassifier
<|> pltStubClassifier
<|> tailCallClassifier
<|> directJumpClassifier
-- | This parses a block that ended with a fetch and execute instruction.
parseFetchAndExecute :: forall arch ids

View File

@ -57,6 +57,7 @@ test-suite macaw-aarch32-symbolic-tests
elf-edit,
filepath,
Glob >= 0.9 && < 0.11,
lens,
dismantle-arm-xml,
asl-translator,
semmc-aarch32,

View File

@ -8,10 +8,12 @@
{-# LANGUAGE TypeOperators #-}
module Main (main) where
import Control.Lens ( (^.) )
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BS8
import qualified Data.ElfEdit as Elf
import qualified Data.Foldable as F
import qualified Data.Map as Map
import Data.Maybe ( mapMaybe )
import qualified Data.Parameterized.Classes as PC
import qualified Data.Parameterized.Nonce as PN
@ -125,7 +127,8 @@ symExTestSized :: MST.SimulationResult
-> MAI.ArchitectureInfo MA.ARM
-> TTH.Assertion
symExTestSized expected exePath saveSMT saveMacaw step ehi archInfo = do
(mem, funInfos) <- MST.runDiscovery ehi MST.toAddrSymMap archInfo
(mem, discState) <- MST.runDiscovery ehi MST.toAddrSymMap archInfo
let funInfos = Map.elems (discState ^. M.funInfo)
let testEntryPoints = mapMaybe hasTestPrefix funInfos
F.forM_ testEntryPoints $ \(name, Some dfi) -> do
step ("Testing " ++ BS8.unpack name ++ " at " ++ show (M.discoveredFunAddr dfi))
@ -144,7 +147,7 @@ symExTestSized expected exePath saveSMT saveMacaw step ehi archInfo = do
let extract = armResultExtractor archVals
logger <- makeGoalLogger saveSMT solver name exePath
let ?memOpts = LLVM.defaultMemOptions
simRes <- MST.simulateAndVerify solver logger bak execFeatures archInfo archVals mem extract dfi
simRes <- MST.simulateAndVerify solver logger bak execFeatures archInfo archVals mem extract discState dfi
TTH.assertEqual "AssertionResult" expected simRes
writeMacawIR :: (MC.ArchConstraints arch) => SaveMacaw -> String -> M.DiscoveryFunInfo arch ids -> IO ()

View File

@ -1,4 +1,5 @@
R { funcs = [ (0x10054, [ (0x10054, 32), (0x10074, 12), (0x10080, 4), (0x10084, 24), (0x1009c, 24) ])
R { funcs = [ (0x10054, [ (0x10054, 32), (0x10074, 12), (0x10080, 4) ])
, (0x10084, [ (0x10084, 24), (0x1009c, 24) ])
]
, ignoreBlocks = []
}

Binary file not shown.

View File

@ -0,0 +1,4 @@
R { funcs = [ (0x101b8, [(0x101b8, 16)])
]
, ignoreBlocks = [0x101ac]
}

View File

@ -55,6 +55,7 @@ test-suite macaw-ppc-symbolic-tests
elf-edit,
filepath,
Glob >= 0.9 && < 0.11,
lens,
dismantle-ppc,
semmc-ppc,
macaw-base,

View File

@ -8,6 +8,7 @@
{-# LANGUAGE TypeOperators #-}
module Main (main) where
import Control.Lens ( (^.) )
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BS8
import qualified Data.ElfEdit as Elf
@ -169,7 +170,8 @@ symExTestSized :: forall v w arch
-> MAI.ArchitectureInfo arch
-> TTH.Assertion
symExTestSized expected exePath saveSMT saveMacaw step ehi loadedBinary archInfo = do
(mem, funInfos) <- MST.runDiscovery ehi (toPPCAddrNameMap loadedBinary) archInfo
(mem, discState) <- MST.runDiscovery ehi (toPPCAddrNameMap loadedBinary) archInfo
let funInfos = Map.elems (discState ^. M.funInfo)
let testEntryPoints = mapMaybe hasTestPrefix funInfos
F.forM_ testEntryPoints $ \(name, Some dfi) -> do
step ("Testing " ++ BS8.unpack name ++ " at " ++ show (M.discoveredFunAddr dfi))
@ -188,7 +190,7 @@ symExTestSized expected exePath saveSMT saveMacaw step ehi loadedBinary archInfo
let extract = ppcResultExtractor archVals
logger <- makeGoalLogger saveSMT solver name exePath
let ?memOpts = LLVM.defaultMemOptions
simRes <- MST.simulateAndVerify solver logger bak execFeatures archInfo archVals mem extract dfi
simRes <- MST.simulateAndVerify solver logger bak execFeatures archInfo archVals mem extract discState dfi
TTH.assertEqual "AssertionResult" expected simRes
writeMacawIR :: (MC.ArchConstraints arch) => SaveMacaw -> String -> M.DiscoveryFunInfo arch ids -> IO ()

View File

@ -34,6 +34,7 @@ module Data.Macaw.Symbolic.Testing (
import qualified Control.Exception as X
import qualified Control.Lens as L
import Control.Lens ( (&), (%~) )
import Control.Monad ( when )
import qualified Data.BitVector.Sized as BVS
import qualified Data.ByteString as BS
@ -48,6 +49,7 @@ import qualified Data.Macaw.Symbolic as MS
import qualified Data.Macaw.Symbolic.Memory as MSM
import qualified Data.Macaw.Types as MT
import qualified Data.Map as Map
import Data.Maybe ( listToMaybe )
import qualified Data.Parameterized.Context as Ctx
import qualified Data.Parameterized.List as PL
import qualified Data.Parameterized.NatRepr as PN
@ -56,7 +58,6 @@ import Data.Proxy ( Proxy(..) )
import qualified Data.Text as Text
import qualified Data.Text.Encoding as Text
import qualified Data.Text.Encoding.Error as Text
import qualified Data.Traversable as T
import GHC.TypeNats ( type (<=) )
import qualified Lang.Crucible.Analysis.Postdom as CAP
import qualified Lang.Crucible.Backend as CB
@ -129,20 +130,11 @@ runDiscovery :: (MC.ArchAddrWidth arch ~ w)
--
-- A good default is 'toAddrSymMap'
-> MAI.ArchitectureInfo arch
-> IO (MM.Memory w, [Some (MD.DiscoveryFunInfo arch)])
-> IO (MM.Memory w, MD.DiscoveryState arch)
runDiscovery ehi toEntryPoints archInfo = do
(mem, nameAddrList) <- loadELF ehi
let addrSymMap = toEntryPoints mem nameAddrList
let ds0 = MD.emptyDiscoveryState mem addrSymMap archInfo
fns <- T.forM (Map.keys addrSymMap) $ \entryPoint -> do
-- We are discovering each function in isolation for now, so we can throw
-- away the updated discovery state.
--
-- NOTE: If we extend this to support function calls, we will probably want
-- to just turn this into a fold/use the main discovery entry point.
let (_ds1, dfi) = MD.analyzeFunction entryPoint MD.UserRequest ds0
return dfi
return (mem, fns)
return (mem, MD.cfgFromAddrs archInfo mem addrSymMap (Map.keys addrSymMap) [])
data SATResult = Unsat | Sat | Unknown
deriving (Eq, Show)
@ -269,10 +261,12 @@ simulateAndVerify :: forall arch sym bak ids w t st fs
-- ^ The initial contents of static memory
-> ResultExtractor sym arch
-- ^ A function to extract the return value of a function from its post-state
-> MD.DiscoveryState arch
-- ^ The rest of the discovery state, including other discovered functions
-> MD.DiscoveryFunInfo arch ids
-- ^ The function to simulate and verify
-> IO SimulationResult
simulateAndVerify goalSolver logger bak execFeatures archInfo archVals mem (ResultExtractor withResult) dfi =
simulateAndVerify goalSolver logger bak execFeatures archInfo archVals mem (ResultExtractor withResult) discState dfi =
let sym = CB.backendGetSym bak in
MS.withArchConstraints archVals $ do
let funName = functionName dfi
@ -283,7 +277,7 @@ simulateAndVerify goalSolver logger bak execFeatures archInfo archVals mem (Resu
let ?recordLLVMAnnotation = \_ _ _ -> return ()
(initMem, memPtrTbl) <- MSM.newGlobalMemory (Proxy @arch) bak endianness MSM.ConcreteMutable mem
let globalMap = MSM.mapRegionPointers memPtrTbl
(memVar, stackPointer, execResult) <- simulateFunction bak execFeatures archVals halloc initMem globalMap g
(memVar, stackPointer, execResult) <- simulateFunction discState bak execFeatures archVals halloc initMem globalMap g
case execResult of
CS.TimeoutResult {} -> return SimulationTimeout
CS.AbortedResult {} -> return SimulationAborted
@ -335,7 +329,8 @@ simulateFunction :: ( ext ~ MS.MacawExt arch
, 16 <= w
, ?memOpts :: CLM.MemOptions
)
=> bak
=> MD.DiscoveryState arch
-> bak
-> [CS.GenericExecutionFeature sym]
-> MS.ArchVals arch
-> CFH.HandleAllocator
@ -346,7 +341,7 @@ simulateFunction :: ( ext ~ MS.MacawExt arch
, CLM.LLVMPtr sym w
, CS.ExecResult (MS.MacawSimulatorState sym) sym ext (CS.RegEntry sym (MS.ArchRegStruct arch))
)
simulateFunction bak execFeatures archVals halloc initMem globalMap g = do
simulateFunction discState bak execFeatures archVals halloc initMem globalMap g = do
let sym = CB.backendGetSym bak
let symArchFns = MS.archFunctions archVals
let crucRegTypes = MS.crucArchRegTypes symArchFns
@ -383,7 +378,7 @@ simulateFunction bak execFeatures archVals halloc initMem globalMap g = do
let fnBindings = CFH.insertHandleMap (CCC.cfgHandle g) (CS.UseCFG g (CAP.postdomInfo g)) CFH.emptyHandleMap
MS.withArchEval archVals sym $ \archEvalFn -> do
let extImpl = MS.macawExtensions archEvalFn memVar globalMap lookupFunction lookupSyscall validityCheck
let extImpl = MS.macawExtensions archEvalFn memVar globalMap (lookupFunction archVals discState) lookupSyscall validityCheck
let ctx = CS.initSimContext bak CLI.llvmIntrinsicTypes halloc IO.stdout (CS.FnBindings fnBindings) extImpl MS.MacawSimulatorState
let s0 = CS.InitialState ctx initGlobals CS.defaultAbortHandler regsRepr simAction
res <- CS.executeCrucible (fmap CS.genericToExecutionFeature execFeatures) s0
@ -437,11 +432,53 @@ data ResultExtractor sym arch where
-> IO a)
-> ResultExtractor sym arch
-- | The test harness does not currently support calling functions from test cases.
--
-- It could be modified to do so.
lookupFunction :: MS.LookupFunctionHandle p sym arch
lookupFunction = MS.unsupportedFunctionCalls "macaw-symbolic-tests"
resolveAbsoluteAddress
:: (MM.MemWidth w)
=> MM.Memory w
-> MM.MemWord w
-> Maybe (MM.MemSegmentOff w)
resolveAbsoluteAddress mem addr =
listToMaybe [ segOff
| seg <- MM.memSegments mem
, let region = MM.segmentBase seg
, Just segOff <- return (MM.resolveRegionOff mem region addr)
]
addBinding
:: CFH.FnHandle args ret
-> CS.FnState p sym ext args ret
-> CS.FunctionBindings p sym ext
-> CS.FunctionBindings p sym ext
addBinding hdl fstate (CS.FnBindings binds) =
CS.FnBindings (CFH.insertHandleMap hdl fstate binds)
lookupFunction
:: ( MS.SymArchConstraints arch
, CB.IsSymInterface sym
)
=> MS.ArchVals arch
-> MD.DiscoveryState arch
-> MS.LookupFunctionHandle p sym arch
lookupFunction archVals discState = MS.LookupFunctionHandle $ \s0 _memImpl regs -> do
let regsEntry = CS.RegEntry regsRepr regs
let (_, ptrOffset) = CLM.llvmPointerView (CS.regValue (MS.lookupReg archVals regsEntry MC.ip_reg))
case WI.asBV ptrOffset of
Just bvOff
| Just segOff <- resolveAbsoluteAddress mem (fromIntegral (BVS.asUnsigned bvOff))
, Just (Some targetFn) <- Map.lookup segOff (discState L.^. MD.funInfo) -> do
let funName = functionName targetFn
halloc <- CFH.newHandleAllocator
CCC.SomeCFG g <- MS.mkFunCFG symArchFns halloc funName posFn targetFn
hdl <- CFH.mkHandle' halloc funName (Ctx.singleton regsRepr) regsRepr
let fstate = CS.UseCFG g (CAP.postdomInfo g)
let s1 = s0 & CS.stateContext . CS.functionBindings %~ addBinding hdl fstate
return (hdl, s1)
_ -> error ("Symbolic pointer offset in lookupFunction: " ++ show (WI.printSymExpr ptrOffset))
where
mem = MD.memory discState
symArchFns = MS.archFunctions archVals
crucRegTypes = MS.crucArchRegTypes symArchFns
regsRepr = CT.StructRepr crucRegTypes
-- | The test harness does not currently support making system calls from test cases.
--

View File

@ -1,3 +1,5 @@
R { funcs = [(Addr 0 0x401000, [(Addr 0 0x401000,33), (Addr 0 0x401021, 2)])]
R { funcs = [ (Addr 0 0x401000, [(Addr 0 0x401000,33)])
, (Addr 0 0x401021, [(Addr 0 0x401021, 2)])
]
, ignoreBlocks = []
}

View File

@ -7,7 +7,8 @@ R { funcs =
, (Addr 1 0x5e0, [(Addr 1 0x5e0,41),(Addr 1 0x609,1)])
, (Addr 1 0x610, [(Addr 1 0x610,27),(Addr 1 0x62b,12),(Addr 1 0x637,3),(Addr 1 0x640,2)])
, (Addr 1 0x650, [(Addr 1 0x650,40),(Addr 1 0x678,12),(Addr 1 0x684,3),(Addr 1 0x690,2)])
, (Addr 1 0x6a0, [(Addr 1 0x6a0,9),(Addr 1 0x6a9,14),(Addr 1 0x6b7,12),(Addr 1 0x6c3,5),(Addr 1 0x6c8,8),(Addr 1 0x6d0,2)])
, (Addr 1 0x6a0, [(Addr 1 0x6a0,9),(Addr 1 0x6a9,14),(Addr 1 0x6b7,12),(Addr 1 0x6c3,5),(Addr 1 0x6c8,8), (Addr 1 0x6d0,2)])
, (Addr 1 0x6d0, [(Addr 1 0x6d0,2)])
, (Addr 1 0x6e0, [(Addr 1 0x6e0,13), (Addr 1 0x6ed,5), (Addr 1 0x6f8, 12), (Addr 1 0x704, 6), (Addr 1 0x70a, 6)])
, (Addr 1 0x710, [(Addr 1 0x710,13),(Addr 1 0x71d,4)])
, (Addr 1 0x730, [(Addr 1 0x730,49),(Addr 1 0x761,5),(Addr 1 0x766,10),(Addr 1 0x770,13),(Addr 1

View File

@ -1,6 +1,7 @@
R { funcs = [ (Addr 1 0x2c0, [(Addr 1 0x2c0, 7)])
, (Addr 1 0x2d0, [(Addr 1 0x2d0, 11)])
, (Addr 1 0x2e0, [(Addr 1 0x2e0, 11), (Addr 1 0x2eb, 16)])
, (Addr 1 0x2fb, [(Addr 1 0x2fb, 1)])
]
, ignoreBlocks = [Addr 1 0x2fb]
, ignoreBlocks = []
}

View File

@ -48,6 +48,7 @@ test-suite macaw-x86-symbolic-tests
elf-edit,
filepath,
Glob >= 0.9 && < 0.11,
lens,
macaw-base,
macaw-symbolic,
macaw-x86,

View File

@ -7,10 +7,12 @@
{-# LANGUAGE TypeApplications #-}
module Main (main) where
import Control.Lens ( (^.) )
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BS8
import qualified Data.ElfEdit as Elf
import qualified Data.Foldable as F
import qualified Data.Map as Map
import Data.Maybe ( mapMaybe )
import qualified Data.Parameterized.Classes as PC
import qualified Data.Parameterized.Nonce as PN
@ -108,7 +110,8 @@ mkSymExTest expected exePath = TT.askOption $ \saveSMT@(SaveSMT _) -> TT.askOpti
case Elf.headerClass (Elf.header ehi) of
Elf.ELFCLASS32 -> TTH.assertFailure "32 bit x86 binaries are not supported"
Elf.ELFCLASS64 -> do
(mem, funInfos) <- MST.runDiscovery ehi MST.toAddrSymMap MX.x86_64_linux_info
(mem, discState) <- MST.runDiscovery ehi MST.toAddrSymMap MX.x86_64_linux_info
let funInfos = Map.elems (discState ^. M.funInfo)
let testEntryPoints = mapMaybe hasTestPrefix funInfos
F.forM_ testEntryPoints $ \(name, Some dfi) -> do
step ("Testing " ++ BS8.unpack name)
@ -127,7 +130,7 @@ mkSymExTest expected exePath = TT.askOption $ \saveSMT@(SaveSMT _) -> TT.askOpti
let extract = x86ResultExtractor archVals
logger <- makeGoalLogger saveSMT solver name exePath
let ?memOpts = LLVM.defaultMemOptions
simRes <- MST.simulateAndVerify solver logger bak execFeatures MX.x86_64_linux_info archVals mem extract dfi
simRes <- MST.simulateAndVerify solver logger bak execFeatures MX.x86_64_linux_info archVals mem extract discState dfi
TTH.assertEqual "AssertionResult" expected simRes
writeMacawIR :: (MC.ArchConstraints arch) => SaveMacaw -> String -> M.DiscoveryFunInfo arch ids -> IO ()