mirror of
https://github.com/GaloisInc/macaw.git
synced 2024-11-22 14:27:51 +03:00
Revisit handling of tail calls
It turns out that we have to be more conservative with tail call identification, as incorrectly identifying a block as the target of a tail call (instead of a branch) can cause other branch classifiers to fail if that block is the target of another jump. Ultimately, we will need to give up some tail call recognition (since they are in general indistinguishable from jumps), and instead only identify known call targets as tail call candidates. With additional global analysis we could do better. Fixes #294
This commit is contained in:
parent
857bb72b31
commit
6a4f406c68
@ -497,15 +497,13 @@ useExternalTargets bcc = do
|
||||
-- new classifiers (e.g., classifiers that can produce architecture-specific
|
||||
-- terminators).
|
||||
--
|
||||
-- Note that the direct jump classifier is last; this is to give the other
|
||||
-- classifiers a chance to run and match code. In particular, tail calls and
|
||||
-- direct local jumps are very difficult to distinguish from each other. Since
|
||||
-- we have to choose some order between them, we put the tail call classifier
|
||||
-- first so that it at least *could* fire without being completely subsumed by
|
||||
-- direct jumps. This means that some control flow transfers that look like
|
||||
-- direct jumps could instead be classified as tail calls. It would take some
|
||||
-- higher-level heuristics (e.g., live registers at the call site, locality) to
|
||||
-- distinguish the cases.
|
||||
-- Note that classifiers are an instance of 'Control.Monad.Alternative', so the
|
||||
-- order they are applied in matters. While several are non-overlapping, at
|
||||
-- least the order that the direct jump and tail call classifiers are applied in
|
||||
-- matters, as they look substantially the same to the analysis. Being too eager
|
||||
-- to flag jumps as tail calls classifies the jump targets as known function
|
||||
-- entry points, which can interfere with other classifiers later in the
|
||||
-- function.
|
||||
defaultClassifier :: BlockClassifier arch ids
|
||||
defaultClassifier = branchClassifier
|
||||
<|> noreturnCallClassifier
|
||||
@ -513,8 +511,8 @@ defaultClassifier = branchClassifier
|
||||
<|> returnClassifier
|
||||
<|> jumpTableClassifier
|
||||
<|> pltStubClassifier
|
||||
<|> tailCallClassifier
|
||||
<|> directJumpClassifier
|
||||
<|> tailCallClassifier
|
||||
|
||||
-- | This parses a block that ended with a fetch and execute instruction.
|
||||
parseFetchAndExecute :: forall arch ids
|
||||
|
@ -292,8 +292,21 @@ returnClassifier = classifierName "Return" $ do
|
||||
}
|
||||
|
||||
-- | Classifies jumps to concrete addresses as unconditional jumps. Note that
|
||||
-- this logic is substantially similar to the 'callClassifier'; as such, this
|
||||
-- classifier should always be applied *after* the 'callClassifier'.
|
||||
-- this logic is substantially similar to the 'tailCallClassifier' in cases
|
||||
-- where the function does not establish a stack frame (i.e., leaf functions).
|
||||
--
|
||||
-- Note that known call targets are not eligible to be intra-procedural jump
|
||||
-- targets (see 'classifyDirectJump'). This means that we need to conservatively
|
||||
-- prefer to mis-classify terminators as jumps rather than tail calls. The
|
||||
-- downside of this choice is that code that could be considered a tail-called
|
||||
-- function may be duplicated in some cases (i.e., considered part of multiple
|
||||
-- functions).
|
||||
--
|
||||
-- The alternative interpretation (eagerly preferring tail calls) can cause a
|
||||
-- section of a function to be marked as a tail-called function, thereby
|
||||
-- blocking the 'directJumpClassifier' or the 'branchClassifier' from
|
||||
-- recognizing the "callee" as an intra-procedural jump. This results in
|
||||
-- classification failures that we don't have any mitigations for.
|
||||
directJumpClassifier :: BlockClassifier arch ids
|
||||
directJumpClassifier = classifierName "Jump" $ do
|
||||
bcc <- CMR.ask
|
||||
@ -380,6 +393,11 @@ noreturnCallClassifier = classifierName "no return call" $ do
|
||||
--
|
||||
-- The current heuristic is that the target looks like a call, except the stack
|
||||
-- height in the caller is 0.
|
||||
--
|
||||
-- Note that, in leaf functions (i.e., with no stack usage), tail calls and
|
||||
-- jumps look substantially similar. We typically apply the jump classifier
|
||||
-- first to prefer them, which means that we very rarely recognize tail calls in
|
||||
-- leaf functions.
|
||||
tailCallClassifier :: BlockClassifier arch ids
|
||||
tailCallClassifier = classifierName "Tail call" $ do
|
||||
bcc <- CMR.ask
|
||||
|
@ -1,5 +1,4 @@
|
||||
R { funcs = [ (0x10054, [ (0x10054, 32), (0x10074, 12), (0x10080, 4) ])
|
||||
, (0x10084, [ (0x10084, 24), (0x1009c, 24) ])
|
||||
R { funcs = [ (0x10054, [ (0x10054, 32), (0x10074, 12), (0x10080, 4), (0x10084, 24), (0x1009c, 24) ])
|
||||
]
|
||||
, ignoreBlocks = []
|
||||
}
|
||||
|
BIN
x86/tests/x64/inner_loop_31.exe
Executable file
BIN
x86/tests/x64/inner_loop_31.exe
Executable file
Binary file not shown.
3
x86/tests/x64/inner_loop_31.exe.expected
Normal file
3
x86/tests/x64/inner_loop_31.exe.expected
Normal file
@ -0,0 +1,3 @@
|
||||
R { funcs = [(Addr 0 0x401000, [(Addr 0 0x401000, 34), (Addr 0 0x401022, 76), (Addr 0 0x40106e, 31)])]
|
||||
, ignoreBlocks = []
|
||||
}
|
38
x86/tests/x64/inner_loop_31.s
Normal file
38
x86/tests/x64/inner_loop_31.s
Normal file
@ -0,0 +1,38 @@
|
||||
main:
|
||||
endbr64
|
||||
movabs $0x7fffffff80000000,%rcx
|
||||
movabs $0x800000007fffffff,%r13
|
||||
movabs $0x7fffffff7fffffff,%r15
|
||||
target:
|
||||
cmp %r10,%r8
|
||||
mov %r8,%rax
|
||||
mov %r10,%rbx
|
||||
mov %rcx,%rbp
|
||||
mov %r13,%r14
|
||||
cmovb %r10,%r8
|
||||
cmovb %rax,%r10
|
||||
cmovb %r13,%rcx
|
||||
cmovb %rbp,%r13
|
||||
sub %r10,%r8
|
||||
sub %r13,%rcx
|
||||
add %r15,%rcx
|
||||
test $0x1,%rax
|
||||
cmove %rax,%r8
|
||||
cmove %rbx,%r10
|
||||
cmove %rbp,%rcx
|
||||
cmove %r14,%r13
|
||||
shr %r8
|
||||
add %r13,%r13
|
||||
sub %r15,%r13
|
||||
sub $0x1,%edi
|
||||
jne target
|
||||
shr $0x20,%r15
|
||||
mov %ecx,%edx
|
||||
mov %r13d,%r12d
|
||||
shr $0x20,%rcx
|
||||
shr $0x20,%r13
|
||||
sub %r15,%rdx
|
||||
sub %r15,%rcx
|
||||
sub %r15,%r12
|
||||
sub %r15,%r13
|
||||
repz retq
|
@ -1,5 +1,4 @@
|
||||
R { funcs = [ (Addr 0 0x401000, [(Addr 0 0x401000,33)])
|
||||
, (Addr 0 0x401021, [(Addr 0 0x401021, 2)])
|
||||
R { funcs = [ (Addr 0 0x401000, [(Addr 0 0x401000,33), (Addr 0 0x401021, 2)])
|
||||
]
|
||||
, ignoreBlocks = []
|
||||
}
|
||||
|
@ -8,7 +8,6 @@ R { funcs =
|
||||
, (Addr 1 0x610, [(Addr 1 0x610,27),(Addr 1 0x62b,12),(Addr 1 0x637,3),(Addr 1 0x640,2)])
|
||||
, (Addr 1 0x650, [(Addr 1 0x650,40),(Addr 1 0x678,12),(Addr 1 0x684,3),(Addr 1 0x690,2)])
|
||||
, (Addr 1 0x6a0, [(Addr 1 0x6a0,9),(Addr 1 0x6a9,14),(Addr 1 0x6b7,12),(Addr 1 0x6c3,5),(Addr 1 0x6c8,8), (Addr 1 0x6d0,2)])
|
||||
, (Addr 1 0x6d0, [(Addr 1 0x6d0,2)])
|
||||
, (Addr 1 0x6e0, [(Addr 1 0x6e0,13), (Addr 1 0x6ed,5), (Addr 1 0x6f8, 12), (Addr 1 0x704, 6), (Addr 1 0x70a, 6)])
|
||||
, (Addr 1 0x710, [(Addr 1 0x710,13),(Addr 1 0x71d,4)])
|
||||
, (Addr 1 0x730, [(Addr 1 0x730,49),(Addr 1 0x761,5),(Addr 1 0x766,10),(Addr 1 0x770,13),(Addr 1
|
||||
|
@ -1,7 +1,6 @@
|
||||
R { funcs = [ (Addr 1 0x2c0, [(Addr 1 0x2c0, 7)])
|
||||
, (Addr 1 0x2d0, [(Addr 1 0x2d0, 11)])
|
||||
, (Addr 1 0x2e0, [(Addr 1 0x2e0, 11), (Addr 1 0x2eb, 16)])
|
||||
, (Addr 1 0x2fb, [(Addr 1 0x2fb, 1)])
|
||||
, (Addr 1 0x2e0, [(Addr 1 0x2e0, 11), (Addr 1 0x2eb, 16), (Addr 1 0x2fb, 1)])
|
||||
]
|
||||
, ignoreBlocks = []
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user