Merge branch 'master' of github.com:GaloisInc/macaw into HEAD

This commit is contained in:
Tristan Ravitch 2018-04-25 08:41:41 -07:00
commit 0eb0bd14f7
15 changed files with 256 additions and 76 deletions

14
.gitmodules vendored
View File

@ -1,12 +1,18 @@
[submodule "x86/tests/submodules/dwarf"]
path = x86/tests/submodules/dwarf
path = deps/dwarf
url = https://github.com/GaloisInc/dwarf.git
[submodule "x86/tests/submodules/elf-edit"]
path = x86/tests/submodules/elf-edit
path = deps/elf-edit
url = https://github.com/GaloisInc/elf-edit.git
[submodule "x86/tests/submodules/flexdis86"]
path = x86/tests/submodules/flexdis86
path = deps/flexdis86
url = https://github.com/GaloisInc/flexdis86.git
[submodule "x86/tests/submodules/parameterized-utils"]
path = x86/tests/submodules/parameterized-utils
path = deps/parameterized-utils
url = https://github.com/GaloisInc/parameterized-utils.git
[submodule "deps/crucible"]
path = deps/crucible
url = https://github.com/GaloisInc/crucible.git
[submodule "deps/llvm-pretty"]
path = deps/llvm-pretty
url = https://github.com/elliottt/llvm-pretty.git

View File

@ -41,13 +41,8 @@ install:
# any command which exits with a non-zero exit code causes the build to fail.
script:
- travis_wait stack --no-terminal --skip-ghc-check setup
- cd x86/tests
- stack build macaw-x86 macaw-x86-symbolic
- stack test macaw-x86
- cd ../..
# - cabal check
# - cabal haddock # tests that documentation can be generated
# - cabal sdist # tests that a source-distribution can be generated
# Check that the resulting source distribution can be built & installed.
# If there are no other `.tar.gz` files in `dist`, this can be even simpler:

View File

@ -29,13 +29,18 @@ module Data.Macaw.CFG.Core
, AssignRhs(..)
, MemRepr(..)
, memReprBytes
, readMemRepr
, readMemReprDyn
-- * Value
, Value(..)
, BVValue
, valueAsApp
, valueAsArchFn
, valueAsRhs
, valueAsMemAddr
, valueAsSegmentOff
, valueAsArrayOffset
, valueAsStaticMultiplication
, asLiteralAddr
, asBaseOffset
, asInt64Constant
@ -260,6 +265,22 @@ instance Show (MemRepr tp) where
memReprBytes :: MemRepr tp -> Integer
memReprBytes (BVMemRepr x _) = natValue x
-- | Read a word with a dynamically-chosen endianness and size
readMemRepr :: MemWidth w' => Memory w -> MemAddr w -> MemRepr (BVType w') -> Either (MemoryError w) (MemWord w')
readMemRepr mem addr (BVMemRepr size endianness) = do
bs <- readByteString mem addr (fromInteger (natValue size))
let Just val = addrRead endianness bs
Right val
-- | Like 'readMemRepr', but has a short static list of sizes for which it can
-- dispatch the 'MemWidth' constraint. Returns 'Left' for sizes other than 4 or
-- 8 bytes.
readMemReprDyn :: Memory w -> MemAddr w -> MemRepr (BVType w') -> Either (MemoryError w) (MemWord w')
readMemReprDyn mem addr repr@(BVMemRepr size _) = case () of
_ | Just Refl <- testEquality size (knownNat :: NatRepr 4) -> readMemRepr mem addr repr
| Just Refl <- testEquality size (knownNat :: NatRepr 8) -> readMemRepr mem addr repr
| otherwise -> Left $ InvalidSize addr size
instance TestEquality MemRepr where
testEquality (BVMemRepr xw xe) (BVMemRepr yw ye) =
if xe == ye then do
@ -460,6 +481,10 @@ valueAsArchFn :: Value arch ids tp -> Maybe (ArchFn arch (Value arch ids) tp)
valueAsArchFn (AssignedValue (Assignment _ (EvalArchFn a _))) = Just a
valueAsArchFn _ = Nothing
valueAsRhs :: Value arch ids tp -> Maybe (AssignRhs arch (Value arch ids) tp)
valueAsRhs (AssignedValue (Assignment _ v)) = Just v
valueAsRhs _ = Nothing
-- | This returns a segmented address if the value can be interpreted as a literal memory
-- address, and returns nothing otherwise.
valueAsMemAddr :: MemWidth (ArchAddrWidth arch)
@ -469,6 +494,33 @@ valueAsMemAddr (BVValue _ val) = Just $ absoluteAddr (fromInteger val)
valueAsMemAddr (RelocatableValue _ i) = Just i
valueAsMemAddr _ = Nothing
valueAsArrayOffset ::
Memory (ArchAddrWidth arch) ->
ArchAddrValue arch ids ->
Maybe (ArchSegmentOff arch, ArchAddrValue arch ids)
valueAsArrayOffset mem v
| Just (BVAdd w base offset) <- valueAsApp v
, Just Refl <- testEquality w (memWidth mem)
, Just ptr <- valueAsSegmentOff mem base
= Just (ptr, offset)
-- and with the other argument order
| Just (BVAdd w offset base) <- valueAsApp v
, Just Refl <- testEquality w (memWidth mem)
, Just ptr <- valueAsSegmentOff mem base
= Just (ptr, offset)
| otherwise = Nothing
valueAsStaticMultiplication ::
BVValue arch ids w ->
Maybe (Integer, BVValue arch ids w)
valueAsStaticMultiplication v
| Just (BVMul _ (BVValue _ mul) v') <- valueAsApp v = Just (mul, v')
| Just (BVMul _ v' (BVValue _ mul)) <- valueAsApp v = Just (mul, v')
| Just (BVShl _ v' (BVValue _ sh)) <- valueAsApp v = Just (2^sh, v')
| otherwise = Nothing
asLiteralAddr :: MemWidth (ArchAddrWidth arch)
=> BVValue arch ids (ArchAddrWidth arch)
-> Maybe (ArchMemAddr arch)

View File

@ -21,6 +21,7 @@ This provides information about code discovered in binaries.
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE StandaloneDeriving #-}
module Data.Macaw.Discovery
( -- * DiscoveryInfo
State.DiscoveryState
@ -63,6 +64,7 @@ module Data.Macaw.Discovery
, eliminateDeadStmts
) where
import Control.Applicative
import Control.Lens
import Control.Monad.ST
import Control.Monad.State.Strict
@ -72,6 +74,7 @@ import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map
import Data.Maybe
import Data.Parameterized.Classes
import Data.Parameterized.NatRepr
import Data.Parameterized.Nonce
import Data.Parameterized.Some
import Data.Parameterized.TraversableF
@ -88,7 +91,6 @@ import Debug.Trace
import Data.Macaw.AbsDomain.AbsState
import qualified Data.Macaw.AbsDomain.JumpBounds as Jmp
import Data.Macaw.AbsDomain.Refine
import qualified Data.Macaw.AbsDomain.StridedInterval as SI
import Data.Macaw.Architecture.Info
import Data.Macaw.CFG
import Data.Macaw.CFG.DemandSet
@ -381,22 +383,109 @@ mergeIntraJump src ab tgt = do
-------------------------------------------------------------------------------
-- Jump table bounds
-- | A memory read that looks like array indexing. It read 'arSize' bytes from
-- the address given by 'arBase' + 'arIx'*'arStride'.
data ArrayRead arch ids = forall w. ArrayRead
{ arBase :: ArchSegmentOff arch
, arIx :: ArchAddrValue arch ids
, arStride :: Integer
, arSize :: MemRepr (BVType w)
}
deriving instance RegisterInfo (ArchReg arch) => Show (ArrayRead arch ids)
-- | Same as 'arSize', but less typed.
arSizeBytes :: ArrayRead arch ids -> Integer
arSizeBytes (ArrayRead { arSize = s }) = memReprBytes s
data Extension = Signed | Unsigned deriving (Bounded, Enum, Eq, Ord, Read, Show)
extendDyn :: MemRepr (BVType w) -> Maybe Extension -> MemWord w -> Maybe Integer
extendDyn (BVMemRepr size _) ext w = case ext of
Nothing -> Just (memWordInteger w)
Just Unsigned -> Just (memWordInteger w)
Just Signed | Just Refl <- testEquality size (knownNat :: NatRepr 4) -> Just (memWordSigned w)
| Just Refl <- testEquality size (knownNat :: NatRepr 8) -> Just (memWordSigned w)
_ -> Nothing
data JumpTable arch ids
-- the result of the array read gives the address to jump to
= Absolute (ArrayRead arch ids) (Maybe Extension)
-- the result of the array read gives an offset from the given base address
-- (typically the base address and the array read's arBase will be identical)
| Relative (ArchSegmentOff arch) (ArrayRead arch ids) (Maybe Extension)
deriving instance RegisterInfo (ArchReg arch) => Show (JumpTable arch ids)
-- | The array read done when computing the jump table. N.B. other processing
-- may be needed on the value read in this way to know the address to jump to.
jumpTableRead :: JumpTable arch ids -> ArrayRead arch ids
jumpTableRead (Absolute r _) = r
jumpTableRead (Relative _ r _) = r
-- | After reading from the array, the result may be extended to address width;
-- if so, this says how.
jumpTableExtension :: JumpTable arch ids -> Maybe Extension
jumpTableExtension (Absolute _ e) = e
jumpTableExtension (Relative _ _ e) = e
ensure :: Alternative f => (a -> Bool) -> a -> f a
ensure p x = x <$ guard (p x)
matchArrayRead, matchReadOnlyArrayRead ::
MemWidth (ArchAddrWidth arch) =>
Memory (ArchAddrWidth arch) ->
BVValue arch ids w ->
Maybe (ArrayRead arch ids)
matchArrayRead mem val
| Just (ReadMem addr size) <- valueAsRhs val
, Just (base, offset) <- valueAsArrayOffset mem addr
, Just (stride, ixVal) <- valueAsStaticMultiplication offset
= Just ArrayRead
{ arBase = base
, arIx = ixVal
, arStride = stride
, arSize = size
}
| otherwise = Nothing
matchReadOnlyArrayRead mem val =
matchArrayRead mem val >>=
ensure (Perm.isReadonly . segmentFlags . msegSegment . arBase)
-- | Just like Some (BVValue arch ids), but doesn't run into trouble with
-- partially applying the BVValue type synonym.
data SomeBVValue arch ids = forall tp. SomeBVValue (BVValue arch ids tp)
matchExtension :: BVValue arch ids w -> (Maybe Extension, SomeBVValue arch ids)
matchExtension val
| Just (SExt val' _) <- valueAsApp val = (Just Signed , SomeBVValue val')
| Just (UExt val' _) <- valueAsApp val = (Just Unsigned, SomeBVValue val')
| otherwise = (Nothing, SomeBVValue val)
-- | Figure out if this is a jump table.
matchJumpTable :: MemWidth (ArchAddrWidth arch)
=> Memory (ArchAddrWidth arch)
-> BVValue arch ids (ArchAddrWidth arch) -- ^ Memory address that IP is read from.
-> Maybe (ArchSegmentOff arch, BVValue arch ids (ArchAddrWidth arch))
matchJumpTable mem read_addr
-- Turn the read address into base + offset.
| Just (BVAdd _ offset base_val) <- valueAsApp read_addr
, Just mseg <- valueAsSegmentOff mem base_val
-- Turn the offset into a multiple by an index.
, Just (BVMul _ (BVValue _ mul) jump_index) <- valueAsApp offset
, mul == toInteger (addrSize (memAddrWidth mem))
, Perm.isReadonly (segmentFlags (msegSegment mseg)) = do
Just (mseg, jump_index)
matchJumpTable _ _ =
Nothing
-> ArchAddrValue arch ids -- ^ Value that's assigned to the IP.
-> Maybe (JumpTable arch ids)
matchJumpTable mem ip
-- Turn a plain read address into base + offset.
| (ext, SomeBVValue ipShort) <- matchExtension ip
, Just arrayRead <- matchReadOnlyArrayRead mem ipShort
= Just (Absolute arrayRead ext)
-- gcc-style PIC jump tables on x86 use, roughly,
-- ip = jmptbl + jmptbl[index]
-- where jmptbl is a pointer to the lookup table.
| Just (tgtBase, tgtOffset) <- valueAsArrayOffset mem ip
, (ext, SomeBVValue shortOffset) <- matchExtension tgtOffset
, Just arrayRead <- matchReadOnlyArrayRead mem shortOffset
= Just (Relative tgtBase arrayRead ext)
matchJumpTable _ _ = Nothing
-- | This describes why we could not infer the bounds of code that looked like it
-- was accessing a jump table.
@ -410,7 +499,7 @@ showJumpTableBoundsError :: ArchConstraints arch => JumpTableBoundsError arch id
showJumpTableBoundsError err =
case err of
CouldNotInterpretAbsValue val ->
"Index interval is not a stride " ++ show val
"Index <" ++ show val ++ "> is not a stride."
UpperBoundMismatch bnd index_range ->
"Upper bound mismatch at jumpbounds "
++ show bnd
@ -424,22 +513,17 @@ showJumpTableBoundsError err =
-- not a block table.
getJumpTableBounds :: ArchitectureInfo a
-> AbsProcessorState (ArchReg a) ids -- ^ Current processor registers.
-> ArchSegmentOff a -- ^ Base
-> BVValue a ids (ArchAddrWidth a) -- ^ Index in jump table
-> ArrayRead a ids
-> Either (JumpTableBoundsError a ids) (ArchAddrWord a)
-- ^ One past last index in jump table or nothing
getJumpTableBounds info regs base jump_index = withArchConstraints info $
case transferValue regs jump_index of
StridedInterval (SI.StridedInterval _ index_base index_range index_stride) -> do
let index_end = index_base + (index_range + 1) * index_stride
if rangeInReadonlySegment base (jumpTableEntrySize info * fromInteger index_end) then
case Jmp.unsignedUpperBound (regs^.indexBounds) jump_index of
Right (Jmp.IntegerUpperBound bnd) | bnd == index_range -> Right $! fromInteger index_end
Right bnd -> Left (UpperBoundMismatch bnd index_range)
Left msg -> Left (CouldNotFindBound msg jump_index)
else
error $ "Jump table range is not in readonly memory"
abs_value -> Left (CouldNotInterpretAbsValue abs_value)
getJumpTableBounds info regs arrayRead = withArchConstraints info $
case Jmp.unsignedUpperBound (regs ^. indexBounds) (arIx arrayRead) of
Right (Jmp.IntegerUpperBound maxIx) ->
let arrayByteSize = maxIx * arStride arrayRead + arSizeBytes arrayRead in
if rangeInReadonlySegment (arBase arrayRead) (fromInteger arrayByteSize)
then Right $! fromInteger maxIx
else error $ "Jump table range is not in readonly memory"
Left msg -> Left (CouldNotFindBound msg (arIx arrayRead))
------------------------------------------------------------------------
-- ParseState
@ -616,10 +700,9 @@ parseFetchAndExecute ctx lbl_idx stmts regs s' = do
, stmtsAbsState = absProcState'
}
-- Block ends with what looks like a jump table.
| AssignedValue (Assignment _ (ReadMem ptr _)) <- debug DCFG "try jump table" $ s'^.curIP
-- Attempt to compute interval of addresses interval is over.
, Just (base, jump_idx) <- matchJumpTable mem ptr ->
case getJumpTableBounds arch_info absProcState' base jump_idx of
| Just jt <- debug DCFG "try jump table" $ matchJumpTable mem (s'^.curIP) ->
let arrayRead = jumpTableRead jt in
case getJumpTableBounds arch_info absProcState' arrayRead of
Left err ->
trace (show src ++ ": Could not compute bounds: " ++ showJumpTableBoundsError err) $ do
mapM_ (recordWriteStmt arch_info mem absProcState') stmts
@ -628,45 +711,68 @@ parseFetchAndExecute ctx lbl_idx stmts regs s' = do
, stmtsTerm = ClassifyFailure s'
, stmtsAbsState = absProcState'
}
Right read_end -> do
Right maxIdx -> do
mapM_ (recordWriteStmt arch_info mem absProcState') stmts
-- Try to compute jump table bounds
let abst :: AbsBlockState (ArchReg arch)
abst = finalAbsBlockState absProcState' s'
resolveJump :: ArchAddrWord arch
-> Maybe (ArchSegmentOff arch)
resolveJump = case jt of
Absolute (ArrayRead { arSize = BVMemRepr arByteCount endianness }) Nothing
| natValue arByteCount == toInteger (addrSize (archAddrWidth arch_info)) -> \idx ->
let read_addr = relativeSegmentAddr (arBase arrayRead) & incAddr (arStride arrayRead * toInteger idx)
in case readAddr mem endianness read_addr of
Right tgt_addr
| Just tgt_mseg <- asSegmentOff mem tgt_addr
, Perm.isExecutable (segmentFlags (msegSegment tgt_mseg))
-> Just tgt_mseg
_ -> Nothing
Relative base (ArrayRead { arSize = repr }) ext -> \idx ->
let read_addr = relativeSegmentAddr (arBase arrayRead) & incAddr (arStride arrayRead * toInteger idx)
in case readMemReprDyn mem read_addr repr of
Right shortOffset
| Just offset <- extendDyn repr ext shortOffset
, let tgt_addr = relativeSegmentAddr base & incAddr offset
, Just tgt_mseg <- asSegmentOff mem tgt_addr
, Perm.isExecutable (segmentFlags (msegSegment tgt_mseg))
-> Just tgt_mseg
_ -> Nothing
Absolute _ _ -> debug DCFG
( "Found a jump table of absolute addresses, but the array elements weren't of\n"
++ "the same size as addresses. We're gonna bail and report this as a jump table\n"
++ "with no targets. Jump table info follows.\n"
++ show jt
)
(\_ -> Nothing)
seq abst $ do
-- This function resolves jump table entries.
-- It is a recursive function that has an index into the jump table.
-- If the current index can be interpreted as a intra-procedural jump,
-- then it will add that to the current procedure.
-- This returns the last address read.
let resolveJump :: [ArchSegmentOff arch]
let resolveJumps :: [ArchSegmentOff arch]
-- /\ Addresses in jump table in reverse order
-> ArchAddrWord arch
-- /\ Current index
-> State (ParseState arch ids) [ArchSegmentOff arch]
resolveJump prev idx | idx == read_end = do
resolveJumps prev idx | idx > maxIdx = do
-- Stop jump table when we have reached computed bounds.
return (reverse prev)
resolveJump prev idx = do
let read_addr = relativeSegmentAddr base & incAddr (toInteger (8 * idx))
case readAddr mem (archEndianness arch_info) read_addr of
Right tgt_addr
| Just read_mseg <- asSegmentOff mem read_addr
, Perm.isReadonly (segmentFlags (msegSegment read_mseg))
, Just tgt_mseg <- asSegmentOff mem tgt_addr
, Perm.isExecutable (segmentFlags (msegSegment tgt_mseg)) -> do
let abst' = abst & setAbsIP tgt_mseg
intraJumpTargets %= ((tgt_mseg, abst'):)
resolveJump (tgt_mseg:prev) (idx+1)
_ -> do
debug DCFG ("Stop jump table: " ++ show idx ++ " " ++ show read_end) $ do
resolveJumps prev idx = case resolveJump idx of
Just tgt_mseg -> do
let abst' = abst & setAbsIP tgt_mseg
intraJumpTargets %= ((tgt_mseg, abst'):)
resolveJumps (tgt_mseg:prev) (idx+1)
_ -> debug DCFG ("Stop jump table: " ++ show idx ++ " " ++ show maxIdx) $ do
return (reverse prev)
read_addrs <- resolveJump [] 0
read_addrs <- resolveJumps [] 0
pure StatementList { stmtsIdent = lbl_idx
, stmtsNonterm = stmts
, stmtsTerm = ParsedLookupTable s' jump_idx (V.fromList read_addrs)
, stmtsTerm = ParsedLookupTable s' (arIx arrayRead) (V.fromList read_addrs)
, stmtsAbsState = absProcState'
}

View File

@ -65,6 +65,7 @@ module Data.Macaw.Memory
, MemWidth(..)
, memWord
, memWordInteger
, memWordSigned
-- * Segment offsets
, MemSegmentOff
, viewSegmentOff
@ -259,6 +260,12 @@ newtype MemWord (w :: Nat) = MemWord { _memWordValue :: Word64 }
memWordInteger :: MemWord w -> Integer
memWordInteger = fromIntegral . _memWordValue
-- | Treat the word as a signed integer.
memWordSigned :: MemWidth w => MemWord w -> Integer
memWordSigned w = if i >= bound then i-2*bound else i where
i = memWordInteger w
bound = 2^(8*addrSize w-1)
instance Show (MemWord w) where
showsPrec _ (MemWord w) = showString "0x" . showHex w
@ -586,6 +593,7 @@ data MemoryError w
-- ^ We unexpectedly encountered a BSS segment/section.
| InvalidAddr !(MemAddr w)
-- ^ The data at the given address did not refer to a valid memory location.
| forall n. InvalidSize !(MemAddr w) !(NatRepr n)
instance MemWidth w => Show (MemoryError w) where
show err =
@ -601,6 +609,8 @@ instance MemWidth w => Show (MemoryError w) where
"Attempt to read zero initialized BSS memory at " ++ show a ++ "."
InvalidAddr a ->
"Attempt to interpret an invalid address: " ++ show a ++ "."
InvalidSize a n ->
"Attempt to read an invalid number of bytes (" ++ show n ++ ") from address " ++ show a ++ "."
------------------------------------------------------------------------
-- SegmentContents

1
deps/crucible vendored Submodule

@ -0,0 +1 @@
Subproject commit b8b3c0049384f18694e49e2f978982cb4021dcf7

1
deps/llvm-pretty vendored Submodule

@ -0,0 +1 @@
Subproject commit d2fb5fc1e6bf0f40a8a55a63672970cdbc6070b6

21
stack.ghc-8.2.2.yaml Normal file
View File

@ -0,0 +1,21 @@
flags:
time-locale-compat:
old-locale: false
packages:
- base
- x86
- symbolic
- x86_symbolic
- deps/crucible/crucible
- deps/crucible/crucible-llvm
- deps/dwarf
- deps/elf-edit
- deps/flexdis86
- deps/flexdis86/binary-symbols
- deps/llvm-pretty
- deps/parameterized-utils
extra-deps:
- monadLib-3.7.3
resolver: lts-11.5

View File

@ -376,6 +376,9 @@ getJumpTarget v =
case v of
F.JumpOffset _ joff -> do
(.+) <$> get rip <*> resolveJumpOffset joff
F.QWordReg r -> get (reg64Loc r)
F.Mem64 ar -> get =<< getBV64Addr ar
F.QWordImm w -> return (ValueExpr (BVValue knownNat (toInteger w)))
_ -> fail "Unexpected argument"
------------------------------------------------------------------------

View File

@ -1,15 +0,0 @@
flags:
time-locale-compat:
old-locale: false
packages:
- ../../base/
- ../
- submodules/dwarf
- submodules/elf-edit/
- submodules/flexdis86/
- submodules/flexdis86/binary-symbols
- submodules/parameterized-utils/
resolver: lts-11.5
allow-newer: true