Merge remote-tracking branch 'origin/master' into cfg-rewriting

This commit is contained in:
Luke Maurer 2018-07-24 17:11:20 -07:00
commit 0c2076d54d
45 changed files with 1437 additions and 1306 deletions

View File

@ -39,7 +39,7 @@ library
IntervalMap >= 0.5, IntervalMap >= 0.5,
lens >= 4.7, lens >= 4.7,
mtl, mtl,
parameterized-utils >= 0.1.6, parameterized-utils >= 1.0.1,
text, text,
vector, vector,
QuickCheck >= 2.7 QuickCheck >= 2.7

View File

@ -14,7 +14,6 @@ module Data.Macaw.Architecture.Info
-- * Unclassified blocks -- * Unclassified blocks
, module Data.Macaw.CFG.Block , module Data.Macaw.CFG.Block
, rewriteBlock , rewriteBlock
, disassembleAndRewrite
) where ) where
import Control.Monad.ST import Control.Monad.ST
@ -187,12 +186,3 @@ rewriteBlock info rwctx b = do
, blockStmts = tgtStmts , blockStmts = tgtStmts
, blockTerm = tgtTermStmt , blockTerm = tgtTermStmt
} }
-- | Translate code into blocks and simplify the resulting blocks.
disassembleAndRewrite :: ArchitectureInfo arch -> DisassembleFn arch
disassembleAndRewrite ainfo mem nonceGen addr max_size ab =
withArchConstraints ainfo $ do
(bs0,sz, maybeError) <- disassembleFn ainfo mem nonceGen addr max_size ab
ctx <- mkRewriteContext nonceGen (rewriteArchFn ainfo) (rewriteArchStmt ainfo)
bs1 <- traverse (rewriteBlock ainfo ctx) bs0
pure (bs1, sz, maybeError)

View File

@ -1,5 +1,5 @@
{-| {-|
Copyright : (c) Galois, Inc 2015-2017 Copyright : (c) Galois, Inc 2015-2018
Maintainer : Joe Hendrix <jhendrix@galois.com> Maintainer : Joe Hendrix <jhendrix@galois.com>
This defines a data type `App` for representing operations that can be This defines a data type `App` for representing operations that can be

View File

@ -36,7 +36,6 @@ module Data.Macaw.CFG.Core
, valueAsMemAddr , valueAsMemAddr
, valueAsSegmentOff , valueAsSegmentOff
, valueAsStaticMultiplication , valueAsStaticMultiplication
, asLiteralAddr
, asBaseOffset , asBaseOffset
, asInt64Constant , asInt64Constant
, IPAlignment(..) , IPAlignment(..)
@ -189,7 +188,7 @@ data Value arch ids tp where
RelocatableValue :: !(AddrWidthRepr (ArchAddrWidth arch)) RelocatableValue :: !(AddrWidthRepr (ArchAddrWidth arch))
-> !(ArchMemAddr arch) -> !(ArchMemAddr arch)
-> Value arch ids (BVType (ArchAddrWidth arch)) -> Value arch ids (BVType (ArchAddrWidth arch))
-- | Reference to a symbol identifier. -- | This denotes the address of a symbol identifier in the binary.
-- --
-- This appears when dealing with relocations. -- This appears when dealing with relocations.
SymbolValue :: !(AddrWidthRepr (ArchAddrWidth arch)) SymbolValue :: !(AddrWidthRepr (ArchAddrWidth arch))
@ -294,6 +293,12 @@ mkLit n v = BVValue n (v .&. mask)
bvValue :: (KnownNat n, 1 <= n) => Integer -> Value arch ids (BVType n) bvValue :: (KnownNat n, 1 <= n) => Integer -> Value arch ids (BVType n)
bvValue i = mkLit knownNat i bvValue i = mkLit knownNat i
-- | Return the right-hand side if this is an assignment.
valueAsRhs :: Value arch ids tp -> Maybe (AssignRhs arch (Value arch ids) tp)
valueAsRhs (AssignedValue (Assignment _ v)) = Just v
valueAsRhs _ = Nothing
-- | Return the value evaluated if this is from an `App`.
valueAsApp :: Value arch ids tp -> Maybe (App (Value arch ids) tp) valueAsApp :: Value arch ids tp -> Maybe (App (Value arch ids) tp)
valueAsApp (AssignedValue (Assignment _ (EvalApp a))) = Just a valueAsApp (AssignedValue (Assignment _ (EvalApp a))) = Just a
valueAsApp _ = Nothing valueAsApp _ = Nothing
@ -303,10 +308,6 @@ valueAsArchFn :: Value arch ids tp -> Maybe (ArchFn arch (Value arch ids) tp)
valueAsArchFn (AssignedValue (Assignment _ (EvalArchFn a _))) = Just a valueAsArchFn (AssignedValue (Assignment _ (EvalArchFn a _))) = Just a
valueAsArchFn _ = Nothing valueAsArchFn _ = Nothing
valueAsRhs :: Value arch ids tp -> Maybe (AssignRhs arch (Value arch ids) tp)
valueAsRhs (AssignedValue (Assignment _ v)) = Just v
valueAsRhs _ = Nothing
-- | This returns a segmented address if the value can be interpreted as a literal memory -- | This returns a segmented address if the value can be interpreted as a literal memory
-- address, and returns nothing otherwise. -- address, and returns nothing otherwise.
valueAsMemAddr :: MemWidth (ArchAddrWidth arch) valueAsMemAddr :: MemWidth (ArchAddrWidth arch)
@ -333,13 +334,6 @@ valueAsStaticMultiplication v
= Just (2^shl, l') = Just (2^shl, l')
| otherwise = Nothing | otherwise = Nothing
asLiteralAddr :: MemWidth (ArchAddrWidth arch)
=> BVValue arch ids (ArchAddrWidth arch)
-> Maybe (ArchMemAddr arch)
asLiteralAddr = valueAsMemAddr
{-# DEPRECATED asLiteralAddr "Use valueAsMemAddr" #-}
-- | Returns a segment offset associated with the value if one can be defined. -- | Returns a segment offset associated with the value if one can be defined.
valueAsSegmentOff :: Memory (ArchAddrWidth arch) valueAsSegmentOff :: Memory (ArchAddrWidth arch)
-> BVValue arch ids (ArchAddrWidth arch) -> BVValue arch ids (ArchAddrWidth arch)

View File

@ -23,9 +23,11 @@ module Data.Macaw.CFG.Rewriter
) where ) where
import Control.Lens import Control.Lens
import Control.Monad.State.Strict
import Control.Monad.ST import Control.Monad.ST
import Control.Monad.State.Strict
import Data.Bits import Data.Bits
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map
import Data.Parameterized.Map (MapF) import Data.Parameterized.Map (MapF)
import qualified Data.Parameterized.Map as MapF import qualified Data.Parameterized.Map as MapF
import Data.Parameterized.NatRepr import Data.Parameterized.NatRepr
@ -40,14 +42,22 @@ import Data.Macaw.Types
data RewriteContext arch s src tgt data RewriteContext arch s src tgt
= RewriteContext { rwctxNonceGen :: !(NonceGenerator (ST s) tgt) = RewriteContext { rwctxNonceGen :: !(NonceGenerator (ST s) tgt)
-- ^ Generator for making new nonces in the target ST monad -- ^ Generator for making new nonces in the target ST monad
, rwctxArchFn :: !(forall tp , rwctxArchFn
. ArchFn arch (Value arch src) tp :: !(forall tp
-> Rewriter arch s src tgt (Value arch tgt tp)) . ArchFn arch (Value arch src) tp
-> Rewriter arch s src tgt (Value arch tgt tp))
-- ^ Rewriter for architecture-specific functions
, rwctxArchStmt
:: !(ArchStmt arch (Value arch src) -> Rewriter arch s src tgt ())
-- ^ Rewriter for architecture-specific statements -- ^ Rewriter for architecture-specific statements
, rwctxArchStmt :: !(ArchStmt arch (Value arch src) -> Rewriter arch s src tgt ()) , rwctxConstraints
-- ^ Rewriter for architecture-specific statements :: !(forall a . (RegisterInfo (ArchReg arch) => a) -> a)
, rwctxConstraints :: (forall a . (RegisterInfo (ArchReg arch) => a) -> a)
-- ^ Constraints needed during rewriting. -- ^ Constraints needed during rewriting.
, rwctxSectionAddrMap
:: !(Map SectionIndex (ArchSegmentOff arch))
-- ^ Map from section indices to the address they are loaded at
-- if any.
-- This is used to replace section references with their address.
, rwctxCache :: !(STRef s (MapF (AssignId src) (Value arch tgt))) , rwctxCache :: !(STRef s (MapF (AssignId src) (Value arch tgt)))
-- ^ A reference to a map from source assignment -- ^ A reference to a map from source assignment
-- identifiers to the updated value. -- identifiers to the updated value.
@ -67,13 +77,16 @@ mkRewriteContext :: RegisterInfo (ArchReg arch)
-> Rewriter arch s src tgt (Value arch tgt tp)) -> Rewriter arch s src tgt (Value arch tgt tp))
-> (ArchStmt arch (Value arch src) -> (ArchStmt arch (Value arch src)
-> Rewriter arch s src tgt ()) -> Rewriter arch s src tgt ())
-> Map SectionIndex (ArchSegmentOff arch)
-- ^ Map from loaded section indices to their address.
-> ST s (RewriteContext arch s src tgt) -> ST s (RewriteContext arch s src tgt)
mkRewriteContext nonceGen archFn archStmt = do mkRewriteContext nonceGen archFn archStmt secAddrMap = do
ref <- newSTRef MapF.empty ref <- newSTRef MapF.empty
pure $! RewriteContext { rwctxNonceGen = nonceGen pure $! RewriteContext { rwctxNonceGen = nonceGen
, rwctxArchFn = archFn , rwctxArchFn = archFn
, rwctxArchStmt = archStmt , rwctxArchStmt = archStmt
, rwctxConstraints = \a -> a , rwctxConstraints = \a -> a
, rwctxSectionAddrMap = secAddrMap
, rwctxCache = ref , rwctxCache = ref
} }
@ -266,18 +279,22 @@ rewriteApp app = do
pure x pure x
BVAdd w (BVValue _ x) (BVValue _ y) -> do BVAdd w (BVValue _ x) (BVValue _ y) -> do
pure (BVValue w (toUnsigned w (x + y))) pure (BVValue w (toUnsigned w (x + y)))
-- Move constant to right -- If first argument is constant and second is not, then commute.
BVAdd w (BVValue _ x) y -> do BVAdd w (BVValue _ x) y -> do
rewriteApp (BVAdd w y (BVValue w x)) rewriteApp $ BVAdd w y (BVValue w x)
-- (x + yc) + zc -> x + (yc + zc) -- (x + yc) + zc -> x + (yc + zc)
BVAdd w (valueAsApp -> Just (BVAdd _ x (BVValue _ yc))) (BVValue _ zc) -> do BVAdd w (valueAsApp -> Just (BVAdd _ x (BVValue _ yc))) (BVValue _ zc) -> do
rewriteApp (BVAdd w x (BVValue w (toUnsigned w (yc + zc)))) rewriteApp $ BVAdd w x (BVValue w (toUnsigned w (yc + zc)))
-- (x - yc) + zc -> x + (zc - yc) -- (x - yc) + zc -> x + (zc - yc)
BVAdd w (valueAsApp -> Just (BVSub _ x (BVValue _ yc))) (BVValue _ zc) -> do BVAdd w (valueAsApp -> Just (BVSub _ x (BVValue _ yc))) (BVValue _ zc) -> do
rewriteApp (BVAdd w x (BVValue w (toUnsigned w (zc - yc)))) rewriteApp $ BVAdd w x (BVValue w (toUnsigned w (zc - yc)))
-- (xc - y) + zc => (xc + zc) - y -- (xc - y) + zc => (xc + zc) - y
BVAdd w (valueAsApp -> Just (BVSub _ (BVValue _ xc) y)) (BVValue _ zc) -> do BVAdd w (valueAsApp -> Just (BVSub _ (BVValue _ xc) y)) (BVValue _ zc) -> do
rewriteApp (BVSub w (BVValue w (toUnsigned w (xc + zc))) y) rewriteApp $ BVSub w (BVValue w (toUnsigned w (xc + zc))) y
-- Increment address by a constant.
BVAdd _ (RelocatableValue r a) (BVValue _ c) ->
pure $ RelocatableValue r (incAddr c a)
-- addr a + (c - addr b) => c + (addr a - addr b) -- addr a + (c - addr b) => c + (addr a - addr b)
BVAdd w (RelocatableValue _ a) (valueAsApp -> Just (BVSub _ c (RelocatableValue _ b))) BVAdd w (RelocatableValue _ a) (valueAsApp -> Just (BVSub _ c (RelocatableValue _ b)))
@ -553,12 +570,18 @@ rewriteAssignRhs rhs =
ReadMem addr repr -> do ReadMem addr repr -> do
tgtAddr <- rewriteValue addr tgtAddr <- rewriteValue addr
evalRewrittenRhs (ReadMem tgtAddr repr) evalRewrittenRhs (ReadMem tgtAddr repr)
CondReadMem repr cond addr def -> do CondReadMem repr cond0 addr0 def0 -> do
rhs' <- CondReadMem repr cond <- rewriteValue cond0
<$> rewriteValue cond addr <- rewriteValue addr0
<*> rewriteValue addr case () of
<*> rewriteValue def _ | BoolValue b <- cond ->
evalRewrittenRhs rhs' if b then
evalRewrittenRhs (ReadMem addr repr)
else
rewriteValue def0
_ -> do
def <- rewriteValue def0
evalRewrittenRhs (CondReadMem repr cond addr def)
EvalArchFn archFn _repr -> do EvalArchFn archFn _repr -> do
f <- Rewriter $ gets $ rwctxArchFn . rwContext f <- Rewriter $ gets $ rwctxArchFn . rwContext
f archFn f archFn
@ -569,7 +592,16 @@ rewriteValue v =
BoolValue b -> pure (BoolValue b) BoolValue b -> pure (BoolValue b)
BVValue w i -> pure (BVValue w i) BVValue w i -> pure (BVValue w i)
RelocatableValue w a -> pure (RelocatableValue w a) RelocatableValue w a -> pure (RelocatableValue w a)
SymbolValue w a -> pure (SymbolValue w a) SymbolValue repr sym -> do
ctx <- Rewriter $ gets rwContext
rwctxConstraints ctx $ do
let secIdxAddrMap = rwctxSectionAddrMap ctx
case sym of
SectionIdentifier secIdx
| Just val <- Map.lookup secIdx secIdxAddrMap -> do
pure $! RelocatableValue repr (relativeSegmentAddr val)
_ -> do
pure $! SymbolValue repr sym
AssignedValue (Assignment aid _) -> Rewriter $ do AssignedValue (Assignment aid _) -> Rewriter $ do
ref <- gets $ rwctxCache . rwContext ref <- gets $ rwctxCache . rwContext
srcMap <- lift $ readSTRef ref srcMap <- lift $ readSTRef ref

File diff suppressed because it is too large Load Diff

View File

@ -133,7 +133,7 @@ data ParsedTermStmt arch ids
-- an unsigned number) is larger than the number of entries in the vector, then the -- an unsigned number) is larger than the number of entries in the vector, then the
-- result is undefined. -- result is undefined.
| ParsedLookupTable !(RegState (ArchReg arch) (Value arch ids)) | ParsedLookupTable !(RegState (ArchReg arch) (Value arch ids))
!(BVValue arch ids (ArchAddrWidth arch)) !(ArchAddrValue arch ids)
!(V.Vector (ArchSegmentOff arch)) !(V.Vector (ArchSegmentOff arch))
-- | A return with the given registers. -- | A return with the given registers.
| ParsedReturn !(RegState (ArchReg arch) (Value arch ids)) | ParsedReturn !(RegState (ArchReg arch) (Value arch ids))
@ -279,6 +279,9 @@ instance ArchConstraints arch => Pretty (DiscoveryFunInfo arch ids) where
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- DiscoveryState -- DiscoveryState
type UnexploredFunctionMap arch =
Map (ArchSegmentOff arch) (FunctionExploreReason (ArchAddrWidth arch))
-- | Information discovered about the program -- | Information discovered about the program
data DiscoveryState arch data DiscoveryState arch
= DiscoveryState { memory :: !(Memory (ArchAddrWidth arch)) = DiscoveryState { memory :: !(Memory (ArchAddrWidth arch))
@ -294,7 +297,7 @@ data DiscoveryState arch
, _funInfo :: !(Map (ArchSegmentOff arch) (Some (DiscoveryFunInfo arch))) , _funInfo :: !(Map (ArchSegmentOff arch) (Some (DiscoveryFunInfo arch)))
-- ^ Map from function addresses to discovered information about function -- ^ Map from function addresses to discovered information about function
, _unexploredFunctions , _unexploredFunctions
:: !(Map (ArchSegmentOff arch) (FunctionExploreReason (ArchAddrWidth arch))) :: !(UnexploredFunctionMap arch)
-- ^ This maps addresses that have been marked as -- ^ This maps addresses that have been marked as
-- functions, but not yet analyzed to the reason -- functions, but not yet analyzed to the reason
-- they are analyzed. -- they are analyzed.
@ -328,7 +331,7 @@ ppDiscoveryStateBlocks info = withDiscoveryArchConstraints info $
emptyDiscoveryState :: Memory (ArchAddrWidth arch) emptyDiscoveryState :: Memory (ArchAddrWidth arch)
-- ^ State of memory -- ^ State of memory
-> AddrSymMap (ArchAddrWidth arch) -> AddrSymMap (ArchAddrWidth arch)
-- ^ Map from addresses -- ^ Map from addresses to their symbol name (if any)
-> ArchitectureInfo arch -> ArchitectureInfo arch
-- ^ architecture/OS specific information -- ^ architecture/OS specific information
-> DiscoveryState arch -> DiscoveryState arch
@ -351,7 +354,7 @@ globalDataMap = lens _globalDataMap (\s v -> s { _globalDataMap = v })
-- | List of functions to explore next. -- | List of functions to explore next.
unexploredFunctions unexploredFunctions
:: Simple Lens (DiscoveryState arch) (Map (ArchSegmentOff arch) (FunctionExploreReason (ArchAddrWidth arch))) :: Simple Lens (DiscoveryState arch) (UnexploredFunctionMap arch)
unexploredFunctions = lens _unexploredFunctions (\s v -> s { _unexploredFunctions = v }) unexploredFunctions = lens _unexploredFunctions (\s v -> s { _unexploredFunctions = v })
-- | Get information for specific functions -- | Get information for specific functions

View File

@ -58,6 +58,7 @@ module Data.Macaw.Memory
, module Data.BinarySymbols , module Data.BinarySymbols
, DropError(..) , DropError(..)
, dropErrorAsMemError , dropErrorAsMemError
, splitSegmentRangeList
, dropSegmentRangeListBytes , dropSegmentRangeListBytes
, takeSegmentPrefix , takeSegmentPrefix
-- * MemWord -- * MemWord
@ -75,6 +76,7 @@ module Data.Macaw.Memory
, msegSegment , msegSegment
, msegOffset , msegOffset
, msegAddr , msegAddr
, msegByteCountAfter
, incSegmentOff , incSegmentOff
, diffSegmentOff , diffSegmentOff
, contentsAfterSegmentOff , contentsAfterSegmentOff
@ -90,10 +92,11 @@ module Data.Macaw.Memory
-- ** Undefined symbol infomration -- ** Undefined symbol infomration
, SymbolRequirement(..) , SymbolRequirement(..)
, SymbolUndefType(..) , SymbolUndefType(..)
-- * Section addresses
, memAddSectionAddr
, memSectionAddrMap
-- * General purposes addrs -- * General purposes addrs
, MemAddr , MemAddr(..)
, addrBase
, addrOffset
, absoluteAddr , absoluteAddr
, relativeAddr , relativeAddr
, relativeSegmentAddr , relativeSegmentAddr
@ -121,8 +124,10 @@ module Data.Macaw.Memory
, bsWord8 , bsWord8
, bsWord16be , bsWord16be
, bsWord16le , bsWord16le
, bsWord32
, bsWord32be , bsWord32be
, bsWord32le , bsWord32le
, bsWord64
, bsWord64be , bsWord64be
, bsWord64le , bsWord64le
, AddrSymMap , AddrSymMap
@ -234,6 +239,11 @@ bsWord32le bs
| otherwise = w 0 .|. w 1 .|. w 2 .|. w 3 | otherwise = w 0 .|. w 1 .|. w 2 .|. w 3
where w i = fromIntegral (BS.index bs i) `shiftL` (i `shiftL` 3) where w i = fromIntegral (BS.index bs i) `shiftL` (i `shiftL` 3)
-- | Convert a bytestring to an unsigned with the given endianness.
bsWord32 :: Endianness -> BS.ByteString -> Word32
bsWord32 BigEndian = bsWord32be
bsWord32 LittleEndian = bsWord32le
bsWord64be :: BS.ByteString -> Word64 bsWord64be :: BS.ByteString -> Word64
bsWord64be bs bsWord64be bs
| BS.length bs /= 8 = error "bsWord64be given bytestring with bad length." | BS.length bs /= 8 = error "bsWord64be given bytestring with bad length."
@ -246,6 +256,10 @@ bsWord64le bs
| otherwise = w 0 .|. w 1 .|. w 2 .|. w 3 .|. w 4 .|. w 5 .|. w 6 .|. w 7 | otherwise = w 0 .|. w 1 .|. w 2 .|. w 3 .|. w 4 .|. w 5 .|. w 6 .|. w 7
where w i = fromIntegral (BS.index bs i) `shiftL` (i `shiftL` 3) where w i = fromIntegral (BS.index bs i) `shiftL` (i `shiftL` 3)
bsWord64 :: Endianness -> BS.ByteString -> Word64
bsWord64 BigEndian = bsWord64be
bsWord64 LittleEndian = bsWord64le
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- MemBase -- MemBase
@ -260,12 +274,6 @@ newtype MemWord (w :: Nat) = MemWord { _memWordValue :: Word64 }
memWordInteger :: MemWord w -> Integer memWordInteger :: MemWord w -> Integer
memWordInteger = fromIntegral . _memWordValue memWordInteger = fromIntegral . _memWordValue
-- | Treat the word as a signed integer.
memWordSigned :: MemWidth w => MemWord w -> Integer
memWordSigned w = if i >= bound then i-2*bound else i where
i = memWordInteger w
bound = 2^(8*addrSize w-1)
instance Show (MemWord w) where instance Show (MemWord w) where
showsPrec _ (MemWord w) = showString "0x" . showHex w showsPrec _ (MemWord w) = showString "0x" . showHex w
@ -294,7 +302,7 @@ class (1 <= w) => MemWidth w where
-- The argument is not evaluated. -- The argument is not evaluated.
addrSize :: p w -> Int addrSize :: p w -> Int
-- Rotates the value by the given index. -- | Rotates the value by the given index.
addrRotate :: MemWord w -> Int -> MemWord w addrRotate :: MemWord w -> Int -> MemWord w
-- | Read an address with the given endianess. -- | Read an address with the given endianess.
@ -302,6 +310,12 @@ class (1 <= w) => MemWidth w where
-- This returns nothing if the bytestring is too short. -- This returns nothing if the bytestring is too short.
addrRead :: Endianness -> BS.ByteString -> Maybe (MemWord w) addrRead :: Endianness -> BS.ByteString -> Maybe (MemWord w)
-- | Treat the word as a signed integer.
memWordSigned :: MemWidth w => MemWord w -> Integer
memWordSigned w = if i >= bound then i-2*bound else i
where i = memWordInteger w
bound = 2^(addrBitSize w-1)
-- | Returns number of bits in address. -- | Returns number of bits in address.
addrBitSize :: MemWidth w => p w -> Int addrBitSize :: MemWidth w => p w -> Int
addrBitSize w = 8 * addrSize w addrBitSize w = 8 * addrSize w
@ -355,14 +369,12 @@ instance MemWidth w => Bounded (MemWord w) where
instance MemWidth 32 where instance MemWidth 32 where
addrWidthRepr _ = Addr32 addrWidthRepr _ = Addr32
addrWidthMod _ = 0xffffffff addrWidthMod _ = 0xffffffff
addrRotate (MemWord w) i = MemWord (fromIntegral ((fromIntegral w :: Word32) `rotate` i)) addrRotate (MemWord w) i =
MemWord (fromIntegral ((fromIntegral w :: Word32) `rotate` i))
addrSize _ = 4 addrSize _ = 4
addrRead e s addrRead e s
| BS.length s < 4 = Nothing | BS.length s < 4 = Nothing
| otherwise = | otherwise = Just $ MemWord $ fromIntegral $ bsWord32 e s
case e of
BigEndian -> Just $ MemWord $ fromIntegral $ bsWord32be s
LittleEndian -> Just $ MemWord $ fromIntegral $ bsWord32le s
instance MemWidth 64 where instance MemWidth 64 where
addrWidthRepr _ = Addr64 addrWidthRepr _ = Addr64
@ -371,10 +383,7 @@ instance MemWidth 64 where
addrSize _ = 8 addrSize _ = 8
addrRead e s addrRead e s
| BS.length s < 8 = Nothing | BS.length s < 8 = Nothing
| otherwise = | otherwise = Just $ MemWord $ bsWord64 e s
case e of
BigEndian -> Just $ MemWord $ fromIntegral $ bsWord64be s
LittleEndian -> Just $ MemWord $ fromIntegral $ bsWord64le s
-- | Number of bytes in an address -- | Number of bytes in an address
addrWidthClass :: AddrWidthRepr w -> (MemWidth w => a) -> a addrWidthClass :: AddrWidthRepr w -> (MemWidth w => a) -> a
@ -482,53 +491,63 @@ data SymbolInfo =
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Relocation -- Relocation
-- | Information about a relocation.
--
-- A "relocation" is essentially a region of memory in a binary whose
-- ultimate value is unknown when the binary was generated. For
-- executables and shared libraries, relocation values are assigned
-- during loading or even deferred until needed at runtime (lazy
-- runtime linking). For object files, relocations values are
-- assigned during linking either by assigning values directly or
-- generating dynamic relocation entries for the loader to resolve.
--
-- This structure contains the information needed to compute the value
-- stored in memory, and whether there are constraints on that value.
data Relocation w
= Relocation { relocationSym :: !SymbolIdentifier
-- ^ The base address of the relocation
, relocationOffset :: !(MemWord w)
-- ^ A constant value to add to the base
-- to compute the relocation
, relocationIsRel :: !Bool
-- ^ If this is true, then one should subtract
-- address of where the relocation is stored from
-- the value computed by the symbol + offset. If
-- false, then do not subtract.
, relocationSize :: !Int
-- ^ Number of bytes available to write the
-- relocation address to. This and `relocationSign`
-- affect the ultimate values the relocation is
-- allowed to take.
, relocationIsSigned :: !Bool
-- ^ This indicates if the value stored will be
-- interpreted as an signed or unsigned number.
--
-- It is expected that the value stored is in the
-- range determined by the number of bytes and
-- whether those bytes will be interpreted as
-- signed.
, relocationEndianness :: !Endianness
-- ^ The byte order used to encode the relocation in
-- memory.
}
-- | Short encoding of endianness for relocation pretty printing
showEnd :: Endianness -> ShowS showEnd :: Endianness -> ShowS
showEnd LittleEndian = showString "LE" showEnd LittleEndian = showString "LE"
showEnd BigEndian = showString "BE" showEnd BigEndian = showString "BE"
-- | Information about a relocation
data Relocation w
= AbsoluteRelocation !SymbolIdentifier !(MemWord w) !Endianness !Int
-- ^ @AbsoluteRelocation addr off end size@ denotes an
-- address of the relocation plus the offset stored
-- with the given endianess.
--
-- The @size@ field is the number of bytes the relocation is stored
-- at, and when inserting the relocation value it should only use
-- that many bytes. If the address + offset is greater than or equal to
-- @2^(8*n)@, then updating the relocation target should fail. This is
-- used to support relocation types such as @R_X86_64_32@. We do not
-- currently support signed versions like @R_X86_64_32S@.
| RelativeRelocation !SymbolIdentifier !(MemWord w) !Endianness !Int
-- ^ @RelativeRelocation addr off end cnt@ denotes a relocation
-- that stores the value of @addr + off - this_addr@ (where
-- @this_addr@ is the address the relocation is stored at as a
-- signed value in @cnt@ bytes with endianess @end@.
-- | Return size of relocation in bytes
relocSize :: forall w . MemWidth w => Relocation w -> MemWord w
relocSize (AbsoluteRelocation _ _ _ cnt) = fromIntegral cnt
relocSize (RelativeRelocation _ _ _ cnt) = fromIntegral cnt
instance Show (Relocation w) where instance Show (Relocation w) where
showsPrec _ (AbsoluteRelocation base off end cnt) = showsPrec _ r =
showString "[areloc," showString "[areloc,"
. shows base . shows (relocationSym r)
. showChar ',' . showChar ','
. showHex (memWordInteger off) . showHex (memWordInteger (relocationOffset r))
. showChar ',' . showChar ','
. showEnd end . shows (8*relocationSize r)
. showChar ',' . (if relocationIsRel r then showString ",PC" else id)
. shows (8*cnt) . (if relocationIsSigned r then showString ",S" else id)
. showChar ']' . showChar ',' . showEnd (relocationEndianness r)
showsPrec _ (RelativeRelocation base off end cnt) =
showString "[rreloc,"
. shows base
. showHex (memWordInteger off)
. showChar ','
. showEnd end
. showChar ','
. shows (8*cnt)
. showChar ']' . showChar ']'
------------------------------------------------------------------------ ------------------------------------------------------------------------
@ -541,12 +560,15 @@ data SegmentRange (w :: Nat)
= ByteRegion !BS.ByteString = ByteRegion !BS.ByteString
-- ^ A region with specificed bytes -- ^ A region with specificed bytes
| RelocationRegion !(Relocation w) | RelocationRegion !(Relocation w)
-- ^ A region whose contents are computed using the expression
-- denoted by the relocation.
| BSSRegion !(MemWord w) | BSSRegion !(MemWord w)
-- ^ A region containing the given number of zero-initialized bytes. -- ^ A region containing the given number of zero-initialized
-- bytes.
rangeSize :: forall w . MemWidth w => SegmentRange w -> MemWord w rangeSize :: forall w . MemWidth w => SegmentRange w -> MemWord w
rangeSize (ByteRegion bs) = fromIntegral (BS.length bs) rangeSize (ByteRegion bs) = fromIntegral (BS.length bs)
rangeSize (RelocationRegion r) = relocSize r rangeSize (RelocationRegion r) = fromIntegral (relocationSize r)
rangeSize (BSSRegion sz) = sz rangeSize (BSSRegion sz) = sz
ppByte :: Word8 -> String -> String ppByte :: Word8 -> String -> String
@ -561,57 +583,6 @@ instance Show (SegmentRange w) where
showList [] = id showList [] = id
showList (h : r) = showsPrec 10 h . showList r showList (h : r) = showsPrec 10 h . showList r
takeSegmentPrefix :: MemWidth w => [SegmentRange w] -> MemWord w -> [SegmentRange w]
takeSegmentPrefix _ 0 = []
takeSegmentPrefix rngs c = do
let rest l d | c > d = takeSegmentPrefix l (c - d)
| otherwise = []
case rngs of
[] -> []
ByteRegion b : l ->
ByteRegion (BS.take (fromIntegral c) b)
: rest l (fromIntegral (BS.length b))
RelocationRegion r : l ->
RelocationRegion r
: rest l (relocSize r)
BSSRegion d : l ->
BSSRegion (min d c)
: rest l d
------------------------------------------------------------------------
-- MemoryError
-- | Type of errors that may occur when reading memory.
data MemoryError w
= AccessViolation !(MemAddr w)
-- ^ Memory could not be read, because it was not defined.
| PermissionsError !(MemAddr w)
-- ^ Memory could not be read due to insufficient permissions.
| UnexpectedRelocation !(MemAddr w) !(Relocation w) !String
-- ^ Read from location that partially overlaps a relocated entry
| UnexpectedBSS !(MemAddr w)
-- ^ We unexpectedly encountered a BSS segment/section.
| InvalidAddr !(MemAddr w)
-- ^ The data at the given address did not refer to a valid memory location.
| forall n. InvalidSize !(MemAddr w) !(NatRepr n)
instance MemWidth w => Show (MemoryError w) where
show err =
case err of
AccessViolation a ->
"Access violation at " ++ show a ++ "."
PermissionsError a ->
"Insufficient permissions at " ++ show a ++ "."
UnexpectedRelocation a r msg ->
"Attempt to read an unexpected relocation entry at " ++ show a ++ ":\n"
++ " " ++ show r ++ "\n" ++ msg
UnexpectedBSS a ->
"Attempt to read zero initialized BSS memory at " ++ show a ++ "."
InvalidAddr a ->
"Attempt to interpret an invalid address: " ++ show a ++ "."
InvalidSize a n ->
"Attempt to read an invalid number of bytes (" ++ show n ++ ") from address " ++ show a ++ "."
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- SegmentContents -- SegmentContents
@ -629,50 +600,10 @@ contentsSize (SegmentContents m) =
Nothing -> 0 Nothing -> 0
Just ((start, c),_) -> start + rangeSize c Just ((start, c),_) -> start + rangeSize c
-- | De-construct a 'SegmentContents' into its constituent ranges -- | Deconstruct a 'SegmentContents' into its constituent ranges
contentsRanges :: SegmentContents w -> [(MemWord w, SegmentRange w)] contentsRanges :: SegmentContents w -> [(MemWord w, SegmentRange w)]
contentsRanges = Map.toList . segContentsMap contentsRanges = Map.toList . segContentsMap
-- | Return list of contents from given word or an error if this we can't cleanly
-- partition a relocation
-- due to a relocation.
contentsAfterSegmentOff :: MemWidth w
=> MemSegmentOff w
-> Either (MemoryError w) [SegmentRange w]
contentsAfterSegmentOff mseg = do
-- Get offset within segment to get
let off = msegOffset mseg
-- Get complete contents of segment
let contents = segmentContents (msegSegment mseg)
-- Split the map into all segments starting strictly before offset,
-- memory starting at offset (if any), and contents strictly after offset.
let (premap,mv,post) = Map.splitLookup off (segContentsMap contents)
case mv of
-- If something starts at offset, then return it and everything after.
Just v -> Right $ v : Map.elems post
-- If no memory starts exactly at offset, then
-- look at the last segment starting before offset.
Nothing ->
case Map.maxViewWithKey premap of
-- This implies nothing starts before the segment offset, which should not be
-- allowed
Nothing -> error $ "Memory.contentsAfterSegmentOff invalid contents"
-- If last segment is a byte region then we drop elements before offset.
Just ((pre_off, ByteRegion bs),_) -> do
let v = ByteRegion (BS.drop (fromIntegral (off - pre_off)) bs)
Right $ v : Map.elems post
-- If last segment is a BSS region, then we drop elements before offset.
Just ((pre_off, BSSRegion sz),_) -> do
let v = BSSRegion (sz - fromIntegral (off - pre_off))
Right $ v : Map.elems post
-- If last segment is a symbolic reference, then the code is asking
-- us to partition a symbolic reference in two, which we cannot do.
Just ((_, RelocationRegion r),_) ->
Left (UnexpectedRelocation (relativeSegmentAddr mseg) r "caso")
contentsList :: SegmentContents w -> [(MemWord w, SegmentRange w)]
contentsList (SegmentContents m) = Map.toList m
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Code for injecting relocations into segments. -- Code for injecting relocations into segments.
@ -875,12 +806,16 @@ instance MemWidth w => Show (MemSegment w) where
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Memory -- Memory
-- | Map from region index to map of offset to segment.
type SegmentOffsetMap w = Map.Map RegionIndex (Map.Map (MemWord w) (MemSegment w)) type SegmentOffsetMap w = Map.Map RegionIndex (Map.Map (MemWord w) (MemSegment w))
-- | The state of the memory. -- | The state of the memory.
data Memory w = Memory { memAddrWidth :: !(AddrWidthRepr w) data Memory w = Memory { memAddrWidth :: !(AddrWidthRepr w)
-- ^ Return the address width of the memory -- ^ Return the address width of the memory
, memSegmentMap :: !(SegmentOffsetMap w) , memSegmentMap :: !(SegmentOffsetMap w)
-- ^ Segment map
, memSectionAddrMap :: !(Map SectionIndex (MemSegmentOff w))
-- ^ Map from section indices to addresses.
} }
-- | Get memory segments. -- | Get memory segments.
@ -891,6 +826,14 @@ memSegments m = concatMap Map.elems (Map.elems (memSegmentMap m))
memWidth :: Memory w -> NatRepr w memWidth :: Memory w -> NatRepr w
memWidth = addrWidthNatRepr . memAddrWidth memWidth = addrWidthNatRepr . memAddrWidth
-- | Add a new section index to address entry.
memAddSectionAddr :: SectionIndex -> MemSegmentOff w -> Memory w -> Memory w
memAddSectionAddr idx addr mem
| Map.member idx (memSectionAddrMap mem) =
error $ "memAddSectionAddr: duplicate index " ++ show idx
| otherwise =
mem { memSectionAddrMap = Map.insert idx addr (memSectionAddrMap mem) }
instance MemWidth w => Show (Memory w) where instance MemWidth w => Show (Memory w) where
show = show . memSegments show = show . memSegments
@ -898,6 +841,7 @@ instance MemWidth w => Show (Memory w) where
emptyMemory :: AddrWidthRepr w -> Memory w emptyMemory :: AddrWidthRepr w -> Memory w
emptyMemory w = Memory { memAddrWidth = w emptyMemory w = Memory { memAddrWidth = w
, memSegmentMap = Map.empty , memSegmentMap = Map.empty
, memSectionAddrMap = Map.empty
} }
-- | Get executable segments. -- | Get executable segments.
@ -935,12 +879,11 @@ insertMemSegment :: MemSegment w
-> Either (InsertError w) (Memory w) -> Either (InsertError w) (Memory w)
insertMemSegment seg mem = addrWidthClass (memAddrWidth mem) $ do insertMemSegment seg mem = addrWidthClass (memAddrWidth mem) $ do
absMap <- insertSegmentOffsetMap seg (memSegmentMap mem) absMap <- insertSegmentOffsetMap seg (memSegmentMap mem)
pure $ Memory { memAddrWidth = memAddrWidth mem pure $ mem { memSegmentMap = absMap }
, memSegmentMap = absMap
}
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- MemSegmentOff -- MemSegmentOff
-- | A pair containing a segment and offset. -- | A pair containing a segment and offset.
-- --
-- Functions that return a segment-offset pair enforce that the offset -- Functions that return a segment-offset pair enforce that the offset
@ -950,6 +893,12 @@ data MemSegmentOff w = MemSegmentOff { msegSegment :: !(MemSegment w)
} }
deriving (Eq, Ord) deriving (Eq, Ord)
-- | Return the number of bytes in the segment after this address.
msegByteCountAfter :: MemWidth w => MemSegmentOff w -> Integer
msegByteCountAfter segOff = sz - off
where sz = toInteger (segmentSize (msegSegment segOff))
off = toInteger (msegOffset segOff)
{-# DEPRECATED viewSegmentOff "Use msegSegment and msegOffset." #-} {-# DEPRECATED viewSegmentOff "Use msegSegment and msegOffset." #-}
viewSegmentOff :: MemSegmentOff w -> (MemSegment w, MemWord w) viewSegmentOff :: MemSegmentOff w -> (MemSegment w, MemWord w)
viewSegmentOff mseg = (msegSegment mseg, msegOffset mseg) viewSegmentOff mseg = (msegSegment mseg, msegOffset mseg)
@ -993,7 +942,8 @@ clearSegmentOffLeastBit (MemSegmentOff seg off) = MemSegmentOff seg (off .&. com
-- Returns 'Nothing' if the result would be out of range. -- Returns 'Nothing' if the result would be out of range.
incSegmentOff :: MemWidth w => MemSegmentOff w -> Integer -> Maybe (MemSegmentOff w) incSegmentOff :: MemWidth w => MemSegmentOff w -> Integer -> Maybe (MemSegmentOff w)
incSegmentOff (MemSegmentOff seg off) inc incSegmentOff (MemSegmentOff seg off) inc
| 0 <= next && next <= toInteger (segmentSize seg) = Just $ MemSegmentOff seg (fromInteger next) | 0 <= next && next <= toInteger (segmentSize seg) =
Just $ MemSegmentOff seg (fromInteger next)
| otherwise = Nothing | otherwise = Nothing
where next = toInteger off + inc where next = toInteger off + inc
@ -1026,7 +976,7 @@ memAsAddrPairs :: Memory w
-> [(MemSegmentOff w, MemSegmentOff w)] -> [(MemSegmentOff w, MemSegmentOff w)]
memAsAddrPairs mem end = addrWidthClass (memAddrWidth mem) $ do memAsAddrPairs mem end = addrWidthClass (memAddrWidth mem) $ do
seg <- memSegments mem seg <- memSegments mem
(contents_offset,r) <- contentsList (segmentContents seg) (contents_offset,r) <- contentsRanges (segmentContents seg)
let sz = addrSize mem let sz = addrSize mem
case r of case r of
ByteRegion bs -> assert (BS.length bs `rem` fromIntegral sz == 0) $ do ByteRegion bs -> assert (BS.length bs `rem` fromIntegral sz == 0) $ do
@ -1109,6 +1059,99 @@ instance MemWidth w => Show (MemAddr w) where
instance MemWidth w => Pretty (MemAddr w) where instance MemWidth w => Pretty (MemAddr w) where
pretty = text . show pretty = text . show
------------------------------------------------------------------------
-- MemoryError
-- | Type of errors that may occur when reading memory.
data MemoryError w
= AccessViolation !(MemAddr w)
-- ^ Memory could not be read, because it was not defined.
| PermissionsError !(MemAddr w)
-- ^ Memory could not be read due to insufficient permissions.
| UnexpectedRelocation !(MemAddr w) !(Relocation w)
-- ^ Read from location that partially overlaps a relocated entry
| UnexpectedByteRelocation !(MemAddr w) !(Relocation w)
-- ^ An relocation appeared when reading a byte.
| Unsupported32ImmRelocation !(MemAddr w) !(Relocation w)
-- ^ An unsupported relocation appeared when reading a 32-bit immediate.
| UnsupportedJumpOffsetRelocation !(MemAddr w) !(Relocation w)
-- ^ An unsupported relocation appeared when reading a jump offset.
| UnexpectedBSS !(MemAddr w)
-- ^ We unexpectedly encountered a BSS segment/section.
| InvalidAddr !(MemAddr w)
-- ^ The data at the given address did not refer to a valid memory location.
| InvalidRead !(MemSegmentOff w) !Word64
-- ^ Can't read the given number of bytes from the offset as that is outside
-- allocated memory.
| forall n. InvalidSize !(MemAddr w) !(NatRepr n)
instance MemWidth w => Show (MemoryError w) where
show err =
case err of
AccessViolation a ->
"Access violation at " ++ show a ++ "."
PermissionsError a ->
"Insufficient permissions at " ++ show a ++ "."
UnexpectedRelocation a r ->
"Attempt to read an unexpected relocation entry at " ++ show a ++ ":\n"
++ " " ++ show r
UnexpectedByteRelocation a r ->
"Attempt to read a relocation as a byte at " ++ show a ++ ":\n"
++ " " ++ show r
Unsupported32ImmRelocation a r ->
"Attempt to read an unsupported relocation as a 32-bit immediate at " ++ show a ++ ":\n"
++ " " ++ show r
UnsupportedJumpOffsetRelocation a r ->
"Attempt to read an unsupported relocation as a jump offset at " ++ show a ++ ":\n"
++ " " ++ show r
UnexpectedBSS a ->
"Attempt to read zero initialized BSS memory at " ++ show a ++ "."
InvalidAddr a ->
"Attempt to interpret an invalid address: " ++ show a ++ "."
InvalidRead a c ->
"Read " ++ show c ++ " bytes if after defined memory " ++ show a ++ "."
InvalidSize a n ->
"Attempt to read an invalid number of bytes (" ++ show n ++ ") from address " ++ show a ++ "."
------------------------------------------------------------------------
-- Reading contents
-- | Return list of contents from given word or an error if this we can't cleanly
-- partition a relocation
-- due to a relocation.
contentsAfterSegmentOff :: MemWidth w
=> MemSegmentOff w
-> Either (MemoryError w) [SegmentRange w]
contentsAfterSegmentOff mseg = do
-- Get offset within segment to get
let off = msegOffset mseg
-- Get complete contents of segment
let contents = segmentContents (msegSegment mseg)
-- Split the map into all segments starting strictly before offset,
-- memory starting at offset (if any), and contents strictly after offset.
let (premap,mv,post) = Map.splitLookup off (segContentsMap contents)
case mv of
-- If something starts at offset, then return it and everything after.
Just v -> Right $ v : Map.elems post
-- If no memory starts exactly at offset, then
-- look at the last segment starting before offset.
Nothing ->
case Map.maxViewWithKey premap of
-- This implies nothing starts before the segment offset, which should not be
-- allowed
Nothing -> error $ "Memory.contentsAfterSegmentOff invalid contents"
-- If last segment is a byte region then we drop elements before offset.
Just ((preOff, ByteRegion bs),_) -> do
let v = ByteRegion (BS.drop (fromIntegral (off - preOff)) bs)
Right $ v : Map.elems post
-- If last segment is a BSS region, then we drop elements before offset.
Just ((preOff, BSSRegion sz),_) -> do
let v = BSSRegion (sz - fromIntegral (off - preOff))
Right $ v : Map.elems post
-- If last segment is a symbolic reference, then the code is asking
-- us to partition a symbolic reference in two, which we cannot do.
Just ((_, RelocationRegion r),_) ->
Left $ UnexpectedRelocation (relativeSegmentAddr mseg) r
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- AddrSymMap -- AddrSymMap
@ -1117,7 +1160,34 @@ instance MemWidth w => Pretty (MemAddr w) where
type AddrSymMap w = Map.Map (MemSegmentOff w) BSC.ByteString type AddrSymMap w = Map.Map (MemSegmentOff w) BSC.ByteString
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- DropError -- Split segment range list.
-- | @takeSegmentPrefix ranges cnt@ attempts to read @cnt@ bytes from
-- @ranges@.
--
-- It is a total function, and will return @ranges@ if it contains
-- less than @cnt@ bytes. It may also return more than @cnt@ bytes as
-- if a relocation region spans across the break, it will return the
-- region.
takeSegmentPrefix :: forall w
. MemWidth w => [SegmentRange w] -> MemWord w -> [SegmentRange w]
takeSegmentPrefix _ 0 = []
takeSegmentPrefix rngs c = do
let rest :: [SegmentRange w] -> MemWord w -> [SegmentRange w]
rest l d | c > d = takeSegmentPrefix l (c - d)
| otherwise = []
case rngs of
[] -> []
ByteRegion b : l ->
ByteRegion (BS.take (fromIntegral c) b)
: rest l (fromIntegral (BS.length b))
RelocationRegion r : l ->
RelocationRegion r
: rest l (fromIntegral (relocationSize r))
BSSRegion d : l ->
BSSRegion (min d c)
: rest l d
-- | An error that occured when droping bytes. -- | An error that occured when droping bytes.
data DropError w data DropError w
@ -1125,8 +1195,50 @@ data DropError w
| DropInvalidAddr | DropInvalidAddr
dropErrorAsMemError :: MemAddr w -> DropError w -> MemoryError w dropErrorAsMemError :: MemAddr w -> DropError w -> MemoryError w
dropErrorAsMemError a (DropUnexpectedRelocation r) = UnexpectedRelocation a r "dropErr" dropErrorAsMemError a (DropUnexpectedRelocation r) = UnexpectedRelocation a r
dropErrorAsMemError a DropInvalidAddr = InvalidAddr a dropErrorAsMemError a DropInvalidAddr = InvalidAddr a
splitSegmentRangeList' :: MemWidth w
=> [SegmentRange w]
-> Int
-> [SegmentRange w]
-> Either (DropError w) ([SegmentRange w], [SegmentRange w])
splitSegmentRangeList' prev c next
| c <= 0 = Right (reverse prev, next)
splitSegmentRangeList' _ _ [] = Left DropInvalidAddr
splitSegmentRangeList' prev cnt (reg@(ByteRegion bs) : rest) = do
let sz = BS.length bs
if cnt < sz then do
let taken = ByteRegion (BS.take cnt bs):prev
let dropped = ByteRegion (BS.drop cnt bs) : rest
pure $ (reverse taken, dropped)
else do
splitSegmentRangeList' (reg:prev) (cnt - sz) rest
splitSegmentRangeList' prev cnt (reg@(RelocationRegion r):rest) = do
let sz = relocationSize r
if toInteger cnt < toInteger sz then
Left (DropUnexpectedRelocation r)
else do
splitSegmentRangeList' (reg:prev) (cnt - fromIntegral sz) rest
splitSegmentRangeList' prev cnt (reg@(BSSRegion sz): rest) =
if toInteger cnt < toInteger sz then do
let taken = BSSRegion (fromIntegral cnt):prev
let dropped = BSSRegion (sz - fromIntegral cnt) : rest
pure $ (reverse taken, dropped)
else
splitSegmentRangeList' (reg:prev) (cnt - fromIntegral sz) rest
-- | Given a segment data and a number of bytes `c`, this partitions the data in
-- two data regions. The first contains the first `c` bytes in the data; the second
-- contains the rest of the data.
--
-- This will return an exception if the size of the data is too small or the partition
-- would split a relocation entry.
splitSegmentRangeList :: MemWidth w
=> [SegmentRange w]
-> Int
-> Either (DropError w) ([SegmentRange w], [SegmentRange w])
splitSegmentRangeList l c = splitSegmentRangeList' [] c l
-- | Given a contiguous list of segment ranges and a number of bytes to drop, this -- | Given a contiguous list of segment ranges and a number of bytes to drop, this
-- returns the remaining segment ranges or throws an error. -- returns the remaining segment ranges or throws an error.
@ -1135,26 +1247,7 @@ dropSegmentRangeListBytes :: forall w
=> [SegmentRange w] => [SegmentRange w]
-> Int -> Int
-> Either (DropError w) [SegmentRange w] -> Either (DropError w) [SegmentRange w]
dropSegmentRangeListBytes ranges 0 = Right ranges dropSegmentRangeListBytes l c = snd <$> splitSegmentRangeList l c
dropSegmentRangeListBytes (ByteRegion bs : rest) cnt = do
let sz = BS.length bs
if sz > cnt then
Right $ ByteRegion (BS.drop cnt bs) : rest
else
dropSegmentRangeListBytes rest (cnt - sz)
dropSegmentRangeListBytes (RelocationRegion r:rest) cnt = do
let sz = fromIntegral (relocSize r)
if sz > cnt then
Left (DropUnexpectedRelocation r)
else
dropSegmentRangeListBytes rest (cnt - sz)
dropSegmentRangeListBytes (BSSRegion sz : rest) cnt =
if toInteger sz > toInteger cnt then
Right $ BSSRegion (sz - fromIntegral cnt) : rest
else
dropSegmentRangeListBytes rest (cnt - fromIntegral sz)
dropSegmentRangeListBytes [] _ =
Left DropInvalidAddr
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Memory symbol -- Memory symbol
@ -1193,41 +1286,50 @@ addrContentsAfter mem addr = do
-- --
-- This is a helper method for @readByteString@ below. -- This is a helper method for @readByteString@ below.
readByteString' :: MemWidth w readByteString' :: MemWidth w
=> BS.ByteString => MemSegmentOff w
-- ^ Bytestring read so far (prepended to output) -- ^ Initial starting address
-> [BS.ByteString]
-- ^ Bytestring read so far (in reverse order)
-> [SegmentRange w] -> [SegmentRange w]
-- ^ Remaining segments to read from. -- ^ Remaining segments to read from.
-> MemAddr w
-- ^ Address we are reading from (used for error reporting)
-> Word64 -> Word64
-- ^ Number of bytes to read. -- ^ Number of bytes remaining to read.
-> Either (MemoryError w) BS.ByteString -> Either (MemoryError w) [BS.ByteString]
readByteString' _ _ _ 0 = pure BS.empty readByteString' _ prev _ 0 =
readByteString' _ [] addr _ = Left $! InvalidAddr addr pure $! prev
readByteString' prev (ByteRegion bs:rest) addr sz = readByteString' _ _ [] _ = error "internal: readByteString' given too many bytes."
if toInteger sz <= toInteger (BS.length bs) then readByteString' initAddr prev (ByteRegion bs:rest) cnt =
pure $ prev <> BS.take (fromIntegral sz) bs if toInteger cnt <= toInteger (BS.length bs) then
pure $! BS.take (fromIntegral cnt) bs : prev
else do else do
let addr' = incAddr (fromIntegral (BS.length bs)) addr let cnt' = cnt - fromIntegral (BS.length bs)
let sz' = sz - fromIntegral (BS.length bs) readByteString' initAddr (bs:prev) rest cnt'
readByteString' (prev <> bs) rest addr' sz' readByteString' initAddr prev (RelocationRegion r:_) _ = do
readByteString' _ (RelocationRegion r:_) addr _ = do let cnt = sum (toInteger . BS.length <$> prev)
Left $! UnexpectedRelocation addr r "readBS" let addr = incAddr cnt (relativeSegmentAddr initAddr)
readByteString' prev (BSSRegion cnt:rest) addr sz = Left $! UnexpectedRelocation addr r
if toInteger sz <= toInteger cnt then readByteString' initAddr prev (BSSRegion sz:rest) cnt =
pure $ prev <> BS.replicate (fromIntegral sz) 0 if toInteger cnt <= toInteger sz then
pure $! BS.replicate (fromIntegral cnt) 0 : prev
else do else do
let addr' = incAddr (toInteger sz) addr let cnt' = cnt - fromIntegral sz
let sz' = sz - fromIntegral cnt let next = BS.replicate (fromIntegral sz) 0 : prev
seq addr' $ seq cnt' $ seq next $
readByteString' (prev <> BS.replicate (fromIntegral cnt) 0) rest addr' sz' readByteString' initAddr next rest cnt'
-- | Attemtp to read a bytestring of the given length -- | Attemtp to read a bytestring of the given length
readByteString :: Memory w -> MemAddr w -> Word64 -> Either (MemoryError w) BS.ByteString readByteString :: Memory w
readByteString mem addr sz = addrWidthClass (memAddrWidth mem) $ do -> MemAddr w
-> Word64 -- ^ Number of bytes to read
-> Either (MemoryError w) BS.ByteString
readByteString mem addr cnt = addrWidthClass (memAddrWidth mem) $ do
segOff <- resolveMemAddr mem addr segOff <- resolveMemAddr mem addr
-- Check read is in range.
when (toInteger cnt > msegByteCountAfter segOff) $ do
Left $! InvalidRead segOff cnt
-- Get contents after segment
l <- contentsAfterSegmentOff segOff l <- contentsAfterSegmentOff segOff
readByteString' BS.empty l addr sz mconcat . reverse <$> readByteString' segOff [] l cnt
-- | Read an address from the value in the segment or report a memory -- | Read an address from the value in the segment or report a memory
-- error. -- error.
@ -1239,7 +1341,7 @@ readAddr mem end addr = addrWidthClass (memAddrWidth mem) $ do
let sz = fromIntegral (addrSize addr) let sz = fromIntegral (addrSize addr)
bs <- readByteString mem addr sz bs <- readByteString mem addr sz
case addrRead end bs of case addrRead end bs of
Just val -> Right $ MemAddr 0 val Just val -> Right $ MemAddr 0 val
Nothing -> error $ "readAddr internal error: readByteString result too short." Nothing -> error $ "readAddr internal error: readByteString result too short."
-- | Read a single byte. -- | Read a single byte.

View File

@ -119,19 +119,6 @@ flagsForSectionFlags f =
where flagIf :: ElfSectionFlags w -> Perm.Flags -> Perm.Flags where flagIf :: ElfSectionFlags w -> Perm.Flags -> Perm.Flags
flagIf ef pf = if f `Elf.hasPermissions` ef then pf else Perm.none flagIf ef pf = if f `Elf.hasPermissions` ef then pf else Perm.none
------------------------------------------------------------------------
-- RegionAdjust
-- | This captures how to translate addresses in the Elf file to
-- regions in the memory object.
data RegionAdjust
= RegionAdjust { regionIndex :: !RegionIndex
-- ^ Region index for new segments
, regionOffset :: !Integer
-- ^ Offset from region to automatically add to
-- segment/sections during loading.
}
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- MemLoader -- MemLoader
@ -312,6 +299,7 @@ type RelocationResolver tp
= SymbolVector = SymbolVector
-> Elf.RelEntry tp -> Elf.RelEntry tp
-> MemWord (Elf.RelocationWidth tp) -> MemWord (Elf.RelocationWidth tp)
-- ^ Offset
-> RelocResolver (Relocation (Elf.RelocationWidth tp)) -> RelocResolver (Relocation (Elf.RelocationWidth tp))
data SomeRelocationResolver w data SomeRelocationResolver w
@ -334,47 +322,92 @@ resolveSymbol (SymbolVector symtab) symIdx = do
relocError $ RelocationBadSymbolIndex $ fromIntegral symIdx relocError $ RelocationBadSymbolIndex $ fromIntegral symIdx
Just sym -> pure $ sym Just sym -> pure $ sym
resolveRelocationAddr :: SymbolVector -- | This attempts to resolve an index in the symbol table to the
-- identifier information needed to resolve its loaded address.
resolveRelocationSym :: SymbolVector
-- ^ A vector mapping symbol indices to the -- ^ A vector mapping symbol indices to the
-- associated symbol information. -- associated symbol information.
-> Word32 -> Word32
-- ^ Index in the symbol table this refers to. -- ^ Index in the symbol table this refers to.
-> RelocResolver SymbolIdentifier -> RelocResolver SymbolIdentifier
resolveRelocationAddr symtab symIdx = do resolveRelocationSym symtab symIdx = do
sym <- resolveSymbol symtab symIdx sym <- resolveSymbol symtab symIdx
case symbolDef sym of case symbolDef sym of
DefinedSymbol{} -> do DefinedSymbol{} -> do
pure $ SymbolRelocation (symbolName sym) (symbolVersion sym) pure $ SymbolRelocation (symbolName sym) (symbolVersion sym)
SymbolSection idx -> do SymbolSection idx -> do
pure $ SectionBaseRelocation idx pure $ SectionIdentifier idx
SymbolFile _ -> do SymbolFile _ -> do
relocError $ RelocationFileUnsupported relocError $ RelocationFileUnsupported
UndefinedSymbol{} -> do UndefinedSymbol{} -> do
pure $ SymbolRelocation (symbolName sym) (symbolVersion sym) pure $ SymbolRelocation (symbolName sym) (symbolVersion sym)
-- | Attempt to resolve an X86_64 specific symbol. -- | Attempt to resolve an X86_64 specific symbol.
relaTargetX86_64 :: SomeRelocationResolver 64 relaTargetX86_64 :: SymbolVector
relaTargetX86_64 = SomeRelocationResolver $ \symtab rel off -> -> Elf.RelEntry Elf.X86_64_RelocationType
-> MemWord 64
-- ^ Offset of symbol
-> RelocResolver (Relocation 64)
relaTargetX86_64 symtab rel off =
case Elf.relType rel of case Elf.relType rel of
Elf.R_X86_64_JUMP_SLOT -> do Elf.R_X86_64_JUMP_SLOT -> do
addr <- resolveRelocationAddr symtab (Elf.relSym rel) sym <- resolveRelocationSym symtab (Elf.relSym rel)
pure $ AbsoluteRelocation addr off LittleEndian 8 pure $ Relocation { relocationSym = sym
, relocationOffset = off
, relocationIsRel = False
, relocationSize = 8
, relocationIsSigned = False
, relocationEndianness = LittleEndian
}
Elf.R_X86_64_PC32 -> do Elf.R_X86_64_PC32 -> do
addr <- resolveRelocationAddr symtab (Elf.relSym rel) sym <- resolveRelocationSym symtab (Elf.relSym rel)
pure $ RelativeRelocation addr off LittleEndian 4 pure $ Relocation { relocationSym = sym
, relocationOffset = off
, relocationIsRel = True
, relocationSize = 4
, relocationIsSigned = False
, relocationEndianness = LittleEndian
}
Elf.R_X86_64_32 -> do Elf.R_X86_64_32 -> do
addr <- resolveRelocationAddr symtab (Elf.relSym rel) sym <- resolveRelocationSym symtab (Elf.relSym rel)
pure $ AbsoluteRelocation addr off LittleEndian 4 pure $ Relocation { relocationSym = sym
, relocationOffset = off
, relocationIsRel = False
, relocationSize = 4
, relocationIsSigned = False
, relocationEndianness = LittleEndian
}
Elf.R_X86_64_32S -> do
sym <- resolveRelocationSym symtab (Elf.relSym rel)
pure $ Relocation { relocationSym = sym
, relocationOffset = off
, relocationIsRel = False
, relocationSize = 4
, relocationIsSigned = True
, relocationEndianness = LittleEndian
}
Elf.R_X86_64_64 -> do Elf.R_X86_64_64 -> do
addr <- resolveRelocationAddr symtab (Elf.relSym rel) sym <- resolveRelocationSym symtab (Elf.relSym rel)
pure $ AbsoluteRelocation addr off LittleEndian 8 pure $ Relocation { relocationSym = sym
, relocationOffset = off
, relocationIsRel = False
, relocationSize = 8
, relocationIsSigned = False
, relocationEndianness = LittleEndian
}
-- R_X86_64_GLOB_DAT are used to update GOT entries with their -- R_X86_64_GLOB_DAT are used to update GOT entries with their
-- target address. They are similar to R_x86_64_64 except appear -- target address. They are similar to R_x86_64_64 except appear
-- inside dynamically linked executables/libraries, and are often -- inside dynamically linked executables/libraries, and are often
-- loaded lazily. We just use the eager AbsoluteRelocation here. -- loaded lazily. We just use the eager AbsoluteRelocation here.
Elf.R_X86_64_GLOB_DAT -> do Elf.R_X86_64_GLOB_DAT -> do
addr <- resolveRelocationAddr symtab (Elf.relSym rel) sym <- resolveRelocationSym symtab (Elf.relSym rel)
pure $ AbsoluteRelocation addr off LittleEndian 8 pure $ Relocation { relocationSym = sym
, relocationOffset = off
, relocationIsRel = False
, relocationSize = 8
, relocationIsSigned = False
, relocationEndianness = LittleEndian
}
-- Jhx Note. These will be needed to support thread local variables. -- Jhx Note. These will be needed to support thread local variables.
-- Elf.R_X86_64_TPOFF32 -> undefined -- Elf.R_X86_64_TPOFF32 -> undefined
@ -398,10 +431,6 @@ relaTargetARM = SomeRelocationResolver $ \_symtab rel _maddend ->
-- TargetSymbol <$> resolveSymbol symtab rel -- TargetSymbol <$> resolveSymbol symtab rel
tp -> relocError $ RelocationUnsupportedType (show tp) tp -> relocError $ RelocationUnsupportedType (show tp)
-} -}
{-
000000613ff8 003c00000006 R_X86_64_GLOB_DAT 0000000000000000 __gmon_start__ + 0
-}
-- | Creates a relocation map from the contents of a dynamic section. -- | Creates a relocation map from the contents of a dynamic section.
@ -413,7 +442,7 @@ withRelocationResolver
-> MemLoader w a -> MemLoader w a
withRelocationResolver hdr f = withRelocationResolver hdr f =
case (Elf.headerClass hdr, Elf.headerMachine hdr) of case (Elf.headerClass hdr, Elf.headerMachine hdr) of
(Elf.ELFCLASS64, Elf.EM_X86_64) -> f relaTargetX86_64 (Elf.ELFCLASS64, Elf.EM_X86_64) -> f (SomeRelocationResolver relaTargetX86_64)
-- (Elf.ELFCLASS32, Elf.EM_ARM) -> f relaTargetARM -- (Elf.ELFCLASS32, Elf.EM_ARM) -> f relaTargetARM
(_,mach) -> throwError $ UnsupportedArchitecture (show mach) (_,mach) -> throwError $ UnsupportedArchitecture (show mach)
@ -684,7 +713,8 @@ reprConstraints Addr64 x = x
-- | Return a memory segment for elf segment if it loadable. -- | Return a memory segment for elf segment if it loadable.
memSegmentForElfSegment :: (MemWidth w, Monad m, Integral (ElfWordType w)) memSegmentForElfSegment :: (MemWidth w, Monad m, Integral (ElfWordType w))
=> ResolveFn v m w => ResolveFn v m w
-> RegionAdjust -- ^ Index for segment -> MemAddr w
-- ^ Base address to use for segments.
-> L.ByteString -> L.ByteString
-- ^ Complete contents of Elf file. -- ^ Complete contents of Elf file.
-> AddrOffsetMap w v -> AddrOffsetMap w v
@ -692,28 +722,27 @@ memSegmentForElfSegment :: (MemWidth w, Monad m, Integral (ElfWordType w))
-> Elf.Phdr w -> Elf.Phdr w
-- ^ Program header entry -- ^ Program header entry
-> m (MemSegment w) -> m (MemSegment w)
memSegmentForElfSegment resolver regAdj contents relocMap phdr = memSegmentForElfSegment resolver base contents relocMap phdr =
memSegment resolver (regionIndex regAdj) relocMap (fromInteger base) flags dta sz memSegment resolver (addrBase base) relocMap off flags dta sz
where {- seg = Elf.phdrSegment phdr -} where off = addrOffset base + fromIntegral (Elf.phdrSegmentVirtAddr phdr)
dta = sliceL (Elf.phdrFileRange phdr) contents dta = sliceL (Elf.phdrFileRange phdr) contents
sz = fromIntegral $ Elf.phdrMemSize phdr sz = fromIntegral $ Elf.phdrMemSize phdr
base = regionOffset regAdj + toInteger (Elf.phdrSegmentVirtAddr phdr)
flags = flagsForSegmentFlags (Elf.phdrSegmentFlags phdr) flags = flagsForSegmentFlags (Elf.phdrSegmentFlags phdr)
-- | Load an elf file into memory. -- | Load an elf file into memory.
insertElfSegment :: RegionAdjust insertElfSegment :: MemAddr w
-- ^ Where to load region -- ^ Base address to use for loading program header.
-> ElfFileSectionMap (ElfWordType w) -> ElfFileSectionMap (ElfWordType w)
-> L.ByteString -> L.ByteString
-> RelocMap w v -> RelocMap w v
-- ^ Relocations to apply in loading section. -- ^ Relocations to apply in loading section.
-> Elf.Phdr w -> Elf.Phdr w
-> MemLoader w () -> MemLoader w ()
insertElfSegment regAdj shdrMap contents (RelocMap relocMap resolver) phdr = do insertElfSegment base shdrMap contents (RelocMap relocMap resolver) phdr = do
w <- uses mlsMemory memAddrWidth w <- uses mlsMemory memAddrWidth
reprConstraints w $ do reprConstraints w $ do
when (Elf.phdrMemSize phdr > 0) $ do when (Elf.phdrMemSize phdr > 0) $ do
seg <- memSegmentForElfSegment resolver regAdj contents relocMap phdr seg <- memSegmentForElfSegment resolver base contents relocMap phdr
let seg_idx = Elf.phdrSegmentIndex phdr let seg_idx = Elf.phdrSegmentIndex phdr
loadMemSegment ("Segment " ++ show seg_idx) seg loadMemSegment ("Segment " ++ show seg_idx) seg
let phdr_offset = Elf.fromFileOffset (Elf.phdrFileStart phdr) let phdr_offset = Elf.fromFileOffset (Elf.phdrFileStart phdr)
@ -727,16 +756,19 @@ insertElfSegment regAdj shdrMap contents (RelocMap relocMap resolver) phdr = do
fail $ "Found section header that overlaps with program header." fail $ "Found section header that overlaps with program header."
let sec_offset = fromIntegral $ shdr_start - phdr_offset let sec_offset = fromIntegral $ shdr_start - phdr_offset
let Just addr = resolveSegmentOff seg sec_offset let Just addr = resolveSegmentOff seg sec_offset
mlsMemory %= memAddSectionAddr (fromElfSectionIndex elfIdx) addr
mlsIndexMap %= Map.insert elfIdx (addr, sec) mlsIndexMap %= Map.insert elfIdx (addr, sec)
_ -> fail "Unexpected shdr interval" _ -> fail "Unexpected shdr interval"
-- | Load an elf file into memory with the given options. -- | Load an elf file into memory by parsing segments.
memoryForElfSegments memoryForElfSegments
:: forall w :: forall w
. RegionAdjust . MemAddr w
-- ^ This is used as the base address to load Elf segments at (the virtual
-- address is treated as relative to this.
-> Elf w -> Elf w
-> MemLoader w () -> MemLoader w ()
memoryForElfSegments regAdj e = do memoryForElfSegments base e = do
let l = elfLayout e let l = elfLayout e
let hdr = Elf.elfLayoutHeader l let hdr = Elf.elfLayoutHeader l
let w = elfAddrWidth (elfClass e) let w = elfAddrWidth (elfClass e)
@ -755,28 +787,43 @@ memoryForElfSegments regAdj e = do
, let sec = shdr^._1 , let sec = shdr^._1
, let end = start + elfSectionFileSize sec , let end = start + elfSectionFileSize sec
] ]
mapM_ (insertElfSegment regAdj intervals contents relocMap) mapM_ (insertElfSegment base intervals contents relocMap)
(filter (\p -> Elf.phdrSegmentType p == Elf.PT_LOAD) ph) (filter (\p -> Elf.phdrSegmentType p == Elf.PT_LOAD) ph)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Elf section loading -- Elf section loading
-- | Contains the name of a section we allocate and whether
-- relocations are used.
type AllocatedSectionInfo = (B.ByteString, Bool)
allocatedNames :: AllocatedSectionInfo -> [B.ByteString]
allocatedNames (nm,False) = [nm]
allocatedNames (nm,True) = [nm, ".rela" <> nm]
allocatedSectionInfo :: [AllocatedSectionInfo]
allocatedSectionInfo =
[ (,) ".text" True
, (,) ".eh_frame" True
, (,) ".data" True
, (,) ".bss" False
, (,) ".rodata" True
]
allowedSectionNames :: Set B.ByteString allowedSectionNames :: Set B.ByteString
allowedSectionNames = Set.fromList allowedSectionNames = Set.fromList
[ "" $ concatMap allocatedNames allocatedSectionInfo
, ".text", ".rela.text" ++ [ ""
, ".data", ".rela.data" , ".text.hot"
, ".bss" , ".text.unlikely"
, ".tbss" , ".tbss"
, ".tdata", ".rela.tdata" , ".tdata", ".rela.tdata"
, ".rodata", ".rela.rodata" , ".comment"
, ".comment" , ".note.GNU-stack"
, ".note.GNU-stack" , ".shstrtab"
, ".eh_frame", ".rela.eh_frame" , ".symtab"
, ".shstrtab" , ".strtab"
, ".symtab" ]
, ".strtab"
]
-- | Map from section names to information about them. -- | Map from section names to information about them.
type SectionNameMap w = Map SectionName [ElfSection (ElfWordType w)] type SectionNameMap w = Map SectionName [ElfSection (ElfWordType w)]
@ -835,6 +882,7 @@ insertAllocatedSection hdr symtab sectionMap regIdx nm = do
-- Add entry to map elf section index to start in segment. -- Add entry to map elf section index to start in segment.
let elfIdx = ElfSectionIndex (elfSectionIndex sec) let elfIdx = ElfSectionIndex (elfSectionIndex sec)
let Just addr = resolveSegmentOff seg 0 let Just addr = resolveSegmentOff seg 0
mlsMemory %= memAddSectionAddr (fromElfSectionIndex elfIdx) addr
mlsIndexMap %= Map.insert elfIdx (addr, sec) mlsIndexMap %= Map.insert elfIdx (addr, sec)
-- | Create the symbol vector from -- | Create the symbol vector from
@ -864,10 +912,8 @@ memoryForElfSections e = do
sectionMap = foldlOf elfSections insSec Map.empty e sectionMap = foldlOf elfSections insSec Map.empty e
where insSec m sec = Map.insertWith (\new old -> old ++ new) (elfSectionName sec) [sec] m where insSec m sec = Map.insertWith (\new old -> old ++ new) (elfSectionName sec) [sec] m
symtab <- symtabSymbolVector e symtab <- symtabSymbolVector e
insertAllocatedSection hdr symtab sectionMap 1 ".text" forM_ (zip [1..] allocatedSectionInfo) $ \(idx, (nm,_)) -> do
insertAllocatedSection hdr symtab sectionMap 2 ".data" insertAllocatedSection hdr symtab sectionMap idx nm
insertAllocatedSection hdr symtab sectionMap 3 ".bss"
insertAllocatedSection hdr symtab sectionMap 4 ".rodata"
-- TODO: Figure out what to do about .tdata and .tbss -- TODO: Figure out what to do about .tdata and .tbss
-- Check for other section names that we do not support." -- Check for other section names that we do not support."
let unsupportedKeys = Map.keysSet sectionMap `Set.difference ` allowedSectionNames let unsupportedKeys = Map.keysSet sectionMap `Set.difference ` allowedSectionNames
@ -899,20 +945,27 @@ adjustedLoadRegionIndex e loadOpt =
-- information tends to be more precise. -- information tends to be more precise.
memoryForElf :: LoadOptions memoryForElf :: LoadOptions
-> Elf w -> Elf w
-> Either String (SectionIndexMap w, Memory w, [MemLoadWarning]) -> Either String ( Memory w
memoryForElf opt e = do , [MemSymbol w] -- Function symbols
, [MemLoadWarning]
, [SymbolResolutionError]
)
memoryForElf opt e = reprConstraints (elfAddrWidth (elfClass e)) $ do
let end = case Elf.elfData e of let end = case Elf.elfData e of
Elf.ELFDATA2LSB -> LittleEndian Elf.ELFDATA2LSB -> LittleEndian
Elf.ELFDATA2MSB -> BigEndian Elf.ELFDATA2MSB -> BigEndian
runMemLoader end (emptyMemory (elfAddrWidth (elfClass e))) $ do (secMap, mem, warnings) <-
case Elf.elfType e of runMemLoader end (emptyMemory (elfAddrWidth (elfClass e))) $ do
Elf.ET_REL -> case Elf.elfType e of
memoryForElfSections e Elf.ET_REL ->
_ -> do memoryForElfSections e
let regAdj = RegionAdjust { regionIndex = adjustedLoadRegionIndex e opt _ -> do
, regionOffset = loadRegionBaseOffset opt let base = MemAddr { addrBase = adjustedLoadRegionIndex e opt
} , addrOffset = fromInteger (loadRegionBaseOffset opt)
memoryForElfSegments regAdj e }
memoryForElfSegments base e
let (symErrs, funcSymbols) = resolveElfFuncSymbols mem secMap e
pure (mem, funcSymbols, warnings, symErrs)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Elf symbol utilities -- Elf symbol utilities
@ -935,28 +988,27 @@ instance Show SymbolResolutionError where
show (CouldNotResolveAddr sym) = "Could not resolve address of " ++ BSC.unpack sym ++ "." show (CouldNotResolveAddr sym) = "Could not resolve address of " ++ BSC.unpack sym ++ "."
show MultipleSymbolTables = "Elf contains multiple symbol tables." show MultipleSymbolTables = "Elf contains multiple symbol tables."
-- | Find an absolute symbol, of any time, not just function. -- | Find an symbol of any type -- not just functions.
resolveElfFuncSymbolAny' :: resolveElfSymbol :: Memory w -- ^ Memory object from Elf file.
Memory w -- ^ Memory object from Elf file. -> SectionIndexMap w -- ^ Section index mp from memory
-> SectionIndexMap w -- ^ Section index mp from memory -> Int -- ^ Index of symbol
-> Int -- ^ Index of symbol -> ElfSymbolTableEntry (ElfWordType w)
-> ElfSymbolTableEntry (ElfWordType w) -> Maybe (Either SymbolResolutionError (MemSymbol w))
-> Either SymbolResolutionError (MemSymbol w) resolveElfSymbol mem secMap idx ste
resolveElfFuncSymbolAny' mem secMap idx ste
-- Check symbol is defined -- Check symbol is defined
| Elf.steIndex ste == Elf.SHN_UNDEF = Left $ UndefSymbol (Elf.steName ste) | Elf.steIndex ste == Elf.SHN_UNDEF = Nothing
-- Check symbol name is non-empty -- Check symbol name is non-empty
| Elf.steName ste == "" = Left $ EmptySymbolName idx (Elf.steType ste) | Elf.steName ste == "" = Just $ Left $ EmptySymbolName idx (Elf.steType ste)
-- Lookup absolute symbol -- Lookup absolute symbol
| Elf.steIndex ste == Elf.SHN_ABS = reprConstraints (memAddrWidth mem) $ do | Elf.steIndex ste == Elf.SHN_ABS = reprConstraints (memAddrWidth mem) $ do
let val = Elf.steValue ste let val = Elf.steValue ste
case resolveAddr mem 0 (fromIntegral val) of case resolveAddr mem 0 (fromIntegral val) of
Just addr -> Right $ Just addr -> Just $ Right $
MemSymbol { memSymbolName = Elf.steName ste MemSymbol { memSymbolName = Elf.steName ste
, memSymbolStart = addr , memSymbolStart = addr
, memSymbolSize = fromIntegral (Elf.steSize ste) , memSymbolSize = fromIntegral (Elf.steSize ste)
} }
Nothing -> Left $ CouldNotResolveAddr (Elf.steName ste) Nothing -> Just $ Left $ CouldNotResolveAddr (Elf.steName ste)
-- Lookup symbol stored in specific section -- Lookup symbol stored in specific section
| otherwise = reprConstraints (memAddrWidth mem) $ do | otherwise = reprConstraints (memAddrWidth mem) $ do
let val = Elf.steValue ste let val = Elf.steValue ste
@ -964,41 +1016,12 @@ resolveElfFuncSymbolAny' mem secMap idx ste
Just (base,sec) Just (base,sec)
| elfSectionAddr sec <= val && val < elfSectionAddr sec + Elf.elfSectionSize sec | elfSectionAddr sec <= val && val < elfSectionAddr sec + Elf.elfSectionSize sec
, off <- toInteger val - toInteger (elfSectionAddr sec) , off <- toInteger val - toInteger (elfSectionAddr sec)
, Just addr <- incSegmentOff base off -> do , Just addr <- incSegmentOff base off -> Just $ do
Right $ MemSymbol { memSymbolName = Elf.steName ste Right $ MemSymbol { memSymbolName = Elf.steName ste
, memSymbolStart = addr , memSymbolStart = addr
, memSymbolSize = fromIntegral (Elf.steSize ste) , memSymbolSize = fromIntegral (Elf.steSize ste)
} }
_ -> Left $ CouldNotResolveAddr (Elf.steName ste) _ -> Just $ Left $ CouldNotResolveAddr (Elf.steName ste)
-- | Find an absolute symbol, of any time, not just function.
resolveElfFuncSymbolAny ::
Memory w -- ^ Memory object from Elf file.
-> SectionIndexMap w -- ^ Section index mp from memory
-> Int -- ^ Index of symbol
-> ElfSymbolTableEntry (ElfWordType w)
-> Maybe (Either SymbolResolutionError (MemSymbol w))
resolveElfFuncSymbolAny mem secMap idx ste
| Elf.steIndex ste == Elf.SHN_UNDEF = Nothing
| otherwise = Just (resolveElfFuncSymbolAny' mem secMap idx ste)
-- | This resolves an Elf symbol into a MemSymbol if it is likely a
-- pointer to a resolved function.
resolveElfFuncSymbol :: Memory w -- ^ Memory object from Elf file.
-> SectionIndexMap w -- ^ Section index mp from memory
-> Int -- ^ Index of symbol
-> ElfSymbolTableEntry (ElfWordType w)
-> Maybe (Either SymbolResolutionError (MemSymbol w))
resolveElfFuncSymbol mem secMap idx ste
-- Check this is a defined function symbol
-- Some NO_TYPE entries appear to correspond to functions, so we include those.
| (Elf.steType ste `elem` [ Elf.STT_FUNC, Elf.STT_NOTYPE ]) == False =
Nothing
-- Check symbol is defined
| Elf.steIndex ste == Elf.SHN_UNDEF = Nothing
-- Check symbol name is non-empty
| Elf.steName ste == "" = Just $ (resolveElfFuncSymbolAny' mem secMap idx ste)
| otherwise = Just (resolveElfFuncSymbolAny' mem secMap idx ste)
-- | Resolve symbol table entries defined in this Elf file to -- | Resolve symbol table entries defined in this Elf file to
-- a mem symbol -- a mem symbol
@ -1016,11 +1039,12 @@ resolveElfFuncSymbols mem secMap e =
case Elf.elfSymtab e of case Elf.elfSymtab e of
[] -> ([], []) [] -> ([], [])
[tbl] -> [tbl] ->
let entries = V.toList (Elf.elfSymbolTableEntries tbl) let entries = zip [0..] (V.toList (Elf.elfSymbolTableEntries tbl))
in partitionEithers (mapMaybe (uncurry (resolveElfFuncSymbol mem secMap)) (zip [0..] entries)) isRelevant (_,ste) = Elf.steType ste == Elf.STT_FUNC
funcEntries = filter isRelevant entries
in partitionEithers (mapMaybe (uncurry (resolveElfSymbol mem secMap)) funcEntries)
_ -> ([MultipleSymbolTables], []) _ -> ([MultipleSymbolTables], [])
-- | Resolve symbol table entries to the addresses in a memory. -- | Resolve symbol table entries to the addresses in a memory.
-- --
-- It takes the memory constructed from the Elf file, the section -- It takes the memory constructed from the Elf file, the section
@ -1037,11 +1061,9 @@ resolveElfFuncSymbolsAny mem secMap e =
[] -> ([], []) [] -> ([], [])
[tbl] -> [tbl] ->
let entries = V.toList (Elf.elfSymbolTableEntries tbl) let entries = V.toList (Elf.elfSymbolTableEntries tbl)
in partitionEithers (mapMaybe (uncurry (resolveElfFuncSymbolAny mem secMap)) (zip [0..] entries)) in partitionEithers (mapMaybe (uncurry (resolveElfSymbol mem secMap)) (zip [0..] entries))
_ -> ([MultipleSymbolTables], []) _ -> ([MultipleSymbolTables], [])
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- resolveElfContents -- resolveElfContents
@ -1075,19 +1097,16 @@ resolveElfContents :: LoadOptions
resolveElfContents loadOpts e = resolveElfContents loadOpts e =
case Elf.elfType e of case Elf.elfType e of
Elf.ET_REL -> do Elf.ET_REL -> do
(secMap, mem, warnings) <- memoryForElf loadOpts e (mem, funcSymbols, warnings, symErrs) <- memoryForElf loadOpts e
let (symErrs, funcSymbols) = resolveElfFuncSymbols mem secMap e
pure (fmap show warnings ++ fmap show symErrs, mem, Nothing, funcSymbols) pure (fmap show warnings ++ fmap show symErrs, mem, Nothing, funcSymbols)
Elf.ET_EXEC -> do Elf.ET_EXEC -> do
(secMap, mem, warnings) <- memoryForElf loadOpts e (mem, funcSymbols, warnings, symErrs) <- memoryForElf loadOpts e
let (entryWarn, mentry) = getElfEntry loadOpts mem e let (entryWarn, mentry) = getElfEntry loadOpts mem e
let (symErrs, funcSymbols) = resolveElfFuncSymbols mem secMap e Right (fmap show warnings ++ fmap show symErrs ++ entryWarn, mem, mentry, funcSymbols)
Right (fmap show warnings ++ entryWarn ++ fmap show symErrs, mem, mentry, funcSymbols)
Elf.ET_DYN -> do Elf.ET_DYN -> do
(secMap, mem, warnings) <- memoryForElf loadOpts e (mem, funcSymbols, warnings, symErrs) <- memoryForElf loadOpts e
let (entryWarn, mentry) = getElfEntry loadOpts mem e let (entryWarn, mentry) = getElfEntry loadOpts mem e
let (symErrs, funcSymbols) = resolveElfFuncSymbols mem secMap e pure (fmap show warnings ++ fmap show symErrs ++ entryWarn, mem, mentry, funcSymbols)
pure (fmap show warnings ++ entryWarn ++ fmap show symErrs, mem, mentry, funcSymbols)
Elf.ET_CORE -> do Elf.ET_CORE -> do
Left $ "Reopt does not support loading core files." Left $ "Reopt does not support loading core files."
tp -> do tp -> do

View File

@ -13,8 +13,12 @@ optimization.
module Data.Macaw.SCFG module Data.Macaw.SCFG
( SCFG(..) ( SCFG(..)
, SCFGBlock(..) , SCFGBlock(..)
, CallingConvention(..) , CallingConvention
, Stmt(..)
, TermStmt(..) , TermStmt(..)
, Value(..)
, AssignId(..)
, BlockIndex(..)
, module Data.Macaw.CFG.App , module Data.Macaw.CFG.App
) where ) where

2
deps/crucible vendored

@ -1 +1 @@
Subproject commit 31f432e23c94855273477f6db08cd9c73072e930 Subproject commit 60a10a7933bcfab2c444dd436aee82eed449cec9

2
deps/elf-edit vendored

@ -1 +1 @@
Subproject commit ed210d94e17b48be8905b6389f10d906dfc32b2e Subproject commit 78ca989cb31058968c5dc4e01009ee705eaeea09

2
deps/flexdis86 vendored

@ -1 +1 @@
Subproject commit f50f3a78776287e66a4b25e7f172a55b2818f2b2 Subproject commit 9304269acef1c82e9ae3676649574e371ac54c36

@ -1 +1 @@
Subproject commit 898020da43fbf9989a8877c1404c05767bb5df98 Subproject commit a30e12292371bc9abe6ccc631a0409df059449d9

View File

@ -19,4 +19,5 @@ packages:
extra-deps: extra-deps:
- monadLib-3.7.3 - monadLib-3.7.3
- panic-0.4.0.1
resolver: lts-11.5 resolver: lts-11.5

View File

@ -60,14 +60,14 @@ import What4.Symbol (userSymbol)
import qualified Lang.Crucible.Analysis.Postdom as C import qualified Lang.Crucible.Analysis.Postdom as C
import Lang.Crucible.Backend import Lang.Crucible.Backend
import qualified Lang.Crucible.CFG.Core as C import qualified Lang.Crucible.CFG.Core as C
import qualified Lang.Crucible.CFG.Extension as C
import qualified Lang.Crucible.CFG.Reg as CR import qualified Lang.Crucible.CFG.Reg as CR
import qualified Lang.Crucible.CFG.SSAConversion as C import qualified Lang.Crucible.CFG.SSAConversion as C
import qualified Lang.Crucible.FunctionHandle as C import qualified Lang.Crucible.FunctionHandle as C
import qualified Lang.Crucible.Simulator as C
import qualified Lang.Crucible.Simulator.ExecutionTree as C import qualified Lang.Crucible.Simulator.ExecutionTree as C
import qualified Lang.Crucible.Simulator.Intrinsics as C
import qualified Lang.Crucible.Simulator.GlobalState as C import qualified Lang.Crucible.Simulator.GlobalState as C
import qualified Lang.Crucible.Simulator.OverrideSim as C
import qualified Lang.Crucible.Simulator.RegMap as C
import System.IO (stdout) import System.IO (stdout)
@ -498,7 +498,7 @@ execMacawStmtExtension archStmtFn mvar globs (LFH lookupH) s0 st =
M.BoolTypeRepr -> freshConstant sym nm C.BaseBoolRepr M.BoolTypeRepr -> freshConstant sym nm C.BaseBoolRepr
_ -> error ("MacawFreshSymbolic: XXX type " ++ show t) _ -> error ("MacawFreshSymbolic: XXX type " ++ show t)
return (v,st) return (v,st)
where sym = C.stateSymInterface st where sym = st^.C.stateSymInterface
MacawLookupFunctionHandle _ args -> do MacawLookupFunctionHandle _ args -> do
(hv, st') <- doLookupFunctionHandle lookupH st mvar (C.regValue args) (hv, st') <- doLookupFunctionHandle lookupH st mvar (C.regValue args)
@ -576,7 +576,7 @@ macawExtensions f mvar globs lookupH =
-- | Run the simulator over a contiguous set of code. -- | Run the simulator over a contiguous set of code.
runCodeBlock :: forall sym arch blocks runCodeBlock :: forall sym arch blocks
. IsSymInterface sym . (C.IsSyntaxExtension (MacawExt arch), IsSymInterface sym)
=> sym => sym
-> MacawSymbolicArchFunctions arch -> MacawSymbolicArchFunctions arch
-- ^ Translation functions -- ^ Translation functions
@ -612,8 +612,8 @@ runCodeBlock sym archFns archEval halloc (initMem,globs) lookupH g regStruct = d
} }
-- Create the symbolic simulator state -- Create the symbolic simulator state
let initGlobals = C.insertGlobal mvar initMem C.emptyGlobals let initGlobals = C.insertGlobal mvar initMem C.emptyGlobals
let s = C.initSimState ctx initGlobals C.defaultErrorHandler let s = C.initSimState ctx initGlobals C.defaultAbortHandler
a <- C.runOverrideSim s macawStructRepr $ do a <- C.executeCrucible s $ C.runOverrideSim macawStructRepr $ do
let args :: C.RegMap sym (MacawFunctionArgs arch) let args :: C.RegMap sym (MacawFunctionArgs arch)
args = C.RegMap (Ctx.singleton (C.RegEntry macawStructRepr regStruct)) args = C.RegMap (Ctx.singleton (C.RegEntry macawStructRepr regStruct))
crucGenArchConstraints archFns $ crucGenArchConstraints archFns $

View File

@ -194,7 +194,7 @@ doGetGlobal ::
, CrucibleState s sym ext rtp blocks r ctx , CrucibleState s sym ext rtp blocks r ctx
) )
doGetGlobal st mvar globs addr = do doGetGlobal st mvar globs addr = do
let sym = stateSymInterface st let sym = st^.stateSymInterface
mem <- getMem st mvar mem <- getMem st mvar
regionNum <- natLit sym (fromIntegral (M.addrBase addr)) regionNum <- natLit sym (fromIntegral (M.addrBase addr))
offset <- bvLit sym (M.addrWidthNatRepr (M.addrWidthRepr addr)) (M.memWordInteger (M.addrOffset addr)) offset <- bvLit sym (M.addrWidthNatRepr (M.addrWidthRepr addr)) (M.memWordInteger (M.addrOffset addr))
@ -205,7 +205,19 @@ doGetGlobal st mvar globs addr = do
, "*** Region: " ++ show (M.addrBase addr) , "*** Region: " ++ show (M.addrBase addr)
, "*** Address: " ++ show addr , "*** Address: " ++ show addr
] ]
-- <<<<<<< HEAD
Just ptr -> return (ptr, st) Just ptr -> return (ptr, st)
-- =======
-- Just region ->
-- do mem <- getMem st mvar
-- let sym = st^.stateSymInterface
-- let w = M.addrWidthRepr addr
-- LeqProof <- pure $ addrWidthAtLeast16 w
-- let ?ptrWidth = M.addrWidthNatRepr w
-- off <- bvLit sym ?ptrWidth (M.memWordInteger (M.addrOffset addr))
-- res <- doPtrAddOffset sym mem region off
-- return (res, st)
-- >>>>>>> master
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
@ -401,7 +413,7 @@ doReadMem st mvar globs w (BVMemRepr bytes endian) ptr0 =
do mem <- getMem st mvar do mem <- getMem st mvar
checkEndian mem endian checkEndian mem endian
let sym = stateSymInterface st let sym = st^.stateSymInterface
ty = bitvectorType (toBytes (widthVal bytes)) ty = bitvectorType (toBytes (widthVal bytes))
bitw = natMultiply (knownNat @8) bytes bitw = natMultiply (knownNat @8) bytes
@ -441,7 +453,7 @@ doCondReadMem st mvar globs w (BVMemRepr bytes endian) cond0 ptr0 def0 =
def = regValue def0 def = regValue def0
mem <- getMem st mvar mem <- getMem st mvar
checkEndian mem endian checkEndian mem endian
let sym = stateSymInterface st let sym = st^.stateSymInterface
ty = bitvectorType (toBytes (widthVal bytes)) ty = bitvectorType (toBytes (widthVal bytes))
bitw = natMultiply (knownNat @8) bytes bitw = natMultiply (knownNat @8) bytes
@ -490,7 +502,7 @@ doWriteMem st mvar globs w (BVMemRepr bytes endian) ptr0 val =
do mem <- getMem st mvar do mem <- getMem st mvar
checkEndian mem endian checkEndian mem endian
let sym = stateSymInterface st let sym = st^.stateSymInterface
ty = bitvectorType (toBytes (widthVal bytes)) ty = bitvectorType (toBytes (widthVal bytes))
LeqProof <- pure $ addrWidthIsPos w LeqProof <- pure $ addrWidthIsPos w
@ -547,7 +559,7 @@ ptrOp ::
ptrOp k st mvar w x0 y0 = ptrOp k st mvar w x0 y0 =
do mem <- getMem st mvar do mem <- getMem st mvar
LeqProof <- return (addrWidthIsPos w) LeqProof <- return (addrWidthIsPos w)
let sym = stateSymInterface st let sym = st^.stateSymInterface
x = regValue x0 x = regValue x0
y = regValue y0 y = regValue y0

View File

@ -50,7 +50,6 @@ import Control.Lens
import Control.Monad.Cont import Control.Monad.Cont
import Control.Monad.Except import Control.Monad.Except
import Control.Monad.ST import Control.Monad.ST
import qualified Data.Foldable as Fold
import qualified Data.Map as Map import qualified Data.Map as Map
import Data.Parameterized.Classes import Data.Parameterized.Classes
import qualified Data.Parameterized.Map as MapF import qualified Data.Parameterized.Map as MapF
@ -144,9 +143,6 @@ initGenState :: NonceGenerator (ST st_s) ids
-> GenState st_s ids -> GenState st_s ids
initGenState nonce_gen mem addr s = initGenState nonce_gen mem addr s =
GenState { assignIdGen = nonce_gen GenState { assignIdGen = nonce_gen
, _blockSeq = BlockSeq { _nextBlockID = 1
, _frontierBlocks = Seq.empty
}
, _blockState = emptyPreBlock s 0 addr , _blockState = emptyPreBlock s 0 addr
, genAddr = addr , genAddr = addr
, genMemory = mem , genMemory = mem
@ -156,13 +152,12 @@ initGenState nonce_gen mem addr s =
returnWithError :: GenState st_s ids returnWithError :: GenState st_s ids
-> X86TranslateError 64 -> X86TranslateError 64
-> ST st_s (BlockSeq ids, MemWord 64, Maybe (X86TranslateError 64)) -> ST st_s (Block X86_64 ids, MemWord 64, Maybe (X86TranslateError 64))
returnWithError gs err = returnWithError gs err =
let curIPAddr = genAddr gs let curIPAddr = genAddr gs
term = (`TranslateError` Text.pack (show err)) term s = TranslateError s (Text.pack (show err))
b = finishBlock' (gs^.blockState) term b = finishBlock (gs^.blockState) term
res = seq b $ gs^.blockSeq & frontierBlocks %~ (Seq.|> b) in return (b, msegOffset curIPAddr, Just err)
in return (res, msegOffset curIPAddr, Just err)
-- | Translate block, returning blocks read, ending -- | Translate block, returning blocks read, ending
-- PC, and an optional error. and ending PC. -- PC, and an optional error. and ending PC.
@ -173,7 +168,7 @@ disassembleBlockImpl :: forall st_s ids
-- ^ Maximum offset for this addr. -- ^ Maximum offset for this addr.
-> [SegmentRange 64] -> [SegmentRange 64]
-- ^ List of contents to read next. -- ^ List of contents to read next.
-> ST st_s (BlockSeq ids, MemWord 64, Maybe (X86TranslateError 64)) -> ST st_s (Block X86_64 ids, MemWord 64, Maybe (X86TranslateError 64))
disassembleBlockImpl gs max_offset contents = do disassembleBlockImpl gs max_offset contents = do
let curIPAddr = genAddr gs let curIPAddr = genAddr gs
case readInstruction' curIPAddr contents of case readInstruction' curIPAddr contents of
@ -191,7 +186,7 @@ disassembleBlockImpl gs max_offset contents = do
returnWithError gs (UnsupportedInstruction (genAddr gs) i) returnWithError gs (UnsupportedInstruction (genAddr gs) i)
Just exec -> do Just exec -> do
gsr <- gsr <-
runExceptT $ runX86Generator (\() s -> pure (mkGenResult s)) gs $ do runExceptT $ runX86Generator gs $ do
let next_ip_word = fromIntegral $ segmentOffset seg + off let next_ip_word = fromIntegral $ segmentOffset seg + off
let line = show curIPAddr ++ ": " ++ show (F.ppInstruction next_ip_word i) let line = show curIPAddr ++ ": " ++ show (F.ppInstruction next_ip_word i)
addStmt (Comment (Text.pack line)) addStmt (Comment (Text.pack line))
@ -200,18 +195,16 @@ disassembleBlockImpl gs max_offset contents = do
Left msg -> do Left msg -> do
returnWithError gs (ExecInstructionError (genAddr gs) i msg) returnWithError gs (ExecInstructionError (genAddr gs) i msg)
Right res -> do Right res -> do
case resState res of case res of
-- If IP after interpretation is the next_ip, there are no blocks, and we -- If IP after interpretation is the next_ip, there are no blocks, and we
-- haven't crossed max_offset, then keep running. -- haven't crossed max_offset, then keep running.
Just p_b UnfinishedGenResult p_b
| Seq.null (resBlockSeq res ^. frontierBlocks) | RelocatableValue _ v <- p_b^.(pBlockState . curIP)
, RelocatableValue _ v <- p_b^.(pBlockState . curIP)
, v == next_ip , v == next_ip
-- Check to see if we should continue -- Check to see if we should continue
, next_ip_off < max_offset , next_ip_off < max_offset
, Just next_ip_segaddr <- resolveSegmentOff seg next_ip_off -> do , Just next_ip_segaddr <- resolveSegmentOff seg next_ip_off -> do
let gs2 = GenState { assignIdGen = assignIdGen gs let gs2 = GenState { assignIdGen = assignIdGen gs
, _blockSeq = resBlockSeq res
, _blockState = p_b , _blockState = p_b
, genAddr = next_ip_segaddr , genAddr = next_ip_segaddr
, genMemory = genMemory gs , genMemory = genMemory gs
@ -226,8 +219,7 @@ disassembleBlockImpl gs max_offset contents = do
Right contents' -> Right contents' ->
disassembleBlockImpl gs2 max_offset contents' disassembleBlockImpl gs2 max_offset contents'
_ -> do _ -> do
let gs3 = finishBlock FetchAndExecute res return (finishGenResult res, next_ip_off, Nothing)
return (gs3, next_ip_off, Nothing)
-- | Disassemble block, returning either an error, or a list of blocks -- | Disassemble block, returning either an error, or a list of blocks
-- and ending PC. -- and ending PC.
@ -237,12 +229,12 @@ disassembleBlock :: forall s
-> ExploreLoc -> ExploreLoc
-> MemWord 64 -> MemWord 64
-- ^ Maximum number of bytes in ths block. -- ^ Maximum number of bytes in ths block.
-> ST s ([Block X86_64 s], MemWord 64, Maybe (X86TranslateError 64)) -> ST s (Block X86_64 s, MemWord 64, Maybe (X86TranslateError 64))
disassembleBlock mem nonce_gen loc max_size = do disassembleBlock mem nonce_gen loc max_size = do
let addr = loc_ip loc let addr = loc_ip loc
let gs = initGenState nonce_gen mem addr (initX86State loc) let gs = initGenState nonce_gen mem addr (initX86State loc)
let sz = msegOffset addr + max_size let sz = msegOffset addr + max_size
(gs', next_ip_off, maybeError) <- (b, next_ip_off, maybeError) <-
case addrContentsAfter mem (relativeSegmentAddr addr) of case addrContentsAfter mem (relativeSegmentAddr addr) of
Left msg -> Left msg ->
returnWithError gs (FlexdisMemoryError msg) returnWithError gs (FlexdisMemoryError msg)
@ -250,7 +242,7 @@ disassembleBlock mem nonce_gen loc max_size = do
disassembleBlockImpl gs sz contents disassembleBlockImpl gs sz contents
assert (next_ip_off > msegOffset addr) $ do assert (next_ip_off > msegOffset addr) $ do
let block_sz = next_ip_off - msegOffset addr let block_sz = next_ip_off - msegOffset addr
pure (Fold.toList (gs'^.frontierBlocks), block_sz, maybeError) pure (b, block_sz, maybeError)
-- | The abstract state for a function begining at a given address. -- | The abstract state for a function begining at a given address.
initialX86AbsState :: MemSegmentOff 64 -> AbsBlockState X86Reg initialX86AbsState :: MemSegmentOff 64 -> AbsBlockState X86Reg
@ -289,6 +281,7 @@ transferAbsValue r f =
ReadFSBase -> TopV ReadFSBase -> TopV
ReadGSBase -> TopV ReadGSBase -> TopV
CPUID _ -> TopV CPUID _ -> TopV
CMPXCHG8B{} -> TopV
RDTSC -> TopV RDTSC -> TopV
XGetBV _ -> TopV XGetBV _ -> TopV
PShufb{} -> TopV PShufb{} -> TopV
@ -338,7 +331,7 @@ tryDisassembleBlockFromAbsState :: forall s ids
-- ^ Maximum size of this block -- ^ Maximum size of this block
-> AbsBlockState X86Reg -> AbsBlockState X86Reg
-- ^ Abstract state of processor for defining state. -- ^ Abstract state of processor for defining state.
-> ExceptT String (ST s) ([Block X86_64 ids], MemWord 64, Maybe String) -> ExceptT String (ST s) (Block X86_64 ids, MemWord 64, Maybe String)
tryDisassembleBlockFromAbsState mem nonce_gen addr max_size ab = do tryDisassembleBlockFromAbsState mem nonce_gen addr max_size ab = do
t <- t <-
case asConcreteSingleton (ab^.absRegState^.boundValue X87_TopReg) of case asConcreteSingleton (ab^.absRegState^.boundValue X87_TopReg) of
@ -355,7 +348,7 @@ tryDisassembleBlockFromAbsState mem nonce_gen addr max_size ab = do
} }
let gs = initGenState nonce_gen mem addr (initX86State loc) let gs = initGenState nonce_gen mem addr (initX86State loc)
let off = msegOffset addr let off = msegOffset addr
(gs', next_ip_off, maybeError) <- lift $ (b, next_ip_off, maybeError) <- lift $
case addrContentsAfter mem (relativeSegmentAddr addr) of case addrContentsAfter mem (relativeSegmentAddr addr) of
Left msg -> Left msg ->
returnWithError gs (FlexdisMemoryError msg) returnWithError gs (FlexdisMemoryError msg)
@ -363,8 +356,7 @@ tryDisassembleBlockFromAbsState mem nonce_gen addr max_size ab = do
disassembleBlockImpl gs (off + max_size) contents disassembleBlockImpl gs (off + max_size) contents
assert (next_ip_off > off) $ do assert (next_ip_off > off) $ do
let sz = next_ip_off - off let sz = next_ip_off - off
let blocks = Fold.toList (gs'^.frontierBlocks) pure $! (b, sz, show <$> maybeError)
pure $! (blocks, sz, show <$> maybeError)
-- | Disassemble block, returning either an error, or a list of blocks -- | Disassemble block, returning either an error, or a list of blocks
-- and ending PC. -- and ending PC.
@ -382,7 +374,7 @@ disassembleBlockFromAbsState mem nonce_gen addr max_size ab = do
mr <- runExceptT $ tryDisassembleBlockFromAbsState mem nonce_gen addr max_size ab mr <- runExceptT $ tryDisassembleBlockFromAbsState mem nonce_gen addr max_size ab
case mr of case mr of
Left msg -> pure ([], 0, Just msg) Left msg -> pure ([], 0, Just msg)
Right r -> pure r Right (b,sz, merr) -> pure ([b],sz,merr)
-- | Attempt to identify the write to a stack return address, returning -- | Attempt to identify the write to a stack return address, returning
-- instructions prior to that write and return values. -- instructions prior to that write and return values.

View File

@ -73,7 +73,7 @@ instance HasRepr SIMDWidth NatRepr where
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- RepValSize -- RepValSize
-- | Rep value -- | A value for distinguishing between 1,2,4 and 8 byte values.
data RepValSize w data RepValSize w
= (w ~ 8) => ByteRepVal = (w ~ 8) => ByteRepVal
| (w ~ 16) => WordRepVal | (w ~ 16) => WordRepVal
@ -309,9 +309,31 @@ data X86PrimFn f tp where
-- | Read the 'GS' base address. -- | Read the 'GS' base address.
ReadGSBase :: X86PrimFn f (BVType 64) ReadGSBase :: X86PrimFn f (BVType 64)
-- | The CPUID instruction. -- | The CPUID instruction.
CPUID :: !(f (BVType 32)) -> X86PrimFn f (BVType 128)
-- --
-- Given value in eax register, this returns the concatenation of eax:ebx:ecx:edx. -- Given value in eax register, this returns the concatenation of eax:ebx:ecx:edx.
CPUID :: !(f (BVType 32)) -> X86PrimFn f (BVType 128)
-- | This implements the logic for the cmpxchg8b instruction
--
-- Given a statement, `CMPXCHG8B addr eax ebx ecx edx` this executes the following logic:
-- temp64 <- read addr
-- if edx:eax == tmp then do
-- *addr := ecx:ebx
-- return (true, eax, edx)
-- else
-- return (false, trunc 32 temp64, trunc 32 (temp64 >> 32))
CMPXCHG8B :: !(f (BVType 64))
-- ^ Address to read
-> !(f (BVType 32))
-- ^ Value in EAX
-> !(f (BVType 32))
-- ^ Value in EBX
-> !(f (BVType 32))
-- ^ Value in ECX
-> !(f (BVType 32))
-- ^ Value in EDX
-> X86PrimFn f (TupleType [BoolType, BVType 32, BVType 32])
-- | The RDTSC instruction. -- | The RDTSC instruction.
-- --
-- This returns the current time stamp counter a 64-bit value that will -- This returns the current time stamp counter a 64-bit value that will
@ -331,7 +353,7 @@ data X86PrimFn f tp where
-- @res[i] = x[j] where j = s[i](0..l) -- @res[i] = x[j] where j = s[i](0..l)
-- where @msb(y)@ returns the most-significant bit in byte @y@. -- where @msb(y)@ returns the most-significant bit in byte @y@.
PShufb :: (1 <= w) => !(SIMDWidth w) -> !(f (BVType w)) -> !(f (BVType w)) -> X86PrimFn f (BVType w) PShufb :: (1 <= w) => !(SIMDWidth w) -> !(f (BVType w)) -> !(f (BVType w)) -> X86PrimFn f (BVType w)
-- | Compares to memory regions -- | Compares to memory regions and return the number of bytes that were the same.
MemCmp :: !Integer MemCmp :: !Integer
-- /\ Number of bytes per value. -- /\ Number of bytes per value.
-> !(f (BVType 64)) -> !(f (BVType 64))
@ -537,7 +559,7 @@ data X86PrimFn f tp where
-> !(f (BVType 80)) -> !(f (BVType 80))
-> X86PrimFn f tp -> X86PrimFn f tp
{- | Unary operation on a vector. Should have no side effects. -} -- | Unary operation on a vector. Should have no side effects.
VOp1 :: (1 <= n) => VOp1 :: (1 <= n) =>
!(NatRepr n) -> {- /\ width of input/result -} !(NatRepr n) -> {- /\ width of input/result -}
!AVXOp1 -> {- /\ do this operation -} !AVXOp1 -> {- /\ do this operation -}
@ -597,6 +619,7 @@ instance HasRepr (X86PrimFn f) TypeRepr where
ReadFSBase -> knownRepr ReadFSBase -> knownRepr
ReadGSBase -> knownRepr ReadGSBase -> knownRepr
CPUID{} -> knownRepr CPUID{} -> knownRepr
CMPXCHG8B{} -> knownRepr
RDTSC{} -> knownRepr RDTSC{} -> knownRepr
XGetBV{} -> knownRepr XGetBV{} -> knownRepr
PShufb w _ _ -> BVTypeRepr (typeRepr w) PShufb w _ _ -> BVTypeRepr (typeRepr w)
@ -651,6 +674,7 @@ instance TraversableFC X86PrimFn where
ReadFSBase -> pure ReadFSBase ReadFSBase -> pure ReadFSBase
ReadGSBase -> pure ReadGSBase ReadGSBase -> pure ReadGSBase
CPUID v -> CPUID <$> go v CPUID v -> CPUID <$> go v
CMPXCHG8B a ax bx cx dx -> CMPXCHG8B <$> go a <*> go ax <*> go bx <*> go cx <*> go dx
RDTSC -> pure RDTSC RDTSC -> pure RDTSC
XGetBV v -> XGetBV <$> go v XGetBV v -> XGetBV <$> go v
PShufb w x y -> PShufb w <$> go x <*> go y PShufb w x y -> PShufb w <$> go x <*> go y
@ -694,6 +718,7 @@ instance IsArchFn X86PrimFn where
ReadFSBase -> pure $ text "fs.base" ReadFSBase -> pure $ text "fs.base"
ReadGSBase -> pure $ text "gs.base" ReadGSBase -> pure $ text "gs.base"
CPUID code -> sexprA "cpuid" [ pp code ] CPUID code -> sexprA "cpuid" [ pp code ]
CMPXCHG8B a ax bx cx dx -> sexprA "cmpxchg8b" [ pp a, pp ax, pp bx, pp cx, pp dx ]
RDTSC -> pure $ text "rdtsc" RDTSC -> pure $ text "rdtsc"
XGetBV code -> sexprA "xgetbv" [ pp code ] XGetBV code -> sexprA "xgetbv" [ pp code ]
PShufb _ x s -> sexprA "pshufb" [ pp x, pp s ] PShufb _ x s -> sexprA "pshufb" [ pp x, pp s ]
@ -744,6 +769,7 @@ x86PrimFnHasSideEffects f =
ReadFSBase -> False ReadFSBase -> False
ReadGSBase -> False ReadGSBase -> False
CPUID{} -> False CPUID{} -> False
CMPXCHG8B{} -> True
RDTSC -> False RDTSC -> False
XGetBV{} -> False XGetBV{} -> False
PShufb{} -> False PShufb{} -> False
@ -781,40 +807,40 @@ x86PrimFnHasSideEffects f =
-- X86Stmt -- X86Stmt
-- | An X86 specific statement. -- | An X86 specific statement.
data X86Stmt (v :: Type -> *) data X86Stmt (v :: Type -> *) where
= forall tp . WriteLoc :: !(X86PrimLoc tp) -> !(v tp) -> X86Stmt v
WriteLoc !(X86PrimLoc tp) !(v tp) StoreX87Control :: !(v (BVType 64)) -> X86Stmt v
| StoreX87Control !(v (BVType 64)) -- ^ Store the X87 control register in the given address.
-- ^ Store the X87 control register in the given location.
| MemCopy !Integer
!(v (BVType 64))
!(v (BVType 64))
!(v (BVType 64))
!(v BoolType)
-- ^ Copy a region of memory from a source buffer to a destination buffer.
--
-- In an expression @MemCopy bc v src dest dir@
-- * @bc@ is the number of bytes to copy at a time (1,2,4,8)
-- * @v@ is the number of values to move.
-- * @src@ is the start of source buffer.
-- * @dest@ is the start of destination buffer.
-- * @dir@ is a flag that indicates whether direction of move:
-- * 'True' means we should decrement buffer pointers after each copy.
-- * 'False' means we should increment the buffer pointers after each copy.
| forall n .
MemSet !(v (BVType 64))
-- /\ Number of values to assign
!(v (BVType n))
-- /\ Value to assign
!(v (BVType 64))
-- /\ Address to start assigning from.
!(v BoolType)
-- /\ Direction flag
| EMMS RepMovs :: !(RepValSize w)
-- ^ Empty MMX technology State. Sets the x87 FPU tag word to empty. -> !(v (BVType 64))
-- Probably OK to use this for both EMMS FEMMS, the second being a -> !(v (BVType 64))
-- a faster version from AMD 3D now. -> !(v (BVType 64))
-> !(v BoolType)
-> X86Stmt v
-- ^ Copy a region of memory from a source buffer to a destination buffer.
--
-- In an expression @RepMovs bc cnt src dest dir@
-- * @bc@ denotes the bytes to copy at a time.
-- * @cnt@ is the number of values to move.
-- * @src@ is the start of source buffer.
-- * @dest@ is the start of destination buffer.
-- * @dir@ is a flag that indicates whether direction of move:
-- * 'True' means we should decrement buffer pointers after each copy.
-- * 'False' means we should increment the buffer pointers after each copy.
MemSet :: !(v (BVType 64))
-- /\ Number of values to assign
-> !(v (BVType n))
-- /\ Value to assign
-> !(v (BVType 64))
-- /\ Address to start assigning from.
-> !(v BoolType)
-- /\ Direction flag
-> X86Stmt v
EMMS :: X86Stmt v
-- ^ Empty MMX technology State. Sets the x87 FPU tag word to empty.
-- Probably OK to use this for both EMMS FEMMS, the second being a a
-- faster version from AMD 3D now.
instance FunctorF X86Stmt where instance FunctorF X86Stmt where
fmapF = fmapFDefault fmapF = fmapFDefault
@ -827,7 +853,7 @@ instance TraversableF X86Stmt where
case stmt of case stmt of
WriteLoc loc v -> WriteLoc loc <$> go v WriteLoc loc v -> WriteLoc loc <$> go v
StoreX87Control v -> StoreX87Control <$> go v StoreX87Control v -> StoreX87Control <$> go v
MemCopy bc v src dest dir -> MemCopy bc <$> go v <*> go src <*> go dest <*> go dir RepMovs bc v src dest dir -> RepMovs bc <$> go v <*> go src <*> go dest <*> go dir
MemSet v src dest dir -> MemSet <$> go v <*> go src <*> go dest <*> go dir MemSet v src dest dir -> MemSet <$> go v <*> go src <*> go dest <*> go dir
EMMS -> pure EMMS EMMS -> pure EMMS
@ -836,9 +862,9 @@ instance IsArchStmt X86Stmt where
case stmt of case stmt of
WriteLoc loc rhs -> pretty loc <+> text ":=" <+> pp rhs WriteLoc loc rhs -> pretty loc <+> text ":=" <+> pp rhs
StoreX87Control addr -> pp addr <+> text ":= x87_control" StoreX87Control addr -> pp addr <+> text ":= x87_control"
MemCopy sz cnt src dest rev -> RepMovs sz cnt src dest rev ->
text "memcopy" <+> parens (hcat $ punctuate comma args) text "repMovs" <+> parens (hcat $ punctuate comma args)
where args = [pretty sz, pp cnt, pp src, pp dest, pp rev] where args = [pretty (repValSizeByteCount sz), pp cnt, pp src, pp dest, pp rev]
MemSet cnt val dest d -> MemSet cnt val dest d ->
text "memset" <+> parens (hcat $ punctuate comma args) text "memset" <+> parens (hcat $ punctuate comma args)
where args = [pp cnt, pp val, pp dest, pp d] where args = [pp cnt, pp val, pp dest, pp d]

View File

@ -199,8 +199,8 @@ instance MemWidth w => ByteReader (MemoryByteReader w) where
-- Throw error if we try to read a relocation as a symbolic reference -- Throw error if we try to read a relocation as a symbolic reference
BSSRegion _:_ -> do BSSRegion _:_ -> do
throwMemoryError $ UnexpectedBSS (msAddr ms) throwMemoryError $ UnexpectedBSS (msAddr ms)
RelocationRegion r:_ -> do RelocationRegion r:_ ->
throwMemoryError $ UnexpectedRelocation (msAddr ms) r "byte0" throwMemoryError $ UnexpectedByteRelocation (msAddr ms) r
ByteRegion bs:rest -> do ByteRegion bs:rest -> do
if BS.null bs then do if BS.null bs then do
throwMemoryError $ AccessViolation (msAddr ms) throwMemoryError $ AccessViolation (msAddr ms)
@ -219,23 +219,27 @@ instance MemWidth w => ByteReader (MemoryByteReader w) where
BSSRegion _:_ -> do BSSRegion _:_ -> do
throwMemoryError $ UnexpectedBSS (msAddr ms) throwMemoryError $ UnexpectedBSS (msAddr ms)
RelocationRegion r:rest -> do RelocationRegion r:rest -> do
case r of let sym = relocationSym r
AbsoluteRelocation sym off end szCnt -> do let off = relocationOffset r
unless (szCnt == 4 && end == LittleEndian) $ do let isGood
throwMemoryError $ UnexpectedRelocation (msAddr ms) r "dimm0" = relocationIsRel r == False
let ms' = ms { msOffset = msOffset ms + 4 && relocationSize r == 4
, msNext = rest && relocationEndianness r == LittleEndian
} when (not isGood) $ do
seq ms' $ MBR $ put ms' throwMemoryError $ Unsupported32ImmRelocation (msAddr ms) r
pure $ Flexdis.Imm32SymbolOffset sym (fromIntegral off) -- Returns whether the bytes in this relocation are thought of as signed or unsigned.
-- RelativeOffset addr ioff (fromIntegral off) let signed = relocationIsSigned r
RelativeRelocation _addr _off _end _szCnt -> do
throwMemoryError $ UnexpectedRelocation (msAddr ms) r "dimm1" let ms' = ms { msOffset = msOffset ms + 4
, msNext = rest
}
seq ms' $ MBR $ put ms'
pure $ Flexdis.Imm32SymbolOffset sym (fromIntegral off) signed
ByteRegion bs:rest -> do ByteRegion bs:rest -> do
v <- getUnsigned32 bs v <- getUnsigned32 bs
updateMSByteString ms bs rest 4 updateMSByteString ms bs rest 4
pure $! Flexdis.Imm32Concrete v pure $! Flexdis.Imm32Concrete (fromIntegral v)
readJump sz = do readJump sz = do
ms <- MBR get ms <- MBR get
@ -247,20 +251,22 @@ instance MemWidth w => ByteReader (MemoryByteReader w) where
BSSRegion _:_ -> do BSSRegion _:_ -> do
throwMemoryError $ UnexpectedBSS (msAddr ms) throwMemoryError $ UnexpectedBSS (msAddr ms)
RelocationRegion r:rest -> do RelocationRegion r:rest -> do
case r of let sym = relocationSym r
AbsoluteRelocation{} -> do let off = relocationOffset r
throwMemoryError $ UnexpectedRelocation (msAddr ms) r "jump0" -- Sanity checks
RelativeRelocation addr off end szCnt -> do let isGood
when (szCnt /= jsizeCount sz) $ do = relocationIsRel r
throwMemoryError $ UnexpectedRelocation (msAddr ms) r "jump1" && relocationSize r == jsizeCount sz
when (end /= LittleEndian) $ do && relocationIsSigned r == False
throwMemoryError $ UnexpectedRelocation (msAddr ms) r "jump2" && relocationEndianness r == LittleEndian
let ms' = ms { msOffset = msOffset ms + fromIntegral (jsizeCount sz) when (not isGood) $ do
, msNext = rest throwMemoryError $ UnsupportedJumpOffsetRelocation (msAddr ms) r
} let ms' = ms { msOffset = msOffset ms + fromIntegral (jsizeCount sz)
seq ms' $ MBR $ put ms' , msNext = rest
let ioff = fromIntegral $ msOffset ms - msStart ms }
pure $ Flexdis.RelativeOffset addr ioff (fromIntegral off) seq ms' $ MBR $ put ms'
let ioff = fromIntegral $ msOffset ms - msStart ms
pure $ Flexdis.RelativeOffset sym ioff (fromIntegral off)
ByteRegion bs:rest -> do ByteRegion bs:rest -> do
(v,c) <- getJumpBytes bs sz (v,c) <- getJumpBytes bs sz
updateMSByteString ms bs rest (fromIntegral c) updateMSByteString ms bs rest (fromIntegral c)
@ -275,7 +281,6 @@ instance MemWidth w => ByteReader (MemoryByteReader w) where
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- readInstruction -- readInstruction
-- | Read instruction at a given memory address. -- | Read instruction at a given memory address.
readInstruction' :: MemSegmentOff 64 readInstruction' :: MemSegmentOff 64
-- ^ Address to read from. -- ^ Address to read from.

View File

@ -24,12 +24,11 @@ module Data.Macaw.X86.Generator
, addArchTermStmt , addArchTermStmt
, evalArchFn , evalArchFn
, evalAssignRhs , evalAssignRhs
, shiftX86GCont
, asAtomicStateUpdate , asAtomicStateUpdate
, getState , getState
-- * GenResult -- * GenResult
, GenResult(..) , GenResult(..)
, finishBlock , finishGenResult
-- * PreBlock -- * PreBlock
, PreBlock , PreBlock
, emptyPreBlock , emptyPreBlock
@ -37,15 +36,9 @@ module Data.Macaw.X86.Generator
, pBlockState , pBlockState
, pBlockStmts , pBlockStmts
, pBlockApps , pBlockApps
, finishBlock' , finishBlock
-- * Misc
, BlockSeq(..)
, nextBlockID
, frontierBlocks
-- * GenState -- * GenState
, GenState(..) , GenState(..)
, mkGenResult
, blockSeq
, blockState , blockState
, curX86State , curX86State
-- * Expr -- * Expr
@ -186,53 +179,28 @@ pBlockApps :: Simple Lens (PreBlock ids) (MapF (App (Value X86_64 ids)) (Assign
pBlockApps = lens _pBlockApps (\s v -> s { _pBlockApps = v }) pBlockApps = lens _pBlockApps (\s v -> s { _pBlockApps = v })
-- | Finishes the current block, if it is started. -- | Finishes the current block, if it is started.
finishBlock' :: PreBlock ids finishBlock :: PreBlock ids
-> (RegState X86Reg (Value X86_64 ids) -> TermStmt X86_64 ids) -> (RegState X86Reg (Value X86_64 ids) -> TermStmt X86_64 ids)
-> Block X86_64 ids -> Block X86_64 ids
finishBlock' pre_b term = finishBlock preBlock term =
Block { blockLabel = pBlockIndex pre_b Block { blockLabel = pBlockIndex preBlock
, blockStmts = toList (pre_b^.pBlockStmts) , blockStmts = toList (preBlock^.pBlockStmts)
, blockTerm = term (pre_b^.pBlockState) , blockTerm = term (preBlock^.pBlockState)
} }
------------------------------------------------------------------------
-- BlockSeq
-- | List of blocks generated so far, and an index for generating new block labels.
data BlockSeq ids = BlockSeq
{ _nextBlockID :: !Word64
-- ^ Index of next block
, _frontierBlocks :: !(Seq (Block X86_64 ids))
-- ^ Blocks added to CFG
}
-- | Control flow blocs generated so far.
nextBlockID :: Simple Lens (BlockSeq ids) Word64
nextBlockID = lens _nextBlockID (\s v -> s { _nextBlockID = v })
-- | Blocks that are not in CFG that end with a FetchAndExecute,
-- which we need to analyze to compute new potential branch targets.
frontierBlocks :: Simple Lens (BlockSeq ids) (Seq (Block X86_64 ids))
frontierBlocks = lens _frontierBlocks (\s v -> s { _frontierBlocks = v })
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- GenResult -- GenResult
-- | The final result from the block generator. -- | The final result from the block generator.
data GenResult ids = GenResult { resBlockSeq :: !(BlockSeq ids) data GenResult ids
, resState :: !(Maybe (PreBlock ids)) = UnfinishedGenResult !(PreBlock ids)
} | FinishedGenResult !(Block X86_64 ids)
-- | Finishes the current block, if it is started. -- | Finishes the current block, if it is started.
finishBlock :: (RegState X86Reg (Value X86_64 ids) -> TermStmt X86_64 ids) finishGenResult :: GenResult ids
-> GenResult ids -> Block X86_64 ids
-> BlockSeq ids finishGenResult (UnfinishedGenResult pre_b) = finishBlock pre_b FetchAndExecute
finishBlock term st = finishGenResult (FinishedGenResult blk) = blk
case resState st of
Nothing -> resBlockSeq st
Just pre_b ->
let b = finishBlock' pre_b term
in seq b $ resBlockSeq st & frontierBlocks %~ (Seq.|> b)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- GenState -- GenState
@ -241,10 +209,8 @@ finishBlock term st =
data GenState st_s ids = GenState data GenState st_s ids = GenState
{ assignIdGen :: !(NonceGenerator (ST st_s) ids) { assignIdGen :: !(NonceGenerator (ST st_s) ids)
-- ^ 'NonceGenerator' for generating 'AssignId's -- ^ 'NonceGenerator' for generating 'AssignId's
, _blockSeq :: !(BlockSeq ids)
-- ^ Blocks generated so far.
, _blockState :: !(PreBlock ids) , _blockState :: !(PreBlock ids)
-- ^ Current block -- ^ Block that we are processing.
, genAddr :: !(MemSegmentOff 64) , genAddr :: !(MemSegmentOff 64)
-- ^ Address of instruction we are translating -- ^ Address of instruction we are translating
, genMemory :: !(Memory 64) , genMemory :: !(Memory 64)
@ -261,16 +227,6 @@ data GenState st_s ids = GenState
are working with an SSE instruction. -} are working with an SSE instruction. -}
} }
-- | Create a gen result from a state result.
mkGenResult :: GenState st_s ids -> GenResult ids
mkGenResult s = GenResult { resBlockSeq = s^.blockSeq
, resState = Just (s^.blockState)
}
-- | Control flow blocs generated so far.
blockSeq :: Simple Lens (GenState st_s ids) (BlockSeq ids)
blockSeq = lens _blockSeq (\s v -> s { _blockSeq = v })
genRegUpdates :: Simple Lens (GenState st_s ids) (MapF.MapF X86Reg (Value X86_64 ids)) genRegUpdates :: Simple Lens (GenState st_s ids) (MapF.MapF X86Reg (Value X86_64 ids))
genRegUpdates = lens _genRegUpdates (\s v -> s { _genRegUpdates = v }) genRegUpdates = lens _genRegUpdates (\s v -> s { _genRegUpdates = v })
@ -316,20 +272,11 @@ type X86GCont st_s ids a
-> ExceptT Text (ST st_s) (GenResult ids) -> ExceptT Text (ST st_s) (GenResult ids)
-- | Run an 'X86Generator' starting from a given state -- | Run an 'X86Generator' starting from a given state
runX86Generator :: X86GCont st_s ids a runX86Generator :: GenState st_s ids
-> GenState st_s ids -> X86Generator st_s ids ()
-> X86Generator st_s ids a
-> ExceptT Text (ST st_s) (GenResult ids) -> ExceptT Text (ST st_s) (GenResult ids)
runX86Generator k st (X86G m) = runReaderT (runContT m (ReaderT . k)) st runX86Generator st (X86G m) = runReaderT (runContT m (ReaderT . k)) st
where k () s = pure $! UnfinishedGenResult (s^.blockState)
-- | Capture the current continuation and 'GenState' in an 'X86Generator'
shiftX86GCont :: (X86GCont st_s ids a
-> GenState st_s ids
-> ExceptT Text (ST st_s) (GenResult ids))
-> X86Generator st_s ids a
shiftX86GCont f =
X86G $ ContT $ \k -> ReaderT $ \s -> f (runReaderT . k) s
getState :: X86Generator st_s ids (GenState st_s ids) getState :: X86Generator st_s ids (GenState st_s ids)
getState = X86G ask getState = X86G ask
@ -381,17 +328,13 @@ addArchStmt = addStmt . ExecArchStmt
-- | execute a primitive instruction. -- | execute a primitive instruction.
addArchTermStmt :: X86TermStmt ids -> X86Generator st ids () addArchTermStmt :: X86TermStmt ids -> X86Generator st ids ()
addArchTermStmt ts = do addArchTermStmt ts = do
shiftX86GCont $ \_ s0 -> do X86G $ ContT $ \_ -> ReaderT $ \s0 -> do
-- Get last block. -- Get last block.
let p_b = s0 ^. blockState let p_b = s0 ^. blockState
-- Create finished block. -- Create finished block.
let fin_b = finishBlock' p_b $ ArchTermStmt ts let fin_b = finishBlock p_b $ ArchTermStmt ts
seq fin_b $
-- Return early -- Return early
return GenResult return $! FinishedGenResult fin_b
{ resBlockSeq = s0 ^.blockSeq & frontierBlocks %~ (Seq.|> fin_b)
, resState = Nothing
}
-- | Are we in AVX mode? -- | Are we in AVX mode?
isAVX :: X86Generator st ids Bool isAVX :: X86Generator st ids Bool

View File

@ -21,7 +21,7 @@ module Data.Macaw.X86.Getters
, getSignExtendedValue , getSignExtendedValue
, truncateBVValue , truncateBVValue
, getCallTarget , getCallTarget
, getJumpTarget , doJump
, HasRepSize(..) , HasRepSize(..)
, getAddrRegOrSegment , getAddrRegOrSegment
, getAddrRegSegmentOrImm , getAddrRegSegmentOrImm
@ -54,7 +54,7 @@ import qualified Flexdis86 as F
import GHC.TypeLits (KnownNat) import GHC.TypeLits (KnownNat)
import Data.Macaw.CFG import Data.Macaw.CFG
import Data.Macaw.Types (BVType, n8, n16, n32, n64, typeWidth) import Data.Macaw.Types
import Data.Macaw.X86.Generator import Data.Macaw.X86.Generator
import Data.Macaw.X86.Monad import Data.Macaw.X86.Monad
import Data.Macaw.X86.X86Reg (X86Reg(..)) import Data.Macaw.X86.X86Reg (X86Reg(..))
@ -107,6 +107,27 @@ reg64Loc = fullRegister . X86_GP
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Getters -- Getters
getImm32 :: F.Imm32 -> BVExpr ids 32
getImm32 (F.Imm32Concrete w) = bvLit n32 (toInteger w)
getImm32 (F.Imm32SymbolOffset sym off _) =
bvTrunc' n32 (ValueExpr (SymbolValue Addr64 sym))
.+ bvLit n32 (toInteger off)
-- | Return the value of a 32-bit displacement.
getDisplacement32 :: F.Displacement -> BVExpr ids 32
getDisplacement32 F.NoDisplacement = bvLit n32 0
getDisplacement32 (F.Disp8 x) = bvLit n32 (toInteger x)
getDisplacement32 (F.Disp32 x) = getImm32 x
-- | Return the value of a 32-bit displacement.
getDisplacement64 :: F.Displacement -> BVExpr ids 64
getDisplacement64 F.NoDisplacement = bvLit n64 0
getDisplacement64 (F.Disp8 x) = bvLit n64 (toInteger x)
getDisplacement64 (F.Disp32 (F.Imm32Concrete x)) = bvLit n64 (toInteger x)
getDisplacement64 (F.Disp32 (F.Imm32SymbolOffset sym off _)) =
ValueExpr (SymbolValue Addr64 sym)
.+ bvLit n64 (toInteger off)
-- | Calculates the address corresponding to an AddrRef -- | Calculates the address corresponding to an AddrRef
getBVAddress :: F.AddrRef -> X86Generator st ids (BVExpr ids 64) getBVAddress :: F.AddrRef -> X86Generator st ids (BVExpr ids 64)
getBVAddress ar = getBVAddress ar =
@ -122,26 +143,28 @@ getBVAddress ar =
Just (i, r) -> Just (i, r) ->
bvTrunc n32 . (bvLit n32 (toInteger i) .*) bvTrunc n32 . (bvLit n32 (toInteger i) .*)
<$> get (reg32Loc r) <$> get (reg32Loc r)
let offset = uext n64 (base .+ scale .+ bvLit n32 (toInteger (F.displacementInt i32)))
let offset = uext n64 (base .+ scale .+ getDisplacement32 i32)
mk_absolute seg offset mk_absolute seg offset
F.IP_Offset_32 _seg _i32 -> fail "IP_Offset_32" F.IP_Offset_32 _seg _i32 -> fail "IP_Offset_32"
F.Offset_32 _seg _w32 -> F.Offset_32 _seg _w32 ->
fail "Offset_32" fail "Offset_32"
F.Offset_64 seg w64 -> do F.Offset_64 seg w64 -> do
mk_absolute seg (bvLit n64 (toInteger w64)) mk_absolute seg (bvLit n64 (toInteger w64))
F.Addr_64 seg m_r64 m_int_r64 i32 -> do F.Addr_64 seg m_r64 m_int_r64 disp -> do
base <- case m_r64 of base <- case m_r64 of
Nothing -> return v0_64 Nothing -> return v0_64
Just r -> get (reg64Loc r) Just r -> get (reg64Loc r)
scale <- case m_int_r64 of scale <-
Nothing -> return v0_64 case m_int_r64 of
Just (i, r) -> bvTrunc n64 . (bvLit n64 (toInteger i) .*) Nothing -> return v0_64
<$> get (reg64Loc r) Just (i, r) ->
let offset = base .+ scale .+ bvLit n64 (toInteger i32) bvTrunc n64 . (bvLit n64 (toInteger i) .*) <$> get (reg64Loc r)
let offset = base .+ scale .+ getDisplacement64 disp
mk_absolute seg offset mk_absolute seg offset
F.IP_Offset_64 seg i32 -> do F.IP_Offset_64 seg disp -> do
ip_val <- get rip ipVal <- get rip
mk_absolute seg (bvLit n64 (toInteger i32) .+ ip_val) mk_absolute seg (ipVal .+ getDisplacement64 disp)
where where
v0_64 = bvLit n64 0 v0_64 = bvLit n64 0
-- | Add the segment base to compute an absolute address. -- | Add the segment base to compute an absolute address.
@ -188,8 +211,6 @@ getBV128Addr ar = (`MemoryAddr` xmmMemRepr) <$> getBVAddress ar
getBV256Addr :: F.AddrRef -> X86Generator st ids (Location (Addr ids) (BVType 256)) getBV256Addr :: F.AddrRef -> X86Generator st ids (Location (Addr ids) (BVType 256))
getBV256Addr ar = (`MemoryAddr` ymmMemRepr) <$> getBVAddress ar getBV256Addr ar = (`MemoryAddr` ymmMemRepr) <$> getBVAddress ar
readBVAddress :: F.AddrRef -> MemRepr tp -> X86Generator st ids (Expr ids tp) readBVAddress :: F.AddrRef -> MemRepr tp -> X86Generator st ids (Expr ids tp)
readBVAddress ar repr = get . (`MemoryAddr` repr) =<< getBVAddress ar readBVAddress ar repr = get . (`MemoryAddr` repr) =<< getBVAddress ar
@ -252,21 +273,13 @@ getBVLocation l expected = do
Nothing -> Nothing ->
fail $ "Widths aren't equal: " ++ show (typeWidth v) ++ " and " ++ show expected fail $ "Widths aren't equal: " ++ show (typeWidth v) ++ " and " ++ show expected
getImm32 :: F.Imm32 -> X86Generator st ids (BVExpr ids 32)
getImm32 (F.Imm32Concrete w) =
pure $ bvLit n32 (toInteger w)
getImm32 (F.Imm32SymbolOffset sym off) = do
let symExpr = ValueExpr $ SymbolValue Addr64 sym
let offExpr = bvLit n64 (toInteger off)
pure $ bvTrunc' n32 (symExpr .+ offExpr)
-- | Return a bitvector value. -- | Return a bitvector value.
getSomeBVValue :: F.Value -> X86Generator st ids (SomeBV (Expr ids)) getSomeBVValue :: F.Value -> X86Generator st ids (SomeBV (Expr ids))
getSomeBVValue v = getSomeBVValue v =
case v of case v of
F.ByteImm w -> pure $! SomeBV $ bvLit n8 $ toInteger w F.ByteImm w -> pure $! SomeBV $ bvLit n8 $ toInteger w
F.WordImm w -> pure $! SomeBV $ bvLit n16 $ toInteger w F.WordImm w -> pure $! SomeBV $ bvLit n16 $ toInteger w
F.DWordImm i -> SomeBV <$> getImm32 i F.DWordImm i -> pure $! SomeBV $ getImm32 i
F.QWordImm w -> pure $! SomeBV $ bvLit n64 $ toInteger w F.QWordImm w -> pure $! SomeBV $ bvLit n64 $ toInteger w
F.JumpOffset _ _ -> fail "Jump Offset should not be treated as a BVValue." F.JumpOffset _ _ -> fail "Jump Offset should not be treated as a BVValue."
_ -> do _ -> do
@ -348,15 +361,15 @@ truncateBVValue n (SomeBV v)
| otherwise = | otherwise =
fail $ "Widths isn't >=: " ++ show (typeWidth v) ++ " and " ++ show n fail $ "Widths isn't >=: " ++ show (typeWidth v) ++ " and " ++ show n
resolveJumpOffset :: F.JumpOffset -> X86Generator s ids (BVExpr ids 64) resolveJumpOffset :: GenState st_s ids -> F.JumpOffset -> BVExpr ids 64
resolveJumpOffset (F.FixedOffset off) = resolveJumpOffset _ (F.FixedOffset off) = bvLit n64 (toInteger off)
pure $ bvLit n64 (toInteger off) resolveJumpOffset s (F.RelativeOffset symId insOff off) =
resolveJumpOffset (F.RelativeOffset symId insOff off) = do symVal .+ bvLit n64 (toInteger off) .- ValueExpr (RelocatableValue arepr relocAddr)
arepr <- memAddrWidth . genMemory <$> getState where arepr = memAddrWidth (genMemory s)
let symVal = ValueExpr (SymbolValue arepr symId) symVal = ValueExpr (SymbolValue arepr symId)
addrOff <- genAddr <$> getState addrOff = genAddr s
let relocAddr = relativeAddr (msegSegment addrOff) (msegOffset addrOff + fromIntegral insOff) relocAddr = relativeAddr (msegSegment addrOff) (msegOffset addrOff + fromIntegral insOff)
pure $ symVal .+ bvLit n64 (toInteger off) .- ValueExpr (RelocatableValue arepr relocAddr)
-- | Return the target of a call or jump instruction. -- | Return the target of a call or jump instruction.
getCallTarget :: F.Value getCallTarget :: F.Value
@ -366,19 +379,33 @@ getCallTarget v =
F.Mem64 ar -> get =<< getBV64Addr ar F.Mem64 ar -> get =<< getBV64Addr ar
F.QWordReg r -> get (reg64Loc r) F.QWordReg r -> get (reg64Loc r)
F.JumpOffset _ joff -> do F.JumpOffset _ joff -> do
(.+) <$> get rip <*> resolveJumpOffset joff s <- getState
(.+ resolveJumpOffset s joff) <$> get rip
_ -> fail "Unexpected argument" _ -> fail "Unexpected argument"
-- | Return the target of a call or jump instruction. -- | Return the target of a call or jump instruction.
getJumpTarget :: F.Value doJump :: Expr ids BoolType -> F.Value -> X86Generator st ids ()
-> X86Generator st ids (BVExpr ids 64) doJump cond v =
getJumpTarget v =
case v of case v of
F.JumpOffset _ joff -> do F.JumpOffset _ joff -> do
(.+) <$> get rip <*> resolveJumpOffset joff s <- getState
F.QWordReg r -> get (reg64Loc r) modify rip $ \ipVal -> mux cond (ipVal .+ resolveJumpOffset s joff) ipVal
F.Mem64 ar -> get =<< getBV64Addr ar
F.QWordImm w -> return (ValueExpr (BVValue knownNat (toInteger w))) F.QWordReg r -> do
ipVal <- get (reg64Loc r)
modify rip $ mux cond ipVal
F.Mem64 ar -> do
condVal <- eval cond
-- Address to read jump target from
ipAddr <- eval =<< getBVAddress ar
-- Get exiting ip value if we need it.
oldIpVal <- eval =<< get rip
-- Read the new memory if cond is true, and otherwise return old value.
ipVal <- evalAssignRhs $ CondReadMem qwordMemRepr condVal ipAddr oldIpVal
-- Set the ip value.
rip .= ipVal
F.QWordImm w -> do
modify rip $ mux cond $ bvKLit (toInteger w)
_ -> fail "Unexpected argument" _ -> fail "Unexpected argument"
------------------------------------------------------------------------ ------------------------------------------------------------------------
@ -412,7 +439,7 @@ getAddrRegSegmentOrImm v =
case v of case v of
F.ByteImm w -> pure $ Some $ HasRepSize ByteRepVal $ bvLit n8 (toInteger w) F.ByteImm w -> pure $ Some $ HasRepSize ByteRepVal $ bvLit n8 (toInteger w)
F.WordImm w -> pure $ Some $ HasRepSize WordRepVal $ bvLit n16 (toInteger w) F.WordImm w -> pure $ Some $ HasRepSize WordRepVal $ bvLit n16 (toInteger w)
F.DWordImm i -> Some . HasRepSize DWordRepVal <$> getImm32 i F.DWordImm i -> pure $ Some $ HasRepSize DWordRepVal $ getImm32 i
F.QWordImm w -> pure $ Some $ HasRepSize QWordRepVal $ bvLit n64 (toInteger w) F.QWordImm w -> pure $ Some $ HasRepSize QWordRepVal $ bvLit n64 (toInteger w)
_ -> do _ -> do
Some (HasRepSize rep l) <- getAddrRegOrSegment v Some (HasRepSize rep l) <- getAddrRegOrSegment v

View File

@ -154,11 +154,6 @@ module Data.Macaw.X86.Monad
, Data.Macaw.X86.Generator.eval , Data.Macaw.X86.Generator.eval
, Data.Macaw.X86.Generator.evalArchFn , Data.Macaw.X86.Generator.evalArchFn
, Data.Macaw.X86.Generator.addArchTermStmt , Data.Macaw.X86.Generator.addArchTermStmt
, ifte_
, when_
, unless_
, memcopy
, memcmp
, memset , memset
, even_parity , even_parity
, fnstcw , fnstcw
@ -181,15 +176,12 @@ import Control.Lens hiding ((.=))
import Control.Monad import Control.Monad
import qualified Data.Bits as Bits import qualified Data.Bits as Bits
import Data.Macaw.CFG import Data.Macaw.CFG
import Data.Macaw.CFG.Block
import Data.Macaw.Memory (Endianness(..)) import Data.Macaw.Memory (Endianness(..))
import Data.Macaw.Types import Data.Macaw.Types
import Data.Maybe import Data.Maybe
import Data.Parameterized.Classes import Data.Parameterized.Classes
import Data.Parameterized.NatRepr import Data.Parameterized.NatRepr
import qualified Data.Sequence as Seq
import qualified Flexdis86 as F import qualified Flexdis86 as F
import Flexdis86.Segment ( Segment )
import Flexdis86.Sizes (SizeConstraint(..)) import Flexdis86.Sizes (SizeConstraint(..))
import GHC.TypeLits as TypeLits import GHC.TypeLits as TypeLits
import Text.PrettyPrint.ANSI.Leijen as PP hiding ((<$>)) import Text.PrettyPrint.ANSI.Leijen as PP hiding ((<$>))
@ -518,7 +510,7 @@ data Location addr (tp :: Type) where
DebugReg :: !F.DebugReg DebugReg :: !F.DebugReg
-> Location addr (BVType 64) -> Location addr (BVType 64)
SegmentReg :: !Segment SegmentReg :: !F.Segment
-> Location addr (BVType 16) -> Location addr (BVType 16)
X87ControlReg :: !(X87_ControlReg w) X87ControlReg :: !(X87_ControlReg w)
@ -1613,162 +1605,6 @@ modify r f = do
x <- get r x <- get r
r .= f x r .= f x
-- | Perform an if-then-else
ifte_ :: Expr ids BoolType
-> X86Generator st ids ()
-> X86Generator st ids ()
-> X86Generator st ids ()
-- Implement ifte_
-- Note that this implementation will run any code appearing after the ifte_
-- twice, once for the true branch and once for the false branch.
--
-- This could be changed to run the code afterwards once, but the cost would be
-- defining a way to merge processor states from the different branches, and making
-- sure that expression assignments generated in one branch were not referred to in
-- another branch.
--
-- One potential design change, not implemented here, would be to run both branches,
-- up to the point where they merge, and if the resulting PC is in the same location,
-- to merge in that case, otherwise to run them separately.
--
-- This would support the cmov instruction, but result in divergence for branches, which
-- I think is what we want.
ifte_ c_expr t f = eval c_expr >>= go
where
go (BoolValue True) = t
go (BoolValue False) = f
go cond =
shiftX86GCont $ \c s0 -> do
let p_b = s0 ^.blockState
let st = p_b^.pBlockState
let t_block_label = s0^.blockSeq^.nextBlockID
let s2 = s0 & blockSeq . nextBlockID +~ 1
& blockSeq . frontierBlocks .~ Seq.empty
& blockState .~ emptyPreBlock st t_block_label (genAddr s0)
-- Run true block.
t_seq <- finishBlock FetchAndExecute <$> runX86Generator c s2 t
-- Run false block
let f_block_label = t_seq^.nextBlockID
let s5 = GenState { assignIdGen = assignIdGen s0
, _blockSeq =
BlockSeq { _nextBlockID = t_seq^.nextBlockID + 1
, _frontierBlocks = Seq.empty
}
, _blockState = emptyPreBlock st f_block_label (genAddr s0)
, genAddr = genAddr s0
, genMemory = genMemory s0
, _genRegUpdates = _genRegUpdates s0
, avxMode = avxMode s0
}
f_seq <- finishBlock FetchAndExecute <$> runX86Generator c s5 f
-- Join results together.
let fin_b = finishBlock' p_b (\_ -> Branch cond t_block_label f_block_label)
seq fin_b $
return
GenResult { resBlockSeq =
BlockSeq { _nextBlockID = _nextBlockID f_seq
, _frontierBlocks = (s0^.blockSeq^.frontierBlocks Seq.|> fin_b)
Seq.>< t_seq^.frontierBlocks
Seq.>< f_seq^.frontierBlocks
}
, resState = Nothing
}
-- | Run a step if condition holds.
when_ :: Expr ids BoolType -> X86Generator st ids () -> X86Generator st ids ()
when_ p x = ifte_ p x (return ())
-- | Run a step if condition is false.
unless_ :: Expr ids BoolType -> X86Generator st ids () -> X86Generator st ids ()
unless_ p = ifte_ p (return ())
-- | Move n bits at a time, with count moves
--
-- Semantic sketch. The effect on memory should be like @memcopy@
-- below, not like @memcopy2@. These sketches ignore the issue of
-- copying in chunks of size `bytes`, which should only be an
-- efficiency concern.
--
-- @
-- void memcopy(int bytes, int copies, char *src, char *dst, int reversed) {
-- int maybeFlip = reversed ? -1 : 1;
-- for (int c = 0; c < copies; ++c) {
-- for (int b = 0; b < bytes; ++b) {
-- int offset = maybeFlip * (b + c * bytes);
-- *(dst + offset) = *(src + offset);
-- }
-- }
-- }
-- @
--
-- Compare with:
--
-- @
-- void memcopy2(int bytes, int copies, char *src, char *dst, int reversed) {
-- int maybeFlip = reversed ? -1 : 1;
-- /* The only difference from `memcopy` above: here the same memory is
-- copied whether `reversed` is true or false -- only the order of
-- copies changes -- whereas above different memory is copied for
-- each direction. */
-- if (reversed) {
-- /* Start at the end and work backwards. */
-- src += copies * bytes - 1;
-- dst += copies * bytes - 1;
-- }
-- for (int c = 0; c < copies; ++c) {
-- for (int b = 0; b < bytes; ++b) {
-- int offset = maybeFlip * (b + c * bytes);
-- *(dst + offset) = *(src + offset);
-- }
-- }
-- }
-- @
memcopy :: Integer
-- ^ Number of bytes to copy at a time (1,2,4,8)
-> BVExpr ids 64
-- ^ Number of values to move.
-> Addr ids
-- ^ Start of source buffer
-> Addr ids
-- ^ Start of destination buffer.
-> Expr ids BoolType
-- ^ Flag indicates direction of move:
-- True means we should decrement buffer pointers after each copy.
-- False means we should increment the buffer pointers after each copy.
-> X86Generator st ids ()
memcopy val_sz count src dest is_reverse = do
count_v <- eval count
src_v <- eval src
dest_v <- eval dest
is_reverse_v <- eval is_reverse
addArchStmt $ MemCopy val_sz count_v src_v dest_v is_reverse_v
-- | Compare the memory regions. Returns the number of elements which are
-- identical. If the direction is 0 then it is increasing, otherwise decreasing.
--
-- See `memcopy` above for explanation of which memory regions are
-- compared: the regions copied there are compared here.
memcmp :: Integer
-- ^ Number of bytes to compare at a time {1, 2, 4, 8}
-> BVExpr ids 64
-- ^ Number of elementes to compare
-> Addr ids
-- ^ Pointer to first buffer
-> Addr ids
-- ^ Pointer to second buffer
-> Expr ids BoolType
-- ^ Flag indicates direction of copy:
-- True means we should decrement buffer pointers after each copy.
-- False means we should increment the buffer pointers after each copy.
-> X86Generator st ids (BVExpr ids 64)
memcmp sz count src dest is_reverse = do
count_v <- eval count
is_reverse_v <- eval is_reverse
src_v <- eval src
dest_v <- eval dest
evalArchFn (MemCmp sz count_v src_v dest_v is_reverse_v)
-- | Set memory to the given value, for the number of words (nbytes -- | Set memory to the given value, for the number of words (nbytes
-- = count * typeWidth v) -- = count * typeWidth v)
memset :: (1 <= n) memset :: (1 <= n)
@ -1800,7 +1636,7 @@ fnstcw addr = do
addArchStmt =<< StoreX87Control <$> eval addr addArchStmt =<< StoreX87Control <$> eval addr
-- | Return the base address of the given segment. -- | Return the base address of the given segment.
getSegmentBase :: Segment -> X86Generator st ids (Addr ids) getSegmentBase :: F.Segment -> X86Generator st ids (Addr ids)
getSegmentBase seg = getSegmentBase seg =
case seg of case seg of
F.FS -> evalArchFn ReadFSBase F.FS -> evalArchFn ReadFSBase

View File

@ -18,6 +18,7 @@ module Data.Macaw.X86.Semantics
( execInstruction ( execInstruction
) where ) where
import Control.Lens ((^.))
import Control.Monad (when) import Control.Monad (when)
import qualified Data.Bits as Bits import qualified Data.Bits as Bits
import Data.Foldable import Data.Foldable
@ -27,6 +28,7 @@ import Data.Parameterized.Classes
import qualified Data.Parameterized.List as P import qualified Data.Parameterized.List as P
import Data.Parameterized.NatRepr import Data.Parameterized.NatRepr
import Data.Parameterized.Some import Data.Parameterized.Some
import Data.Parameterized.TraversableF
import Data.Proxy import Data.Proxy
import Data.Word import Data.Word
import qualified Flexdis86 as F import qualified Flexdis86 as F
@ -335,7 +337,7 @@ def_mov =
(F.DWordReg r, F.DWordSignedImm i) -> do (F.DWordReg r, F.DWordSignedImm i) -> do
reg32Loc r .= bvLit n32 (toInteger i) reg32Loc r .= bvLit n32 (toInteger i)
(F.DWordReg r, F.DWordImm i) -> do (F.DWordReg r, F.DWordImm i) -> do
(reg32Loc r .=) =<< getImm32 i reg32Loc r .= getImm32 i
(F.DWordReg r, F.Mem32 src) -> do (F.DWordReg r, F.Mem32 src) -> do
v <- get =<< getBV32Addr src v <- get =<< getBV32Addr src
reg32Loc r .= v reg32Loc r .= v
@ -421,24 +423,24 @@ def_cmpxchg = defBinaryLV "cmpxchg" $ \d s -> do
let p = a .=. temp let p = a .=. temp
zf_loc .= p zf_loc .= p
d .= mux p s temp d .= mux p s temp
modify acc $ \old -> mux p temp old modify acc $ mux p temp
def_cmpxchg8b :: InstructionDef def_cmpxchg8b :: InstructionDef
def_cmpxchg8b = def_cmpxchg8b =
defUnaryKnown "cmpxchg8b" $ \loc -> do defUnary "cmpxchg8b" $ \_ bloc -> do
temp64 <- get loc loc <-
edx_eax <- bvCat <$> get edx <*> get eax case bloc of
let p = edx_eax .=. temp64 F.Mem64 ar -> eval =<< getBVAddress ar
zf_loc .= p F.VoidMem ar -> eval =<< getBVAddress ar
ifte_ p _ -> fail "Unexpected argument to cmpxchg8b"
(do ecx_ebx <- bvCat <$> get ecx <*> get ebx eaxVal <- eval =<< get eax
loc .= ecx_ebx ebxVal <- eval =<< get ebx
) ecxVal <- eval =<< get ecx
(do let (upper,lower) = bvSplit temp64 edxVal <- eval =<< get edx
edx .= upper res <- evalArchFn (CMPXCHG8B loc eaxVal ebxVal ecxVal edxVal)
eax .= lower zf_loc .= app (TupleField knownRepr res P.index0)
loc .= edx_eax -- FIXME: this store is redundant, but it is in the ISA, so we do it. eax .= app (TupleField knownRepr res P.index1)
) edx .= app (TupleField knownRepr res P.index2)
def_movsx :: InstructionDef def_movsx :: InstructionDef
def_movsx = defBinaryLVge "movsx" $ \l v -> l .= sext (typeWidth l) v def_movsx = defBinaryLVge "movsx" $ \l v -> l .= sext (typeWidth l) v
@ -968,8 +970,8 @@ exec_ror l count = do
-- ** Bit and Byte Instructions -- ** Bit and Byte Instructions
getBT16RegOffset :: F.Value -> X86Generator st ids (BVExpr ids 16) getBT16RegOffset :: F.Value -> X86Generator st ids (BVExpr ids 16)
getBT16RegOffset val = getBT16RegOffset idx =
case val of case idx of
F.ByteImm i -> do F.ByteImm i -> do
pure $ bvLit n16 $ toInteger $ i Bits..&. 0xF pure $ bvLit n16 $ toInteger $ i Bits..&. 0xF
F.WordReg ir -> do F.WordReg ir -> do
@ -1029,18 +1031,24 @@ def_bt mnem act = defBinary mnem $ \_ base_loc idx -> do
iv <- getBT64RegOffset idx iv <- getBT64RegOffset idx
act knownNat (reg64Loc r) iv act knownNat (reg64Loc r) iv
set_bt_flags set_bt_flags
(F.Mem16 base, F.ByteImm i) -> do (F.Mem16 base, _) -> do
when (i >= 16) $ fail $ mnem ++ " given invalid index." case idx of
loc <- getBV16Addr base F.ByteImm i -> do
act knownNat loc (bvLit knownNat (toInteger i)) when (i >= 16) $ fail $ mnem ++ " given invalid index."
set_bt_flags baseAddr <- getBVAddress base
(F.Mem16 base, F.WordReg ir) -> do let loc = MemoryAddr baseAddr wordMemRepr
off <- get $! reg16Loc ir act knownNat loc (bvLit knownNat (toInteger i))
base_addr <- getBVAddress base F.WordReg ir -> do
let word_off = sext' n64 $ bvSar off (bvLit knownNat 4) off <- get $! reg16Loc ir
let loc = MemoryAddr (base_addr .+ word_off) wordMemRepr
let iv = off .&. bvLit knownNat 15 loc <- do
act knownNat loc iv baseAddr <- getBVAddress base
let wordOff = sext' n64 $ bvSar off (bvLit knownNat 4)
pure $! MemoryAddr (baseAddr .+ wordOff) wordMemRepr
let iv = off .&. bvLit knownNat 15
act knownNat loc iv
_ -> error $ "bt given unexpected index."
set_bt_flags set_bt_flags
(F.Mem32 base, F.ByteImm i) -> do (F.Mem32 base, F.ByteImm i) -> do
when (i >= 32) $ fail $ mnem ++ " given invalid index." when (i >= 32) $ fail $ mnem ++ " given invalid index."
@ -1050,9 +1058,9 @@ def_bt mnem act = defBinary mnem $ \_ base_loc idx -> do
set_bt_flags set_bt_flags
(F.Mem32 base, F.DWordReg ir) -> do (F.Mem32 base, F.DWordReg ir) -> do
off <- get $! reg32Loc ir off <- get $! reg32Loc ir
base_addr <- getBVAddress base baseAddr <- getBVAddress base
let dword_off = sext' n64 $ bvSar off (bvLit knownNat 5) let dwordOff = sext' n64 $ bvSar off (bvLit knownNat 5)
let loc = MemoryAddr (base_addr .+ dword_off) dwordMemRepr let loc = MemoryAddr (baseAddr .+ dwordOff) dwordMemRepr
let iv = off .&. bvLit knownNat 31 let iv = off .&. bvLit knownNat 31
act knownNat loc iv act knownNat loc iv
set_bt_flags set_bt_flags
@ -1064,9 +1072,9 @@ def_bt mnem act = defBinary mnem $ \_ base_loc idx -> do
set_bt_flags set_bt_flags
(F.Mem64 base, F.QWordReg ir) -> do (F.Mem64 base, F.QWordReg ir) -> do
off <- get $! reg64Loc ir off <- get $! reg64Loc ir
let qword_off = bvSar off (bvLit knownNat 6) let qwordOff = bvSar off (bvLit knownNat 6)
base_addr <- getBVAddress base baseAddr <- getBVAddress base
let loc = MemoryAddr (base_addr .+ qword_off) qwordMemRepr let loc = MemoryAddr (baseAddr .+ qwordOff) qwordMemRepr
let iv = off .&. bvLit knownNat 63 let iv = off .&. bvLit knownNat 63
act knownNat loc iv act knownNat loc iv
set_bt_flags set_bt_flags
@ -1123,14 +1131,11 @@ def_jcc_list =
defConditionals "j" $ \mnem cc -> defConditionals "j" $ \mnem cc ->
defUnary mnem $ \_ v -> do defUnary mnem $ \_ v -> do
a <- cc a <- cc
when_ a $ do doJump a v
tgt <- getJumpTarget v
rip .= tgt
def_jmp :: InstructionDef def_jmp :: InstructionDef
def_jmp = defUnary "jmp" $ \_ v -> do def_jmp = defUnary "jmp" $ \_ v -> do
tgt <- getJumpTarget v doJump true v
rip .= tgt
def_ret :: InstructionDef def_ret :: InstructionDef
def_ret = defVariadic "ret" $ \_ vs -> def_ret = defVariadic "ret" $ \_ vs ->
@ -1156,54 +1161,51 @@ def_ret = defVariadic "ret" $ \_ vs ->
-- FIXME: probably doesn't work for 32 bit address sizes -- FIXME: probably doesn't work for 32 bit address sizes
-- arguments are only for the size, they are fixed at rsi/rdi -- arguments are only for the size, they are fixed at rsi/rdi
exec_movs :: 1 <= w
=> Bool -- Flag indicating if RepPrefix appeared before instruction
-> NatRepr w -- Number of bytes to move at a time.
-> X86Generator st ids ()
exec_movs False w = do
let bytesPerOp = bvLit n64 (natValue w)
let repr = BVMemRepr w LittleEndian
-- The direction flag indicates post decrement or post increment.
df <- get df_loc
src <- get rsi
dest <- get rdi
v' <- get $ MemoryAddr src repr
MemoryAddr dest repr .= v'
rsi .= mux df (src .- bytesPerOp) (src .+ bytesPerOp)
rdi .= mux df (dest .- bytesPerOp) (dest .+ bytesPerOp)
exec_movs True w = do
-- FIXME: aso modifies this
let count_reg = rcx
bytesPerOp = natValue w
bytesPerOpv = bvLit n64 bytesPerOp
-- The direction flag indicates post decrement or post increment.
df <- get df_loc
src <- get rsi
dest <- get rdi
count <- get count_reg
let total_bytes = count .* bytesPerOpv
-- FIXME: we might need direction for overlapping regions
count_reg .= bvLit n64 (0::Integer)
memcopy bytesPerOp count src dest df
rsi .= mux df (src .- total_bytes) (src .+ total_bytes)
rdi .= mux df (dest .- total_bytes) (dest .+ total_bytes)
def_movs :: InstructionDef def_movs :: InstructionDef
def_movs = defBinary "movs" $ \ii loc _ -> do def_movs = defBinary "movs" $ \ii loc _ -> do
case loc of let pfx = F.iiPrefixes ii
F.Mem8 F.Addr_64{} -> Some w <-
exec_movs (F.iiLockPrefix ii == F.RepPrefix) (knownNat :: NatRepr 1) case loc of
F.Mem16 F.Addr_64{} -> F.Mem8{} -> pure (Some ByteRepVal)
exec_movs (F.iiLockPrefix ii == F.RepPrefix) (knownNat :: NatRepr 2) F.Mem16{} -> pure (Some WordRepVal)
F.Mem32 F.Addr_64{} -> F.Mem32{} -> pure (Some DWordRepVal)
exec_movs (F.iiLockPrefix ii == F.RepPrefix) (knownNat :: NatRepr 4) F.Mem64{} -> pure (Some QWordRepVal)
F.Mem64 F.Addr_64{} -> _ -> error "Bad argument to movs"
exec_movs (F.iiLockPrefix ii == F.RepPrefix) (knownNat :: NatRepr 8) let bytesPerOp = bvLit n64 (repValSizeByteCount w)
_ -> fail "Bad argument to movs" df <- get df_loc
src <- get rsi
dest <- get rdi
case pfx^.F.prLockPrefix of
F.RepPrefix -> do
when (pfx^.F.prASO) $ do
fail "Rep prefix semantics not defined when address size override is true."
-- The direction flag indicates post decrement or post increment.
count <- get rcx
addArchStmt =<< traverseF eval (RepMovs w count src dest df)
-- We adjust rsi and rdi by a negative value if df is true.
-- The formula is organized so that the bytesPerOp literal is
-- passed to the multiply and we can avoid non-linear arithmetic.
let adj = bytesPerOp .* mux df (bvNeg count) count
rcx .= bvLit n64 (0::Integer)
rsi .= src .+ adj
rdi .= dest .+ adj
F.NoLockPrefix -> do
let repr = repValSizeMemRepr w
-- The direction flag indicates post decrement or post increment.
v' <- get $ MemoryAddr src repr
MemoryAddr dest repr .= v'
-- We adjust rsi and rdi by a negative value if df is true.
-- The formula is organized so that the bytesPerOp literal is
-- passed to the multiply and we can avoid non-linear arithmetic.
let adj = mux df (bvNeg bytesPerOp) bytesPerOp
rsi .= src .+ adj
rdi .= dest .+ adj
_ -> do
fail "movs given unsupported lock prefix"
-- FIXME: can also take rep prefix
-- FIXME: we ignore the aso here.
-- | CMPS/CMPSB Compare string/Compare byte string -- | CMPS/CMPSB Compare string/Compare byte string
-- CMPS/CMPSW Compare string/Compare word string -- CMPS/CMPSW Compare string/Compare word string
-- CMPS/CMPSD Compare string/Compare doubleword string -- CMPS/CMPSD Compare string/Compare doubleword string
@ -1221,26 +1223,39 @@ exec_cmps repz_pfx rval = repValHasSupportedWidth rval $ do
let bytesPerOp' = bvLit n64 bytesPerOp let bytesPerOp' = bvLit n64 bytesPerOp
if repz_pfx then do if repz_pfx then do
count <- get rcx count <- get rcx
unless_ (count .=. bvKLit 0) $ do count_v <- eval count
nsame <- memcmp bytesPerOp count v_rsi v_rdi df src_v <- eval v_rsi
let equal = (nsame .=. count) dest_v <- eval v_rdi
nwordsSeen = mux equal count (count .- (nsame .+ bvKLit 1)) is_reverse_v <- eval df
nsame <- evalArchFn $ MemCmp bytesPerOp count_v src_v dest_v is_reverse_v
let equal = (nsame .=. count)
nwordsSeen = mux equal count (count .- (nsame .+ bvKLit 1))
-- we need to set the flags as if the last comparison was done, hence this. -- we need to set the flags as if the last comparison was done, hence this.
let lastWordBytes = (nwordsSeen .- bvKLit 1) .* bytesPerOp' let lastWordBytes = (nwordsSeen .- bvKLit 1) .* bytesPerOp'
lastSrc = mux df (v_rsi .- lastWordBytes) (v_rsi .+ lastWordBytes) lastSrc1 <- eval $ mux df (v_rsi .- lastWordBytes) (v_rsi .+ lastWordBytes)
lastDest = mux df (v_rdi .- lastWordBytes) (v_rdi .+ lastWordBytes) lastSrc2 <- eval $ mux df (v_rdi .- lastWordBytes) (v_rdi .+ lastWordBytes)
-- we do this to make it obvious so repz cmpsb ; jz ... is clear
let nbytesSeen = nwordsSeen .* bytesPerOp'
v' <- get $ MemoryAddr lastDest repr -- Determine if count ever ran.
exec_cmp (MemoryAddr lastSrc repr) v' -- FIXME: right way around? nzCount <- eval $ count .=/=. bvKLit 0
-- we do this to make it obvious so repz cmpsb ; jz ... is clear src1Val <- evalAssignRhs $ CondReadMem repr nzCount lastSrc1 (mkLit knownNat 0)
zf_loc .= equal src2Val <- evalAssignRhs $ CondReadMem repr nzCount lastSrc2 (mkLit knownNat 0)
let nbytesSeen = nwordsSeen .* bytesPerOp'
rsi .= mux df (v_rsi .- nbytesSeen) (v_rsi .+ nbytesSeen) -- Set result value.
rdi .= mux df (v_rdi .- nbytesSeen) (v_rdi .+ nbytesSeen) let res = src1Val .- src2Val
rcx .= (count .- nwordsSeen) -- Set flags
pf_val <- even_parity (least_byte res)
modify of_loc $ mux (ValueExpr nzCount) $ ssub_overflows src1Val src2Val
modify af_loc $ mux (ValueExpr nzCount) $ usub4_overflows src1Val src2Val
modify cf_loc $ mux (ValueExpr nzCount) $ usub_overflows src1Val src2Val
modify sf_loc $ mux (ValueExpr nzCount) $ msb res
modify pf_loc $ mux (ValueExpr nzCount) $ pf_val
modify rsi $ mux (ValueExpr nzCount) $ mux df (v_rsi .- nbytesSeen) (v_rsi .+ nbytesSeen)
modify rdi $ mux (ValueExpr nzCount) $ mux df (v_rdi .- nbytesSeen) (v_rdi .+ nbytesSeen)
modify rcx $ mux (ValueExpr nzCount) $ count .- nwordsSeen
else do else do
v' <- get $ MemoryAddr v_rdi repr v' <- get $ MemoryAddr v_rdi repr
exec_cmp (MemoryAddr v_rsi repr) v' -- FIXME: right way around? exec_cmp (MemoryAddr v_rsi repr) v' -- FIXME: right way around?

View File

@ -6,22 +6,21 @@ module ElfX64Linux (
elfX64LinuxTests elfX64LinuxTests
) where ) where
import Control.Arrow ( first ) import Control.Lens ( (^.) )
import Control.Lens ( (^.) )
import Control.Monad ( unless ) import Control.Monad ( unless )
import qualified Control.Monad.Catch as C import qualified Control.Monad.Catch as C
import qualified Data.ByteString as B import qualified Data.ByteString as B
import qualified Data.Foldable as F import qualified Data.Foldable as F
import qualified Data.Map as M import qualified Data.Map as M
import Data.Maybe
import qualified Data.Set as S import qualified Data.Set as S
import Data.Typeable ( Typeable ) import Data.Typeable ( Typeable )
import Data.Word ( Word64 ) import Data.Word ( Word64 )
import System.FilePath ( dropExtension, replaceExtension ) import System.FilePath
import qualified Test.Tasty as T import qualified Test.Tasty as T
import qualified Test.Tasty.HUnit as T import qualified Test.Tasty.HUnit as T
import Text.Printf ( printf ) import Text.Printf ( printf )
import Text.Read ( readMaybe ) import Text.Read ( readMaybe )
import Numeric (showHex)
import qualified Data.ElfEdit as E import qualified Data.ElfEdit as E
@ -31,34 +30,22 @@ import qualified Data.Macaw.Memory.ElfLoader as MM
import qualified Data.Macaw.Discovery as MD import qualified Data.Macaw.Discovery as MD
import qualified Data.Macaw.X86 as RO import qualified Data.Macaw.X86 as RO
-- | This is an offset we use to change the load address of the text section of -- |
-- the binary.
--
-- The current binaries are position independent. This means that the load
-- address is around 0x100. The problem is that there are constant offsets from
-- the stack that are in this range; the abstract interpretation in AbsState.hs
-- then interprets stack offsets as code pointers (since small integer values
-- look like code pointers when the code is mapped at these low addresses).
-- This throws off the abstract interpretation by hitting a "stack offset + code
-- pointer" case, which resolves to Top.
--
-- This offset forces macaw to load the binary at a higher address where this
-- accidental overlap is less likely. We still need a more principled solution
-- to this problem.
-- Note (JHx): The code appears to be working without this, so I've disableed it.
--loadOffset :: Word64
--loadOffset = 0x400000
elfX64LinuxTests :: [FilePath] -> T.TestTree elfX64LinuxTests :: [FilePath] -> T.TestTree
elfX64LinuxTests = T.testGroup "ELF x64 Linux" . map mkTest elfX64LinuxTests = T.testGroup "ELF x64 Linux" . map mkTest
data Addr = Addr Int Word64
deriving (Read,Show,Eq)
-- ^ An address is a region index and offset
-- | The type of expected results for test cases -- | The type of expected results for test cases
data ExpectedResult = data ExpectedResult =
R { funcs :: [(Word64, [(Word64, Integer)])] R { funcs :: [(Addr, [(Addr, Integer)])]
-- ^ The first element of the pair is the address of entry point -- ^ The first element of the pair is the address of entry point
-- of the function. The list is a list of the addresses of the -- of the function. The list is a list of the addresses of the
-- basic blocks in the function (including the first block). -- basic blocks in the function (including the first block) and the
, ignoreBlocks :: [Word64] -- size of the block
, ignoreBlocks :: [Addr]
-- ^ This is a list of discovered blocks to ignore. This is -- ^ This is a list of discovered blocks to ignore. This is
-- basically just the address of the instruction after the exit -- basically just the address of the instruction after the exit
-- syscall, as macaw doesn't know that exit never returns and -- syscall, as macaw doesn't know that exit never returns and
@ -66,32 +53,32 @@ data ExpectedResult =
} }
deriving (Read, Show, Eq) deriving (Read, Show, Eq)
-- | Given a path to an expected file, this
mkTest :: FilePath -> T.TestTree mkTest :: FilePath -> T.TestTree
mkTest fp = T.testCase fp $ withELF exeFilename (testDiscovery fp) mkTest fp = T.testCase fp $ withELF elfFilename (testDiscovery fp)
where where
asmFilename = dropExtension fp elfFilename = dropExtension fp
exeFilename = replaceExtension asmFilename "exe"
-- | Run a test over a given expected result filename and the ELF file -- | Run a test over a given expected result filename and the ELF file
-- associated with it -- associated with it
testDiscovery :: FilePath -> E.Elf 64 -> IO () testDiscovery :: FilePath -> E.Elf 64 -> IO ()
testDiscovery expectedFilename elf = testDiscovery expectedFilename elf =
withMemory MM.Addr64 elf $ \mem mentry -> do withMemory MM.Addr64 elf $ \mem entries -> do
let Just entryPoint = mentry let di = MD.cfgFromAddrs RO.x86_64_linux_info mem M.empty entries []
di = MD.cfgFromAddrs RO.x86_64_linux_info mem M.empty [entryPoint] []
let memIdx = case E.elfType elf of
E.ET_DYN -> 1
E.ET_EXEC -> 0
eidx -> error $ "Unexpected elf type " ++ show eidx
expectedString <- readFile expectedFilename expectedString <- readFile expectedFilename
case readMaybe expectedString of case readMaybe expectedString of
Nothing -> T.assertFailure ("Invalid expected result: " ++ show expectedString) Nothing -> T.assertFailure ("Invalid expected result: " ++ show expectedString)
Just er -> do Just er -> do
let toSegOff :: Word64 -> MM.MemSegmentOff 64 let toSegOff :: Addr -> MM.MemSegmentOff 64
toSegOff off = toSegOff (Addr idx off) = do
case MM.resolveAddr mem memIdx (fromIntegral off) of let addr :: MM.MemAddr 64
addr = MM.MemAddr idx (fromIntegral off)
case MM.asSegmentOff mem addr of
Just a -> a Just a -> a
Nothing -> error $ "Could not resolve offset " ++ showHex off "" Nothing -> do
let ppSeg seg = " Segment: " ++ show (MM.relativeAddr seg 0)
error $ "Could not resolve address : " ++ show addr ++ "\n"
++ unlines (fmap ppSeg (MM.memSegments mem))
let expectedEntries = M.fromList let expectedEntries = M.fromList
[ (toSegOff entry [ (toSegOff entry
, S.fromList ((\(s,sz) -> (toSegOff s, sz)) <$> starts) , S.fromList ((\(s,sz) -> (toSegOff s, sz)) <$> starts)
@ -141,7 +128,7 @@ withMemory :: forall w m a
. (C.MonadThrow m, MM.MemWidth w, Integral (E.ElfWordType w)) . (C.MonadThrow m, MM.MemWidth w, Integral (E.ElfWordType w))
=> MM.AddrWidthRepr w => MM.AddrWidthRepr w
-> E.Elf w -> E.Elf w
-> (MM.Memory w -> Maybe (MM.MemSegmentOff w) -> m a) -> (MM.Memory w -> [MM.MemSegmentOff w] -> m a)
-> m a -> m a
withMemory _relaWidth e k = do withMemory _relaWidth e k = do
let opt = MM.LoadOptions { MM.loadRegionIndex = Nothing let opt = MM.LoadOptions { MM.loadRegionIndex = Nothing
@ -151,9 +138,9 @@ withMemory _relaWidth e k = do
} }
case MM.resolveElfContents opt e of case MM.resolveElfContents opt e of
Left err -> C.throwM (MemoryLoadError err) Left err -> C.throwM (MemoryLoadError err)
Right (warn, mem, mentry, _sym) -> Right (warn, mem, mentry, syms) ->
if length warn >= 3 then if null warn then
k mem mentry k mem (maybeToList mentry ++ fmap MM.memSymbolStart syms)
else else
error $ "Warnings while loading Elf " ++ show warn error $ "Warnings while loading Elf " ++ show warn

View File

@ -7,8 +7,7 @@ import qualified ElfX64Linux as T
main :: IO () main :: IO ()
main = do main = do
x64AsmTests <- namesMatching "tests/x64/*.s.expected" x64AsmTests <- namesMatching "tests/x64/*.expected"
T.defaultMain $ T.testGroup "ReoptTests" [ T.defaultMain $ T.testGroup "ReoptTests" [
T.elfX64LinuxTests x64AsmTests T.elfX64LinuxTests x64AsmTests
] ]

View File

@ -14,7 +14,10 @@ test-tail-call.exe: test-tail-call.c
%.s: %.c %.s: %.c
gcc -fno-stack-protector -foptimize-sibling-calls -S -c $< -o $@ gcc -fno-stack-protector -foptimize-sibling-calls -S -c $< -o $@
%.o: %.c
gcc -fno-stack-protector -c $< -o $@
.PRECIOUS: %.s .PRECIOUS: %.s
clean: clean:
rm *.s *.exe rm -f *.o *.s *.exe

View File

@ -0,0 +1,3 @@
R { funcs = [(Addr 1 0x2b1, [(Addr 1 0x2b1, 14), (Addr 1 0x2ce , 16), (Addr 1 0x2bf, 15)])]
, ignoreBlocks = [Addr 1 0x2de]
}

View File

@ -1,3 +0,0 @@
R { funcs = [(0x2b1, [(0x2b1, 14), (0x2ce, 16), (0x2bf, 15)])]
, ignoreBlocks = [0x2de]
}

View File

@ -0,0 +1,6 @@
R { funcs = [ (Addr 1 0x336, [(Addr 1 0x336, 64), (Addr 1 0x376, 22)])
, (Addr 1 0x2be, [(Addr 1 0x2be, 90), (Addr 1 0x318, 30)])
, (Addr 1 0x2b1, [(Addr 1 0x2b1, 13)])
]
, ignoreBlocks = [Addr 1 0x38c]
}

View File

@ -1,6 +0,0 @@
R { funcs = [ (0x336, [(0x336, 64), (0x376, 22)])
, (0x2be, [(0x2be, 90), (0x318, 30)])
, (0x2b1, [(0x2b1, 13)])
]
, ignoreBlocks = [0x38c]
}

View File

@ -0,0 +1,5 @@
R { funcs = [ (Addr 1 0x2b1, [(Addr 1 0x2b1, 14)])
, (Addr 1 0x2bf, [(Addr 1 0x2bf, 35), (Addr 1 0x2e2, 22)])
]
, ignoreBlocks = [Addr 1 0x2f8]
}

View File

@ -1,5 +0,0 @@
R { funcs = [ (0x2b1, [(0x2b1, 14)])
, (0x2bf, [(0x2bf, 35), (0x2e2, 22)])
]
, ignoreBlocks = [0x2f8]
}

View File

@ -0,0 +1,4 @@
R { funcs = [(Addr 0 0x400144,
[(Addr 0 0x400144, 14), (Addr 0 0x40014c, 6), (Addr 0 0x400152, 7)])]
, ignoreBlocks = []
}

View File

@ -1,3 +0,0 @@
R { funcs = [(0x400144, [(0x400144, 14), (0x40014c, 6), (0x400152, 7)])]
, ignoreBlocks = []
}

View File

@ -0,0 +1,32 @@
int a(int x);
int b();
int c();
int d();
int e();
int lookup(int i) {
switch (i) {
case 0:
return a(1);
case 1:
return b();
case 2:
return c();
case 3:
return d();
case 4:
return e();
case 5:
return 5;
case 6:
return 1123213;
case 7:
return 191286;
case 8:
return 921312;
default:
return 0;
}
}

Binary file not shown.

View File

@ -0,0 +1,13 @@
R { funcs = [(Addr 1 0
, [ (Addr 1 0, 9), (Addr 1 9,9), (Addr 1 0x18, 6)
, (Addr 1 0x20, 6)
, (Addr 1 0x30, 6)
, (Addr 1 0x40, 6)
, (Addr 1 0x50, 10)
, (Addr 1 0x60, 7)
, (Addr 1 0x70, 7)
, (Addr 1 0x80, 7)
, (Addr 1 0x90, 7)
, (Addr 1 0xa0, 3)])]
, ignoreBlocks = []
}

View File

@ -0,0 +1,3 @@
R { funcs = [(Addr 1 0x2b1, [(Addr 1 0x2b1, 20)])]
, ignoreBlocks = [Addr 1 0x2c5]
}

View File

@ -1,3 +0,0 @@
R { funcs = [(0x2b1, [(0x2b1, 20)])]
, ignoreBlocks = [0x2c5]
}

View File

@ -0,0 +1,6 @@
R { funcs = [ (Addr 1 0x2c0, [(Addr 1 0x2c0, 7)])
, (Addr 1 0x2d0, [(Addr 1 0x2d0, 11), (Addr 1 0x2c0, 7)])
, (Addr 1 0x2e0, [(Addr 1 0x2e0, 11), (Addr 1 0x2eb, 16)])
]
, ignoreBlocks = [Addr 1 0x2fb]
}

View File

@ -1,5 +0,0 @@
R { funcs = [ (0x2d0, [(0x2d0, 11), (0x2c0, 7)])
, (0x2e0, [(0x2e0, 11), (0x2eb, 16)])
]
, ignoreBlocks = [0x2fb]
}

View File

@ -31,12 +31,11 @@ module Data.Macaw.X86.Crucible
) where ) where
import Data.Parameterized.NatRepr import Control.Lens ((^.))
import Data.Bits hiding (xor)
import Data.Parameterized.Context.Unsafe (empty,extend) import Data.Parameterized.Context.Unsafe (empty,extend)
import Data.Parameterized.NatRepr
import Data.Bits (shiftR, (.&.))
import Data.Word (Word8) import Data.Word (Word8)
import Data.Bits (shiftL,testBit)
import GHC.TypeLits (KnownNat) import GHC.TypeLits (KnownNat)
import What4.Interface hiding (IsExpr) import What4.Interface hiding (IsExpr)
@ -72,8 +71,8 @@ funcSemantics ::
SymFuns sym -> SymFuns sym ->
M.X86PrimFn (AtomWrapper (RegEntry sym)) mt -> M.X86PrimFn (AtomWrapper (RegEntry sym)) mt ->
S sym rtp bs r ctx -> IO (RegValue sym t, S sym rtp bs r ctx) S sym rtp bs r ctx -> IO (RegValue sym t, S sym rtp bs r ctx)
funcSemantics fs x s = do let sym = Sym { symIface = stateSymInterface s funcSemantics fs x s = do let sym = Sym { symIface = s^.stateSymInterface
, symTys = stateIntrinsicTypes s , symTys = s^.stateIntrinsicTypes
, symFuns = fs , symFuns = fs
} }
v <- pureSem sym x v <- pureSem sym x
@ -140,7 +139,7 @@ pureSem :: (IsSymInterface sym) =>
IO (RegValue sym (ToCrucibleType mt)) -- ^ Resulting value IO (RegValue sym (ToCrucibleType mt)) -- ^ Resulting value
pureSem sym fn = pureSem sym fn =
case fn of case fn of
M.CMPXCHG8B{} -> error "CMPXCHG8B"
M.XGetBV {} -> error "XGetBV" M.XGetBV {} -> error "XGetBV"
M.ReadLoc {} -> error "ReadLoc" M.ReadLoc {} -> error "ReadLoc"
M.PShufb {} -> error "PShufb" M.PShufb {} -> error "PShufb"

View File

@ -101,15 +101,18 @@ getReg ::
forall n t f. (Idx n (ArchRegContext M.X86_64) t) => RegAssign f -> f t forall n t f. (Idx n (ArchRegContext M.X86_64) t) => RegAssign f -> f t
getReg x = x ^. (field @n) getReg x = x ^. (field @n)
x86RegName' :: M.X86Reg tp -> String
x86RegName' M.X86_IP = "ip"
x86RegName' (M.X86_GP r) = show r
x86RegName' (M.X86_FlagReg r) = show r
x86RegName' (M.X87_StatusReg r) = show r
x86RegName' M.X87_TopReg = "x87Top"
x86RegName' (M.X87_TagReg r) = "x87Tag" ++ show r
x86RegName' (M.X87_FPUReg r) = show r
x86RegName' (M.X86_YMMReg r) = show r
x86RegName :: M.X86Reg tp -> C.SolverSymbol x86RegName :: M.X86Reg tp -> C.SolverSymbol
x86RegName M.X86_IP = C.systemSymbol "!ip" x86RegName r = C.systemSymbol $ "r!" ++ x86RegName' r
x86RegName (M.X86_GP r) = C.systemSymbol $ "!" ++ show r
x86RegName (M.X86_FlagReg r) = C.systemSymbol $ "!" ++ show r
x86RegName (M.X87_StatusReg r) = C.systemSymbol $ "!x87Status" ++ show r
x86RegName M.X87_TopReg = C.systemSymbol $ "!x87Top"
x86RegName (M.X87_TagReg r) = C.systemSymbol $ "!x87Tag" ++ show r
x86RegName (M.X87_FPUReg r) = C.systemSymbol $ "!" ++ show r
x86RegName (M.X86_YMMReg r) = C.systemSymbol $ "!" ++ show r
gpReg :: Int -> M.X86Reg (M.BVType 64) gpReg :: Int -> M.X86Reg (M.BVType 64)
gpReg = M.X86_GP . F.Reg64 . fromIntegral gpReg = M.X86_GP . F.Reg64 . fromIntegral

View File

@ -32,10 +32,11 @@ import qualified Lang.Crucible.Backend as C
import qualified Lang.Crucible.Backend.Simple as C import qualified Lang.Crucible.Backend.Simple as C
import qualified Lang.Crucible.CFG.Core as C import qualified Lang.Crucible.CFG.Core as C
import qualified Lang.Crucible.FunctionHandle as C import qualified Lang.Crucible.FunctionHandle as C
import Lang.Crucible.LLVM.DataLayout (EndianForm(LittleEndian))
import qualified Lang.Crucible.LLVM.MemModel as C
import qualified Lang.Crucible.LLVM.MemModel.Pointer as C
import qualified Lang.Crucible.Simulator.ExecutionTree as C import qualified Lang.Crucible.Simulator.ExecutionTree as C
import qualified Lang.Crucible.Simulator.RegValue as C import qualified Lang.Crucible.Simulator.RegValue as C
import qualified Lang.Crucible.LLVM.MemModel.Pointer as C
mkReg :: (C.IsSymInterface sym, M.HasRepr (M.ArchReg arch) M.TypeRepr) mkReg :: (C.IsSymInterface sym, M.HasRepr (M.ArchReg arch) M.TypeRepr)
=> MS.MacawSymbolicArchFunctions arch => MS.MacawSymbolicArchFunctions arch
@ -65,8 +66,7 @@ main = do
let loadOpt :: Elf.LoadOptions let loadOpt :: Elf.LoadOptions
loadOpt = Elf.LoadOptions { Elf.loadRegionIndex = Just 1 loadOpt = Elf.LoadOptions { Elf.loadRegionIndex = Just 1
, Elf.loadRegionBaseOffset = ??? , Elf.loadRegionBaseOffset = 0
-- , Elf.includeBSS = False
} }
putStrLn "Read elf" putStrLn "Read elf"
elfContents <- BS.readFile "tests/add_ubuntu64.o" elfContents <- BS.readFile "tests/add_ubuntu64.o"
@ -121,9 +121,17 @@ main = do
symFuns <- MX.newSymFuns sym symFuns <- MX.newSymFuns sym
putStrLn "Run code block" putStrLn "Run code block"
execResult <- MS.runCodeBlock sym x86ArchFns (MX.x86_64MacawEvalFn symFuns) halloc ??? ??? g regs initMem <- C.emptyMem LittleEndian
let globalMap :: MS.GlobalMap sym MX.X86_64
globalMap = Map.empty
let lookupFn :: MS.LookupFunctionHandle sym MX.X86_64
lookupFn _mem _regs = do
fail "Could not find function handle."
execResult <-
MS.runCodeBlock sym x86ArchFns (MX.x86_64MacawEvalFn symFuns)
halloc (initMem, globalMap) lookupFn g regs
case execResult of case execResult of
(_,C.FinishedExecution _ (C.TotalRes _pair))-> do (_,C.FinishedResult _ (C.TotalRes _pair))-> do
putStrLn "Done" putStrLn "Done"
_ -> do _ -> do
fail "Partial execution returned." fail "Partial execution returned."