Relocation support; various cleanups.

This patch adds initial support for relocations in Macaw code
discovery, and adds other refactoring.

* It introduces a SymbolValue constructor to represent references to
  symbols within Macaw.
* The various cases for x86 mov are made explicit after the flexdis refactor
  broke the previous code.  We should now support segment register movs and
  give better error messages when seeing mov with control or debug registers.
* The generic exception operation is replaced with Hlt and UD2 terminal
  x86-specific statements.
* CodeAddrReason is split into FunctionExploreReason and BlockExploreReason to
  clarify whether a function or block was discovered.
* The Macaw pretty printer is changed to use write_mem in place of pointer syntax.
* Various other refactoring is made to clarify code.
This commit is contained in:
Joe Hendrix 2018-04-23 11:24:21 -07:00
parent 0b8e95b0b0
commit 097edda1ef
No known key found for this signature in database
GPG Key ID: 8DFA5FF784098C4F
23 changed files with 917 additions and 561 deletions

View File

@ -31,6 +31,7 @@ library
base >= 4,
ansi-wl-pprint,
binary,
binary-symbols,
bytestring,
containers >= 0.5.8.1,
elf-edit >= 0.29,

View File

@ -1206,6 +1206,7 @@ transferValue c v = do
FinSet $ Set.singleton $ toInteger addr
| otherwise ->
TopV
SymbolValue{} -> TopV
-- Invariant: v is in m
AssignedValue a ->
fromMaybe (error $ "Missing assignment for " ++ show (assignId a))

View File

@ -180,6 +180,8 @@ unsignedUpperBound bnds v =
BVValue _ i -> Right (IntegerUpperBound i)
RelocatableValue{} ->
Left "Relocatable values do not have bounds."
SymbolValue{} ->
Left "Symbol values do not have bounds."
AssignedValue a ->
case MapF.lookup (assignId a) (bnds^.assignUpperBound) of
Just bnd -> Right bnd

View File

@ -297,13 +297,8 @@ addIntraproceduralJumpTarget fun_info src_block dest_addr = do -- record the ed
valueUses :: (OrdF (ArchReg arch), FoldableFC (ArchFn arch))
=> Value arch ids tp
-> FunctionArgsM arch ids (RegisterSet (ArchReg arch))
valueUses v =
zoom assignmentCache $
foldValueCached (\_ _ -> mempty)
(\_ -> mempty)
(\r -> Set.singleton (Some r))
(\_ regs -> regs)
v
valueUses v = zoom assignmentCache $ foldValueCached fns v
where fns = emptyValueFold { foldInput = Set.singleton . Some }
-- | Record that a block demands the value of certain registers.
recordBlockDemand :: ( OrdF (ArchReg arch)

View File

@ -137,12 +137,17 @@ data ArchitectureInfo arch
-- The architecture-specific statement
-> ArchTermStmt arch ids
-> Maybe (ArchSegmentOff arch, AbsBlockState (ArchReg arch)))
-- ^ This takes an abstract state from before executing an abs state, and an
-- architecture-specific terminal statement, and returns the next address within
-- the procedure that the statement jumps to along with the updated abstract state.
-- ^ This takes an abstract state from before executing an abs
-- state, and an architecture-specific terminal statement.
--
-- Note that per their documentation, architecture specific statements may return to at
-- most one location within a function.
-- If the statement does not return to this function, this
-- function should return `Nothing`. Otherwise, it should
-- returns the next address within the procedure that the
-- statement jumps to along with the updated abstract state.
--
-- Note that per their documentation, architecture specific
-- statements may return to at most one location within a
-- function.
}
-- | Apply optimizations to a terminal statement.

View File

@ -70,6 +70,8 @@ module Data.Macaw.CFG.Core
, ArchTermStmt
, RegAddrWord
, RegAddrWidth
-- * Utilities
, addrWidthTypeRepr
-- * RegisterInfo
, RegisterInfo(..)
, asStackAddrOffset
@ -149,6 +151,11 @@ bracketsep (h:l) = vcat $
++ fmap (text "," <+>) l
++ [text "}"]
-- | A type repr for the address width
addrWidthTypeRepr :: AddrWidthRepr w -> TypeRepr (BVType w)
addrWidthTypeRepr Addr32 = BVTypeRepr knownNat
addrWidthTypeRepr Addr64 = BVTypeRepr knownNat
------------------------------------------------------------------------
-- AssignId
@ -328,24 +335,28 @@ instance FoldableFC (ArchFn arch) => FoldableFC (AssignRhs arch) where
-- Value and Assignment, AssignRhs declarations.
-- | A value at runtime.
data Value arch ids tp
= forall n
. (tp ~ BVType n, 1 <= n)
=> BVValue !(NatRepr n) !Integer
data Value arch ids tp where
BVValue :: (1 <= n) => !(NatRepr n) -> !Integer -> Value arch ids (BVType n)
-- ^ A constant bitvector
--
-- The integer should be between 0 and 2^n-1.
| (tp ~ BoolType)
=> BoolValue !Bool
BoolValue :: !Bool -> Value arch ids BoolType
-- ^ A constant Boolean
| ( tp ~ BVType (ArchAddrWidth arch)
, 1 <= ArchAddrWidth arch
)
=> RelocatableValue !(NatRepr (ArchAddrWidth arch)) !(ArchMemAddr arch)
RelocatableValue :: !(AddrWidthRepr (ArchAddrWidth arch))
-> !(ArchMemAddr arch)
-> Value arch ids (BVType (ArchAddrWidth arch))
-- ^ A memory address
| AssignedValue !(Assignment arch ids tp)
SymbolValue :: !(AddrWidthRepr (ArchAddrWidth arch))
-> !SymbolIdentifier
-> Value arch ids (BVType (ArchAddrWidth arch))
-- ^ Reference to a symbol identifier.
--
-- This appears when dealing with relocations.
AssignedValue :: !(Assignment arch ids tp)
-> Value arch ids tp
-- ^ Value from an assignment statement.
| Initial !(ArchReg arch tp)
Initial :: !(ArchReg arch tp)
-> Value arch ids tp
-- ^ Represents the value assigned to the register when the block started.
-- | An assignment consists of a unique location identifier and a right-
@ -370,7 +381,8 @@ instance ( HasRepr (ArchReg arch) TypeRepr
typeRepr (BoolValue _) = BoolTypeRepr
typeRepr (BVValue w _) = BVTypeRepr w
typeRepr (RelocatableValue w _) = BVTypeRepr w
typeRepr (RelocatableValue w _) = addrWidthTypeRepr w
typeRepr (SymbolValue w _) = addrWidthTypeRepr w
typeRepr (AssignedValue a) = typeRepr (assignRhs a)
typeRepr (Initial r) = typeRepr r
@ -392,12 +404,16 @@ instance OrdF (ArchReg arch)
compareF BVValue{} _ = LTF
compareF _ BVValue{} = GTF
compareF (RelocatableValue _ x) (RelocatableValue _ y) =
fromOrdering (compare x y)
compareF RelocatableValue{} _ = LTF
compareF _ RelocatableValue{} = GTF
compareF (SymbolValue _ x) (SymbolValue _ y) =
fromOrdering (compare x y)
compareF SymbolValue{} _ = LTF
compareF _ SymbolValue{} = GTF
compareF (AssignedValue x) (AssignedValue y) =
compareF (assignId x) (assignId y)
compareF AssignedValue{} _ = LTF
@ -613,8 +629,8 @@ ppValue p (BVValue w i)
-- TODO: We may want to report an error here.
parenIf (p > colonPrec) $
text (show i) <+> text "::" <+> brackets (text (show w))
ppValue p (RelocatableValue _ a) = parenIf (p > plusPrec) $ text (show a)
ppValue _ (SymbolValue _ a) = text (show a)
ppValue _ (AssignedValue a) = ppAssignId (assignId a)
ppValue _ (Initial r) = text (showF r) PP.<> text "_0"
@ -792,7 +808,7 @@ ppStmt :: ArchConstraints arch
ppStmt ppOff stmt =
case stmt of
AssignStmt a -> pretty a
WriteMem a _ rhs -> text "*" PP.<> prettyPrec 11 a <+> text ":=" <+> ppValue 0 rhs
WriteMem a _ rhs -> text "write_mem" <+> prettyPrec 11 a <+> ppValue 0 rhs
PlaceHolderStmt vals name ->
text ("PLACEHOLDER: " ++ name)
<+> parens (hcat $ punctuate comma $ viewSome (ppValue 0) <$> vals)

View File

@ -1,4 +1,5 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE RankNTypes #-}
module Data.Macaw.CFG.DemandSet
@ -86,6 +87,7 @@ addValueDemands v = do
BoolValue{} -> pure ()
BVValue{} -> pure ()
RelocatableValue{} -> pure ()
SymbolValue{} -> pure ()
AssignedValue a -> addAssignmentDemands a
Initial{} -> pure ()

View File

@ -240,6 +240,11 @@ rewriteApp app = do
BVAdd w (valueAsApp -> Just (BVSub _ (BVValue _ xc) y)) (BVValue _ zc) -> do
rewriteApp (BVSub w (BVValue w (toUnsigned w (xc + zc))) y)
-- addr a + (c - addr b) => c + (addr a - addr b)
BVAdd w (RelocatableValue _ a) (valueAsApp -> Just (BVSub _ c (RelocatableValue _ b)))
| Just d <- diffAddr a b ->
rewriteApp $ BVAdd w c (BVValue w (toUnsigned w d))
-- x - yc = x + (negate yc)
BVSub w x (BVValue _ yc) -> do
rewriteApp (BVAdd w x (BVValue w (toUnsigned w (negate yc))))
@ -407,6 +412,7 @@ rewriteValue v =
BoolValue b -> pure (BoolValue b)
BVValue w i -> pure (BVValue w i)
RelocatableValue w a -> pure (RelocatableValue w a)
SymbolValue w a -> pure (SymbolValue w a)
AssignedValue (Assignment aid _) -> Rewriter $ do
ref <- gets $ rwctxCache . rwContext
srcMap <- lift $ readSTRef ref

View File

@ -35,7 +35,8 @@ module Data.Macaw.Discovery
, Data.Macaw.Discovery.cfgFromAddrs
, Data.Macaw.Discovery.cfgFromAddrsAndState
, Data.Macaw.Discovery.markAddrsAsFunction
, State.CodeAddrReason(..)
, State.FunctionExploreReason(..)
, State.BlockExploreReason(..)
, Data.Macaw.Discovery.analyzeFunction
, Data.Macaw.Discovery.exploreMemPointers
, Data.Macaw.Discovery.analyzeDiscoveredFunctions
@ -125,24 +126,6 @@ concretizeAbsCodePointers _mem StridedInterval{} = [] -- FIXME: this case doesn'
concretizeAbsCodePointers _mem _ = []
{-
printAddrBacktrace :: Map (ArchMemAddr arch) (FoundAddr arch)
-> ArchMemAddr arch
-> CodeAddrReason (ArchAddrWidth arch)
-> [String]
printAddrBacktrace found_map addr rsn = do
let pp msg = show addr ++ ": " ++ msg
let prev prev_addr =
case Map.lookup prev_addr found_map of
Just found_info -> printAddrBacktrace found_map prev_addr (foundReason found_info)
Nothing -> error $ "Unknown reason for address " ++ show prev_addr
case rsn of
InWrite src -> pp ("Written to memory in block at address " ++ show src ++ ".") : prev src
NextIP src -> pp ("Target IP for " ++ show src ++ ".") : prev src
CallTarget src -> pp ("Target IP of call at " ++ show src ++ ".") : prev src
InitAddr -> [pp "Initial entry point."]
CodePointerInMem src -> [pp ("Memory address " ++ show src ++ " contained code.")]
SplitAt src -> pp ("Split from read of " ++ show src ++ ".") : prev src
-- | Return true if this address was added because of the contents of a global address
-- in memory initially.
--
@ -234,7 +217,7 @@ rangeInReadonlySegment mseg size =
-- DiscoveryState utilities
-- | Mark a escaped code pointer as a function entry.
markAddrAsFunction :: CodeAddrReason (ArchAddrWidth arch)
markAddrAsFunction :: FunctionExploreReason (ArchAddrWidth arch)
-- ^ Information about why the code address was discovered
--
-- Used for debugging
@ -251,7 +234,7 @@ markAddrAsFunction rsn addr s
s & unexploredFunctions %~ Map.insertWith (\_ old -> old) addr rsn
_ -> s
-- | Mark a list of addresses as function entries with the same reason.
markAddrsAsFunction :: CodeAddrReason (ArchAddrWidth arch)
markAddrsAsFunction :: FunctionExploreReason (ArchAddrWidth arch)
-> [ArchSegmentOff arch]
-> DiscoveryState arch
-> DiscoveryState arch
@ -262,13 +245,13 @@ markAddrsAsFunction rsn addrs s0 = foldl' (\s a -> markAddrAsFunction rsn a s) s
-- | An address that has been found to be reachable.
data FoundAddr arch
= FoundAddr { foundReason :: !(CodeAddrReason (ArchAddrWidth arch))
= FoundAddr { foundReason :: !(BlockExploreReason (ArchAddrWidth arch))
-- ^ The reason the address was found to be containing code.
, foundAbstractState :: !(AbsBlockState (ArchReg arch))
-- ^ The abstract state formed from post-states that reach this address.
}
foundReasonL :: Lens' (FoundAddr arch) (CodeAddrReason (ArchAddrWidth arch))
foundReasonL :: Lens' (FoundAddr arch) (BlockExploreReason (ArchAddrWidth arch))
foundReasonL = lens foundReason (\old new -> old { foundReason = new })
------------------------------------------------------------------------
@ -276,7 +259,8 @@ foundReasonL = lens foundReason (\old new -> old { foundReason = new })
-- | The state for the function exploration monad (funM)
data FunState arch s ids
= FunState { funNonceGen :: !(NonceGenerator (ST s) ids)
= FunState { funReason :: !(FunctionExploreReason (ArchAddrWidth arch))
, funNonceGen :: !(NonceGenerator (ST s) ids)
, curFunAddr :: !(ArchSegmentOff arch)
, _curFunCtx :: !(DiscoveryState arch)
-- ^ Discovery state without this function
@ -540,6 +524,7 @@ identifyCallTargets absState ip = do
case ip of
BVValue _ x -> segOffAddrs $ resolveAbsoluteAddr mem (fromInteger x)
RelocatableValue _ a -> segOffAddrs $ asSegmentOff mem a
SymbolValue{} -> def
AssignedValue a ->
case assignRhs a of
-- See if we can get a value out of a concrete memory read.
@ -845,7 +830,7 @@ transferBlocks src finfo sz block_map =
, blockStatementList = pblock
}
id %= addFunBlock src pb
curFunCtx %= markAddrsAsFunction (InWrite src) (ps^.writtenCodeAddrs)
curFunCtx %= markAddrsAsFunction (PossibleWriteEntry src) (ps^.writtenCodeAddrs)
. markAddrsAsFunction (CallTarget src) (ps^.newFunctionAddrs)
mapM_ (\(addr, abs_state) -> mergeIntraJump src abs_state addr) (ps^.intraJumpTargets)
@ -920,7 +905,7 @@ analyzeBlocks logBlock st =
mkFunState :: NonceGenerator (ST s) ids
-> DiscoveryState arch
-> CodeAddrReason (ArchAddrWidth arch)
-> FunctionExploreReason (ArchAddrWidth arch)
-- ^ Reason to provide for why we are analyzing this function
--
-- This can be used to figure out why we decided a
@ -928,10 +913,11 @@ mkFunState :: NonceGenerator (ST s) ids
-> ArchSegmentOff arch
-> FunState arch s ids
mkFunState gen s rsn addr = do
let faddr = FoundAddr { foundReason = rsn
let faddr = FoundAddr { foundReason = FunctionEntryPoint
, foundAbstractState = mkInitialAbsState (archInfo s) (memory s) addr
}
in FunState { funNonceGen = gen
in FunState { funReason = rsn
, funNonceGen = gen
, curFunAddr = addr
, _curFunCtx = s
, _curFunBlocks = Map.empty
@ -944,10 +930,10 @@ mkFunInfo :: FunState arch s ids -> DiscoveryFunInfo arch ids
mkFunInfo fs =
let addr = curFunAddr fs
s = fs^.curFunCtx
info = archInfo s
nm = withArchConstraints info $
nm = withArchConstraints (archInfo s) $
fromMaybe (BSC.pack (show addr)) (Map.lookup addr (symbolNames s))
in DiscoveryFunInfo { discoveredFunAddr = addr
in DiscoveryFunInfo { discoveredFunReason = funReason fs
, discoveredFunAddr = addr
, discoveredFunName = nm
, _parsedBlocks = fs^.curFunBlocks
}
@ -961,7 +947,7 @@ analyzeFunction :: (ArchSegmentOff arch -> ST s ())
-- ^ Logging function to call when analyzing a new block.
-> ArchSegmentOff arch
-- ^ The address to explore
-> CodeAddrReason (ArchAddrWidth arch)
-> FunctionExploreReason (ArchAddrWidth arch)
-- ^ Reason to provide for why we are analyzing this function
--
-- This can be used to figure out why we decided a
@ -1062,7 +1048,7 @@ cfgFromAddrsAndState initial_state init_addrs mem_words =
-- Resolve functions with logging
resolveFuns :: MemWidth (RegAddrWidth (ArchReg arch))
=> (ArchSegmentOff arch -> CodeAddrReason (ArchAddrWidth arch) -> ST s Bool)
=> (ArchSegmentOff arch -> FunctionExploreReason (ArchAddrWidth arch) -> ST s Bool)
-- ^ Callback for discovered functions
--
-- Should return true if we should analyze the function and false otherwise.
@ -1141,6 +1127,16 @@ discoveryLogFn disOpt _ (AnalyzeBlock addr) = ioToST $ do
hFlush stderr
ppFunReason :: MemWidth w => FunctionExploreReason w -> String
ppFunReason rsn =
case rsn of
InitAddr -> ""
UserRequest -> ""
PossibleWriteEntry a -> " (written at " ++ show a ++ ")"
CallTarget a -> " (called at " ++ show a ++ ")"
CodePointerInMem a -> " (in initial memory at " ++ show a ++ ")"
-- | Explore until we have found all functions we can.
--
-- This function is intended to make it easy to explore functions, and
@ -1169,10 +1165,10 @@ completeDiscoveryState ainfo disOpt mem initEntries symMap funPred = stToIO $ wi
| exploreFunctionSymbols disOpt =
initState & markAddrsAsFunction InitAddr (Map.keys symMap)
| otherwise = initState
let analyzeFn addr _rsn = ioToST $ do
let analyzeFn addr rsn = ioToST $ do
let b = funPred addr
when (b && logAtAnalyzeFunction disOpt) $ do
hPutStrLn stderr $ "Analyzing function: " ++ ppSymbol addr symMap
hPutStrLn stderr $ "Analyzing function: " ++ ppSymbol addr symMap ++ ppFunReason rsn
hFlush stderr
pure $! b
let analyzeBlock _ addr = ioToST $ do

View File

@ -37,8 +37,9 @@ module Data.Macaw.Discovery.State
-- * DiscoveryFunInfo
, DiscoveryFunInfo(..)
, parsedBlocks
-- * CodeAddrRegion
, CodeAddrReason(..)
-- * Reasons for exploring
, FunctionExploreReason(..)
, BlockExploreReason(..)
-- * DiscoveryState utilities
, RegConstraint
) where
@ -62,26 +63,41 @@ import Data.Macaw.CFG
import Data.Macaw.Types
------------------------------------------------------------------------
-- CodeAddrReason
-- BlockExploreReason
-- | This describes the source of an address that was marked as containing code.
data CodeAddrReason w
= InWrite !(MemSegmentOff w)
-- ^ Exploring because the given block writes it to memory.
| NextIP !(MemSegmentOff w)
-- ^ Exploring because the given block jumps here.
-- | This describes why we started exploring a given function.
data FunctionExploreReason w
= PossibleWriteEntry !(MemSegmentOff w)
-- ^ Exploring because code at the given block writes it to memory.
| CallTarget !(MemSegmentOff w)
-- ^ Exploring because address terminates with a call that jumps here.
| InitAddr
-- ^ Identified as an entry point from initial information
| CodePointerInMem !(MemSegmentOff w)
-- ^ A code pointer that was stored at the given address.
| SplitAt !(MemSegmentOff w) !(CodeAddrReason w)
-- ^ Added because the address split this block after it had been disassembled. Also includes the reason we thought the block should be there before we split it.
| UserRequest
-- ^ The user requested that we analyze this address as a function.
deriving (Eq, Show)
------------------------------------------------------------------------
-- BlockExploreReason
-- | This describes why we are exploring a given block within a function.
data BlockExploreReason w
-- =- InWrite !(MemSegmentOff w)
-- ^ Exploring because the given block writes it to memory.
= NextIP !(MemSegmentOff w)
-- ^ Exploring because the given block jumps here.
| FunctionEntryPoint
-- ^ Identified as an entry point from initial information
| SplitAt !(MemSegmentOff w) !(BlockExploreReason w)
-- ^ Added because the address split this block after it had been
-- disassembled. Also includes the reason we thought the block
-- should be there before we split it.
-- | UserRequest
-- ^ The user requested that we analyze this address as a function.
deriving (Eq, Show)
------------------------------------------------------------------------
-- GlobalDataInfo
@ -215,7 +231,7 @@ data ParsedBlock arch ids
-- ^ Address of region
, blockSize :: !(ArchAddrWord arch)
-- ^ The size of the region of memory covered by this.
, blockReason :: !(CodeAddrReason (ArchAddrWidth arch))
, blockReason :: !(BlockExploreReason (ArchAddrWidth arch))
-- ^ Reason that we marked this address as
-- the start of a basic block.
, blockAbstractState :: !(AbsBlockState (ArchReg arch))
@ -241,7 +257,8 @@ instance ArchConstraints arch
-- | Information discovered about a particular function
data DiscoveryFunInfo arch ids
= DiscoveryFunInfo { discoveredFunAddr :: !(ArchSegmentOff arch)
= DiscoveryFunInfo { discoveredFunReason :: !(FunctionExploreReason (ArchAddrWidth arch))
, discoveredFunAddr :: !(ArchSegmentOff arch)
-- ^ Address of function entry block.
, discoveredFunName :: !BSC.ByteString
-- ^ Name of function should be unique for program
@ -276,7 +293,8 @@ data DiscoveryState arch
-- inferred about it.
, _funInfo :: !(Map (ArchSegmentOff arch) (Some (DiscoveryFunInfo arch)))
-- ^ Map from function addresses to discovered information about function
, _unexploredFunctions :: !(Map (ArchSegmentOff arch) (CodeAddrReason (ArchAddrWidth arch)))
, _unexploredFunctions
:: !(Map (ArchSegmentOff arch) (FunctionExploreReason (ArchAddrWidth arch)))
-- ^ This maps addresses that have been marked as
-- functions, but not yet analyzed to the reason
-- they are analyzed.
@ -333,7 +351,7 @@ globalDataMap = lens _globalDataMap (\s v -> s { _globalDataMap = v })
-- | List of functions to explore next.
unexploredFunctions
:: Simple Lens (DiscoveryState arch) (Map (ArchSegmentOff arch) (CodeAddrReason (ArchAddrWidth arch)))
:: Simple Lens (DiscoveryState arch) (Map (ArchSegmentOff arch) (FunctionExploreReason (ArchAddrWidth arch)))
unexploredFunctions = lens _unexploredFunctions (\s v -> s { _unexploredFunctions = v })
-- | Get information for specific functions

View File

@ -8,6 +8,7 @@ a value without revisiting shared subterms.
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE RankNTypes #-}
@ -15,6 +16,8 @@ a value without revisiting shared subterms.
{-# LANGUAGE UndecidableInstances #-}
module Data.Macaw.Fold
( Data.Parameterized.TraversableFC.FoldableFC(..)
, ValueFold(..)
, emptyValueFold
, foldValueCached
) where
@ -27,39 +30,59 @@ import Data.Parameterized.TraversableFC
import Data.Macaw.CFG
data ValueFold arch ids r = ValueFold
{ foldBoolValue :: !(Bool -> r)
, foldBVValue :: !(forall n . NatRepr n -> Integer -> r)
, foldAddr :: !(ArchMemAddr arch -> r)
, foldIdentifier :: !(SymbolIdentifier -> r)
, foldInput :: !(forall utp . ArchReg arch utp -> r)
, foldAssign :: !(forall utp . AssignId ids utp -> r -> r)
}
-- | Empty value fold returns mempty for each non-recursive fold, and the
-- identify of @foldAssign@
emptyValueFold :: Monoid r => ValueFold arch ids r
emptyValueFold =
ValueFold { foldBoolValue = \_ -> mempty
, foldBVValue = \_ _ -> mempty
, foldAddr = \_ -> mempty
, foldIdentifier = \_ -> mempty
, foldInput = \_ -> mempty
, foldAssign = \_ r -> r
}
-- | This folds over elements of a values in a values.
--
-- It memoizes values so that it only evaluates assignments with the same id
-- once.
foldValueCached :: forall r arch ids tp
. (Monoid r, FoldableFC (ArchFn arch))
=> (forall n. NatRepr n -> Integer -> r)
-- ^ Function for literals
-> (ArchMemAddr arch -> r)
-- ^ Function for memwords
-> (forall utp . ArchReg arch utp -> r)
-- ^ Function for input registers
-> (forall utp . AssignId ids utp -> r -> r)
-- ^ Function for assignments
=> ValueFold arch ids r
-> Value arch ids tp
-> State (Map (Some (AssignId ids)) r) r
foldValueCached litf rwf initf assignf = go
foldValueCached fns = go
where
go :: forall tp'
. Value arch ids tp'
-> State (Map (Some (AssignId ids)) r) r
go v =
case v of
BoolValue b -> return (litf (knownNat :: NatRepr 1) (if b then 1 else 0))
BVValue sz i -> return $ litf sz i
RelocatableValue _ a -> pure $ rwf a
Initial r -> return $ initf r
BoolValue b ->
pure $! foldBoolValue fns b
BVValue sz i ->
pure $! foldBVValue fns sz i
RelocatableValue _ a ->
pure $! foldAddr fns a
SymbolValue _ a ->
pure $! foldIdentifier fns a
Initial r ->
pure $! foldInput fns r
AssignedValue (Assignment a_id rhs) -> do
m <- get
case Map.lookup (Some a_id) m of
Just v' ->
return $ assignf a_id v'
pure $! foldAssign fns a_id v'
Nothing -> do
rhs_v <- foldrFC (\v' mrhs -> mappend <$> go v' <*> mrhs) (pure mempty) rhs
modify' $ Map.insert (Some a_id) rhs_v
return (assignf a_id rhs_v)
pure $! foldAssign fns a_id rhs_v

View File

@ -1,5 +1,5 @@
{-|
Copyright : (c) Galois Inc, 2015-2016
Copyright : (c) Galois Inc, 2015-2018
Maintainer : jhendrix@galois.com
Declares 'Memory', a type for representing segmented memory with permissions.
@ -53,10 +53,11 @@ module Data.Macaw.Memory
, segmentSize
, SegmentRange(..)
, Relocation(..)
, RelocationAddr(..)
, module Data.BinarySymbols
, DropError(..)
, dropErrorAsMemError
, dropSegmentRangeListBytes
, takeSegmentPrefix
-- * MemWord
, MemWord
, MemWidth(..)
@ -77,9 +78,9 @@ module Data.Macaw.Memory
, clearSegmentOffLeastBit
, memAsAddrPairs
-- * Symbols
, SymbolRef(..)
, SymbolInfo(..)
, SymbolVersion(..)
, SymbolDef(..)
, SymbolBinding(..)
-- ** Defined symbol information
, SymbolPrecedence(..)
, SymbolDefType(..)
@ -125,6 +126,8 @@ module Data.Macaw.Memory
) where
import Control.Exception (assert)
import Control.Monad
import Data.BinarySymbols
import Data.Bits
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BSC
@ -274,6 +277,8 @@ class (1 <= w) => MemWidth w where
addrRotate :: MemWord w -> Int -> MemWord w
-- | Read an address with the given endianess.
--
-- This returns nothing if the bytestring is too short.
addrRead :: Endianness -> BS.ByteString -> Maybe (MemWord w)
-- | Returns number of bits in address.
@ -358,21 +363,6 @@ addrWidthClass Addr64 x = x
------------------------------------------------------------------------
-- Symbol Information
-- | Characterized the version information on a symbol
data SymbolVersion
= UnversionedSymbol
-- ^ The symbol had no or the default *global* version information.
| ObjectSymbol
-- ^ The symbol comes from an object file and hence does not
-- have GNU version information. Version information
-- may be part of the symbol name however.
| VersionedSymbol !BS.ByteString !BS.ByteString
-- ^ A symbol with version information from version information
-- in a shared library or executable.
--
-- The first value is the name of the shared object. The second
-- is the version associated with the symbol.
-- | Describes symbol precedence
data SymbolPrecedence
= SymbolStrong
@ -418,13 +408,10 @@ data SymbolUndefType
| SymbolUndefObject
-- ^ This symbol is intended to denote some data.
type SectionIndex = Word16
-- | This defines information about the symbol related to whether
-- it is defined (and if so how it binds) or undefined (and if so what
-- requiremens there are for a match).
data SymbolDef
data SymbolBinding
= DefinedSymbol !SymbolPrecedence !SymbolDefType
-- ^ The symbol is defined and globally visible.
--
@ -433,13 +420,17 @@ data SymbolDef
-- and the linker is not allowed to replace the symbol. Is
-- false, then the linker will use a strong symbol if it exists,
-- and one of the weak symbols if it does not.
--
-- The address is the address the symbol was loaded at. It may
-- not be a valid segment offset if the original binary used
-- symbols at unexpected addresses.
| SymbolSection !SectionIndex
-- ^ The symbol denotes a section in an object file with the
-- given index. These are primarily intended for relocations.
--
-- The symbol version should be @UnversionedSymbol@ with this.
| SymbolFile
-- ^ This symbol denotes a file name
| SymbolFile !BS.ByteString
-- ^ This symbol denotes a file name with the given string
--
-- The symbol version should be @UnversionedSymbol@ with this.
| UndefinedSymbol !SymbolRequirement !SymbolUndefType
@ -451,46 +442,24 @@ data SymbolDef
-- the linker cannot find a definition, then it must throw an
-- error.
type SymbolName = BS.ByteString
-- | The name of a symbol along with optional version information.
--
-- Note that this is used for referencing undefined symbols, while
-- @MemSymbol@ is used for defined symbols.
data SymbolRef =
SymbolRef { symbolName :: !SymbolName
-- | This provides information about a symbol in the file.
data SymbolInfo =
SymbolInfo { symbolName :: !SymbolName
-- ^ The name of the symbol
--
-- Symbols are used for many purposes in a file.
-- Symbol names may not be unique, and may even be
-- empty. For example, Elf files uses the empty name
-- for section symbols. On ARM, "$a", "$d" and "$t"
-- are used to indicate regions of ARM code, data, thumb.
, symbolVersion :: !SymbolVersion
-- ^ Version information used to constrain when one
-- symbol matches another.
, symbolDef :: !SymbolDef
, symbolDef :: !SymbolBinding
}
------------------------------------------------------------------------
-- SegmentRange
-- | Denotes an address referenced by a relocation.
data RelocationAddr
= SymbolRelocation !SymbolName !SymbolVersion
-- ^ Denotes the address of the symbol that matches the name and version constraints.
| SectionBaseRelocation !SectionIndex
-- ^ Denotes the address of the section with the given address.
instance Show RelocationAddr where
showsPrec _ (SymbolRelocation nm ver) =
case ver of
UnversionedSymbol -> showString (BSC.unpack nm)
ObjectSymbol -> showString (BSC.unpack nm)
VersionedSymbol symName soName ->
showString (BSC.unpack nm)
. showChar '@' . showString (BSC.unpack symName)
. showChar '(' . showString (BSC.unpack soName) . showChar ')'
showsPrec _ (SectionBaseRelocation idx) =
showString "section_" . shows idx
showOff :: Integer -> ShowS
showOff 0 = id
showOff off = showString " + 0x" . showHex off
-- Relocation
showEnd :: Endianness -> ShowS
showEnd LittleEndian = showString "LE"
@ -498,10 +467,18 @@ showEnd BigEndian = showString "BE"
-- | Information about a relocation
data Relocation w
= AbsoluteRelocation !RelocationAddr !(MemWord w) !Endianness
-- ^ Denotes the address of the relocation plus the offset stored
= AbsoluteRelocation !SymbolIdentifier !(MemWord w) !Endianness !Int
-- ^ @AbsoluteRelocation addr off end size@ denotes an
-- address of the relocation plus the offset stored
-- with the given endianess.
| RelativeRelocation !RelocationAddr !(MemWord w) !Endianness !Int
--
-- The @size@ field is the number of bytes the relocation is stored
-- at, and when inserting the relocation value it should only use
-- that many bytes. If the address + offset is greater than or equal to
-- @2^(8*n)@, then updating the relocation target should fail. This is
-- used to support relocation types such as @R_X86_64_32@. We do not
-- currently support signed versions like @R_X86_64_32S@.
| RelativeRelocation !SymbolIdentifier !(MemWord w) !Endianness !Int
-- ^ @RelativeRelocation addr off end cnt@ denotes a relocation
-- that stores the value of @addr + off - this_addr@ (where
-- @this_addr@ is the address the relocation is stored at as a
@ -509,15 +486,32 @@ data Relocation w
-- | Return size of relocation in bytes
relocSize :: forall w . MemWidth w => Relocation w -> MemWord w
relocSize (AbsoluteRelocation _ o _) = fromIntegral (addrSize o)
relocSize (AbsoluteRelocation _ _ _ cnt) = fromIntegral cnt
relocSize (RelativeRelocation _ _ _ cnt) = fromIntegral cnt
instance Show (Relocation w) where
showsPrec _ (AbsoluteRelocation base off end) =
showString "absolute(" . shows base . showOff (memWordInteger off) . showChar ')' . showEnd end
showsPrec _ (AbsoluteRelocation base off end cnt) =
showString "[areloc,"
. shows base
. showChar ','
. showHex (memWordInteger off)
. showChar ','
. showEnd end
. showChar ','
. shows (8*cnt)
. showChar ']'
showsPrec _ (RelativeRelocation base off end cnt) =
showString "relative(" . shows base . showOff (memWordInteger off) . showChar ')' . showEnd end
. showChar '@' . shows (8*cnt)
showString "[rreloc,"
. shows base
. showHex (memWordInteger off)
. showChar ','
. showEnd end
. showChar ','
. shows (8*cnt)
. showChar ']'
------------------------------------------------------------------------
-- SegmentRange
-- | Defines a portion of a segment.
--
@ -534,16 +528,66 @@ rangeSize (ByteRegion bs) = fromIntegral (BS.length bs)
rangeSize (RelocationRegion r) = relocSize r
rangeSize (BSSRegion sz) = sz
ppByte :: Word8 -> String -> String
ppByte w | w < 16 = showChar '0' . showHex w
| otherwise = showHex w
instance Show (SegmentRange w) where
showsPrec _ (ByteRegion bs) = \s -> foldr ppByte s (BS.unpack bs)
where ppByte w | w < 16 = showChar '0' . showHex w
| otherwise = showHex w
showsPrec p (RelocationRegion r) = showsPrec p r
showsPrec _ (BSSRegion sz) = showString "bss[" . shows sz . showChar ']'
showsPrec _ (BSSRegion sz) = showString "[bss," . shows sz . showChar ']'
showList [] = id
showList (h : r) = showsPrec 10 h . showList r
takeSegmentPrefix :: MemWidth w => [SegmentRange w] -> MemWord w -> [SegmentRange w]
takeSegmentPrefix _ 0 = []
takeSegmentPrefix rngs c = do
let rest l d | c > d = takeSegmentPrefix l (c - d)
| otherwise = []
case rngs of
[] -> []
ByteRegion b : l ->
ByteRegion (BS.take (fromIntegral c) b)
: rest l (fromIntegral (BS.length b))
RelocationRegion r : l ->
RelocationRegion r
: rest l (relocSize r)
BSSRegion d : l ->
BSSRegion (min d c)
: rest l d
------------------------------------------------------------------------
-- MemoryError
-- | Type of errors that may occur when reading memory.
data MemoryError w
= AccessViolation !(MemAddr w)
-- ^ Memory could not be read, because it was not defined.
| PermissionsError !(MemAddr w)
-- ^ Memory could not be read due to insufficient permissions.
| UnexpectedRelocation !(MemAddr w) !(Relocation w) !String
-- ^ Read from location that partially overlaps a relocated entry
| UnexpectedBSS !(MemAddr w)
-- ^ We unexpectedly encountered a BSS segment/section.
| InvalidAddr !(MemAddr w)
-- ^ The data at the given address did not refer to a valid memory location.
instance MemWidth w => Show (MemoryError w) where
show err =
case err of
AccessViolation a ->
"Access violation at " ++ show a ++ "."
PermissionsError a ->
"Insufficient permissions at " ++ show a ++ "."
UnexpectedRelocation a r msg ->
"Attempt to read an unexpected relocation entry at " ++ show a ++ ":\n"
++ " " ++ show r ++ "\n" ++ msg
UnexpectedBSS a ->
"Attempt to read zero initialized BSS memory at " ++ show a ++ "."
InvalidAddr a ->
"Attempt to interpret an invalid address: " ++ show a ++ "."
------------------------------------------------------------------------
-- SegmentContents
@ -595,8 +639,8 @@ contentsAfterSegmentOff mseg = do
Right $ v : Map.elems post
-- If last segment is a symbolic reference, then the code is asking
-- us to partition a symbolic reference in two, which we cannot do.
Just ((_, RelocationRegion{}),_) ->
Left (UnexpectedRelocation (relativeSegmentAddr mseg))
Just ((_, RelocationRegion r),_) ->
Left (UnexpectedRelocation (relativeSegmentAddr mseg) r "caso")
contentsList :: SegmentContents w -> [(MemWord w, SegmentRange w)]
contentsList (SegmentContents m) = Map.toList m
@ -629,9 +673,11 @@ allSymbolData (PresymbolData contents bssSize) =
-- | Take the given amount of data out of presymbol data.
takeSegment :: MemWidth w => Int64 -> PresymbolData -> [SegmentRange w]
takeSegment cnt (PresymbolData contents bssSize) =
singleSegment (L.take cnt contents)
++ bssSegment (min (cnt - L.length contents) bssSize)
takeSegment cnt (PresymbolData contents bssSize)
| L.null contents = bssSegment (min cnt bssSize)
| otherwise =
ByteRegion (L.toStrict (L.take cnt contents))
: bssSegment (min (cnt - L.length contents) bssSize)
-- | @dropSegment cnt dta@ drops @cnt@ bytes from @dta@.
dropSegment :: Int64 -> PresymbolData -> PresymbolData
@ -664,15 +710,17 @@ byteSegments :: forall v m w
-> L.ByteString -- ^ File contents for segment.
-> Int64 -- ^ Expected size
-> m [SegmentRange w]
byteSegments resolver relocMap initBase contents0 regionSize =
bytesToSegmentsAscending [] symbolPairs 0 (mkPresymbolData contents0 regionSize)
byteSegments resolver relocMap initBase contents0 regionSize
| end <= initBase =
error $ "regionSize should be a positive number that does not overflow address space."
| otherwise =
bytesToSegmentsAscending [] symbolPairs initBase (mkPresymbolData contents0 regionSize)
where -- Parse the map to get a list of symbols starting at base0.
symbolPairs :: [(MemWord w, v)]
symbolPairs
= Map.toList
$ Map.dropWhileAntitone (< initBase) relocMap
-- Get last address for this region
end :: MemWord w
end = initBase + fromIntegral regionSize
@ -680,29 +728,34 @@ byteSegments resolver relocMap initBase contents0 regionSize =
bytesToSegmentsAscending :: [SegmentRange w]
-> [(MemWord w, v)]
-- ^ List of relocations to process in order.
-> MemWord w -- ^ Number of bytes so far.
-> MemWord w
-- ^ Address we are currently at
-- This should be guaranteed to be at most @end@.
-> PresymbolData
-- ^ The remaining bytes in memory
-- including a number extra bss.
-> m [SegmentRange w]
bytesToSegmentsAscending pre ((addr,v):rest) ioff contents
| addr >= end = do
pure $ reverse pre ++ allSymbolData contents
| addr - initBase < ioff = do
-- Skip relocations that are before current adddredd
bytesToSegmentsAscending pre rest ioff contents
| otherwise = do
-- We only consider relocations that are in the range of this segment,
-- so we require the difference between the address and initBase is
-- less than regionSize
| addr < end = do
when (addr < ioff) $ do
error "Encountered overlapping relocations."
mr <- resolver v contents
let addrOff = addr - initBase
case mr of
Just (r,rsz) | addrOff >= ioff -> do
let addrDiff = addrOff - ioff
Just (r,rsz) -> do
when (rsz < 1 || ioff + rsz > end) $ do
error $ "Region size " ++ show rsz ++ " is out of range."
-- Get number of bytes between this address offset and the current offset."
let addrDiff = addr - ioff
let post = dropSegment (fromIntegral (addrDiff + rsz)) contents
let pre' = [RelocationRegion r]
++ reverse (takeSegment (fromIntegral addrDiff) contents)
++ pre
bytesToSegmentsAscending pre' rest (ioff + rsz) post
bytesToSegmentsAscending pre' rest (addr + rsz) post
_ -> do
-- Skipping relocation
bytesToSegmentsAscending pre rest ioff contents
bytesToSegmentsAscending pre _ _ contents =
pure $ reverse pre ++ allSymbolData contents
@ -1038,13 +1091,13 @@ type AddrSymMap w = Map.Map (MemSegmentOff w) BSC.ByteString
------------------------------------------------------------------------
-- DropError
-- | An error that occured when droping byes.
data DropError
= DropUnexpectedRelocation
-- | An error that occured when droping bytes.
data DropError w
= DropUnexpectedRelocation !(Relocation w)
| DropInvalidAddr
dropErrorAsMemError :: MemAddr w -> DropError -> MemoryError w
dropErrorAsMemError a DropUnexpectedRelocation = UnexpectedRelocation a
dropErrorAsMemError :: MemAddr w -> DropError w -> MemoryError w
dropErrorAsMemError a (DropUnexpectedRelocation r) = UnexpectedRelocation a r "dropErr"
dropErrorAsMemError a DropInvalidAddr = InvalidAddr a
-- | Given a contiguous list of segment ranges and a number of bytes to drop, this
@ -1053,7 +1106,7 @@ dropSegmentRangeListBytes :: forall w
. MemWidth w
=> [SegmentRange w]
-> Int
-> Either DropError [SegmentRange w]
-> Either (DropError w) [SegmentRange w]
dropSegmentRangeListBytes ranges 0 = Right ranges
dropSegmentRangeListBytes (ByteRegion bs : rest) cnt = do
let sz = BS.length bs
@ -1061,10 +1114,10 @@ dropSegmentRangeListBytes (ByteRegion bs : rest) cnt = do
Right $ ByteRegion (BS.drop cnt bs) : rest
else
dropSegmentRangeListBytes rest (cnt - sz)
dropSegmentRangeListBytes (RelocationRegion{}:rest) cnt = do
let sz = addrSize (error "rangeSize nat evaluated" :: NatRepr w)
dropSegmentRangeListBytes (RelocationRegion r:rest) cnt = do
let sz = fromIntegral (relocSize r)
if sz > cnt then
Left DropUnexpectedRelocation
Left (DropUnexpectedRelocation r)
else
dropSegmentRangeListBytes rest (cnt - sz)
dropSegmentRangeListBytes (BSSRegion sz : rest) cnt =
@ -1075,41 +1128,6 @@ dropSegmentRangeListBytes (BSSRegion sz : rest) cnt =
dropSegmentRangeListBytes [] _ =
Left DropInvalidAddr
------------------------------------------------------------------------
-- MemoryError
-- | Type of errors that may occur when reading memory.
data MemoryError w
= UserMemoryError (MemAddr w) !String
-- ^ the memory reader threw an unspecified error at the given location.
| InvalidInstruction (MemAddr w) ![SegmentRange w]
-- ^ The memory reader could not parse the value starting at the given address.
| AccessViolation (MemAddr w)
-- ^ Memory could not be read, because it was not defined.
| PermissionsError (MemAddr w)
-- ^ Memory could not be read due to insufficient permissions.
| UnexpectedRelocation (MemAddr w)
-- ^ Read from location that partially overlaps a relocated entry
| UnexpectedBSS (MemAddr w)
-- ^ We unexpectedly encountered a BSS segment/section.
| InvalidAddr (MemAddr w)
-- ^ The data at the given address did not refer to a valid memory location.
instance MemWidth w => Show (MemoryError w) where
show (UserMemoryError _ msg) = msg
show (InvalidInstruction start contents) =
"Invalid instruction at " ++ show start ++ ": " ++ showList contents ""
show (AccessViolation a) =
"Access violation at " ++ show a ++ "."
show (PermissionsError a) =
"Insufficient permissions at " ++ show a ++ "."
show (UnexpectedRelocation a) =
"Attempt to read an unexpected relocation entry at " ++ show a ++ "."
show (UnexpectedBSS a) =
"Attempt to read zero initialized BSS memory at " ++ show a ++ "."
show (InvalidAddr a) =
"Attempt to interpret an invalid address: " ++ show a ++ "."
------------------------------------------------------------------------
-- Memory symbol
@ -1143,14 +1161,21 @@ addrContentsAfter mem addr = do
addrWidthClass (memAddrWidth mem) $
contentsAfterSegmentOff =<< resolveMemAddr mem addr
-- | Read a bytestring from a sequence of statements.
--
-- This is a helper method for @readByteString@ below.
readByteString' :: MemWidth w
=> BS.ByteString
-- ^ Bytestring read so far (prepended to output)
-> [SegmentRange w]
-- ^ Remaining segments to read from.
-> MemAddr w
-- ^ Address we are reading from (used for error reporting)
-> Word64
-- ^ Number of bytes to read.
-> Either (MemoryError w) BS.ByteString
readByteString' _ _ _ 0 = pure BS.empty
readByteString' _ [] addr _ = Left (InvalidAddr addr)
readByteString' _ [] addr _ = Left $! InvalidAddr addr
readByteString' prev (ByteRegion bs:rest) addr sz =
if toInteger sz <= toInteger (BS.length bs) then
pure $ prev <> BS.take (fromIntegral sz) bs
@ -1158,23 +1183,26 @@ readByteString' prev (ByteRegion bs:rest) addr sz =
let addr' = incAddr (fromIntegral (BS.length bs)) addr
let sz' = sz - fromIntegral (BS.length bs)
readByteString' (prev <> bs) rest addr' sz'
readByteString' _ (RelocationRegion{}:_) addr _ = do
Left (UnexpectedRelocation addr)
readByteString' _ (RelocationRegion r:_) addr _ = do
Left $! UnexpectedRelocation addr r "readBS"
readByteString' prev (BSSRegion cnt:rest) addr sz =
if toInteger sz <= toInteger cnt then
pure $ prev <> BS.replicate (fromIntegral sz) 0
else do
let addr' = incAddr (toInteger sz) addr
let sz' = sz - fromIntegral cnt
seq addr' $
readByteString' (prev <> BS.replicate (fromIntegral cnt) 0) rest addr' sz'
-- | Attemtp to read a bytestring of the given length
readByteString :: Memory w -> MemAddr w -> Word64 -> Either (MemoryError w) BS.ByteString
readByteString mem addr sz = do
l <- addrContentsAfter mem addr
addrWidthClass (memAddrWidth mem) $ readByteString' BS.empty l addr sz
readByteString mem addr sz = addrWidthClass (memAddrWidth mem) $ do
segOff <- resolveMemAddr mem addr
l <- contentsAfterSegmentOff segOff
readByteString' BS.empty l addr sz
-- | Read an address from the value in the segment or report a memory error.
-- | Read an address from the value in the segment or report a memory
-- error.
readAddr :: Memory w
-> Endianness
-> MemAddr w
@ -1182,10 +1210,11 @@ readAddr :: Memory w
readAddr mem end addr = addrWidthClass (memAddrWidth mem) $ do
let sz = fromIntegral (addrSize addr)
bs <- readByteString mem addr sz
let Just val = addrRead end bs
Right $ MemAddr 0 val
case addrRead end bs of
Just val -> Right $ MemAddr 0 val
Nothing -> error $ "readAddr internal error: readByteString result too short."
-- | Read a big endian word16
-- | Read a single byte.
readWord8 :: Memory w -> MemAddr w -> Either (MemoryError w) Word8
readWord8 mem addr = bsWord8 <$> readByteString mem addr 1

View File

@ -1,5 +1,5 @@
{-|
Copyright) Galois Inc, 2016
Copyright : Galois Inc, 2016
Maintainer : jhendrix@galois.com
Operations for creating a view of memory from an elf file.
@ -15,6 +15,7 @@ Operations for creating a view of memory from an elf file.
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE ViewPatterns #-}
module Data.Macaw.Memory.ElfLoader
( SectionIndexMap
, memoryForElf
@ -137,7 +138,6 @@ data RegionAdjust
-- MemLoader
type SectionName = B.ByteString
type SymbolName = B.ByteString
data MemLoadWarning
= SectionNotAlloc !SectionName
@ -145,7 +145,12 @@ data MemLoadWarning
| MultipleDynamicSegments
| OverlappingLoadableSegments
| RelocationParseFailure !String
| RelaAndRelPresent
| DynamicRelaAndRelPresent
-- ^ Issued if the dynamic section contains table for DT_REL and
-- DT_RELA.
| DuplicateRelocationSections !B.ByteString
-- ^ @DuplicateRelocationSections nm@ is issued if we encounter
-- both section ".rela$nm" and ".rel$nm".
| UnsupportedSection !SectionName
| UnknownDefinedSymbolBinding !SymbolName Elf.ElfSymbolBinding
| UnknownDefinedSymbolType !SymbolName Elf.ElfSymbolType
@ -172,9 +177,12 @@ instance Show MemLoadWarning where
"File segments containing overlapping addresses; skipping relocations."
show (RelocationParseFailure msg) =
"Error parsing relocations: " ++ msg
show RelaAndRelPresent =
"Relocations contain both explicit and implicit addend form;"
++ " choosing to use only explicit addends."
show DynamicRelaAndRelPresent =
"Dynamic section contains contain offsets for both DT_REL and DT_RELA relocation tables; "
++ " Using only DT_RELA relocations."
show (DuplicateRelocationSections (BSC.unpack -> nm)) =
"File contains both .rela" ++ nm ++ " and .rel" ++ nm
++ " sections; Using only .rela" ++ nm ++ " sections."
show (UnsupportedSection nm) =
"Do not support section " ++ BSC.unpack nm
show (UnknownDefinedSymbolBinding nm bnd) =
@ -292,7 +300,7 @@ type ElfFileSectionMap v = IntervalMap v (ElfSection v)
-- | Map from symbol indices to the associated resolved symbol.
--
-- This drops the first symbol in Elf since that refers to no symbol
newtype SymbolVector = SymbolVector (V.Vector SymbolRef)
newtype SymbolVector = SymbolVector (V.Vector SymbolInfo)
type RelocResolver = Either RelocationError
@ -308,13 +316,18 @@ type RelocationResolver tp
-> MemWord (Elf.RelocationWidth tp)
-> RelocResolver (Relocation (Elf.RelocationWidth tp))
data SomeRelocationResolver w
= forall tp
. (Elf.IsRelocationType tp, w ~ Elf.RelocationWidth tp)
=> SomeRelocationResolver (RelocationResolver tp)
-- | Attempts to resolve a relocation entry into a specific target.
resolveSymbol :: SymbolVector
-- ^ A vector mapping symbol indices to the
-- associated symbol information.
-> Word32
-- ^ Offset of symbol
-> RelocResolver SymbolRef
-> RelocResolver SymbolInfo
resolveSymbol (SymbolVector symtab) symIdx = do
when (symIdx == 0) $
relocError $ RelocationZeroSymbol
@ -328,7 +341,7 @@ resolveRelocationAddr :: SymbolVector
-- associated symbol information.
-> Elf.RelEntry tp
-- ^ A relocation entry
-> RelocResolver RelocationAddr
-> RelocResolver SymbolIdentifier
resolveRelocationAddr symtab rel = do
sym <- resolveSymbol symtab (Elf.relSym rel)
case symbolDef sym of
@ -336,15 +349,16 @@ resolveRelocationAddr symtab rel = do
pure $ SymbolRelocation (symbolName sym) (symbolVersion sym)
SymbolSection idx -> do
pure $ SectionBaseRelocation idx
SymbolFile -> do
SymbolFile _ -> do
relocError $ RelocationFileUnsupported
UndefinedSymbol{} -> do
pure $ SymbolRelocation (symbolName sym) (symbolVersion sym)
-- | Attempt to resolve an X86_64 specific symbol.
relaTargetX86_64 :: RelocationResolver Elf.X86_64_RelocationType
relaTargetX86_64 symtab rel off =
relaTargetX86_64 :: SomeRelocationResolver 64
relaTargetX86_64 = SomeRelocationResolver $ \symtab rel off ->
case Elf.relType rel of
-- JHX Note. These have been commented out until we can validate them.
-- Elf.R_X86_64_GLOB_DAT -> do
-- checkZeroAddend
-- TargetSymbol <$> resolveSymbol symtab rel
@ -355,16 +369,23 @@ relaTargetX86_64 symtab rel off =
Elf.R_X86_64_PC32 -> do
addr <- resolveRelocationAddr symtab rel
pure $ RelativeRelocation addr off LittleEndian 4
Elf.R_X86_64_32 -> do
addr <- resolveRelocationAddr symtab rel
pure $ AbsoluteRelocation addr off LittleEndian 4
Elf.R_X86_64_64 -> do
addr <- resolveRelocationAddr symtab rel
pure $ AbsoluteRelocation addr off LittleEndian
pure $ AbsoluteRelocation addr off LittleEndian 8
-- Jhx Note. These will be needed to support thread local variables.
-- Elf.R_X86_64_TPOFF32 -> undefined
-- Elf.R_X86_64_GOTTPOFF -> undefined
tp -> relocError $ RelocationUnsupportedType (show tp)
{-
This has been diabled until we get actual ARM support.
-- | Attempt to resolve an ARM specific symbol.
relaTargetARM :: RelocationResolver Elf.ARM_RelocationType
relaTargetARM _symtab rel _maddend =
relaTargetARM :: SomeRelocationResolver 32
relaTargetARM = SomeRelocationResolver $ \_symtab rel _maddend ->
case Elf.relType rel of
-- Elf.R_ARM_GLOB_DAT -> do
-- checkZeroAddend rel
@ -374,23 +395,19 @@ relaTargetARM _symtab rel _maddend =
-- checkZeroAddend rel
-- TargetSymbol <$> resolveSymbol symtab rel
tp -> relocError $ RelocationUnsupportedType (show tp)
-}
-- | Creates a relocation map from the contents of a dynamic section.
withRelocationResolver
:: forall w a
. Elf.ElfHeader w
-> (forall tp
. (w ~ Elf.RelocationWidth tp
, Elf.IsRelocationType tp
, Integral (Elf.ElfIntType w)
)
=> RelocationResolver tp
-> (SomeRelocationResolver w
-> MemLoader w a)
-> MemLoader w a
withRelocationResolver hdr f =
case (Elf.headerClass hdr, Elf.headerMachine hdr) of
(Elf.ELFCLASS64, Elf.EM_X86_64) -> f relaTargetX86_64
(Elf.ELFCLASS32, Elf.EM_ARM) -> f relaTargetARM
-- (Elf.ELFCLASS32, Elf.EM_ARM) -> f relaTargetARM
(_,mach) -> throwError $ UnsupportedArchitecture (show mach)
data RelocMap w v = RelocMap !(AddrOffsetMap w v) !(ResolveFn v (MemLoader w) w)
@ -454,7 +471,8 @@ resolveRel end symtab resolver rel presym = do
Right r -> do
pure $ Just (r, fromIntegral cnt)
mkRelocMap :: Elf.ElfHeader w
mkRelocMap :: Elf.ElfData
-> Elf.ElfHeader w
-- ^ format for Elf file
-> SymbolVector
-- ^ Map from symbol indices to associated symbol
@ -463,15 +481,12 @@ mkRelocMap :: Elf.ElfHeader w
-> Maybe L.ByteString
-- ^ Buffer containing relocation entries in Rela format
-> MemLoader w (Some (RelocMap w))
mkRelocMap _hdr _symtab Nothing Nothing = do
mkRelocMap _dta _hdr _symtab Nothing Nothing = do
pure $! Some $ emptyRelocMap
mkRelocMap hdr symtab mrelBuffer (Just relaBuffer) = do
mkRelocMap dta hdr symtab _mrelBuffer (Just relaBuffer) = do
w <- uses mlsMemory memAddrWidth
when (isJust mrelBuffer) $ do
addWarning $ RelaAndRelPresent
reprConstraints w $ do
withRelocationResolver hdr $ \resolver -> do
let dta = Elf.headerData hdr
withRelocationResolver hdr $ \(SomeRelocationResolver resolver) -> do
case Elf.elfRelaEntries dta relaBuffer of
Left msg -> do
addWarning (RelocationParseFailure msg)
@ -480,11 +495,10 @@ mkRelocMap hdr symtab mrelBuffer (Just relaBuffer) = do
-- Create the relocation map using the above information
let m = Map.fromList [ (fromIntegral (Elf.relaOffset r), r) | r <- relocs ]
pure $ Some $ RelocMap m (resolveRela symtab resolver)
mkRelocMap hdr symtab (Just relBuffer) Nothing = do
mkRelocMap dta hdr symtab (Just relBuffer) Nothing = do
w <- uses mlsMemory memAddrWidth
reprConstraints w $ do
withRelocationResolver hdr $ \resolver -> do
let dta = Elf.headerData hdr
withRelocationResolver hdr $ \(SomeRelocationResolver resolver) -> do
case Elf.elfRelEntries dta relBuffer of
Left msg -> do
addWarning (RelocationParseFailure msg)
@ -531,45 +545,53 @@ resolveUndefinedSymbolType nm tp =
mkDefinedSymbol :: SymbolName
-> Elf.ElfSymbolBinding
-> SymbolDefType
-> MemLoader w SymbolDef
-> MemLoader w SymbolBinding
mkDefinedSymbol nm bnd tp = do
prec <- resolveDefinedSymbolPrec nm bnd
pure $ DefinedSymbol prec tp
pure $! DefinedSymbol prec tp
symbolDefTypeMap :: Map Elf.ElfSymbolType SymbolDefType
symbolDefTypeMap = Map.fromList
[ (,) Elf.STT_OBJECT SymbolDefObject
, (,) Elf.STT_FUNC SymbolDefFunc
, (,) Elf.STT_TLS SymbolDefThreadLocal
, (,) Elf.STT_GNU_IFUNC SymbolDefIFunc
]
resolveDefinedSymbolDef :: ElfSymbolTableEntry wtp
-> MemLoader w SymbolDef
-> MemLoader w SymbolBinding
resolveDefinedSymbolDef sym = do
let nm = Elf.steName sym
let bnd = Elf.steBind sym
let idx = Elf.steIndex sym
case Elf.steType sym of
Elf.STT_OBJECT ->
mkDefinedSymbol nm bnd SymbolDefObject
Elf.STT_FUNC ->
mkDefinedSymbol nm bnd SymbolDefFunc
Elf.STT_TLS ->
mkDefinedSymbol nm bnd SymbolDefThreadLocal
Elf.STT_GNU_IFUNC ->
mkDefinedSymbol nm bnd SymbolDefIFunc
Elf.STT_SECTION -> do
Elf.STT_SECTION
| idx < Elf.SHN_LOPROC -> do
when (nm /= "") $ do
addWarning $ ExpectedSectionSymbolNameEmpty nm
when (bnd /= Elf.STB_LOCAL) $ do
addWarning $ ExpectedSectionSymbolLocal
if idx < Elf.SHN_LOPROC then
pure $ SymbolSection (Elf.fromElfSectionIndex idx)
else do
| otherwise -> do
addWarning $ InvalidSectionSymbolIndex idx
mkDefinedSymbol nm bnd SymbolDefUnknown
Elf.STT_FILE -> do
pure $ SymbolFile nm
tp -> do
dtp <-
case Map.lookup tp symbolDefTypeMap of
Just dtp ->
pure dtp
Nothing -> do
addWarning $ UnknownDefinedSymbolType nm tp
mkDefinedSymbol nm bnd SymbolDefUnknown
pure SymbolDefUnknown
mkDefinedSymbol nm bnd dtp
-- | Create a symbol ref from Elf versioned symbol from a shared
-- object or executable.
mkSymbolRef :: ElfSymbolTableEntry wtp
-> SymbolVersion
-> MemLoader w SymbolRef
-> MemLoader w SymbolInfo
mkSymbolRef sym ver = do
let nm = Elf.steName sym
def <-
@ -589,7 +611,7 @@ mkSymbolRef sym ver = do
UndefinedSymbol SymbolRequired
<$> resolveUndefinedSymbolType nm (Elf.steType sym)
pure $
SymbolRef { symbolName = Elf.steName sym
SymbolInfo { symbolName = Elf.steName sym
, symbolVersion = ver
, symbolDef = def
}
@ -597,8 +619,8 @@ mkSymbolRef sym ver = do
-- | Create a symbol ref from Elf versioned symbol from a shared
-- object or executable.
mkDynamicSymbolRef :: Elf.VersionedSymbol tp
-> MemLoader w SymbolRef
mkDynamicSymbolRef :: Elf.VersionedSymbol wtp
-> MemLoader w SymbolInfo
mkDynamicSymbolRef (sym, mverId) = do
let ver = case mverId of
Elf.VersionLocal -> UnversionedSymbol
@ -631,15 +653,21 @@ dynamicRelocationMap hdr ph contents =
symentries <- runDynamic (Elf.dynSymTable dynSection)
symtab <-
SymbolVector <$> traverse mkDynamicSymbolRef (V.drop 1 symentries)
maybeRelBuf <- runDynamic $ Elf.dynRelBuffer dynSection
maybeRelaBuf <- runDynamic $ Elf.dynRelaBuffer dynSection
mkRelocMap hdr symtab maybeRelBuf maybeRelaBuf
mRelBuffer <- runDynamic $ Elf.dynRelBuffer dynSection
mRelaBuffer <- runDynamic $ Elf.dynRelaBuffer dynSection
when (isJust mRelBuffer && isJust mRelaBuffer) $ do
addWarning $ DynamicRelaAndRelPresent
mkRelocMap (Elf.headerData hdr) hdr symtab mRelBuffer mRelaBuffer
------------------------------------------------------------------------
-- Elf segment loading
reprConstraints :: AddrWidthRepr w
-> ((Bits (ElfWordType w), Integral (ElfWordType w), Show (ElfWordType w), MemWidth w) => a)
-> ((Bits (ElfWordType w)
, Integral (Elf.ElfIntType w)
, Integral (ElfWordType w)
, Show (ElfWordType w)
, MemWidth w) => a)
-> a
reprConstraints Addr32 x = x
reprConstraints Addr64 x = x
@ -772,7 +800,7 @@ insertAllocatedSection hdr symtab sectionMap regIdx nm = do
Nothing -> pure ()
Just sec -> do
mRelBuffer <- fmap (fmap (L.fromStrict . elfSectionData)) $
findSection sectionMap (".rela" <> nm)
findSection sectionMap (".rel" <> nm)
mRelaBuffer <- fmap (fmap (L.fromStrict . elfSectionData)) $
findSection sectionMap (".rela" <> nm)
-- Build relocation map
@ -789,8 +817,10 @@ insertAllocatedSection hdr symtab sectionMap regIdx nm = do
-- Get bytes as a lazy bytesize
let bytes = L.fromStrict (elfSectionData sec)
-- Create memory segment
when (isJust mRelBuffer && isJust mRelaBuffer) $ do
addWarning $ DuplicateRelocationSections nm
Some (RelocMap relocMap resolver) <-
mkRelocMap hdr symtab mRelBuffer mRelaBuffer
mkRelocMap (Elf.headerData hdr) hdr symtab mRelBuffer mRelaBuffer
seg <-
memSegment resolver regIdx relocMap (fromIntegral base) flags bytes secSize
-- Load memory segment.
@ -811,9 +841,7 @@ symtabSymbolVector e =
let entries = Elf.elfSymbolTableEntries elfSymTab
-- let lclCnt = fromIntegral $ Elf.elfSymbolTableLocalEntries elfSymTab
-- Create an unversioned symbol from symbol table.
let mk :: ElfSymbolTableEntry wtp -> MemLoader w SymbolRef
mk ent = mkSymbolRef ent ObjectSymbol
SymbolVector <$> traverse mk (V.drop 1 entries)
SymbolVector <$> traverse (`mkSymbolRef` ObjectSymbol) (V.drop 1 entries)
-- | Load allocated Elf sections into memory.
--
@ -886,16 +914,56 @@ memoryForElf opt e = do
data SymbolResolutionError
= EmptySymbolName !Int !Elf.ElfSymbolType
-- ^ Symbol names must be non-empty
| UndefSymbol !BSC.ByteString
-- ^ Symbol was in the undefined section.
| CouldNotResolveAddr !BSC.ByteString
-- ^ Symbol address could not be resolved.
| MultipleSymbolTables
-- ^ The elf file contained multiple symbol tables
instance Show SymbolResolutionError where
show (EmptySymbolName idx tp ) = "Symbol Num " ++ show idx ++ " " ++ show tp ++ " has an empty name."
show (EmptySymbolName idx tp ) =
"Symbol Num " ++ show idx ++ " " ++ show tp ++ " has an empty name."
show (UndefSymbol nm) = "Symbol " ++ BSC.unpack nm ++ " is in the text section."
show (CouldNotResolveAddr sym) = "Could not resolve address of " ++ BSC.unpack sym ++ "."
show MultipleSymbolTables = "Elf contains multiple symbol tables."
-- | Find an absolute symbol, of any time, not just function.
resolveElfFuncSymbolAny' ::
Memory w -- ^ Memory object from Elf file.
-> SectionIndexMap w -- ^ Section index mp from memory
-> Int -- ^ Index of symbol
-> ElfSymbolTableEntry (ElfWordType w)
-> Either SymbolResolutionError (MemSymbol w)
resolveElfFuncSymbolAny' mem secMap idx ste
-- Check symbol is defined
| Elf.steIndex ste == Elf.SHN_UNDEF = Left $ UndefSymbol (Elf.steName ste)
-- Check symbol name is non-empty
| Elf.steName ste == "" = Left $ EmptySymbolName idx (Elf.steType ste)
-- Lookup absolute symbol
| Elf.steIndex ste == Elf.SHN_ABS = reprConstraints (memAddrWidth mem) $ do
let val = Elf.steValue ste
case resolveAddr mem 0 (fromIntegral val) of
Just addr -> Right $
MemSymbol { memSymbolName = Elf.steName ste
, memSymbolStart = addr
, memSymbolSize = fromIntegral (Elf.steSize ste)
}
Nothing -> Left $ CouldNotResolveAddr (Elf.steName ste)
-- Lookup symbol stored in specific section
| otherwise = reprConstraints (memAddrWidth mem) $ do
let val = Elf.steValue ste
case Map.lookup (Elf.steIndex ste) secMap of
Just (base,sec)
| elfSectionAddr sec <= val && val < elfSectionAddr sec + Elf.elfSectionSize sec
, off <- toInteger val - toInteger (elfSectionAddr sec)
, Just addr <- incSegmentOff base off -> do
Right $ MemSymbol { memSymbolName = Elf.steName ste
, memSymbolStart = addr
, memSymbolSize = fromIntegral (Elf.steSize ste)
}
_ -> Left $ CouldNotResolveAddr (Elf.steName ste)
-- | Find an absolute symbol, of any time, not just function.
resolveElfFuncSymbolAny ::
Memory w -- ^ Memory object from Elf file.
@ -904,37 +972,8 @@ resolveElfFuncSymbolAny ::
-> ElfSymbolTableEntry (ElfWordType w)
-> Maybe (Either SymbolResolutionError (MemSymbol w))
resolveElfFuncSymbolAny mem secMap idx ste
-- Check symbol is defined
| Elf.steIndex ste == Elf.SHN_UNDEF = Nothing
-- Check symbol name is non-empty
| Elf.steName ste == "" = Just $ Left $ EmptySymbolName idx (Elf.steType ste)
-- Lookup absolute symbol
| Elf.steIndex ste == Elf.SHN_ABS = reprConstraints (memAddrWidth mem) $ do
let val = Elf.steValue ste
case resolveAddr mem 0 (fromIntegral val) of
Just addr -> Just $ Right $
MemSymbol { memSymbolName = Elf.steName ste
, memSymbolStart = addr
, memSymbolSize = fromIntegral (Elf.steSize ste)
}
Nothing -> Just $ Left $ CouldNotResolveAddr (Elf.steName ste)
-- Lookup symbol stored in specific section
| otherwise = reprConstraints (memAddrWidth mem) $ do
let val = Elf.steValue ste
case Map.lookup (Elf.steIndex ste) secMap of
Just (base,sec)
| elfSectionAddr sec <= val && val < elfSectionAddr sec + Elf.elfSectionSize sec
, off <- toInteger val - toInteger (elfSectionAddr sec)
, Just addr <- incSegmentOff base off -> do
Just $ Right $ MemSymbol { memSymbolName = Elf.steName ste
, memSymbolStart = addr
, memSymbolSize = fromIntegral (Elf.steSize ste)
}
_ -> Just $ Left $ CouldNotResolveAddr (Elf.steName ste)
| otherwise = Just (resolveElfFuncSymbolAny' mem secMap idx ste)
-- | This resolves an Elf symbol into a MemSymbol if it is likely a
-- pointer to a resolved function.
@ -951,30 +990,8 @@ resolveElfFuncSymbol mem secMap idx ste
-- Check symbol is defined
| Elf.steIndex ste == Elf.SHN_UNDEF = Nothing
-- Check symbol name is non-empty
| Elf.steName ste == "" = Just $ Left $ EmptySymbolName idx (Elf.steType ste)
-- Lookup absolute symbol
| Elf.steIndex ste == Elf.SHN_ABS = reprConstraints (memAddrWidth mem) $ do
let val = Elf.steValue ste
case resolveAddr mem 0 (fromIntegral val) of
Just addr -> Just $ Right $
MemSymbol { memSymbolName = Elf.steName ste
, memSymbolStart = addr
, memSymbolSize = fromIntegral (Elf.steSize ste)
}
Nothing -> Just $ Left $ CouldNotResolveAddr (Elf.steName ste)
-- Lookup symbol stored in specific section
| otherwise = reprConstraints (memAddrWidth mem) $ do
let val = Elf.steValue ste
case Map.lookup (Elf.steIndex ste) secMap of
Just (base,sec)
| elfSectionAddr sec <= val && val < elfSectionAddr sec + Elf.elfSectionSize sec
, off <- toInteger val - toInteger (elfSectionAddr sec)
, Just addr <- incSegmentOff base off -> do
Just $ Right $ MemSymbol { memSymbolName = Elf.steName ste
, memSymbolStart = addr
, memSymbolSize = fromIntegral (Elf.steSize ste)
}
_ -> Just $ Left $ CouldNotResolveAddr (Elf.steName ste)
| Elf.steName ste == "" = Just $ (resolveElfFuncSymbolAny' mem secMap idx ste)
| otherwise = Just (resolveElfFuncSymbolAny' mem secMap idx ste)
-- | Resolve symbol table entries defined in this Elf file to
-- a mem symbol
@ -1048,7 +1065,7 @@ resolveElfContents :: LoadOptions
, Maybe (MemSegmentOff w) -- Entry point(s)
, [MemSymbol w] -- Function symbols
)
resolveElfContents loadOpts e = do
resolveElfContents loadOpts e =
case Elf.elfType e of
Elf.ET_REL -> do
(secMap, mem, warnings) <- memoryForElf loadOpts e

View File

@ -60,7 +60,6 @@ import Data.Parameterized.Some
import Data.Sequence (Seq)
import qualified Data.Sequence as Seq
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as Text
import qualified Flexdis86 as F
import Text.PrettyPrint.ANSI.Leijen (Pretty(..), text)
@ -89,7 +88,6 @@ import Data.Macaw.CFG.DemandSet
import qualified Data.Macaw.Memory.Permissions as Perm
import Data.Macaw.Types
( n8
, n64
, HasRepr(..)
)
import Data.Macaw.X86.ArchTypes
@ -129,7 +127,7 @@ rootLoc ip = ExploreLoc { loc_ip = ip
initX86State :: ExploreLoc -- ^ Location to explore from.
-> RegState X86Reg (Value X86_64 ids)
initX86State loc = mkRegState Initial
& curIP .~ RelocatableValue knownNat (relativeSegmentAddr (loc_ip loc))
& curIP .~ RelocatableValue Addr64 (relativeSegmentAddr (loc_ip loc))
& boundValue X87_TopReg .~ mkLit knownNat (toInteger (loc_x87_top loc))
& boundValue DF .~ BoolValue (loc_df_flag loc)
@ -152,42 +150,15 @@ initGenState nonce_gen mem addr s =
, _blockState = emptyPreBlock s 0 addr
, genAddr = addr
, genMemory = mem
, _genRegUpdates = MapF.empty
, avxMode = False
, _genRegUpdates = MapF.empty
}
-- | Describes the reason the translation error occured.
data X86TranslateErrorReason
= DecodeError (MemoryError 64)
-- ^ A memory error occured in decoding with Flexdis
| UnsupportedInstruction F.InstructionInstance
-- ^ The instruction is not supported by the translator
| ExecInstructionError F.InstructionInstance Text
-- ^ An error occured when trying to translate the instruction
-- | Describes an error that occured in translation
data X86TranslateError = X86TranslateError { transErrorAddr :: !(MemSegmentOff 64)
, transErrorReason :: !X86TranslateErrorReason
}
instance Show X86TranslateError where
show err =
case transErrorReason err of
DecodeError me ->
"Memory error at " ++ addr ++ ": " ++ show me
UnsupportedInstruction i ->
"Unsupported instruction at " ++ addr ++ ": " ++ show i
ExecInstructionError i msg ->
"Error in interpretting instruction at " ++ addr ++ ": " ++ show i ++ "\n "
++ Text.unpack msg
where addr = show (transErrorAddr err)
returnWithError :: GenState st_s ids
-> X86TranslateErrorReason
-> ST st_s (BlockSeq ids, MemWord 64, Maybe X86TranslateError)
returnWithError gs rsn =
-> X86TranslateError 64
-> ST st_s (BlockSeq ids, MemWord 64, Maybe (X86TranslateError 64))
returnWithError gs err =
let curIPAddr = genAddr gs
err = X86TranslateError curIPAddr rsn
term = (`TranslateError` Text.pack (show err))
b = finishBlock' (gs^.blockState) term
res = seq b $ gs^.blockSeq & frontierBlocks %~ (Seq.|> b)
@ -202,32 +173,32 @@ disassembleBlockImpl :: forall st_s ids
-- ^ Maximum offset for this addr.
-> [SegmentRange 64]
-- ^ List of contents to read next.
-> ST st_s (BlockSeq ids, MemWord 64, Maybe X86TranslateError)
-> ST st_s (BlockSeq ids, MemWord 64, Maybe (X86TranslateError 64))
disassembleBlockImpl gs max_offset contents = do
let curIPAddr = genAddr gs
case readInstruction' curIPAddr contents of
Left msg -> do
returnWithError gs (DecodeError msg)
returnWithError gs msg
Right (i, next_ip_off) -> do
let seg = msegSegment curIPAddr
let off = msegOffset curIPAddr
let next_ip :: MemAddr 64
next_ip = relativeAddr seg next_ip_off
let next_ip_val :: BVValue X86_64 ids 64
next_ip_val = RelocatableValue n64 next_ip
next_ip_val = RelocatableValue Addr64 next_ip
case execInstruction (ValueExpr next_ip_val) i of
Nothing -> do
returnWithError gs (UnsupportedInstruction i)
returnWithError gs (UnsupportedInstruction (genAddr gs) i)
Just exec -> do
gsr <-
runExceptT $ runX86Generator (\() s -> pure (mkGenResult s)) gs $ do
let next_ip_word = fromIntegral $ segmentOffset seg + off
let line = show curIPAddr ++ ": " ++ show (F.ppInstruction next_ip_word i)
addStmt (Comment (Text.pack line))
asAtomicStateUpdate (relativeSegmentAddr curIPAddr) exec
exec
case gsr of
Left msg -> do
returnWithError gs (ExecInstructionError i msg)
returnWithError gs (ExecInstructionError (genAddr gs) i msg)
Right res -> do
case resState res of
-- If IP after interpretation is the next_ip, there are no blocks, and we
@ -247,10 +218,11 @@ disassembleBlockImpl gs max_offset contents = do
, _genRegUpdates = _genRegUpdates gs
, avxMode = avxMode gs
}
case dropSegmentRangeListBytes contents (fromIntegral (next_ip_off - off)) of
Left msg -> do
let err = dropErrorAsMemError (relativeSegmentAddr curIPAddr) msg
returnWithError gs (DecodeError err)
returnWithError gs (FlexdisMemoryError err)
Right contents' ->
disassembleBlockImpl gs2 max_offset contents'
_ -> do
@ -265,7 +237,7 @@ disassembleBlock :: forall s
-> ExploreLoc
-> MemWord 64
-- ^ Maximum number of bytes in ths block.
-> ST s ([Block X86_64 s], MemWord 64, Maybe X86TranslateError)
-> ST s ([Block X86_64 s], MemWord 64, Maybe (X86TranslateError 64))
disassembleBlock mem nonce_gen loc max_size = do
let addr = loc_ip loc
let gs = initGenState nonce_gen mem addr (initX86State loc)
@ -273,7 +245,7 @@ disassembleBlock mem nonce_gen loc max_size = do
(gs', next_ip_off, maybeError) <-
case addrContentsAfter mem (relativeSegmentAddr addr) of
Left msg ->
returnWithError gs (DecodeError msg)
returnWithError gs (FlexdisMemoryError msg)
Right contents ->
disassembleBlockImpl gs sz contents
assert (next_ip_off > msegOffset addr) $ do
@ -386,7 +358,7 @@ tryDisassembleBlockFromAbsState mem nonce_gen addr max_size ab = do
(gs', next_ip_off, maybeError) <- lift $
case addrContentsAfter mem (relativeSegmentAddr addr) of
Left msg ->
returnWithError gs (DecodeError msg)
returnWithError gs (FlexdisMemoryError msg)
Right contents -> do
disassembleBlockImpl gs (off + max_size) contents
assert (next_ip_off > off) $ do
@ -499,6 +471,10 @@ postX86TermStmtAbsState preservePred mem s regs tstmt =
}
Just (nextIP, absEvalCall params s nextIP)
_ -> error $ "Sycall could not interpret next IP"
Hlt ->
Nothing
UD2 ->
Nothing
-- | Common architecture information for X86_64

View File

@ -38,7 +38,6 @@ module Data.Macaw.X86.ArchTypes
) where
import Data.Bits
import Data.Int
import Data.Word(Word8)
import Data.Macaw.CFG
import Data.Macaw.CFG.Rewriter
@ -95,10 +94,20 @@ repValSizeByteCount = memReprBytes . repValSizeMemRepr
------------------------------------------------------------------------
-- X86TermStmt
data X86TermStmt ids = X86Syscall
data X86TermStmt ids
= X86Syscall
-- ^ A system call
| Hlt
-- ^ The halt instruction.
--
-- In protected mode outside ring 0, this just raised a GP(0) exception.
| UD2
-- ^ This raises a invalid opcode instruction.
instance PrettyF X86TermStmt where
prettyF X86Syscall = text "x86_syscall"
prettyF Hlt = text "hlt"
prettyF UD2 = text "ud2"
------------------------------------------------------------------------
-- X86PrimLoc
@ -155,7 +164,7 @@ data SSE_Cmp
-- ^ Neither value is a NaN, no signalling on QNaN
deriving (Eq, Ord)
sseCmpEntries :: [(Int8, SSE_Cmp, String)]
sseCmpEntries :: [(Word8, SSE_Cmp, String)]
sseCmpEntries =
[ (0, EQ_OQ, "EQ_OQ")
, (1, LT_OS, "LT_OS")
@ -167,7 +176,7 @@ sseCmpEntries =
, (7, ORD_Q, "ORD_Q")
]
sseIdxCmpMap :: Map.Map Int8 SSE_Cmp
sseIdxCmpMap :: Map.Map Word8 SSE_Cmp
sseIdxCmpMap = Map.fromList [ (idx,val) | (idx, val, _) <- sseCmpEntries ]
sseCmpNameMap :: Map.Map SSE_Cmp String
@ -180,7 +189,7 @@ instance Show SSE_Cmp where
-- The nothing case should never occur.
Nothing -> "Unexpected name"
lookupSSECmp :: Int8 -> Maybe SSE_Cmp
lookupSSECmp :: Word8 -> Maybe SSE_Cmp
lookupSSECmp i = Map.lookup i sseIdxCmpMap
-- | A binary SSE operation
@ -871,3 +880,5 @@ rewriteX86TermStmt :: X86TermStmt src -> Rewriter X86_64 s src tgt (X86TermStmt
rewriteX86TermStmt f =
case f of
X86Syscall -> pure X86Syscall
Hlt -> pure Hlt
UD2 -> pure UD2

View File

@ -11,6 +11,7 @@ Macaw memory object.
{-# LANGUAGE UndecidableInstances #-}
module Data.Macaw.X86.Flexdis
( MemoryByteReader
, X86TranslateError(..)
, runMemoryByteReader
, readInstruction
, readInstruction'
@ -18,7 +19,11 @@ module Data.Macaw.X86.Flexdis
import Control.Monad.Except
import Control.Monad.State.Strict
import Data.Bits
import qualified Data.ByteString as BS
import Data.Int
import Data.Text (Text)
import Data.Text as Text
import Data.Word
import Data.Macaw.Memory
@ -30,28 +35,12 @@ import Flexdis86.ByteReader
------------------------------------------------------------------------
-- MemStream
data PrevData w = PrevData { prevBytes :: [Word8]
, prevRanges :: [SegmentRange w]
}
emptyPrevData :: PrevData w
emptyPrevData = PrevData { prevBytes = [], prevRanges = [] }
consByte :: Word8 -> PrevData w -> PrevData w
consByte w pd = pd { prevBytes = w:prevBytes pd
}
prevSegments :: PrevData w -> [SegmentRange w]
prevSegments pd | null (prevBytes pd) = reverse (prevRanges pd)
| otherwise = reverse (prevRanges pd) ++ [ByteRegion (BS.pack (prevBytes pd))]
-- | A stream of memory
data MemStream w = MS { msSegment :: !(MemSegment w)
data MemStream w = MS { msInitial :: ![SegmentRange w]
, msSegment :: !(MemSegment w)
-- ^ The current segment
, msStart :: !(MemWord w)
-- ^ The initial offset for the stream.
, msPrev :: !(PrevData w)
-- ^ The values read so far.
, msOffset :: !(MemWord w)
-- ^ The current address
, msNext :: ![SegmentRange w]
@ -59,18 +48,45 @@ data MemStream w = MS { msSegment :: !(MemSegment w)
}
msStartAddr :: MemWidth w => MemStream w -> MemAddr w
msStartAddr ms = relativeSegmentAddr segOff
where Just segOff = resolveSegmentOff (msSegment ms) (msStart ms)
msStartAddr ms = relativeAddr (msSegment ms) (msStart ms)
msAddr :: MemWidth w => MemStream w -> MemAddr w
msAddr ms = relativeSegmentAddr segOff
where Just segOff = resolveSegmentOff (msSegment ms) (msOffset ms)
msAddr ms = relativeAddr (msSegment ms) (msOffset ms)
------------------------------------------------------------------------
-- MemoryByteReader
newtype MemoryByteReader w a = MBR { unMBR :: ExceptT (MemoryError w) (State (MemStream w)) a }
deriving (Functor, Applicative, MonadError (MemoryError w))
-- | Describes the reason the translation error occured.
data X86TranslateError w
= FlexdisMemoryError !(MemoryError w)
-- ^ A memory error occured in decoding with Flexdis
| InvalidInstruction !(MemAddr w) ![SegmentRange w]
-- ^ The memory reader could not parse the value starting at the given address
-- the last byte read was at the offset.
| UserMemoryError !(MemAddr w) !String
-- ^ the memory reader threw an unspecified error at the given location.
| UnsupportedInstruction !(MemSegmentOff w) !Flexdis.InstructionInstance
-- ^ The instruction is not supported by the translator
| ExecInstructionError !(MemSegmentOff w) !Flexdis.InstructionInstance Text
-- ^ An error occured when trying to translate the instruction
instance MemWidth w => Show (X86TranslateError w) where
show err =
case err of
FlexdisMemoryError me ->
show me
InvalidInstruction start rng ->
"Invalid instruction at " ++ show start ++ ": " ++ show rng
UserMemoryError addr msg ->
"Memory error " ++ show addr ++ ": " ++ msg
UnsupportedInstruction addr i ->
"Unsupported instruction at " ++ show addr ++ ": " ++ show i
ExecInstructionError addr i msg ->
"Error in interpretting instruction at " ++ show addr ++ ": " ++ show i ++ "\n "
++ Text.unpack msg
newtype MemoryByteReader w a = MBR { unMBR :: ExceptT (X86TranslateError w) (State (MemStream w)) a }
deriving (Functor, Applicative, MonadError (X86TranslateError w))
instance MemWidth w => Monad (MemoryByteReader w) where
return = MBR . return
@ -83,11 +99,11 @@ instance MemWidth w => Monad (MemoryByteReader w) where
runMemoryByteReader' :: MemSegmentOff w -- ^ Starting segment
-> [SegmentRange w] -- ^ Data to read next.
-> MemoryByteReader w a -- ^ Byte reader to read values from.
-> Either (MemoryError w) (a, MemWord w)
-> Either (X86TranslateError w) (a, MemWord w)
runMemoryByteReader' addr contents (MBR m) = do
let ms0 = MS { msSegment = msegSegment addr
let ms0 = MS { msInitial = contents
, msSegment = msegSegment addr
, msStart = msegOffset addr
, msPrev = emptyPrevData
, msOffset = msegOffset addr
, msNext = contents
}
@ -105,15 +121,73 @@ runMemoryByteReader :: Memory w
-- Added so we can check for read and/or execute permission.
-> MemSegmentOff w -- ^ Starting segment
-> MemoryByteReader w a -- ^ Byte reader to read values from.
-> Either (MemoryError w) (a, MemWord w)
-> Either (X86TranslateError w) (a, MemWord w)
runMemoryByteReader mem reqPerm addr m =
addrWidthClass (memAddrWidth mem) $ do
let seg = msegSegment addr
if not (segmentFlags seg `Perm.hasPerm` reqPerm) then
Left $ PermissionsError (relativeSegmentAddr addr)
else do
contents <- addrContentsAfter mem (relativeSegmentAddr addr)
runMemoryByteReader' addr contents m
Left $ FlexdisMemoryError $ PermissionsError (relativeSegmentAddr addr)
else
case addrContentsAfter mem (relativeSegmentAddr addr) of
Right contents -> runMemoryByteReader' addr contents m
Left e -> Left (FlexdisMemoryError e)
throwMemoryError :: MemoryError w -> MemoryByteReader w a
throwMemoryError e = MBR $ throwError (FlexdisMemoryError e)
sbyte :: (Bits w, Num w) => Word8 -> Int -> w
sbyte w o = fromIntegral i8 `shiftL` (8*o)
where i8 :: Int8
i8 = fromIntegral w
ubyte :: (Bits w, Num w) => Word8 -> Int -> w
ubyte w o = fromIntegral w `shiftL` (8*o)
jsizeCount :: Flexdis.JumpSize -> Int
jsizeCount Flexdis.JSize8 = 1
jsizeCount Flexdis.JSize16 = 2
jsizeCount Flexdis.JSize32 = 4
getUnsigned32 :: MemWidth w => BS.ByteString -> MemoryByteReader w Word32
getUnsigned32 s =
case BS.unpack s of
w0:w1:w2:w3:_ -> do
pure $! ubyte w3 3 .|. ubyte w2 2 .|. ubyte w1 1 .|. ubyte w0 0
_ -> do
ms <- MBR get
throwMemoryError $ AccessViolation (msAddr ms)
getJumpBytes :: MemWidth w => BS.ByteString -> Flexdis.JumpSize -> MemoryByteReader w (Int64, Int)
getJumpBytes s sz =
case (sz, BS.unpack s) of
(Flexdis.JSize8, w0:_) -> do
pure (sbyte w0 0, 1)
(Flexdis.JSize16, w0:w1:_) -> do
pure (sbyte w1 1 .|. ubyte w0 0, 2)
(Flexdis.JSize32, _) -> do
v <- getUnsigned32 s
pure (fromIntegral (fromIntegral v :: Int32), 4)
_ -> do
ms <- MBR get
throwMemoryError $ AccessViolation (msAddr ms)
updateMSByteString :: MemWidth w
=> MemStream w
-> BS.ByteString
-> [SegmentRange w]
-> MemWord w
-> MemoryByteReader w ()
updateMSByteString ms bs rest c = do
let bs' = BS.drop (fromIntegral (memWordInteger c)) bs
let ms' = ms { msOffset = msOffset ms + c
, msNext =
if BS.null bs' then
rest
else
ByteRegion bs' : rest
}
seq ms' $ MBR $ put ms'
instance MemWidth w => ByteReader (MemoryByteReader w) where
readByte = do
@ -121,26 +195,82 @@ instance MemWidth w => ByteReader (MemoryByteReader w) where
-- If remaining bytes are empty
case msNext ms of
[] ->
MBR $ throwError $ AccessViolation (msAddr ms)
throwMemoryError $ AccessViolation (msAddr ms)
-- Throw error if we try to read a relocation as a symbolic reference
BSSRegion _:_ -> do
MBR $ throwError $ UnexpectedBSS (msAddr ms)
RelocationRegion{}:_ -> do
MBR $ throwError $ UnexpectedRelocation (msAddr ms)
throwMemoryError $ UnexpectedBSS (msAddr ms)
RelocationRegion r:_ -> do
throwMemoryError $ UnexpectedRelocation (msAddr ms) r "byte0"
ByteRegion bs:rest -> do
if BS.null bs then do
throwError $ AccessViolation (msAddr ms)
throwMemoryError $ AccessViolation (msAddr ms)
else do
let v = BS.head bs
let ms' = ms { msPrev = consByte v (msPrev ms)
, msOffset = msOffset ms + 1
, msNext = ByteRegion (BS.tail bs) : rest
updateMSByteString ms bs rest 1
pure $! v
readDImm = do
ms <- MBR get
-- If remaining bytes are empty
case msNext ms of
[] ->
throwMemoryError $ AccessViolation (msAddr ms)
-- Throw error if we try to read a relocation as a symbolic reference
BSSRegion _:_ -> do
throwMemoryError $ UnexpectedBSS (msAddr ms)
RelocationRegion r:rest -> do
case r of
AbsoluteRelocation sym off end szCnt -> do
unless (szCnt == 4 && end == LittleEndian) $ do
throwMemoryError $ UnexpectedRelocation (msAddr ms) r "dimm0"
let ms' = ms { msOffset = msOffset ms + 4
, msNext = rest
}
MBR $ v <$ put ms'
seq ms' $ MBR $ put ms'
pure $ Flexdis.Imm32SymbolOffset sym (fromIntegral off)
-- RelativeOffset addr ioff (fromIntegral off)
RelativeRelocation _addr _off _end _szCnt -> do
throwMemoryError $ UnexpectedRelocation (msAddr ms) r "dimm1"
ByteRegion bs:rest -> do
v <- getUnsigned32 bs
updateMSByteString ms bs rest 4
pure $! Flexdis.Imm32Concrete v
readJump sz = do
ms <- MBR get
-- If remaining bytes are empty
case msNext ms of
[] ->
throwMemoryError $ AccessViolation (msAddr ms)
-- Throw error if we try to read a relocation as a symbolic reference
BSSRegion _:_ -> do
throwMemoryError $ UnexpectedBSS (msAddr ms)
RelocationRegion r:rest -> do
case r of
AbsoluteRelocation{} -> do
throwMemoryError $ UnexpectedRelocation (msAddr ms) r "jump0"
RelativeRelocation addr off end szCnt -> do
when (szCnt /= jsizeCount sz) $ do
throwMemoryError $ UnexpectedRelocation (msAddr ms) r "jump1"
when (end /= LittleEndian) $ do
throwMemoryError $ UnexpectedRelocation (msAddr ms) r "jump2"
let ms' = ms { msOffset = msOffset ms + fromIntegral (jsizeCount sz)
, msNext = rest
}
seq ms' $ MBR $ put ms'
let ioff = fromIntegral $ msOffset ms - msStart ms
pure $ Flexdis.RelativeOffset addr ioff (fromIntegral off)
ByteRegion bs:rest -> do
(v,c) <- getJumpBytes bs sz
updateMSByteString ms bs rest (fromIntegral c)
pure (Flexdis.FixedOffset v)
invalidInstruction = do
ms <- MBR $ get
throwError $ InvalidInstruction (msStartAddr ms) (prevSegments (msPrev ms))
throwError $ InvalidInstruction (msStartAddr ms)
(takeSegmentPrefix (msInitial ms) (msOffset ms - msStart ms))
------------------------------------------------------------------------
-- readInstruction
@ -150,12 +280,12 @@ instance MemWidth w => ByteReader (MemoryByteReader w) where
readInstruction' :: MemSegmentOff 64
-- ^ Address to read from.
-> [SegmentRange 64] -- ^ Data to read next.
-> Either (MemoryError 64)
-> Either (X86TranslateError 64)
(Flexdis.InstructionInstance, MemWord 64)
readInstruction' addr contents = do
let seg = msegSegment addr
if not (segmentFlags seg `Perm.hasPerm` Perm.execute) then
Left $ PermissionsError (relativeSegmentAddr addr)
Left $ FlexdisMemoryError $ PermissionsError (relativeSegmentAddr addr)
else do
runMemoryByteReader' addr contents Flexdis.disassembleInstruction
@ -163,8 +293,9 @@ readInstruction' addr contents = do
readInstruction :: Memory 64
-> MemSegmentOff 64
-- ^ Address to read from.
-> Either (MemoryError 64)
-> Either (X86TranslateError 64)
(Flexdis.InstructionInstance, MemWord 64)
readInstruction mem addr = do
readInstruction' addr
=<< addrContentsAfter mem (relativeSegmentAddr addr)
case addrContentsAfter mem (relativeSegmentAddr addr) of
Left e -> Left (FlexdisMemoryError e)
Right l -> readInstruction' addr l

View File

@ -26,6 +26,7 @@ module Data.Macaw.X86.Generator
, evalAssignRhs
, shiftX86GCont
, asAtomicStateUpdate
, getState
-- * GenResult
, GenResult(..)
, finishBlock
@ -321,6 +322,7 @@ runX86Generator :: X86GCont st_s ids a
-> ExceptT Text (ST st_s) (GenResult ids)
runX86Generator k st (X86G m) = runReaderT (runContT m (ReaderT . k)) st
-- | Capture the current continuation and 'GenState' in an 'X86Generator'
shiftX86GCont :: (X86GCont st_s ids a
-> GenState st_s ids

View File

@ -20,13 +20,16 @@ module Data.Macaw.X86.Getters
, getBVValue
, getSignExtendedValue
, truncateBVValue
, getCallTarget
, getJumpTarget
, HasRepSize(..)
, getAddrRegOrSegment
, getAddrRegSegmentOrImm
, readXMMValue
, readYMMValue
, getImm32
-- * Utilities
, reg8Loc
, reg16Loc
, reg32Loc
, reg64Loc
@ -50,8 +53,7 @@ import Data.Parameterized.Some
import qualified Flexdis86 as F
import GHC.TypeLits (KnownNat)
import Data.Macaw.CFG (MemRepr(..))
import Data.Macaw.Memory (Endianness(..))
import Data.Macaw.CFG
import Data.Macaw.Types (BVType, n8, n16, n32, n64, typeWidth)
import Data.Macaw.X86.Generator
import Data.Macaw.X86.Monad
@ -81,11 +83,15 @@ xmmMemRepr = BVMemRepr (knownNat :: NatRepr 16) LittleEndian
ymmMemRepr :: MemRepr (BVType 256)
ymmMemRepr = BVMemRepr (knownNat :: NatRepr 32) LittleEndian
------------------------------------------------------------------------
-- Utilities
-- | Return a location from a 16-bit register
reg8Loc :: F.Reg8 -> Location addr (BVType 8)
reg8Loc (F.LowReg8 r) = reg_low8 $ X86_GP $ F.Reg64 r
reg8Loc (F.HighReg8 r) = reg_high8 $ X86_GP $ F.Reg64 r
reg8Loc _ = error "internal: Unepxected byteReg"
-- | Return a location from a 16-bit register
reg16Loc :: F.Reg16 -> Location addr (BVType 16)
reg16Loc = reg_low16 . X86_GP . F.reg16_reg
@ -98,7 +104,6 @@ reg32Loc = reg_low32 . X86_GP . F.reg32_reg
reg64Loc :: F.Reg64 -> Location addr (BVType 64)
reg64Loc = fullRegister . X86_GP
------------------------------------------------------------------------
-- Getters
@ -120,7 +125,8 @@ getBVAddress ar =
let offset = uext n64 (base .+ scale .+ bvLit n32 (toInteger (F.displacementInt i32)))
mk_absolute seg offset
F.IP_Offset_32 _seg _i32 -> fail "IP_Offset_32"
F.Offset_32 _seg _w32 -> fail "Offset_32"
F.Offset_32 _seg _w32 ->
fail "Offset_32"
F.Offset_64 seg w64 -> do
mk_absolute seg (bvLit n64 (toInteger w64))
F.Addr_64 seg m_r64 m_int_r64 i32 -> do
@ -147,7 +153,8 @@ getBVAddress ar =
-- We could nevertheless call 'getSegmentBase' in all cases
-- here, but that adds a lot of noise to the AST in the common
-- case of segments other than FS or GS.
| seg == F.CS || seg == F.DS || seg == F.ES || seg == F.SS = return offset
| seg == F.CS || seg == F.DS || seg == F.ES || seg == F.SS =
return offset
-- The FS and GS segments can be non-zero based in 64-bit mode.
| otherwise = do
base <- getSegmentBase seg
@ -217,16 +224,17 @@ getSomeBVLocation v =
F.FPMem32 ar -> getBVAddress ar >>= mk . (`MemoryAddr` (floatMemRepr SingleFloatRepr))
F.FPMem64 ar -> getBVAddress ar >>= mk . (`MemoryAddr` (floatMemRepr DoubleFloatRepr))
F.FPMem80 ar -> getBVAddress ar >>= mk . (`MemoryAddr` (floatMemRepr X86_80FloatRepr))
F.ByteReg (F.LowReg8 r) -> mk $ reg_low8 $ X86_GP $ F.Reg64 r
F.ByteReg (F.HighReg8 r) -> mk $ reg_high8 $ X86_GP $ F.Reg64 r
F.ByteReg _ -> error "internal: getSomeBVLocation illegal ByteReg"
F.WordReg r -> mk (reg16Loc r)
F.DWordReg r -> mk (reg32Loc r)
F.QWordReg r -> mk (reg64Loc r)
F.ByteReg r -> mk $ reg8Loc r
F.WordReg r -> mk $ reg16Loc r
F.DWordReg r -> mk $ reg32Loc r
F.QWordReg r -> mk $ reg64Loc r
F.ByteImm _ -> noImm
F.WordImm _ -> noImm
F.DWordImm _ -> noImm
F.QWordImm _ -> noImm
F.ByteSignedImm _ -> noImm
F.WordSignedImm _ -> noImm
F.DWordSignedImm _ -> noImm
F.JumpOffset{} -> fail "Jump Offset is not a location."
where
noImm :: Monad m => m a
@ -244,15 +252,23 @@ getBVLocation l expected = do
Nothing ->
fail $ "Widths aren't equal: " ++ show (typeWidth v) ++ " and " ++ show expected
getImm32 :: F.Imm32 -> X86Generator st ids (BVExpr ids 32)
getImm32 (F.Imm32Concrete w) =
pure $ bvLit n32 (toInteger w)
getImm32 (F.Imm32SymbolOffset sym off) = do
let symExpr = ValueExpr $ SymbolValue Addr64 sym
let offExpr = bvLit n64 (toInteger off)
pure $ bvTrunc' n32 (symExpr .+ offExpr)
-- | Return a bitvector value.
getSomeBVValue :: F.Value -> X86Generator st ids (SomeBV (Expr ids))
getSomeBVValue v =
case v of
F.ByteImm w -> return $ SomeBV $ bvLit n8 $ toInteger w
F.WordImm w -> return $ SomeBV $ bvLit n16 $ toInteger w
F.DWordImm w -> return $ SomeBV $ bvLit n32 $ toInteger w
F.QWordImm w -> return $ SomeBV $ bvLit n64 $ toInteger w
F.JumpOffset _ off -> return $ SomeBV $ bvLit n64 $ toInteger off
F.ByteImm w -> pure $! SomeBV $ bvLit n8 $ toInteger w
F.WordImm w -> pure $! SomeBV $ bvLit n16 $ toInteger w
F.DWordImm i -> SomeBV <$> getImm32 i
F.QWordImm w -> pure $! SomeBV $ bvLit n64 $ toInteger w
F.JumpOffset _ _ -> fail "Jump Offset should not be treated as a BVValue."
_ -> do
SomeBV l <- getSomeBVLocation v
SomeBV <$> get l
@ -284,20 +300,30 @@ getSignExtendedValue v out_w =
F.Mem64 ar -> mk =<< getBV64Addr ar
F.Mem128 ar -> mk =<< getBV128Addr ar
F.Mem256 ar -> mk =<< getBV256Addr ar
F.ByteReg (F.LowReg8 r) -> mk $ reg_low8 $ X86_GP $ F.Reg64 r
F.ByteReg (F.HighReg8 r) -> mk $ reg_high8 $ X86_GP $ F.Reg64 r
F.WordReg r -> mk (reg16Loc r)
F.DWordReg r -> mk (reg32Loc r)
F.QWordReg r -> mk (reg64Loc r)
F.XMMReg r -> mk (xmm_avx r)
F.YMMReg r -> mk (ymm r)
F.ByteImm i -> return $! bvLit out_w (toInteger i)
F.WordImm i -> return $! bvLit out_w (toInteger i)
F.DWordImm i -> return $! bvLit out_w (toInteger i)
F.QWordImm i -> return $! bvLit out_w (toInteger i)
F.ByteImm i
| Just Refl <- testEquality n8 out_w ->
pure $! bvLit n8 (toInteger i)
F.WordImm i
| Just Refl <- testEquality n16 out_w ->
pure $! bvLit n16 (toInteger i)
F.DWordImm (F.Imm32Concrete i)
| Just Refl <- testEquality n32 out_w ->
pure $! bvLit n32 (toInteger i)
F.QWordImm i
| Just Refl <- testEquality n64 out_w ->
pure $! bvLit n64 (toInteger i)
F.ByteSignedImm i -> pure $! bvLit out_w (toInteger i)
F.WordSignedImm i -> pure $! bvLit out_w (toInteger i)
F.DWordSignedImm i -> pure $! bvLit out_w (toInteger i)
F.ByteReg r -> mk $ reg8Loc r
F.WordReg r -> mk $ reg16Loc r
F.DWordReg r -> mk $ reg32Loc r
F.QWordReg r -> mk $ reg64Loc r
_ -> fail $ "getSignExtendedValue given unexpected width: " ++ show v
where
@ -322,14 +348,34 @@ truncateBVValue n (SomeBV v)
| otherwise =
fail $ "Widths isn't >=: " ++ show (typeWidth v) ++ " and " ++ show n
resolveJumpOffset :: F.JumpOffset -> X86Generator s ids (BVExpr ids 64)
resolveJumpOffset (F.FixedOffset off) =
pure $ bvLit n64 (toInteger off)
resolveJumpOffset (F.RelativeOffset symId insOff off) = do
arepr <- memAddrWidth . genMemory <$> getState
let symVal = ValueExpr (SymbolValue arepr symId)
addrOff <- genAddr <$> getState
let relocAddr = relativeAddr (msegSegment addrOff) (msegOffset addrOff + fromIntegral insOff)
pure $ symVal .+ bvLit n64 (toInteger off) .- ValueExpr (RelocatableValue arepr relocAddr)
-- | Return the target of a call or jump instruction.
getCallTarget :: F.Value
-> X86Generator st ids (BVExpr ids 64)
getCallTarget v =
case v of
F.Mem64 ar -> get =<< getBV64Addr ar
F.QWordReg r -> get (reg64Loc r)
F.JumpOffset _ joff -> do
(.+) <$> get rip <*> resolveJumpOffset joff
_ -> fail "Unexpected argument"
-- | Return the target of a call or jump instruction.
getJumpTarget :: F.Value
-> X86Generator st ids (BVExpr ids 64)
getJumpTarget v =
case v of
F.Mem64 ar -> get =<< getBV64Addr ar
F.QWordReg r -> get (reg64Loc r)
F.JumpOffset _ off -> (bvLit n64 (toInteger off) .+) <$> get rip
F.JumpOffset _ joff -> do
(.+) <$> get rip <*> resolveJumpOffset joff
_ -> fail "Unexpected argument"
------------------------------------------------------------------------
@ -350,11 +396,10 @@ getAddrRegOrSegment v =
F.Mem32 ar -> Some . HasRepSize DWordRepVal <$> getBV32Addr ar
F.Mem64 ar -> Some . HasRepSize QWordRepVal <$> getBV64Addr ar
F.ByteReg (F.LowReg8 r) -> pure $ Some $ HasRepSize ByteRepVal $ reg_low8 $ X86_GP $ F.Reg64 r
F.ByteReg (F.HighReg8 r) -> pure $ Some $ HasRepSize ByteRepVal $ reg_high8 $ X86_GP $ F.Reg64 r
F.WordReg r -> pure $ Some $ HasRepSize WordRepVal (reg16Loc r)
F.DWordReg r -> pure $ Some $ HasRepSize DWordRepVal (reg32Loc r)
F.QWordReg r -> pure $ Some $ HasRepSize QWordRepVal (reg64Loc r)
F.ByteReg r -> pure $ Some $ HasRepSize ByteRepVal $ reg8Loc r
F.WordReg r -> pure $ Some $ HasRepSize WordRepVal $ reg16Loc r
F.DWordReg r -> pure $ Some $ HasRepSize DWordRepVal $ reg32Loc r
F.QWordReg r -> pure $ Some $ HasRepSize QWordRepVal $ reg64Loc r
_ -> fail $ "Argument " ++ show v ++ " not supported."
-- | Gets a value that can be pushed.
@ -362,10 +407,10 @@ getAddrRegOrSegment v =
getAddrRegSegmentOrImm :: F.Value -> X86Generator st ids (Some (HasRepSize (Expr ids)))
getAddrRegSegmentOrImm v =
case v of
F.ByteImm w -> return $ Some $ HasRepSize ByteRepVal $ bvLit n8 (toInteger w)
F.WordImm w -> return $ Some $ HasRepSize WordRepVal $ bvLit n16 (toInteger w)
F.DWordImm w -> return $ Some $ HasRepSize DWordRepVal $ bvLit n32 (toInteger w)
F.QWordImm w -> return $ Some $ HasRepSize QWordRepVal $ bvLit n64 (toInteger w)
F.ByteImm w -> pure $ Some $ HasRepSize ByteRepVal $ bvLit n8 (toInteger w)
F.WordImm w -> pure $ Some $ HasRepSize WordRepVal $ bvLit n16 (toInteger w)
F.DWordImm i -> Some . HasRepSize DWordRepVal <$> getImm32 i
F.QWordImm w -> pure $ Some $ HasRepSize QWordRepVal $ bvLit n64 (toInteger w)
_ -> do
Some (HasRepSize rep l) <- getAddrRegOrSegment v
Some . HasRepSize rep <$> get l
@ -384,6 +429,3 @@ readYMMValue :: F.Value -> X86Generator st ids (Expr ids (BVType 256))
readYMMValue (F.YMMReg r) = get (ymm r)
readYMMValue (F.Mem256 a) = readBVAddress a ymmMemRepr
readYMMValue _ = fail "YMM Instruction given unexpected value."

View File

@ -163,8 +163,6 @@ module Data.Macaw.X86.Monad
, even_parity
, fnstcw
, getSegmentBase
, exception
, ExceptionClass(..)
, x87Push
, x87Pop
, bvQuotRem
@ -906,7 +904,7 @@ mux c x y
-- | Construct a literal bit vector. The result is undefined if the
-- literal does not fit withint the given number of bits.
bvLit :: 1 <= n => NatRepr n -> Integer -> Expr ids (BVType n)
bvLit n v = ValueExpr $ mkLit n (toInteger v)
bvLit n v = ValueExpr $ mkLit n v
-- | Add two bitvectors together dropping overflow.
(.+) :: 1 <= n => Expr ids (BVType n) -> Expr ids (BVType n) -> Expr ids (BVType n)
@ -1543,18 +1541,6 @@ infixl 6 .+
infixl 6 .-
infix 4 .=
------------------------------------------------------------------------
-- Monadic definition
data ExceptionClass
= DivideError -- #DE
| FloatingPointError
| SIMDFloatingPointException
| GeneralProtectionException Int
| UndefinedInstructionError -- basically for ud2
-- ^ A general protection exception with the given error code.
-- -- | AlignmentCheck
deriving (Eq, Ord, Show)
------------------------------------------------------------------------
-- Semantics
@ -1822,15 +1808,6 @@ getSegmentBase seg =
_ ->
error $ "X86_64 getSegmentBase " ++ show seg ++ ": unimplemented!"
-- | raises an exception if the predicate is true and the mask is false
exception :: Expr ids BoolType -- mask
-> Expr ids BoolType -- predicate
-> ExceptionClass
-> X86Generator st ids ()
exception m p c =
when_ (boolNot m .&&. p)
(addStmt (PlaceHolderStmt [] $ "Exception " ++ (show c)))
-- FIXME: those should also mutate the underflow/overflow flag and
-- related state.

View File

@ -21,7 +21,6 @@ module Data.Macaw.X86.Semantics
import Control.Monad (when)
import qualified Data.Bits as Bits
import Data.Foldable
import Data.Int
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M
import Data.Parameterized.Classes
@ -29,6 +28,7 @@ import qualified Data.Parameterized.List as P
import Data.Parameterized.NatRepr
import Data.Parameterized.Some
import Data.Proxy
import Data.Word
import qualified Flexdis86 as F
import Data.Macaw.CFG ( MemRepr(..)
@ -291,8 +291,118 @@ def_cqo = defNullary "cqo" $ do
-- FIXME: special segment stuff?
-- FIXME: CR and debug regs?
exec_mov :: Location (Addr ids) (BVType n) -> BVExpr ids n -> X86Generator st ids ()
exec_mov l v = l .= v
def_mov :: InstructionDef
def_mov =
defBinary "mov" $ \_ loc val -> do
case (loc, val) of
(F.ByteReg r, F.ByteReg src) -> do
v <- get $ reg8Loc src
reg8Loc r .= v
(F.ByteReg r, F.ByteImm i) -> do
reg8Loc r .= bvLit n8 (toInteger i)
(F.ByteReg r, F.Mem8 src) -> do
v <- get =<< getBV8Addr src
reg8Loc r .= v
(F.Mem8 a, F.ByteReg src) -> do
l <- getBV8Addr a
v <- get $ reg8Loc src
l .= v
(F.Mem8 a, F.ByteImm i) -> do
l <- getBV8Addr a
l .= bvLit n8 (toInteger i)
(F.WordReg r, F.WordReg src) -> do
v <- get $ reg16Loc src
reg16Loc r .= v
(F.WordReg r, F.WordSignedImm i) -> do
reg16Loc r .= bvLit n16 (toInteger i)
(F.WordReg r, F.WordImm i) -> do
reg16Loc r .= bvLit n16 (toInteger i)
(F.WordReg r, F.Mem16 src) -> do
v <- get =<< getBV16Addr src
reg16Loc r .= v
(F.Mem16 a, F.WordReg src) -> do
l <- getBV16Addr a
v <- get $ reg16Loc src
l .= v
(F.Mem16 a, F.WordSignedImm i) -> do
l <- getBV16Addr a
l .= bvLit n16 (toInteger i)
(F.DWordReg r, F.DWordReg src) -> do
v <- get $ reg32Loc src
reg32Loc r .= v
(F.DWordReg r, F.DWordSignedImm i) -> do
reg32Loc r .= bvLit n32 (toInteger i)
(F.DWordReg r, F.DWordImm i) -> do
(reg32Loc r .=) =<< getImm32 i
(F.DWordReg r, F.Mem32 src) -> do
v <- get =<< getBV32Addr src
reg32Loc r .= v
(F.Mem32 a, F.DWordReg src) -> do
l <- getBV32Addr a
v <- get $ reg32Loc src
l .= v
(F.Mem32 a, F.DWordSignedImm i) -> do
l <- getBV32Addr a
l .= bvLit n32 (toInteger i)
(F.QWordReg r, F.QWordReg src) -> do
v <- get $ reg64Loc src
reg64Loc r .= v
(F.QWordReg r, F.Mem64 src) -> do
v <- get =<< getBV64Addr src
reg64Loc r .= v
(F.QWordReg r, F.QWordImm i) -> do
reg64Loc r .= bvLit n64 (toInteger i)
(F.QWordReg r, F.DWordSignedImm i) -> do
reg64Loc r .= bvLit n64 (toInteger i)
(F.Mem64 a, F.DWordSignedImm i) -> do
l <- getBV64Addr a
l .= bvLit n64 (toInteger i)
(F.Mem64 a, F.QWordReg src) -> do
l <- getBV64Addr a
v <- get $ reg64Loc src
l .= v
(F.Mem16 a, F.SegmentValue s) -> do
v <- get (SegmentReg s)
l <- getBV16Addr a
l .= v
(F.WordReg r, F.SegmentValue s) -> do
v <- get (SegmentReg s)
reg16Loc r .= v
(F.DWordReg r, F.SegmentValue s) -> do
v <- get (SegmentReg s)
reg_low16 (R.X86_GP (F.reg32_reg r)) .= v
(F.QWordReg r, F.SegmentValue s) -> do
v <- get (SegmentReg s)
fullRegister (R.X86_GP r) .= uext' n64 v
(F.SegmentValue s, F.Mem16 a) -> do
v <- get =<< getBV16Addr a
SegmentReg s .= v
(F.SegmentValue s, F.WordReg r) -> do
v <- get (fullRegister (R.X86_GP (F.reg16_reg r)))
SegmentReg s .= bvTrunc' n16 v
(F.SegmentValue s, F.DWordReg r) -> do
v <- get (fullRegister (R.X86_GP (F.reg32_reg r)))
SegmentReg s .= bvTrunc' n16 v
(F.SegmentValue s, F.QWordReg r) -> do
v <- get (fullRegister (R.X86_GP r))
SegmentReg s .= bvTrunc' n16 v
(_, F.ControlReg _) -> do
error "Do not support moving from/to control registers."
(F.ControlReg _, _) -> do
error "Do not support moving from/to control registers."
(_, F.DebugReg _) -> do
error "Do not support moving from/to debug registers."
(F.DebugReg _, _) -> do
error "Do not support moving from/to debug registers."
_ -> do
error $ "Unexpected arguments to mov: " ++ show loc ++ " " ++ show val
regLocation :: NatRepr n -> X86Reg (BVType 64) -> Location addr (BVType n)
regLocation sz
@ -486,7 +596,7 @@ def_idiv = defUnaryV "idiv" $ \d -> do
--
-- This code assumes that we are not running in kernel mode.
def_hlt :: InstructionDef
def_hlt = defNullary "hlt" $ exception false true (GeneralProtectionException 0)
def_hlt = defNullary "hlt" $ addArchTermStmt Hlt
def_inc :: InstructionDef
def_inc = defUnaryLoc "inc" $ \dst -> do
@ -1004,7 +1114,7 @@ def_call = defUnary "call" $ \_ v -> do
old_pc <- getReg R.X86_IP
push addrRepr old_pc
-- Set IP
tgt <- getJumpTarget v
tgt <- getCallTarget v
rip .= tgt
-- | Conditional jumps
@ -1014,9 +1124,8 @@ def_jcc_list =
defUnary mnem $ \_ v -> do
a <- cc
when_ a $ do
old_pc <- getReg R.X86_IP
off <- getBVValue v knownNat
rip .= old_pc .+ off
tgt <- getJumpTarget v
rip .= tgt
def_jmp :: InstructionDef
def_jmp = defUnary "jmp" $ \_ v -> do
@ -2058,7 +2167,7 @@ def_pselect mnem op sz = defBinaryLV mnem $ \l v -> do
-- PEXTRW Extract word
-- | PINSRW Insert word
exec_pinsrw :: Location (Addr ids) XMMType -> BVExpr ids 16 -> Int8 -> X86Generator st ids ()
exec_pinsrw :: Location (Addr ids) XMMType -> BVExpr ids 16 -> Word8 -> X86Generator st ids ()
exec_pinsrw l v off = do
lv <- get l
-- FIXME: is this the right way around?
@ -2598,7 +2707,7 @@ all_instructions =
, def_imul
, def_inc
, def_leave
, defBinaryLV "mov" $ exec_mov
, def_mov
, defUnaryV "mul" $ exec_mul
, def_neg
, defNullary "nop" $ return ()
@ -2623,7 +2732,7 @@ all_instructions =
, def_xadd
, defBinaryLV "xor" exec_xor
, defNullary "ud2" $ exception false true UndefinedInstructionError
, defNullary "ud2" $ addArchTermStmt UD2
-- Primitive instructions
, def_syscall

View File

@ -2,7 +2,6 @@
module Data.Macaw.X86.Semantics.AVX (all_instructions) where
import Data.Word(Word8)
import Data.Int(Int8)
import Control.Monad(forM_)
import Data.Parameterized.NatRepr
@ -53,7 +52,7 @@ avx3 m k = defInstruction m $ \ii ->
avx4 :: String ->
(forall st ids.
F.Value -> F.Value -> F.Value -> Int8 -> X86Generator st ids ()) ->
F.Value -> F.Value -> F.Value -> Word8 -> X86Generator st ids ()) ->
InstructionDef
avx4 m k = defInstruction m $ \ii ->
case F.iiArgs ii of
@ -229,5 +228,3 @@ all_instructions =
, avxInsert "vpinsrq"
]

@ -1 +1 @@
Subproject commit 71c32ec99d503f8aae234b3716aff6c3d217bf50
Subproject commit 497854b1eef4e477a11c808ac21a659dbd757ea5

@ -1 +1 @@
Subproject commit 12f5b922aa4fd16de9901b59253bdcf76421ed65
Subproject commit 3e6a0e87567c7bff8412f451a44bc5b850c3f8ee