Update memory accesses.

This adds endianess anotations and ensure types align with byte bounda.
This commit is contained in:
Joe Hendrix 2017-05-24 15:55:48 -07:00
parent 84e302c41e
commit 735d41e119
No known key found for this signature in database
GPG Key ID: 00F67DE32381DB9F
6 changed files with 120 additions and 77 deletions

View File

@ -572,7 +572,7 @@ uext (SubValue (n :: NatRepr n) av) _ =
LeqProof -> SubValue n av
where
proof :: LeqProof (n + 1) v
proof = leqTrans (leqAdd (LeqProof :: LeqProof (n+1) u) n1) (LeqProof :: LeqProof (u + 1) v)
proof = leqTrans (leqAdd (LeqProof :: LeqProof (n+1) u) knownNat) (LeqProof :: LeqProof (u + 1) v)
uext (StackOffset _ _) _ = TopV
uext (SomeStackOffset _) _ = TopV
uext ReturnAddr _ = TopV
@ -839,7 +839,7 @@ abstractULeq _tp x y = (x, y)
-- AbsBlockStack
data StackEntry w
= forall tp . StackEntry !(TypeRepr tp) !(AbsValue w tp)
= forall tp . StackEntry !(MemRepr tp) !(AbsValue w tp)
instance Eq (StackEntry w) where
StackEntry x_tp x_v == StackEntry y_tp y_v
@ -1181,10 +1181,11 @@ addMemWrite :: ( HasRepr (ArchReg arch) TypeRepr
--
-- Used for pretty printing
-> BVValue arch ids (ArchAddrWidth arch)
-> MemRepr tp
-> Value arch ids tp
-> AbsProcessorState (ArchReg arch) ids
-> AbsProcessorState (ArchReg arch) ids
addMemWrite cur_ip a v r =
addMemWrite cur_ip a memRepr v r =
case (transferValue r a, transferValue r v) of
-- (_,TopV) -> r
-- We overwrite _some_ stack location. An alternative would be to
@ -1197,7 +1198,7 @@ addMemWrite cur_ip a v r =
r & curAbsStack %~ pruneStack
(StackOffset _ s, v_abs) ->
let w = valueByteSize v
e = StackEntry (typeRepr v) v_abs
e = StackEntry memRepr v_abs
stk0 = r^.curAbsStack
-- Delete information about old assignment
stk1 = Set.fold (\o m -> deleteRange o (o+w) m) stk0 s

View File

@ -65,6 +65,8 @@ data ArchitectureInfo arch
= ArchitectureInfo
{ archAddrWidth :: !(AddrWidthRepr (RegAddrWidth (ArchReg arch)))
-- ^ Architecture address width.
, archEndianness :: !Endianness
-- ^ The byte order values are stored in.
, jumpTableEntrySize :: !(MemWord (ArchAddrWidth arch))
-- ^ The size of each entry in a jump table.
, callStackDelta :: !Integer

View File

@ -1,5 +1,5 @@
{-|
Copyright : (c) Galois, Inc 2015-2016
Copyright : (c) Galois, Inc 2015-2017
Maintainer : Joe Hendrix <jhendrix@galois.com>
Defines data types needed to represent control flow graphs from machine code.
@ -44,6 +44,8 @@ module Data.Macaw.CFG
, Assignment(..)
, AssignId(..)
, AssignRhs(..)
, MemRepr(..)
, memReprBytes
-- * Value
, Value(..)
, BVValue
@ -136,7 +138,7 @@ import GHC.TypeLits
import Numeric (showHex)
import Text.PrettyPrint.ANSI.Leijen as PP hiding ((<$>))
import Data.Macaw.Memory (MemWord, MemWidth, SegmentedAddr(..))
import Data.Macaw.Memory (MemWord, MemWidth, SegmentedAddr(..), Endianness)
import Data.Macaw.Types
-- Note:
@ -762,6 +764,29 @@ data Assignment arch ids tp =
, assignRhs :: !(AssignRhs arch ids tp)
}
-- | The type stored in memory.
--
-- The endianess indicates whether the address stores the most
-- or least significant byte. The following indices either store
-- the next lower or higher bytes.
data MemRepr (tp :: Type) where
BVMemRepr :: NatRepr w -> Endianness -> MemRepr (BVType (8*w))
-- | Return the number of bytes this takes up.
memReprBytes :: MemRepr tp -> Integer
memReprBytes (BVMemRepr x _) = natValue x
instance TestEquality MemRepr where
testEquality (BVMemRepr xw xe) (BVMemRepr yw ye) =
if xe == ye then do
Refl <- testEquality xw yw
Just Refl
else
Nothing
instance HasRepr MemRepr TypeRepr where
typeRepr (BVMemRepr w _) = BVTypeRepr (natMultiply n8 w)
-- | The right hand side of an assignment is an expression that
-- returns a value.
data AssignRhs (arch :: *) ids tp where
@ -776,7 +801,7 @@ data AssignRhs (arch :: *) ids tp where
-- Read memory at given location.
ReadMem :: !(ArchAddrValue arch ids)
-> !(TypeRepr tp)
-> !(MemRepr tp)
-> AssignRhs arch ids tp
-- Call an architecture specific function that returns some result.
@ -792,7 +817,7 @@ instance HasRepr (AssignRhs arch ids) TypeRepr where
case rhs of
EvalApp a -> appType a
SetUndefined w -> BVTypeRepr w
ReadMem _ tp -> tp
ReadMem _ tp -> typeRepr tp
EvalArchFn _ rtp -> rtp
instance ( HasRepr (ArchReg arch) TypeRepr
@ -1129,7 +1154,7 @@ instance ( OrdF r
-- | A statement in our control flow graph language.
data Stmt arch ids
= forall tp . AssignStmt !(Assignment arch ids tp)
| forall tp . WriteMem !(ArchAddrValue arch ids) !(Value arch ids tp)
| forall tp . WriteMem !(ArchAddrValue arch ids) !(MemRepr tp) !(Value arch ids tp)
-- ^ Write to memory at the given location
| PlaceHolderStmt !([Some (Value arch ids)]) !String
| Comment !Text
@ -1138,7 +1163,7 @@ data Stmt arch ids
instance ArchConstraints arch => Pretty (Stmt arch ids) where
pretty (AssignStmt a) = pretty a
pretty (WriteMem a rhs) = text "*" PP.<> prettyPrec 11 a <+> text ":=" <+> ppValue 0 rhs
pretty (WriteMem a _ rhs) = text "*" PP.<> prettyPrec 11 a <+> text ":=" <+> ppValue 0 rhs
pretty (PlaceHolderStmt vals name) = text ("PLACEHOLDER: " ++ name)
<+> parens (hcat $ punctuate comma
$ map (viewSome (ppValue 0)) vals)

View File

@ -66,6 +66,21 @@ import Data.Macaw.Memory
import qualified Data.Macaw.Memory.Permissions as Perm
import Data.Macaw.Types
-- Get the absolute value associated with an address.
transferReadMem :: (OrdF (ArchReg a), ShowF (ArchReg a), MemWidth (RegAddrWidth (ArchReg a)))
=> AbsProcessorState (ArchReg a) ids
-> ArchAddrValue a ids
-> MemRepr tp
-- ^ Information about the memory layout for the value.
-> ArchAbsValue a tp
transferReadMem r a tp
| StackOffset _ s <- transferValue r a
, [o] <- Set.toList s
, Just (StackEntry v_tp v) <- Map.lookup o (r^.curAbsStack)
, Just Refl <- testEquality tp v_tp = v
| otherwise = TopV
-- | Get the abstract domain for the right-hand side of an assignment.
transferRHS :: forall a ids tp
. ( OrdF (ArchReg a)
, ShowF (ArchReg a)
@ -79,13 +94,7 @@ transferRHS info r rhs =
case rhs of
EvalApp app -> transferApp r app
SetUndefined _ -> TopV
ReadMem a tp
| StackOffset _ s <- transferValue r a
, [o] <- Set.toList s
, Just (StackEntry v_tp v) <- Map.lookup o (r^.curAbsStack)
, Just Refl <- testEquality tp v_tp ->
v
| otherwise -> TopV
ReadMem a tp -> transferReadMem r a tp
EvalArchFn f _ -> absEvalArchFn info r f
-- | Merge in the value of the assignment.
@ -337,8 +346,8 @@ transferStmt info stmt =
case stmt of
AssignStmt a -> do
modify $ addAssignment info a
WriteMem addr v -> do
modify $ \r -> addMemWrite (r^.absInitialRegs^.curIP) addr v r
WriteMem addr memRepr v -> do
modify $ \r -> addMemWrite (r^.absInitialRegs^.curIP) addr memRepr v r
_ -> return ()
newtype HexWord = HexWord Word64
@ -578,15 +587,15 @@ recordWriteStmt :: ( HasRepr (ArchReg arch) TypeRepr
, OrdF (ArchReg arch)
, ShowF (ArchReg arch)
)
=> Memory (ArchAddrWidth arch)
=> ArchitectureInfo arch
-> Memory (ArchAddrWidth arch)
-> AbsProcessorState (ArchReg arch) ids
-> Stmt arch ids
-> State (ParseState arch ids) ()
recordWriteStmt mem regs stmt = do
let addrWidth = addrWidthNatRepr $ memAddrWidth mem
recordWriteStmt arch_info mem regs stmt = do
case stmt of
WriteMem _addr v
| Just Refl <- testEquality (typeRepr v) (BVTypeRepr addrWidth) -> do
WriteMem _addr repr v
| Just Refl <- testEquality repr (addrMemRepr arch_info) -> do
let addrs = concretizeAbsCodePointers mem (transferValue regs v)
writtenCodeAddrs %= (addrs ++)
_ -> return ()
@ -612,7 +621,7 @@ identifyCall mem stmts0 s = go (Seq.fromList stmts0)
prev Seq.:> stmt
-- Check for a call statement by determining if the last statement
-- writes an executable address to the stack pointer.
| WriteMem a val <- stmt
| WriteMem a _repr val <- stmt
, Just _ <- testEquality a next_sp
-- Check this is the right length.
, Just Refl <- testEquality (typeRepr next_sp) (typeRepr val)
@ -620,7 +629,6 @@ identifyCall mem stmts0 s = go (Seq.fromList stmts0)
, Just val_a <- asLiteralAddr mem val
-- Check if segment of address is marked as executable.
, Perm.isExecutable (segmentFlags (addrSegment val_a)) ->
Just (prev, val_a)
-- Stop if we hit any architecture specific instructions prior to
-- identifying return address since they may have side effects.
@ -628,7 +636,6 @@ identifyCall mem stmts0 s = go (Seq.fromList stmts0)
-- Otherwise skip over this instruction.
| otherwise -> go prev
-- | This is designed to detect returns from the register state representation.
--
-- It pattern matches on a 'RegState' to detect if it read its instruction
@ -659,6 +666,12 @@ data ParseContext arch ids = ParseContext { pctxMemory :: !(Memory (ArchAddrWi
, pctxBlockMap :: !(Map Word64 (Block arch ids))
}
addrMemRepr :: ArchitectureInfo arch -> MemRepr (BVType (RegAddrWidth (ArchReg arch)))
addrMemRepr arch_info =
case archAddrWidth arch_info of
Addr32 -> BVMemRepr n4 (archEndianness arch_info)
Addr64 -> BVMemRepr n8 (archEndianness arch_info)
-- | This parses a block that ended with a fetch and execute instruction.
parseFetchAndExecute :: forall arch ids
. ArchConstraints arch
@ -683,7 +696,7 @@ parseFetchAndExecute ctx lbl stmts regs s' = do
-- Note that in some cases the call is known not to return, and thus
-- this code will never jump to the return value.
_ | Just (prev_stmts, ret) <- identifyCall mem stmts s' -> do
Fold.mapM_ (recordWriteStmt mem regs') prev_stmts
Fold.mapM_ (recordWriteStmt arch_info mem regs') prev_stmts
let abst = finalAbsBlockState regs' s'
seq abst $ do
-- Merge caller return information
@ -707,7 +720,7 @@ parseFetchAndExecute ctx lbl stmts regs s' = do
_ -> False
nonret_stmts = filter (not . isRetLoad) stmts
mapM_ (recordWriteStmt mem regs') nonret_stmts
mapM_ (recordWriteStmt arch_info mem regs') nonret_stmts
let ip_val = s'^.boundValue ip_reg
case transferValue regs' ip_val of
@ -730,7 +743,7 @@ parseFetchAndExecute ctx lbl stmts regs s' = do
| Just tgt_addr <- asLiteralAddr mem (s'^.boundValue ip_reg)
, tgt_addr /= pctxFunAddr ctx -> do
assert (segmentFlags (addrSegment tgt_addr) `Perm.hasPerm` Perm.execute) $ do
mapM_ (recordWriteStmt mem regs') stmts
mapM_ (recordWriteStmt arch_info mem regs') stmts
-- Merge block state and add intra jump target.
let abst = finalAbsBlockState regs' s'
let abst' = abst & setAbsIP tgt_addr
@ -747,14 +760,14 @@ parseFetchAndExecute ctx lbl stmts regs s' = do
case getJumpTableBounds arch_info regs' base jump_idx of
Left err ->
trace (show src ++ ": Could not compute bounds: " ++ showJumpTableBoundsError err) $ do
mapM_ (recordWriteStmt mem regs') stmts
mapM_ (recordWriteStmt arch_info mem regs') stmts
pure ParsedBlock { pblockLabel = lbl_idx
, pblockStmts = stmts
, pblockState = regs'
, pblockTerm = ClassifyFailure s'
}
Right read_end -> do
mapM_ (recordWriteStmt mem regs') stmts
mapM_ (recordWriteStmt arch_info mem regs') stmts
-- Try to compute jump table bounds
@ -776,7 +789,7 @@ parseFetchAndExecute ctx lbl stmts regs s' = do
return (reverse prev)
resolveJump prev idx = do
let read_addr = base & addrOffset +~ 8 * idx
case readAddr mem LittleEndian read_addr of
case readAddr mem (archEndianness arch_info) read_addr of
Right tgt_addr
| Perm.isReadonly (segmentFlags (addrSegment read_addr)) -> do
let flags = segmentFlags (addrSegment tgt_addr)
@ -795,11 +808,11 @@ parseFetchAndExecute ctx lbl stmts regs s' = do
}
-- Check for tail call (anything where we are right at stack height
| ptrType <- BVTypeRepr (addrWidthNatRepr (archAddrWidth arch_info))
| ptrType <- addrMemRepr arch_info
, sp_val <- s'^.boundValue sp_reg
, ReturnAddr <- transferRHS arch_info regs' (ReadMem sp_val ptrType) -> do
, ReturnAddr <- transferReadMem regs' sp_val ptrType -> do
mapM_ (recordWriteStmt mem regs') stmts
mapM_ (recordWriteStmt arch_info mem regs') stmts
-- Compute fina lstate
let abst = finalAbsBlockState regs' s'
@ -819,7 +832,7 @@ parseFetchAndExecute ctx lbl stmts regs s' = do
-- Block that ends with some unknown
| otherwise -> do
trace ("Could not classify " ++ show lbl) $ do
mapM_ (recordWriteStmt mem regs') stmts
mapM_ (recordWriteStmt arch_info mem regs') stmts
pure ParsedBlock { pblockLabel = lbl_idx
, pblockStmts = stmts
, pblockState = regs'
@ -848,7 +861,7 @@ parseBlocks ctx ((b,regs):rest) = do
case blockTerm b of
Branch c lb rb -> do
let regs' = transferStmts arch_info regs (blockStmts b)
mapM_ (recordWriteStmt mem regs') (blockStmts b)
mapM_ (recordWriteStmt arch_info mem regs') (blockStmts b)
let l = tryLookupBlock "left branch" src block_map lb
let l_regs = refineProcStateBounds c True $ refineProcState c absTrue regs'
@ -874,7 +887,7 @@ parseBlocks ctx ((b,regs):rest) = do
Syscall s' -> do
let regs' = transferStmts arch_info regs (blockStmts b)
mapM_ (recordWriteStmt mem regs') (blockStmts b)
mapM_ (recordWriteStmt arch_info mem regs') (blockStmts b)
let abst = finalAbsBlockState regs' s'
case concretizeAbsCodePointers mem (abst^.absRegState^.curIP) of
[] -> error "Could not identify concrete system call address"

View File

@ -104,6 +104,7 @@ addrWidthNatRepr Addr64 = knownNat
-- | Indicates whether bytes are stored in big or little endian representation.
data Endianness = BigEndian | LittleEndian
deriving (Eq)
------------------------------------------------------------------------
-- Utilities

View File

@ -40,9 +40,6 @@ n1 = knownNat
n4 :: NatRepr 4
n4 = knownNat
n5 :: NatRepr 5
n5 = knownNat
n8 :: NatRepr 8
n8 = knownNat
@ -61,21 +58,49 @@ n80 = knownNat
n128 :: NatRepr 128
n128 = knownNat
------------------------------------------------------------------------
-- Type
data Type
= -- | An array of bits
= -- | A bitvector with the given number of bits.
BVType Nat
-- | 64 bit binary IEE754
| DoubleFloat
-- | 32 bit binary IEE754
| SingleFloat
-- | X86 80-bit extended floats
| X86_80Float
-- | 128 bit binary IEE754
| QuadFloat
-- | 16 bit binary IEE754
| HalfFloat
-- Return number of bytes in the type.
type family TypeBytes (tp :: Type) :: Nat where
TypeBytes (BVType 8) = 1
TypeBytes (BVType 16) = 2
TypeBytes (BVType 32) = 4
TypeBytes (BVType 64) = 8
TypeBytes HalfFloat = 2
TypeBytes SingleFloat = 4
TypeBytes DoubleFloat = 8
TypeBytes QuadFloat = 16
TypeBytes X86_80Float = 10
-- Return number of bits in the type.
type family TypeBits (tp :: Type) :: Nat where
TypeBits (BVType n) = n
TypeBits (BVType n) = n
TypeBits HalfFloat = 16
TypeBits SingleFloat = 32
TypeBits DoubleFloat = 64
TypeBits QuadFloat = 128
TypeBits X86_80Float = 80
type FloatType tp = BVType (8 * TypeBytes tp)
type BVType = 'BVType
type BoolType = BVType 1
type XMMType = BVType 128
-- | A runtime representation of @Type@ for case matching purposes.
data TypeRepr (tp :: Type) where
@ -102,32 +127,16 @@ class KnownType tp where
instance KnownNat n => KnownType (BVType n) where
knownType = BVTypeRepr knownNat
------------------------------------------------------------------------
-- IsLeq
type IsLeq (m :: Nat) (n :: Nat) = (m <= n)
------------------------------------------------------------------------
-- Floating point sizes
-- | This data kind describes the styles of floating-point values understood
-- by recent LLVM bytecode formats. This consist of the standard IEEE 754-2008
-- binary floating point formats, as well as the X86 extended 80-bit format
-- and the double-double format.
data FloatInfo where
DoubleFloat :: FloatInfo -- 64 bit binary IEE754
SingleFloat :: FloatInfo -- 32 bit binary IEE754
X86_80Float :: FloatInfo -- X86 80-bit extended floats
QuadFloat :: FloatInfo -- 128 bit binary IEE754
HalfFloat :: FloatInfo -- 16 bit binary IEE754
type SingleFloat = 'SingleFloat
type DoubleFloat = 'DoubleFloat
type X86_80Float = 'X86_80Float
type QuadFloat = 'QuadFloat
type HalfFloat = 'HalfFloat
data FloatInfoRepr (flt::FloatInfo) where
data FloatInfoRepr (flt::Type) where
DoubleFloatRepr :: FloatInfoRepr DoubleFloat
SingleFloatRepr :: FloatInfoRepr SingleFloat
X86_80FloatRepr :: FloatInfoRepr X86_80Float
@ -166,19 +175,8 @@ instance Pretty (FloatInfoRepr flt) where
pretty HalfFloatRepr = text "half"
type family FloatInfoBits (flt :: FloatInfo) :: Nat where
FloatInfoBits HalfFloat = 16
FloatInfoBits SingleFloat = 32
FloatInfoBits DoubleFloat = 64
FloatInfoBits QuadFloat = 128
FloatInfoBits X86_80Float = 80
type FloatType flt = BVType (FloatInfoBits flt)
-- type instance FloatInfoBits DoubleDoubleFloat =
floatInfoBits :: FloatInfoRepr flt -> NatRepr (FloatInfoBits flt)
floatInfoBits fir =
floatInfoBytes :: FloatInfoRepr flt -> NatRepr (TypeBytes flt)
floatInfoBytes fir =
case fir of
HalfFloatRepr -> knownNat
SingleFloatRepr -> knownNat
@ -186,7 +184,10 @@ floatInfoBits fir =
QuadFloatRepr -> knownNat
X86_80FloatRepr -> knownNat
floatTypeRepr :: FloatInfoRepr flt -> TypeRepr (BVType (FloatInfoBits flt))
floatInfoBits :: FloatInfoRepr flt -> NatRepr (8 * TypeBytes flt)
floatInfoBits fir = natMultiply (knownNat :: NatRepr 8) (floatInfoBytes fir)
floatTypeRepr :: FloatInfoRepr flt -> TypeRepr (BVType (8 * TypeBytes flt))
floatTypeRepr fir = BVTypeRepr (floatInfoBits fir)
------------------------------------------------------------------------