From 77627c391d147738e19aefe854e4eb5c88533785 Mon Sep 17 00:00:00 2001 From: Joe Hendrix Date: Wed, 6 Jun 2018 11:28:26 -0700 Subject: [PATCH] Remove redundant IPAlignment constraint. --- base/macaw-base.cabal | 2 + base/src/Data/Macaw/CFG/App.hs | 11 +- base/src/Data/Macaw/CFG/AssignRhs.hs | 176 ++++++++++++++++++++++ base/src/Data/Macaw/CFG/Core.hs | 185 +----------------------- base/src/Data/Macaw/Discovery.hs | 24 ++- base/src/Data/Macaw/Memory/ElfLoader.hs | 45 +++--- base/src/Data/Macaw/SCFG.hs | 151 +++++++++++++++++++ 7 files changed, 386 insertions(+), 208 deletions(-) create mode 100644 base/src/Data/Macaw/CFG/AssignRhs.hs create mode 100644 base/src/Data/Macaw/SCFG.hs diff --git a/base/macaw-base.cabal b/base/macaw-base.cabal index d1ad00bc..3c43a545 100644 --- a/base/macaw-base.cabal +++ b/base/macaw-base.cabal @@ -55,6 +55,7 @@ library Data.Macaw.Architecture.Info Data.Macaw.CFG Data.Macaw.CFG.App + Data.Macaw.CFG.AssignRhs Data.Macaw.CFG.Block Data.Macaw.CFG.BlockLabel Data.Macaw.CFG.Core @@ -70,6 +71,7 @@ library Data.Macaw.Memory.ElfLoader Data.Macaw.Memory.LoadCommon Data.Macaw.Memory.Permissions + Data.Macaw.SCFG Data.Macaw.Types Data.Macaw.Utils.Pretty diff --git a/base/src/Data/Macaw/CFG/App.hs b/base/src/Data/Macaw/CFG/App.hs index fff3fa49..80350fe0 100644 --- a/base/src/Data/Macaw/CFG/App.hs +++ b/base/src/Data/Macaw/CFG/App.hs @@ -61,11 +61,16 @@ data App (f :: Type -> *) (tp :: Type) where ---------------------------------------------------------------------- -- Operations related to concatenating and extending bitvectors. - -- Truncate a bitvector value. + -- | Given a @m@-bit bitvector and a natural number @n@ less than @m@, this returns + -- the bitvector with the @n@ least significant bits. Trunc :: (1 <= n, n+1 <= m) => !(f (BVType m)) -> !(NatRepr n) -> App f (BVType n) - -- Signed extension. + -- | Given a @m@-bit bitvector @x@ and a natural number @n@ greater than @m@, this returns + -- the bitvector with the same @m@ least signficant bits, and where the new bits are + -- the same as the most significant bit in @x@. SExt :: (1 <= m, m+1 <= n, 1 <= n) => f (BVType m) -> NatRepr n -> App f (BVType n) - -- Unsigned extension. + -- | Given a @m@-bit bitvector @x@ and a natural number @n@ greater than @m@, this returns + -- the bitvector with the same @m@ least signficant bits, and where the new bits are + -- all @false@. UExt :: (1 <= m, m+1 <= n, 1 <= n) => f (BVType m) -> NatRepr n -> App f (BVType n) ---------------------------------------------------------------------- diff --git a/base/src/Data/Macaw/CFG/AssignRhs.hs b/base/src/Data/Macaw/CFG/AssignRhs.hs new file mode 100644 index 00000000..3645c756 --- /dev/null +++ b/base/src/Data/Macaw/CFG/AssignRhs.hs @@ -0,0 +1,176 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE TypeOperators #-} +module Data.Macaw.CFG.AssignRhs + ( AssignRhs(..) + -- * MemRepr + , MemRepr(..) + , memReprBytes + -- * Architecture type families + , RegAddrWidth + , ArchReg + , ArchFn + , ArchStmt + , ArchTermStmt + -- * Synonyms + , RegAddrWord + , ArchAddrWidth + , ArchAddrWord + , ArchSegmentOff + , ArchMemAddr + ) where + +import Data.Macaw.CFG.App +import Data.Macaw.Memory (Endianness(..), MemSegmentOff, MemWord, MemAddr) +import Data.Macaw.Types + +import Data.Monoid +import Data.Parameterized.Classes +import Data.Parameterized.NatRepr +import Data.Parameterized.TraversableFC (FoldableFC(..)) +import Data.Proxy +import Text.PrettyPrint.ANSI.Leijen hiding ((<$>), (<>)) + +-- | Width of register used to store addresses. +type family RegAddrWidth (r :: Type -> *) :: Nat + +-- | A word for the given architecture register type. +type RegAddrWord r = MemWord (RegAddrWidth r) + +-- | Type family for defining what a "register" is for this architecture. +-- +-- Registers include things like the general purpose registers, any flag +-- registers that can be read and written without side effects, +type family ArchReg (arch :: *) :: Type -> * + +-- | A type family for architecture specific functions. +-- +-- These functions may return a value. They may depend on the current state of +-- the heap, but should not affect the processor state. +-- +-- The function may depend on the set of registers defined so far, and the type +-- of the result. +type family ArchFn (arch :: *) :: (Type -> *) -> Type -> * + +-- | A type family for defining architecture-specific statements. +-- +-- The second parameter is used to denote the underlying values in the +-- statements so that we can use ArchStmts with multiple CFGs. +type family ArchStmt (arch :: *) :: (Type -> *) -> * + +-- | A type family for defining architecture-specific statements that +-- may have instruction-specific effects on control-flow and register state. +-- +-- The second type parameter is the ids phantom type used to provide +-- uniqueness of Nonce values that identify assignments. +-- +-- An architecture-specific terminal statement may have side effects and change register +-- values, it may or may not return to the current function. If it does return to the +-- current function, it is assumed to be at most one location, and the block-translator +-- must provide that value at translation time. +type family ArchTermStmt (arch :: *) :: * -> * + +-- | Number of bits in addreses for architecture. +type ArchAddrWidth arch = RegAddrWidth (ArchReg arch) + +-- | A pair containing a segment and valid offset within the segment. +type ArchSegmentOff arch = MemSegmentOff (ArchAddrWidth arch) + +-- | A word for the given architecture bitwidth. +type ArchAddrWord arch = RegAddrWord (ArchReg arch) + +-- | An address for a given architecture. +type ArchMemAddr arch = MemAddr (ArchAddrWidth arch) + +------------------------------------------------------------------------ +-- MemRepr + +-- | The provides information sufficient to read supported types of values from +-- memory such as the number of bytes and endianness. +data MemRepr (tp :: Type) where + -- | Denotes a bitvector with the given number of bytes and endianness. + BVMemRepr :: (1 <= w) => !(NatRepr w) -> !Endianness -> MemRepr (BVType (8*w)) + +instance Pretty (MemRepr tp) where + pretty (BVMemRepr w BigEndian) = text "bvbe" <+> text (show w) + pretty (BVMemRepr w LittleEndian) = text "bvle" <+> text (show w) + +instance Show (MemRepr tp) where + show = show . pretty + +-- | Return the number of bytes this uses in memory. +memReprBytes :: MemRepr tp -> Integer +memReprBytes (BVMemRepr x _) = natValue x + +instance TestEquality MemRepr where + testEquality (BVMemRepr xw xe) (BVMemRepr yw ye) = + if xe == ye then do + Refl <- testEquality xw yw + Just Refl + else + Nothing + +instance OrdF MemRepr where + compareF (BVMemRepr xw xe) (BVMemRepr yw ye) = + joinOrderingF (compareF xw yw) $ + fromOrdering (compare xe ye) + +instance HasRepr MemRepr TypeRepr where + typeRepr (BVMemRepr w _) = + let r = (natMultiply n8 w) + in case leqMulPos (Proxy :: Proxy 8) w of + LeqProof -> BVTypeRepr r + +------------------------------------------------------------------------ +-- AssignRhs + +-- | The right hand side of an assignment is an expression that +-- returns a value. +data AssignRhs (arch :: *) (f :: Type -> *) tp where + -- | An expression that is computed from evaluating subexpressions. + EvalApp :: !(App f tp) + -> AssignRhs arch f tp + + -- | An expression with an undefined value. + SetUndefined :: !(TypeRepr tp) + -> AssignRhs arch f tp + + -- | Read memory at given location. + ReadMem :: !(f (BVType (ArchAddrWidth arch))) + -> !(MemRepr tp) + -> AssignRhs arch f tp + + -- | @CondReadMem tp cond addr v@ reads from memory at the given address if the + -- condition is true and returns the value if it false. + CondReadMem :: !(MemRepr tp) + -> !(f BoolType) + -> !(f (BVType (ArchAddrWidth arch))) + -> !(f tp) + -> AssignRhs arch f tp + + -- Call an architecture specific function that returns some result. + EvalArchFn :: !(ArchFn arch f tp) + -> !(TypeRepr tp) + -> AssignRhs arch f tp + +instance HasRepr (AssignRhs arch f) TypeRepr where + typeRepr rhs = + case rhs of + EvalApp a -> typeRepr a + SetUndefined tp -> tp + ReadMem _ tp -> typeRepr tp + CondReadMem tp _ _ _ -> typeRepr tp + EvalArchFn _ rtp -> rtp + +instance FoldableFC (ArchFn arch) => FoldableFC (AssignRhs arch) where + foldMapFC go v = + case v of + EvalApp a -> foldMapFC go a + SetUndefined _w -> mempty + ReadMem addr _ -> go addr + CondReadMem _ c a d -> go c <> go a <> go d + EvalArchFn f _ -> foldMapFC go f diff --git a/base/src/Data/Macaw/CFG/Core.hs b/base/src/Data/Macaw/CFG/Core.hs index 0f46f764..cbd1f1b2 100644 --- a/base/src/Data/Macaw/CFG/Core.hs +++ b/base/src/Data/Macaw/CFG/Core.hs @@ -26,11 +26,6 @@ module Data.Macaw.CFG.Core Stmt(..) , Assignment(..) , AssignId(..) - , AssignRhs(..) - , MemRepr(..) - , memReprBytes - , readMemRepr - , readMemReprDyn -- * Value , Value(..) , BVValue @@ -68,13 +63,6 @@ module Data.Macaw.CFG.Core , PrettyRegValue(..) , IsArchFn(..) , IsArchStmt(..) - -- * Architecture type families - , ArchFn - , ArchReg - , ArchStmt - , ArchTermStmt - , RegAddrWord - , RegAddrWidth -- * Utilities , addrWidthTypeRepr -- * RegisterInfo @@ -85,12 +73,9 @@ module Data.Macaw.CFG.Core , refsInApp , refsInAssignRhs -- ** Synonyms - , ArchAddrWidth , ArchAddrValue - , ArchAddrWord - , ArchMemAddr - , ArchSegmentOff , Data.Parameterized.TraversableFC.FoldableFC(..) + , module Data.Macaw.CFG.AssignRhs , module Data.Macaw.Utils.Pretty ) where @@ -100,7 +85,6 @@ import Control.Monad.State.Strict import Data.Bits import Data.Int (Int64) import Data.Maybe (isNothing, catMaybes) -import Data.Monoid import Data.Parameterized.Classes import Data.Parameterized.Map (MapF) import qualified Data.Parameterized.Map as MapF @@ -109,7 +93,6 @@ import Data.Parameterized.Nonce import Data.Parameterized.Some import Data.Parameterized.TraversableF import Data.Parameterized.TraversableFC (FoldableFC(..)) -import Data.Proxy import Data.Set (Set) import qualified Data.Set as Set import Data.Text (Text) @@ -120,6 +103,7 @@ import Text.PrettyPrint.ANSI.Leijen hiding ((<$>), (<>)) import qualified Text.PrettyPrint.ANSI.Leijen as PP import Data.Macaw.CFG.App +import Data.Macaw.CFG.AssignRhs import Data.Macaw.Memory import Data.Macaw.Types import Data.Macaw.Utils.Pretty @@ -190,170 +174,7 @@ instance Show (AssignId ids tp) where show (AssignId n) = show n ------------------------------------------------------------------------ --- Type families for architecture specific components. - --- | Width of register used to store addresses. -type family RegAddrWidth (r :: Type -> *) :: Nat - --- | A word for the given architecture register type. -type RegAddrWord r = MemWord (RegAddrWidth r) - --- | Type family for defining what a "register" is for this architecture. --- --- Registers include things like the general purpose registers, any flag --- registers that can be read and written without side effects, -type family ArchReg (arch :: *) :: Type -> * - --- | A type family for architecture specific functions. --- --- These functions may return a value. They may depend on the current state of --- the heap, but should not affect the processor state. --- --- The function may depend on the set of registers defined so far, and the type --- of the result. -type family ArchFn (arch :: *) :: (Type -> *) -> Type -> * - --- | A type family for defining architecture-specific statements. --- --- The second type parameter is the ids phantom type used to provide --- uniqueness of Nonce values that identify assignments. -type family ArchStmt (arch :: *) :: (Type -> *) -> * - --- | A type family for defining architecture-specific statements that --- may have instruction-specific effects on control-flow and register state. --- --- The second type parameter is the ids phantom type used to provide --- uniqueness of Nonce values that identify assignments. --- --- An architecture-specific terminal statement may have side effects and change register --- values, it may or may not return to the current function. If it does return to the --- current function, it is assumed to be at most one location, and the block-translator --- must provide that value at translation time. -type family ArchTermStmt (arch :: *) :: * -> * - --- | Number of bits in addreses for architecture. -type ArchAddrWidth arch = RegAddrWidth (ArchReg arch) - --- | A word for the given architecture bitwidth. -type ArchAddrWord arch = RegAddrWord (ArchReg arch) - --- | An address for a given architecture. -type ArchMemAddr arch = MemAddr (ArchAddrWidth arch) - --- | A pair containing a segment and valid offset within the segment. -type ArchSegmentOff arch = MemSegmentOff (ArchAddrWidth arch) - ------------------------------------------------------------------------- --- MemRepr - --- | The type stored in memory. --- --- The endianess indicates whether the address stores the most --- or least significant byte. The following indices either store --- the next lower or higher bytes. -data MemRepr (tp :: Type) where - BVMemRepr :: (1 <= w) => !(NatRepr w) -> !Endianness -> MemRepr (BVType (8*w)) - -instance Pretty (MemRepr tp) where - pretty (BVMemRepr w BigEndian) = text "bvbe" <+> text (show w) - pretty (BVMemRepr w LittleEndian) = text "bvle" <+> text (show w) - -instance Show (MemRepr tp) where - show = show . pretty - --- | Return the number of bytes this takes up. -memReprBytes :: MemRepr tp -> Integer -memReprBytes (BVMemRepr x _) = natValue x - --- | Read a word with a dynamically-chosen endianness and size -readMemRepr :: MemWidth w' => Memory w -> MemAddr w -> MemRepr (BVType w') -> Either (MemoryError w) (MemWord w') -readMemRepr mem addr (BVMemRepr size endianness) = do - bs <- readByteString mem addr (fromInteger (natValue size)) - let Just val = addrRead endianness bs - Right val - --- | Like 'readMemRepr', but has a short static list of sizes for which it can --- dispatch the 'MemWidth' constraint. Returns 'Left' for sizes other than 4 or --- 8 bytes. -readMemReprDyn :: Memory w -> MemAddr w -> MemRepr (BVType w') -> Either (MemoryError w) (MemWord w') -readMemReprDyn mem addr repr@(BVMemRepr size _) = case () of - _ | Just Refl <- testEquality size (knownNat :: NatRepr 4) -> readMemRepr mem addr repr - | Just Refl <- testEquality size (knownNat :: NatRepr 8) -> readMemRepr mem addr repr - | otherwise -> Left $ InvalidSize addr size - -instance TestEquality MemRepr where - testEquality (BVMemRepr xw xe) (BVMemRepr yw ye) = - if xe == ye then do - Refl <- testEquality xw yw - Just Refl - else - Nothing - -instance OrdF MemRepr where - compareF (BVMemRepr xw xe) (BVMemRepr yw ye) = - case compareF xw yw of - LTF -> LTF - GTF -> GTF - EQF -> fromOrdering (compare xe ye) - -instance HasRepr MemRepr TypeRepr where - typeRepr (BVMemRepr w _) = - let r = (natMultiply n8 w) - in case leqMulPos (Proxy :: Proxy 8) w of - LeqProof -> BVTypeRepr r - ------------------------------------------------------------------------- --- AssignRhs - --- | The right hand side of an assignment is an expression that --- returns a value. -data AssignRhs (arch :: *) (f :: Type -> *) tp where - -- An expression that is computed from evaluating subexpressions. - EvalApp :: !(App f tp) - -> AssignRhs arch f tp - - -- An expression with an undefined value. - SetUndefined :: !(TypeRepr tp) - -> AssignRhs arch f tp - - -- Read memory at given location. - ReadMem :: !(f (BVType (ArchAddrWidth arch))) - -> !(MemRepr tp) - -> AssignRhs arch f tp - - -- | @CondReadMem tp cond addr v@ reads from memory at the given address if the - -- condition is true and returns the value if it false. - CondReadMem :: !(MemRepr tp) - -> !(f BoolType) - -> !(f (BVType (ArchAddrWidth arch))) - -> !(f tp) - -> AssignRhs arch f tp - - -- Call an architecture specific function that returns some result. - EvalArchFn :: !(ArchFn arch f tp) - -> !(TypeRepr tp) - -> AssignRhs arch f tp - -instance HasRepr (AssignRhs arch f) TypeRepr where - typeRepr rhs = - case rhs of - EvalApp a -> typeRepr a - SetUndefined tp -> tp - ReadMem _ tp -> typeRepr tp - CondReadMem tp _ _ _ -> typeRepr tp - EvalArchFn _ rtp -> rtp - -instance FoldableFC (ArchFn arch) => FoldableFC (AssignRhs arch) where - foldMapFC go v = - case v of - EvalApp a -> foldMapFC go a - SetUndefined _w -> mempty - ReadMem addr _ -> go addr - CondReadMem _ c a d -> go c <> go a <> go d - EvalArchFn f _ -> foldMapFC go f - ------------------------------------------------------------------------- --- Value and Assignment, AssignRhs declarations. +-- Value and Assignment -- | A value at runtime. data Value arch ids tp where diff --git a/base/src/Data/Macaw/Discovery.hs b/base/src/Data/Macaw/Discovery.hs index 30ce13b7..29511cb7 100644 --- a/base/src/Data/Macaw/Discovery.hs +++ b/base/src/Data/Macaw/Discovery.hs @@ -428,11 +428,13 @@ jumpTableRead :: JumpTable arch ids -> ArrayRead arch ids jumpTableRead (Absolute r _) = r jumpTableRead (Relative _ r _) = r +{- -- | After reading from the array, the result may be extended to address width; -- if so, this says how. jumpTableExtension :: JumpTable arch ids -> Maybe Extension jumpTableExtension (Absolute _ e) = e jumpTableExtension (Relative _ _ e) = e +-} ensure :: Alternative f => (a -> Bool) -> a -> f a ensure p x = x <$ guard (p x) @@ -678,6 +680,20 @@ identifyCallTargets absState ip = do _ -> def Initial _ -> def +-- | Read an address using the @MemRepr@ for format information, which should be 4 or 8 bytes. +-- Returns 'Left' for sizes other than 4 or 8 bytes. +readMemReprDyn :: Memory w -> MemAddr w -> MemRepr (BVType w') -> Either (MemoryError w) (MemWord w') +readMemReprDyn mem addr (BVMemRepr size endianness) = do + bs <- readByteString mem addr (fromInteger (natValue size)) + case () of + _ | Just Refl <- testEquality size (knownNat :: NatRepr 4) -> do + let Just val = addrRead endianness bs + Right val + | Just Refl <- testEquality size (knownNat :: NatRepr 8) -> do + let Just val = addrRead endianness bs + Right val + | otherwise -> Left $ InvalidSize addr size + -- | This parses a block that ended with a fetch and execute instruction. parseFetchAndExecute :: forall arch ids . ParseContext arch ids @@ -1001,7 +1017,7 @@ transferBlocks src finfo sz block_map = mapM_ (\(addr, abs_state) -> mergeIntraJump src abs_state addr) (ps^.intraJumpTargets) -transfer :: IPAlignment arch => ArchSegmentOff arch -> FunM arch s ids () +transfer :: ArchSegmentOff arch -> FunM arch s ids () transfer addr = do s <- use curFunCtx let ainfo = archInfo s @@ -1057,8 +1073,7 @@ transfer addr = do -- | Loop that repeatedly explore blocks until we have explored blocks -- on the frontier. -analyzeBlocks :: IPAlignment arch - => (ArchSegmentOff arch -> ST s ()) +analyzeBlocks :: (ArchSegmentOff arch -> ST s ()) -- ^ Logging function to call when analyzing a new block. -> FunState arch s ids -> ST s (FunState arch s ids) @@ -1110,8 +1125,7 @@ mkFunInfo fs = -- -- This returns the updated state and the discovered control flow -- graph for this function. -analyzeFunction :: IPAlignment arch - => (ArchSegmentOff arch -> ST s ()) +analyzeFunction :: (ArchSegmentOff arch -> ST s ()) -- ^ Logging function to call when analyzing a new block. -> ArchSegmentOff arch -- ^ The address to explore diff --git a/base/src/Data/Macaw/Memory/ElfLoader.hs b/base/src/Data/Macaw/Memory/ElfLoader.hs index df911ca9..a1aa9808 100644 --- a/base/src/Data/Macaw/Memory/ElfLoader.hs +++ b/base/src/Data/Macaw/Memory/ElfLoader.hs @@ -339,11 +339,11 @@ resolveSymbol (SymbolVector symtab) symIdx = do resolveRelocationAddr :: SymbolVector -- ^ A vector mapping symbol indices to the -- associated symbol information. - -> Elf.RelEntry tp - -- ^ A relocation entry + -> Word32 + -- ^ Index in the symbol table this refers to. -> RelocResolver SymbolIdentifier -resolveRelocationAddr symtab rel = do - sym <- resolveSymbol symtab (Elf.relSym rel) +resolveRelocationAddr symtab symIdx = do + sym <- resolveSymbol symtab symIdx case symbolDef sym of DefinedSymbol{} -> do pure $ SymbolRelocation (symbolName sym) (symbolVersion sym) @@ -358,26 +358,30 @@ resolveRelocationAddr symtab rel = do relaTargetX86_64 :: SomeRelocationResolver 64 relaTargetX86_64 = SomeRelocationResolver $ \symtab rel off -> case Elf.relType rel of --- JHX Note. These have been commented out until we can validate them. --- Elf.R_X86_64_GLOB_DAT -> do --- checkZeroAddend --- TargetSymbol <$> resolveSymbol symtab rel --- Elf.R_X86_64_COPY -> TargetCopy --- Elf.R_X86_64_JUMP_SLOT -> do --- checkZeroAddend --- TargetSymbol <$> resolveSymbol symtab rel + Elf.R_X86_64_JUMP_SLOT -> do + addr <- resolveRelocationAddr symtab (Elf.relSym rel) + pure $ AbsoluteRelocation addr off LittleEndian 8 Elf.R_X86_64_PC32 -> do - addr <- resolveRelocationAddr symtab rel + addr <- resolveRelocationAddr symtab (Elf.relSym rel) pure $ RelativeRelocation addr off LittleEndian 4 Elf.R_X86_64_32 -> do - addr <- resolveRelocationAddr symtab rel + addr <- resolveRelocationAddr symtab (Elf.relSym rel) pure $ AbsoluteRelocation addr off LittleEndian 4 Elf.R_X86_64_64 -> do - addr <- resolveRelocationAddr symtab rel + addr <- resolveRelocationAddr symtab (Elf.relSym rel) pure $ AbsoluteRelocation addr off LittleEndian 8 - -- Jhx Note. These will be needed to support thread local variables. - -- Elf.R_X86_64_TPOFF32 -> undefined - -- Elf.R_X86_64_GOTTPOFF -> undefined + -- R_X86_64_GLOB_DAT are used to update GOT entries with their + -- target address. They are similar to R_x86_64_64 except appear + -- inside dynamically linked executables/libraries, and are often + -- loaded lazily. We just use the eager AbsoluteRelocation here. + Elf.R_X86_64_GLOB_DAT -> do + addr <- resolveRelocationAddr symtab (Elf.relSym rel) + pure $ AbsoluteRelocation addr off LittleEndian 8 + + -- Jhx Note. These will be needed to support thread local variables. + -- Elf.R_X86_64_TPOFF32 -> undefined + -- Elf.R_X86_64_GOTTPOFF -> undefined + tp -> relocError $ RelocationUnsupportedType (show tp) {- @@ -396,6 +400,11 @@ relaTargetARM = SomeRelocationResolver $ \_symtab rel _maddend -> -- TargetSymbol <$> resolveSymbol symtab rel tp -> relocError $ RelocationUnsupportedType (show tp) -} +{- +000000613ff8 003c00000006 R_X86_64_GLOB_DAT 0000000000000000 __gmon_start__ + 0 +-} + + -- | Creates a relocation map from the contents of a dynamic section. withRelocationResolver diff --git a/base/src/Data/Macaw/SCFG.hs b/base/src/Data/Macaw/SCFG.hs new file mode 100644 index 00000000..b78843fd --- /dev/null +++ b/base/src/Data/Macaw/SCFG.hs @@ -0,0 +1,151 @@ +{-| +Copyright : (c) Galois, Inc 2017 +Maintainer : Joe Hendrix + +This exports the "simplifiable CFG" interface. A CFG designed for +optimization. +-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE TypeOperators #-} +module Data.Macaw.SCFG + ( SCFG(..) + , SCFGBlock(..) + , CallingConvention(..) + , TermStmt(..) + , module Data.Macaw.CFG.App + ) where + +import Data.Macaw.CFG.AssignRhs +import Data.Macaw.CFG.App +import Data.Macaw.Memory (AddrWidthRepr(..)) +import Data.Macaw.Types + +import Data.BinarySymbols +import Data.ByteString.Char8 as BSC +import Data.Map.Strict (Map) +import Data.Parameterized.Map (MapF) +import Data.Parameterized.Nonce +import Data.Parameterized.Some +import Data.Text (Text) +import qualified Data.Vector as V +import Data.Word +import GHC.TypeLits + +newtype BlockIndex = BlockIndex Word64 + +newtype AssignId ids (tp::Type) = AssignId (Nonce ids tp) + +data Value arch ids tp where + BVValue :: (1 <= n) + => !(NatRepr n) + -> !Integer + -> Value arch ids (BVType n) + -- ^ A constant bitvector + -- + -- The integer should be between 0 and 2^n-1. + BoolValue :: !Bool -> Value arch ids BoolType + -- ^ A constant Boolean + RelocatableValue :: !(AddrWidthRepr (ArchAddrWidth arch)) + -> !(ArchMemAddr arch) + -> Value arch ids (BVType (ArchAddrWidth arch)) + -- ^ A memory address + SymbolValue :: !(AddrWidthRepr (ArchAddrWidth arch)) + -> !SymbolIdentifier + -> Value arch ids (BVType (ArchAddrWidth arch)) + -- ^ Reference to a symbol identifier. + -- + -- This appears when dealing with relocations. + AssignedValue :: !(AssignId ids tp) + -> Value arch ids tp + -- ^ Value from an assignment statement. + +type BVValue arch ids w = Value arch ids (BVType w) + +type ArchAddrValue arch ids = BVValue arch ids (ArchAddrWidth arch) + +-- | A statement in our control flow graph language. +data Stmt arch ids where + AssignStmt :: !(AssignId ids tp) + -> !(AssignRhs arch (Value arch ids) tp) + -> Stmt arch ids + WriteMem :: !(ArchAddrValue arch ids) + -> !(MemRepr tp) + -> !(Value arch ids tp) + -> Stmt arch ids + -- ^ This denotes a write to memory, and consists of an address to write to, a `MemRepr` defining + -- how the value should be stored in memory, and the value to be written. + InstructionStart :: !(ArchAddrWord arch) + -> !Text + -> Stmt arch ids + -- ^ The start of an instruction + -- + -- The information includes the offset relative to the start of the block and the + -- disassembler output if available (or empty string if unavailable) + Comment :: !Text -> Stmt arch ids + -- ^ A user-level comment + ExecArchStmt :: !(ArchStmt arch (Value arch ids)) -> Stmt arch ids + -- ^ Execute an architecture specific statement + -- RegCall :: !(RegState (ArchReg arch) (Value arch ids)) + -- -> Stmt arch ids + -- ^ A call statement. + +-- | This is a calling convention that explains how the linear list of +-- arguments should be stored for the ABI. +type family CallingConvention arch + +-- | This term statement is used to describe higher level expressions +-- of how block ending with a a FetchAndExecute statement should be +-- interpreted. +data TermStmt arch ids where + TailCall :: !(CallingConvention arch) + -> !(BVValue arch ids (ArchAddrWidth arch)) + -- /\ IP to call + -> ![Some (Value arch ids)] + -> TermStmt arch ids + -- ^ A call with the current register values and location to return to or 'Nothing' if this is a tail call. + Jump :: !BlockIndex + -> TermStmt arch ids + -- ^ A jump to an explicit address within a function. + LookupTable :: !(BVValue arch ids (ArchAddrWidth arch)) + -> !(V.Vector BlockIndex) + -> TermStmt arch ids + -- ^ A lookup table that branches to one of a vector of addresses. + -- + -- The value contains the index to jump to as an unsigned bitvector, and the + -- possible addresses as a table. If the index (when interpreted as + -- an unsigned number) is larger than the number of entries in the vector, then the + -- result is undefined. + Return :: !(MapF (ArchReg arch) (Value arch ids)) + -> TermStmt arch ids + -- ^ A return with the given registers. + Ite :: !(Value arch ids BoolType) + -> !BlockIndex + -> !BlockIndex + -> TermStmt arch ids + -- ^ An if-then-else + ArchTermStmt :: !(ArchTermStmt arch ids) + -> !(MapF (ArchReg arch) (Value arch ids)) + -> !(MapF (ArchReg arch) (Value arch ids)) + -> !(Maybe BlockIndex) + -> TermStmt arch ids + -- ^ An architecture-specific statement with the registers prior to execution, and + -- the given next control flow address. + + +data SCFGBlock arch ids + = SCFGBlock { blockAddr :: !(ArchSegmentOff arch) + , blockIndex :: !BlockIndex + , blockInputs :: ![Some TypeRepr] + -- ^ Types of inputs to block. + , blockStmt :: ![Stmt arch ids] + , blockTermStmt :: !(TermStmt arch ids) + } + +data SCFG arch ids + = SCFG { cfgAddr :: !(ArchSegmentOff arch) + , cfgName :: !BSC.ByteString + , cfgBlocks :: !(Map (ArchSegmentOff arch, BlockIndex) (SCFGBlock arch ids)) + }