From 0c3c935b7b792a56ced4fb714bfac2177581b303 Mon Sep 17 00:00:00 2001 From: Joe Hendrix Date: Wed, 5 Jul 2017 13:31:21 -0700 Subject: [PATCH] Refactor modules. --- macaw.cabal | 2 + src/Data/Macaw/Architecture/Info.hs | 73 +-- src/Data/Macaw/CFG.hs | 689 +-------------------------- src/Data/Macaw/CFG/App.hs | 3 +- src/Data/Macaw/CFG/Block.hs | 81 ++++ src/Data/Macaw/CFG/Core.hs | 691 ++++++++++++++++++++++++++++ 6 files changed, 784 insertions(+), 755 deletions(-) create mode 100644 src/Data/Macaw/CFG/Block.hs create mode 100644 src/Data/Macaw/CFG/Core.hs diff --git a/macaw.cabal b/macaw.cabal index 0a6c8d02..6d4a31cc 100644 --- a/macaw.cabal +++ b/macaw.cabal @@ -33,6 +33,8 @@ library Data.Macaw.Architecture.Syscall Data.Macaw.CFG Data.Macaw.CFG.App + Data.Macaw.CFG.Block + Data.Macaw.CFG.Core Data.Macaw.CFG.DemandSet Data.Macaw.CFG.Rewriter Data.Macaw.DebugLogging diff --git a/src/Data/Macaw/Architecture/Info.hs b/src/Data/Macaw/Architecture/Info.hs index 676c01c5..0d96681c 100644 --- a/src/Data/Macaw/Architecture/Info.hs +++ b/src/Data/Macaw/Architecture/Info.hs @@ -4,91 +4,24 @@ Maintainer : jhendrix@galois.com This defines the architecture-specific information needed for code discovery. -} -{-# LANGUAGE ConstraintKinds #-} -{-# LANGUAGE DataKinds #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE GADTs #-} {-# LANGUAGE RankNTypes #-} module Data.Macaw.Architecture.Info ( ArchitectureInfo(..) , DisassembleFn , archPostSyscallAbsState -- * Unclassified blocks - , Block(..) - , TermStmt(..) + , module Data.Macaw.CFG.Block ) where import Control.Monad.ST -import Data.Parameterized.Classes import Data.Parameterized.Nonce -import Data.Text (Text) -import qualified Data.Text as Text -import Data.Word -import Text.PrettyPrint.ANSI.Leijen as PP hiding ((<$>)) import Data.Macaw.AbsDomain.AbsState as AbsState -import Data.Macaw.CFG +import Data.Macaw.CFG.Block +import Data.Macaw.CFG.Core import Data.Macaw.CFG.DemandSet import Data.Macaw.CFG.Rewriter import Data.Macaw.Memory -import Data.Macaw.Types - ------------------------------------------------------------------------- --- TermStmt - --- A terminal statement in a block -data TermStmt arch ids - -- | Fetch and execute the next instruction from the given processor state. - = FetchAndExecute !(RegState (ArchReg arch) (Value arch ids)) - -- | Branch and execute one block or another. - | Branch !(Value arch ids BoolType) !Word64 !Word64 - -- | The syscall instruction. - -- We model system calls as terminal instructions because from the - -- application perspective, the semantics will depend on the operating - -- system. - | Syscall !(RegState (ArchReg arch) (Value arch ids)) - -- | The block ended prematurely due to an error in instruction - -- decoding or translation. - -- - -- This contains the state of the registers when the translation error - -- occured and the error message recorded. - | TranslateError !(RegState (ArchReg arch) (Value arch ids)) !Text - -instance ( OrdF (ArchReg arch) - , ShowF (ArchReg arch) - ) - => Pretty (TermStmt arch ids) where - pretty (FetchAndExecute s) = - text "fetch_and_execute" <$$> - indent 2 (pretty s) - pretty (Branch c x y) = - text "branch" <+> ppValue 0 c <+> text (show x) <+> text (show y) - pretty (Syscall s) = - text "syscall" <$$> - indent 2 (pretty s) - pretty (TranslateError s msg) = - text "ERROR: " <+> text (Text.unpack msg) <$$> - indent 2 (pretty s) - ------------------------------------------------------------------------- --- Block - --- | The type for code blocks returned by the disassembler. --- --- The discovery process will attempt to map each block to a suitable ParsedBlock. -data Block arch ids - = Block { blockLabel :: !Word64 - -- ^ Index of this block - , blockStmts :: !([Stmt arch ids]) - -- ^ List of statements in the block. - , blockTerm :: !(TermStmt arch ids) - -- ^ The last statement in the block. - } - -instance ArchConstraints arch => Pretty (Block arch ids) where - pretty b = do - text (show (blockLabel b)) PP.<> text ":" <$$> - indent 2 (vcat (pretty <$> blockStmts b) <$$> pretty (blockTerm b)) ------------------------------------------------------------------------ -- ArchitectureInfo diff --git a/src/Data/Macaw/CFG.hs b/src/Data/Macaw/CFG.hs index bbe154f6..00b76dc7 100644 --- a/src/Data/Macaw/CFG.hs +++ b/src/Data/Macaw/CFG.hs @@ -1,694 +1,15 @@ {-| -Copyright : (c) Galois, Inc 2015-2017 +Copyright : (c) Galois, Inc 2017 Maintainer : Joe Hendrix -Defines data types needed to represent values, assignments, and statements from Machine code. - -This is a low-level CFG representation where the entire program is a -single CFG. +This exports the main CFG modules -} -{-# LANGUAGE DataKinds #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE GADTs #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE KindSignatures #-} -{-# LANGUAGE MultiParamTypeClasses #-} -{-# LANGUAGE PatternGuards #-} -{-# LANGUAGE PolyKinds #-} -{-# LANGUAGE RankNTypes #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE StandaloneDeriving #-} -{-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE TypeOperators #-} module Data.Macaw.CFG - ( -- * Stmt level declarations - Stmt(..) - , Assignment(..) - , AssignId(..) - , AssignRhs(..) - , MemRepr(..) - , memReprBytes - -- * Value - , Value(..) - , BVValue - , valueAsApp - , valueWidth - , asBaseOffset - , asInt64Constant - , mkLit - , bvValue - , ppValueAssignments - , ppValueAssignmentList - -- * App + ( module Data.Macaw.CFG.Core , module Data.Macaw.CFG.App - -- * RegState - , RegState - , regStateMap - , boundValue - , cmpRegState - , curIP - , mkRegState - , mkRegStateM - , traverseRegsWith - , zipWithRegState - -- * Pretty printing - , ppAssignId - , ppLit - , ppValue - , PrettyF(..) - , ArchConstraints(..) - , PrettyRegValue(..) - -- * Architecture type families - , ArchAddr - , ArchSegmentedAddr - , ArchFn - , ArchReg - , ArchStmt - , RegAddr - , RegAddrWidth - -- * RegisterInfo - , RegisterInfo(..) - , asStackAddrOffset - -- * References - , StmtHasRefs(..) - , FnHasRefs(..) - , refsInValue - , refsInApp - , refsInAssignRhs - -- ** Synonyms - , ArchAddrWidth - , ArchAddrValue , Data.Macaw.Memory.SegmentedAddr ) where -import Control.Exception (assert) -import Control.Lens -import Control.Monad.Identity -import Control.Monad.State.Strict -import Data.Bits -import Data.Int (Int64) -import Data.Maybe (isNothing, catMaybes) -import Data.Parameterized.Classes -import Data.Parameterized.Map (MapF) -import qualified Data.Parameterized.Map as MapF -import Data.Parameterized.NatRepr -import Data.Parameterized.Nonce -import Data.Parameterized.Some -import Data.Parameterized.TraversableF -import Data.Set (Set) -import qualified Data.Set as Set -import Data.Text (Text) -import qualified Data.Text as Text -import GHC.TypeLits -import Numeric (showHex) -import Text.PrettyPrint.ANSI.Leijen as PP hiding ((<$>)) - import Data.Macaw.CFG.App -import Data.Macaw.Memory (MemWord, MemWidth, SegmentedAddr(..), Endianness(..)) -import Data.Macaw.Types - --- Note: --- The declarations in this file follow a top-down order, so the top-level --- definitions should be first. - -type Prec = Int - -colonPrec :: Prec -colonPrec = 7 - -plusPrec :: Prec -plusPrec = 6 - --- | Class for pretty printing with a precedence field. -class PrettyPrec v where - prettyPrec :: Int -> v -> Doc - --- | Pretty print over all instances of a type. -class PrettyF (f :: k -> *) where - prettyF :: f tp -> Doc - --- | Pretty print a document with parens if condition is true -parenIf :: Bool -> Doc -> Doc -parenIf True d = parens d -parenIf False d = d - -bracketsep :: [Doc] -> Doc -bracketsep [] = text "{}" -bracketsep (h:l) = vcat $ - [text "{" <+> h] - ++ fmap (text "," <+>) l - ++ [text "}"] - ------------------------------------------------------------------------- --- AssignId - --- | An 'AssignId' is a unique identifier used to identify assignment --- statements, in a manner similar to SSA (single static assignment) --- form. 'AssignId's are typed, and also include a type variable @ids@ --- that intuitively denotes the set of identifiers from which they are --- drawn. -newtype AssignId (ids :: *) (tp :: Type) = AssignId (Nonce ids tp) - -ppAssignId :: AssignId ids tp -> Doc -ppAssignId (AssignId w) = text ("r" ++ show (indexValue w)) - -instance Eq (AssignId ids tp) where - AssignId id1 == AssignId id2 = id1 == id2 - -instance TestEquality (AssignId ids) where - testEquality (AssignId id1) (AssignId id2) = testEquality id1 id2 - -instance OrdF (AssignId ids) where - compareF (AssignId id1) (AssignId id2) = compareF id1 id2 - -instance ShowF (AssignId ids) where - showF (AssignId n) = show n - -instance Show (AssignId ids tp) where - show (AssignId n) = show n - ------------------------------------------------------------------------- --- Type families for architecture specific components. - --- | Width of register used to store addresses. -type family RegAddrWidth (r :: Type -> *) :: Nat - --- | The value used to store -type RegAddr r = MemWord (RegAddrWidth r) - --- | Type family for defining what a "register" is for this architecture. --- --- Registers include things like the general purpose registers, any flag --- registers that can be read and written without side effects, -type family ArchReg (arch :: *) :: Type -> * - --- | A type family for architecture specific functions. --- --- These function may return a value. They may depend on the current state of --- the heap, but should not affect the processor state. --- --- The function may depend on the set of registers defined so far, and the type --- of the result. -type family ArchFn (arch :: *) :: * -> Type -> * - --- | A type family for defining architecture-specific statements. --- --- The second type parameter is the ids phantom type used to provide --- uniqueness of Nonce values that identify assignments. --- -type family ArchStmt (arch :: *) :: * -> * - --- | The type to use for addresses on the architecutre. -type ArchAddr arch = RegAddr (ArchReg arch) - --- | Number of bits in addreses for architecture. -type ArchAddrWidth arch = RegAddrWidth (ArchReg arch) - --- | A segmented addr for a given architecture. -type ArchSegmentedAddr arch = SegmentedAddr (ArchAddrWidth arch) - - ------------------------------------------------------------------------- --- Value, Assignment, AssignRhs declarations. - --- | A value at runtime. -data Value arch ids tp - = forall n - . (tp ~ BVType n) - => BVValue !(NatRepr n) !Integer - -- ^ A constant bitvector - | ( tp ~ BVType (ArchAddrWidth arch)) - => RelocatableValue !(NatRepr (ArchAddrWidth arch)) !(ArchSegmentedAddr arch) - -- ^ A given memory address. - | AssignedValue !(Assignment arch ids tp) - -- ^ Value from an assignment statement. - | Initial !(ArchReg arch tp) - -- ^ Represents the value assigned to the register when the block started. - -type BVValue arch ids w = Value arch ids (BVType w) - --- | A address value for a specific architecture -type ArchAddrValue arch ids = BVValue arch ids (ArchAddrWidth arch) - --- | An assignment consists of a unique location identifier and a right- --- hand side that returns a value. -data Assignment arch ids tp = - Assignment { assignId :: !(AssignId ids tp) - , assignRhs :: !(AssignRhs arch ids tp) - } - --- | The type stored in memory. --- --- The endianess indicates whether the address stores the most --- or least significant byte. The following indices either store --- the next lower or higher bytes. -data MemRepr (tp :: Type) where - BVMemRepr :: !(NatRepr w) -> !Endianness -> MemRepr (BVType (8*w)) - -instance Pretty (MemRepr tp) where - pretty (BVMemRepr w BigEndian) = text "bvbe" <+> text (show w) - pretty (BVMemRepr w LittleEndian) = text "bvle" <+> text (show w) - --- | Return the number of bytes this takes up. -memReprBytes :: MemRepr tp -> Integer -memReprBytes (BVMemRepr x _) = natValue x - -instance TestEquality MemRepr where - testEquality (BVMemRepr xw xe) (BVMemRepr yw ye) = - if xe == ye then do - Refl <- testEquality xw yw - Just Refl - else - Nothing - -instance HasRepr MemRepr TypeRepr where - typeRepr (BVMemRepr w _) = BVTypeRepr (natMultiply n8 w) - --- | The right hand side of an assignment is an expression that --- returns a value. -data AssignRhs (arch :: *) ids tp where - -- An expression that is computed from evaluating subexpressions. - EvalApp :: !(App (Value arch ids) tp) - -> AssignRhs arch ids tp - - -- An expression with an undefined value. - SetUndefined :: (tp ~ BVType n) - => !(NatRepr n) -- Width of undefined value. - -> AssignRhs arch ids tp - - -- Read memory at given location. - ReadMem :: !(ArchAddrValue arch ids) - -> !(MemRepr tp) - -> AssignRhs arch ids tp - - -- Call an architecture specific function that returns some result. - EvalArchFn :: !(ArchFn arch ids tp) - -> !(TypeRepr tp) - -> AssignRhs arch ids tp - ------------------------------------------------------------------------- --- Type operations on assignment AssignRhs, and Value - -instance HasRepr (AssignRhs arch ids) TypeRepr where - typeRepr rhs = - case rhs of - EvalApp a -> typeRepr a - SetUndefined w -> BVTypeRepr w - ReadMem _ tp -> typeRepr tp - EvalArchFn _ rtp -> rtp - -instance ( HasRepr (ArchReg arch) TypeRepr - ) - => HasRepr (Value arch ids) TypeRepr where - - typeRepr (BVValue w _) = BVTypeRepr w - typeRepr (RelocatableValue w _) = BVTypeRepr w - typeRepr (AssignedValue a) = typeRepr (assignRhs a) - typeRepr (Initial r) = typeRepr r - -valueWidth :: ( HasRepr (ArchReg arch) TypeRepr - ) - => Value arch ids (BVType n) -> NatRepr n -valueWidth v = - case typeRepr v of - BVTypeRepr n -> n - ------------------------------------------------------------------------- --- Value equality - -instance OrdF (ArchReg arch) - => OrdF (Value arch ids) where - compareF (BVValue wx vx) (BVValue wy vy) = - case compareF wx wy of - LTF -> LTF - EQF -> fromOrdering (compare vx vy) - GTF -> GTF - compareF BVValue{} _ = LTF - compareF _ BVValue{} = GTF - - compareF (RelocatableValue _ x) (RelocatableValue _ y) = - fromOrdering (compare x y) - compareF RelocatableValue{} _ = LTF - compareF _ RelocatableValue{} = GTF - - compareF (AssignedValue x) (AssignedValue y) = - compareF (assignId x) (assignId y) - compareF AssignedValue{} _ = LTF - compareF _ AssignedValue{} = GTF - - compareF (Initial r) (Initial r') = - case compareF r r' of - LTF -> LTF - EQF -> EQF - GTF -> GTF - -instance OrdF (ArchReg arch) - => TestEquality (Value arch ids) where - testEquality x y = orderingF_refl (compareF x y) - -instance OrdF (ArchReg arch) - => Eq (Value arch ids tp) where - x == y = isJust (testEquality x y) - -instance OrdF (ArchReg arch) - => Ord (Value arch ids tp) where - compare x y = toOrdering (compareF x y) - -instance OrdF (ArchReg arch) - => EqF (Value arch ids) where - eqF = (==) - ------------------------------------------------------------------------- --- Value operations - -mkLit :: NatRepr n -> Integer -> Value arch ids (BVType n) -mkLit n v = BVValue n (v .&. mask) - where mask = maxUnsigned n - -bvValue :: KnownNat n => Integer -> Value arch ids (BVType n) -bvValue i = mkLit knownNat i - -valueAsApp :: Value arch ids tp -> Maybe (App (Value arch ids) tp) -valueAsApp (AssignedValue (Assignment _ (EvalApp a))) = Just a -valueAsApp _ = Nothing - -asInt64Constant :: Value arch ids (BVType 64) -> Maybe Int64 -asInt64Constant (BVValue _ o) = Just (fromInteger o) -asInt64Constant _ = Nothing - -asBaseOffset :: Value arch ids (BVType w) -> (Value arch ids (BVType w), Integer) -asBaseOffset x - | Just (BVAdd _ x_base (BVValue _ x_off)) <- valueAsApp x = (x_base, x_off) - | otherwise = (x,0) - ------------------------------------------------------------------------- --- RegState - --- | This represents the state of the processor registers. -newtype RegState (r :: k -> *) (f :: k -> *) = RegState (MapF.MapF r f) - -deriving instance (OrdF r, EqF f) => Eq (RegState r f) - -deriving instance FunctorF (RegState r) -deriving instance FoldableF (RegState r) - -instance TraversableF (RegState r) where - traverseF f (RegState m) = RegState <$> traverseF f m - --- | Return underlying map of register state. -regStateMap :: RegState r f -> MapF.MapF r f -regStateMap (RegState m) = m - --- | Traverse the register state with the name of each register and value. -traverseRegsWith :: Applicative m - => (forall tp. r tp -> f tp -> m (g tp)) - -> RegState r f - -> m (RegState r g) -traverseRegsWith f (RegState m) = RegState <$> MapF.traverseWithKey f m - --- | Get a register out of the state. -boundValue :: forall r f tp - . OrdF r - => r tp - -> Simple Lens (RegState r f) (f tp) -boundValue r = lens getter setter - where getter (RegState m) = - case MapF.lookup r m of - Just v -> v - Nothing -> error "internal error in boundValue given unexpected reg" - setter (RegState m) v = RegState (MapF.insert r v m) - - --- | Compares if two register states are equal. -cmpRegState :: OrdF r - => (forall u. f u -> g u -> Bool) - -- ^ Function for checking if two values are equal. - -> RegState r f - -> RegState r g - -> Bool -cmpRegState p (RegState x) (RegState y) = go (MapF.toList x) (MapF.toList y) - where go [] [] = True - go [] (_:_) = False - go (_:_) [] = False - go (MapF.Pair xk xv:xr) (MapF.Pair yk yv:yr) = - case testEquality xk yk of - Nothing -> False - Just Refl -> p xv yv && go xr yr - ------------------------------------------------------------------------- --- RegisterInfo - --- | This class provides access to information about registers. -class ( OrdF r - , ShowF r - , MemWidth (RegAddrWidth r) - , HasRepr r TypeRepr - ) => RegisterInfo r where - - -- | List of all arch registers. - archRegs :: [Some r] - - -- | The stack pointer register - sp_reg :: r (BVType (RegAddrWidth r)) - - -- | The instruction pointer register - ip_reg :: r (BVType (RegAddrWidth r)) - - -- | The register used to store system call numbers. - syscall_num_reg :: r (BVType (RegAddrWidth r)) - - -- | Registers used for passing system call arguments - syscallArgumentRegs :: [r (BVType (RegAddrWidth r))] - - --- The value of the current instruction pointer. -curIP :: RegisterInfo r - => Simple Lens (RegState r f) (f (BVType (RegAddrWidth r))) -curIP = boundValue ip_reg - -mkRegStateM :: (RegisterInfo r, Applicative m) - => (forall tp . r tp -> m (f tp)) - -> m (RegState r f) -mkRegStateM f = RegState . MapF.fromList <$> traverse g archRegs - where g (Some r) = MapF.Pair r <$> f r - --- Create a pure register state -mkRegState :: RegisterInfo r -- AbsRegState r - => (forall tp . r tp -> f tp) - -> RegState r f -mkRegState f = runIdentity (mkRegStateM (return . f)) - -zipWithRegState :: RegisterInfo r - => (forall u. f u -> g u -> h u) - -> RegState r f - -> RegState r g - -> RegState r h -zipWithRegState f x y = mkRegState (\r -> f (x ^. boundValue r) (y ^. boundValue r)) - --- | Returns a offset if the value is an offset of the stack. -asStackAddrOffset :: RegisterInfo (ArchReg arch) - => Value arch ids tp - -> Maybe (BVValue arch ids (ArchAddrWidth arch)) -asStackAddrOffset addr - | Just (BVAdd _ (Initial base) offset) <- valueAsApp addr - , Just Refl <- testEquality base sp_reg = do - Just offset - | Initial base <- addr - , Just Refl <- testEquality base sp_reg = - case typeRepr base of - BVTypeRepr w -> Just (BVValue w 0) - | otherwise = - Nothing - - ------------------------------------------------------------------------- --- Pretty print Assign, AssignRhs, Value operations - -ppLit :: NatRepr n -> Integer -> Doc -ppLit w i - | i >= 0 = text ("0x" ++ showHex i "") <+> text "::" <+> brackets (text (show w)) - | otherwise = error "ppLit given negative value" - --- | Pretty print a value. -ppValue :: ShowF (ArchReg arch) => Prec -> Value arch ids tp -> Doc -ppValue p (BVValue w i) = assert (i >= 0) $ parenIf (p > colonPrec) $ ppLit w i -ppValue p (RelocatableValue _ a) = parenIf (p > plusPrec) $ text (show a) -ppValue _ (AssignedValue a) = ppAssignId (assignId a) -ppValue _ (Initial r) = text (showF r) PP.<> text "_0" - -instance ShowF (ArchReg arch) => PrettyPrec (Value arch ids tp) where - prettyPrec = ppValue - -instance ShowF (ArchReg arch) => Pretty (Value arch ids tp) where - pretty = ppValue 0 - -instance ShowF (ArchReg arch) => Show (Value arch ids tp) where - show = show . pretty - -class ( RegisterInfo (ArchReg arch) - , PrettyF (ArchStmt arch) - ) => ArchConstraints arch where - - -- | A function for pretty printing an archFn of a given type. - ppArchFn :: Applicative m - => (forall u . Value arch ids u -> m Doc) - -- ^ Function for pretty printing vlaue. - -> ArchFn arch ids tp - -> m Doc - --- | Pretty print an assignment right-hand side using operations parameterized --- over an application to allow side effects. -ppAssignRhs :: (Applicative m, ArchConstraints arch) - => (forall u . Value arch ids u -> m Doc) - -- ^ Function for pretty printing value. - -> AssignRhs arch ids tp - -> m Doc -ppAssignRhs pp (EvalApp a) = ppAppA pp a -ppAssignRhs _ (SetUndefined w) = pure $ text "undef ::" <+> brackets (text (show w)) -ppAssignRhs pp (ReadMem a repr) = - (\d -> text "read_mem" <+> d <+> PP.parens (pretty repr)) <$> pp a -ppAssignRhs pp (EvalArchFn f _) = ppArchFn pp f - -instance ArchConstraints arch => Pretty (AssignRhs arch ids tp) where - pretty = runIdentity . ppAssignRhs (Identity . ppValue 10) - -instance ArchConstraints arch => Pretty (Assignment arch ids tp) where - pretty (Assignment lhs rhs) = ppAssignId lhs <+> text ":=" <+> pretty rhs - ------------------------------------------------------------------------- --- Pretty print a value assignment - --- | Helper type to wrap up a 'Doc' with a dummy type argument; used to put --- 'Doc's into heterogenous maps in the below -newtype DocF (a :: Type) = DocF Doc - --- | This pretty prints a value's representation while saving the pretty --- printed repreentation of subvalues in a map. -collectValueRep :: forall arch ids tp - . ArchConstraints arch - => Prec - -> Value arch ids tp - -> State (MapF (AssignId ids) DocF) Doc -collectValueRep _ (AssignedValue a :: Value arch ids tp) = do - let lhs = assignId a - mr <- gets $ MapF.lookup lhs - when (isNothing mr) $ do - let ppVal :: forall u . Value arch ids u -> - State (MapF (AssignId ids) DocF) Doc - ppVal = collectValueRep 10 - rhs <- ppAssignRhs ppVal (assignRhs a) - let d = ppAssignId lhs <+> text ":=" <+> rhs - modify $ MapF.insert lhs (DocF d) - return () - return $! ppAssignId lhs -collectValueRep p v = return $ ppValue p v - --- | This pretty prints all the history used to create a value. -ppValueAssignments' :: State (MapF (AssignId ids) DocF) Doc -> Doc -ppValueAssignments' m = - case MapF.elems bindings of - [] -> rhs - (Some (DocF h):r) -> - let first = text "let" PP.<+> h - f (Some (DocF b)) = text " " PP.<> b - in vcat (first:fmap f r) <$$> - text " in" PP.<+> rhs - where (rhs, bindings) = runState m MapF.empty - --- | This pretty prints all the history used to create a value. -ppValueAssignments :: ArchConstraints arch => Value arch ids tp -> Doc -ppValueAssignments v = ppValueAssignments' (collectValueRep 0 v) - -ppValueAssignmentList :: ArchConstraints arch => [Value arch ids tp] -> Doc -ppValueAssignmentList vals = - ppValueAssignments' $ do - docs <- mapM (collectValueRep 0) vals - return $ brackets $ hcat (punctuate comma docs) - ------------------------------------------------------------------------- --- Pretty printing RegState - --- | This class provides a way of optionally pretty printing the contents --- of a register or omitting them. -class PrettyRegValue r (f :: Type -> *) where - -- | ppValueEq should return a doc if the contents of the given register - -- should be printed, and Nothing if the contents should be ignored. - ppValueEq :: r tp -> f tp -> Maybe Doc - -instance ( PrettyRegValue r f - ) - => Pretty (RegState r f) where - pretty (RegState m) = bracketsep $ catMaybes (f <$> MapF.toList m) - where f :: MapF.Pair r f -> Maybe Doc - f (MapF.Pair r v) = ppValueEq r v - -instance ( PrettyRegValue r f - ) - => Show (RegState r f) where - show s = show (pretty s) - -instance ( OrdF r - , ShowF r - , r ~ ArchReg arch - ) - => PrettyRegValue r (Value arch ids) where - ppValueEq r (Initial r') - | Just _ <- testEquality r r' = Nothing - ppValueEq r v - | otherwise = Just $ text (showF r) <+> text "=" <+> pretty v - ------------------------------------------------------------------------- --- Stmt - --- | A statement in our control flow graph language. -data Stmt arch ids - = forall tp . AssignStmt !(Assignment arch ids tp) - | forall tp . WriteMem !(ArchAddrValue arch ids) !(MemRepr tp) !(Value arch ids tp) - -- ^ This denotes a write to memory, and consists of an address to write to, a `MemRepr` defining - -- how the value should be stored in memory, and the value to be written. - | PlaceHolderStmt !([Some (Value arch ids)]) !String - | Comment !Text - | ExecArchStmt !(ArchStmt arch ids) - -- ^ Execute an architecture specific statement - -instance ArchConstraints arch => Pretty (Stmt arch ids) where - pretty (AssignStmt a) = pretty a - pretty (WriteMem a _ rhs) = text "*" PP.<> prettyPrec 11 a <+> text ":=" <+> ppValue 0 rhs - pretty (PlaceHolderStmt vals name) = text ("PLACEHOLDER: " ++ name) - <+> parens (hcat $ punctuate comma - $ map (viewSome (ppValue 0)) vals) - pretty (Comment s) = text $ "# " ++ Text.unpack s - pretty (ExecArchStmt s) = prettyF s - - -instance ArchConstraints arch => Show (Stmt arch ids) where - show = show . pretty - ------------------------------------------------------------------------- --- References - --- | Return refernces in a stmt type. -class StmtHasRefs f where - refsInStmt :: f ids -> Set (Some (AssignId ids)) - --- | Return refernces in a function type. -class FnHasRefs (f :: * -> Type -> *) where - refsInFn :: f ids tp -> Set (Some (AssignId ids)) - -refsInValue :: Value arch ids tp -> Set (Some (AssignId ids)) -refsInValue (AssignedValue (Assignment v _)) = Set.singleton (Some v) -refsInValue _ = Set.empty - -refsInApp :: App (Value arch ids) tp -> Set (Some (AssignId ids)) -refsInApp app = foldApp refsInValue app - -refsInAssignRhs :: FnHasRefs (ArchFn arch) - => AssignRhs arch ids tp - -> Set (Some (AssignId ids)) -refsInAssignRhs rhs = - case rhs of - EvalApp v -> refsInApp v - SetUndefined _ -> Set.empty - ReadMem v _ -> refsInValue v - EvalArchFn f _ -> refsInFn f +import Data.Macaw.CFG.Core +import Data.Macaw.Memory diff --git a/src/Data/Macaw/CFG/App.hs b/src/Data/Macaw/CFG/App.hs index 845d83e2..4f628634 100644 --- a/src/Data/Macaw/CFG/App.hs +++ b/src/Data/Macaw/CFG/App.hs @@ -48,7 +48,8 @@ import Data.Macaw.Types -- | App defines builtin operations on values. data App (f :: Type -> *) (tp :: Type) where - Mux :: !(NatRepr n) + Mux :: (1 <= n) + => !(NatRepr n) -> !(f BoolType) -> !(f (BVType n)) -> !(f (BVType n)) diff --git a/src/Data/Macaw/CFG/Block.hs b/src/Data/Macaw/CFG/Block.hs new file mode 100644 index 00000000..30248b9e --- /dev/null +++ b/src/Data/Macaw/CFG/Block.hs @@ -0,0 +1,81 @@ +{-| +Copyright : (c) Galois, Inc 2017 +Maintainer : Joe Hendrix + +This exports the pre-clasisification term statement and block data +types. +-} +{-# LANGUAGE FlexibleContexts #-} +module Data.Macaw.CFG.Block + ( Block(..) + , TermStmt(..) + ) where + +import Data.Parameterized.Classes +import Data.Text (Text) +import qualified Data.Text as Text +import Data.Word +import Text.PrettyPrint.ANSI.Leijen as PP hiding ((<$>)) + +import Data.Macaw.CFG.Core +import Data.Macaw.Types + +------------------------------------------------------------------------ +-- TermStmt + +-- | A terminal statement in a block +-- +-- This is the unclassified definition that is generated directly from +-- the architecture specific disassembler. +data TermStmt arch ids + -- | Fetch and execute the next instruction from the given processor state. + = FetchAndExecute !(RegState (ArchReg arch) (Value arch ids)) + -- | Branch and execute one block or another. + | Branch !(Value arch ids BoolType) !Word64 !Word64 + -- | The syscall instruction. + -- We model system calls as terminal instructions because from the + -- application perspective, the semantics will depend on the operating + -- system. + | Syscall !(RegState (ArchReg arch) (Value arch ids)) + -- | The block ended prematurely due to an error in instruction + -- decoding or translation. + -- + -- This contains the state of the registers when the translation error + -- occured and the error message recorded. + | TranslateError !(RegState (ArchReg arch) (Value arch ids)) !Text + +instance ( OrdF (ArchReg arch) + , ShowF (ArchReg arch) + ) + => Pretty (TermStmt arch ids) where + pretty (FetchAndExecute s) = + text "fetch_and_execute" <$$> + indent 2 (pretty s) + pretty (Branch c x y) = + text "branch" <+> ppValue 0 c <+> text (show x) <+> text (show y) + pretty (Syscall s) = + text "syscall" <$$> + indent 2 (pretty s) + pretty (TranslateError s msg) = + text "ERROR: " <+> text (Text.unpack msg) <$$> + indent 2 (pretty s) + +------------------------------------------------------------------------ +-- Block + +-- | The type for code blocks returned by the disassembler. +-- +-- The discovery process will attempt to map each block to a suitable ParsedBlock. +data Block arch ids + = Block { blockLabel :: !Word64 + -- ^ Index of this block + , blockStmts :: !([Stmt arch ids]) + -- ^ List of statements in the block. + , blockTerm :: !(TermStmt arch ids) + -- ^ The last statement in the block. + } + +instance ArchConstraints arch => Pretty (Block arch ids) where + pretty b = do + text (show (blockLabel b)) PP.<> text ":" <$$> + indent 2 (vcat (pretty <$> blockStmts b) <$$> pretty (blockTerm b)) diff --git a/src/Data/Macaw/CFG/Core.hs b/src/Data/Macaw/CFG/Core.hs new file mode 100644 index 00000000..f1c07306 --- /dev/null +++ b/src/Data/Macaw/CFG/Core.hs @@ -0,0 +1,691 @@ +{-| +Copyright : (c) Galois, Inc 2015-2017 +Maintainer : Joe Hendrix + +Defines data types needed to represent values, assignments, and statements from Machine code. + +This is a low-level CFG representation where the entire program is a +single CFG. +-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE PolyKinds #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE TypeOperators #-} +module Data.Macaw.CFG.Core + ( -- * Stmt level declarations + Stmt(..) + , Assignment(..) + , AssignId(..) + , AssignRhs(..) + , MemRepr(..) + , memReprBytes + -- * Value + , Value(..) + , BVValue + , valueAsApp + , valueWidth + , asBaseOffset + , asInt64Constant + , mkLit + , bvValue + , ppValueAssignments + , ppValueAssignmentList + -- * RegState + , RegState + , regStateMap + , boundValue + , cmpRegState + , curIP + , mkRegState + , mkRegStateM + , traverseRegsWith + , zipWithRegState + -- * Pretty printing + , ppAssignId + , ppLit + , ppValue + , PrettyF(..) + , ArchConstraints(..) + , PrettyRegValue(..) + -- * Architecture type families + , ArchAddr + , ArchSegmentedAddr + , ArchFn + , ArchReg + , ArchStmt + , RegAddr + , RegAddrWidth + -- * RegisterInfo + , RegisterInfo(..) + , asStackAddrOffset + -- * References + , StmtHasRefs(..) + , FnHasRefs(..) + , refsInValue + , refsInApp + , refsInAssignRhs + -- ** Synonyms + , ArchAddrWidth + , ArchAddrValue + ) where + +import Control.Exception (assert) +import Control.Lens +import Control.Monad.Identity +import Control.Monad.State.Strict +import Data.Bits +import Data.Int (Int64) +import Data.Maybe (isNothing, catMaybes) +import Data.Parameterized.Classes +import Data.Parameterized.Map (MapF) +import qualified Data.Parameterized.Map as MapF +import Data.Parameterized.NatRepr +import Data.Parameterized.Nonce +import Data.Parameterized.Some +import Data.Parameterized.TraversableF +import Data.Set (Set) +import qualified Data.Set as Set +import Data.Text (Text) +import qualified Data.Text as Text +import GHC.TypeLits +import Numeric (showHex) +import Text.PrettyPrint.ANSI.Leijen as PP hiding ((<$>)) + +import Data.Macaw.CFG.App +import Data.Macaw.Memory (MemWord, MemWidth, SegmentedAddr(..), Endianness(..)) +import Data.Macaw.Types + +-- Note: +-- The declarations in this file follow a top-down order, so the top-level +-- definitions should be first. + +type Prec = Int + +colonPrec :: Prec +colonPrec = 7 + +plusPrec :: Prec +plusPrec = 6 + +-- | Class for pretty printing with a precedence field. +class PrettyPrec v where + prettyPrec :: Int -> v -> Doc + +-- | Pretty print over all instances of a type. +class PrettyF (f :: k -> *) where + prettyF :: f tp -> Doc + +-- | Pretty print a document with parens if condition is true +parenIf :: Bool -> Doc -> Doc +parenIf True d = parens d +parenIf False d = d + +bracketsep :: [Doc] -> Doc +bracketsep [] = text "{}" +bracketsep (h:l) = vcat $ + [text "{" <+> h] + ++ fmap (text "," <+>) l + ++ [text "}"] + +------------------------------------------------------------------------ +-- AssignId + +-- | An 'AssignId' is a unique identifier used to identify assignment +-- statements, in a manner similar to SSA (single static assignment) +-- form. 'AssignId's are typed, and also include a type variable @ids@ +-- that intuitively denotes the set of identifiers from which they are +-- drawn. +newtype AssignId (ids :: *) (tp :: Type) = AssignId (Nonce ids tp) + +ppAssignId :: AssignId ids tp -> Doc +ppAssignId (AssignId w) = text ("r" ++ show (indexValue w)) + +instance Eq (AssignId ids tp) where + AssignId id1 == AssignId id2 = id1 == id2 + +instance TestEquality (AssignId ids) where + testEquality (AssignId id1) (AssignId id2) = testEquality id1 id2 + +instance OrdF (AssignId ids) where + compareF (AssignId id1) (AssignId id2) = compareF id1 id2 + +instance ShowF (AssignId ids) where + showF (AssignId n) = show n + +instance Show (AssignId ids tp) where + show (AssignId n) = show n + +------------------------------------------------------------------------ +-- Type families for architecture specific components. + +-- | Width of register used to store addresses. +type family RegAddrWidth (r :: Type -> *) :: Nat + +-- | The value used to store +type RegAddr r = MemWord (RegAddrWidth r) + +-- | Type family for defining what a "register" is for this architecture. +-- +-- Registers include things like the general purpose registers, any flag +-- registers that can be read and written without side effects, +type family ArchReg (arch :: *) :: Type -> * + +-- | A type family for architecture specific functions. +-- +-- These function may return a value. They may depend on the current state of +-- the heap, but should not affect the processor state. +-- +-- The function may depend on the set of registers defined so far, and the type +-- of the result. +type family ArchFn (arch :: *) :: * -> Type -> * + +-- | A type family for defining architecture-specific statements. +-- +-- The second type parameter is the ids phantom type used to provide +-- uniqueness of Nonce values that identify assignments. +-- +type family ArchStmt (arch :: *) :: * -> * + +-- | The type to use for addresses on the architecutre. +type ArchAddr arch = RegAddr (ArchReg arch) + +-- | Number of bits in addreses for architecture. +type ArchAddrWidth arch = RegAddrWidth (ArchReg arch) + +-- | A segmented addr for a given architecture. +type ArchSegmentedAddr arch = SegmentedAddr (ArchAddrWidth arch) + + +------------------------------------------------------------------------ +-- Value, Assignment, AssignRhs declarations. + +-- | A value at runtime. +data Value arch ids tp + = forall n + . (tp ~ BVType n) + => BVValue !(NatRepr n) !Integer + -- ^ A constant bitvector + | ( tp ~ BVType (ArchAddrWidth arch)) + => RelocatableValue !(NatRepr (ArchAddrWidth arch)) !(ArchSegmentedAddr arch) + -- ^ A given memory address. + | AssignedValue !(Assignment arch ids tp) + -- ^ Value from an assignment statement. + | Initial !(ArchReg arch tp) + -- ^ Represents the value assigned to the register when the block started. + +type BVValue arch ids w = Value arch ids (BVType w) + +-- | A address value for a specific architecture +type ArchAddrValue arch ids = BVValue arch ids (ArchAddrWidth arch) + +-- | An assignment consists of a unique location identifier and a right- +-- hand side that returns a value. +data Assignment arch ids tp = + Assignment { assignId :: !(AssignId ids tp) + , assignRhs :: !(AssignRhs arch ids tp) + } + +-- | The type stored in memory. +-- +-- The endianess indicates whether the address stores the most +-- or least significant byte. The following indices either store +-- the next lower or higher bytes. +data MemRepr (tp :: Type) where + BVMemRepr :: !(NatRepr w) -> !Endianness -> MemRepr (BVType (8*w)) + +instance Pretty (MemRepr tp) where + pretty (BVMemRepr w BigEndian) = text "bvbe" <+> text (show w) + pretty (BVMemRepr w LittleEndian) = text "bvle" <+> text (show w) + +-- | Return the number of bytes this takes up. +memReprBytes :: MemRepr tp -> Integer +memReprBytes (BVMemRepr x _) = natValue x + +instance TestEquality MemRepr where + testEquality (BVMemRepr xw xe) (BVMemRepr yw ye) = + if xe == ye then do + Refl <- testEquality xw yw + Just Refl + else + Nothing + +instance HasRepr MemRepr TypeRepr where + typeRepr (BVMemRepr w _) = BVTypeRepr (natMultiply n8 w) + +-- | The right hand side of an assignment is an expression that +-- returns a value. +data AssignRhs (arch :: *) ids tp where + -- An expression that is computed from evaluating subexpressions. + EvalApp :: !(App (Value arch ids) tp) + -> AssignRhs arch ids tp + + -- An expression with an undefined value. + SetUndefined :: (tp ~ BVType n) + => !(NatRepr n) -- Width of undefined value. + -> AssignRhs arch ids tp + + -- Read memory at given location. + ReadMem :: !(ArchAddrValue arch ids) + -> !(MemRepr tp) + -> AssignRhs arch ids tp + + -- Call an architecture specific function that returns some result. + EvalArchFn :: !(ArchFn arch ids tp) + -> !(TypeRepr tp) + -> AssignRhs arch ids tp + +------------------------------------------------------------------------ +-- Type operations on assignment AssignRhs, and Value + +instance HasRepr (AssignRhs arch ids) TypeRepr where + typeRepr rhs = + case rhs of + EvalApp a -> typeRepr a + SetUndefined w -> BVTypeRepr w + ReadMem _ tp -> typeRepr tp + EvalArchFn _ rtp -> rtp + +instance ( HasRepr (ArchReg arch) TypeRepr + ) + => HasRepr (Value arch ids) TypeRepr where + + typeRepr (BVValue w _) = BVTypeRepr w + typeRepr (RelocatableValue w _) = BVTypeRepr w + typeRepr (AssignedValue a) = typeRepr (assignRhs a) + typeRepr (Initial r) = typeRepr r + +valueWidth :: ( HasRepr (ArchReg arch) TypeRepr + ) + => Value arch ids (BVType n) -> NatRepr n +valueWidth v = + case typeRepr v of + BVTypeRepr n -> n + +------------------------------------------------------------------------ +-- Value equality + +instance OrdF (ArchReg arch) + => OrdF (Value arch ids) where + compareF (BVValue wx vx) (BVValue wy vy) = + case compareF wx wy of + LTF -> LTF + EQF -> fromOrdering (compare vx vy) + GTF -> GTF + compareF BVValue{} _ = LTF + compareF _ BVValue{} = GTF + + compareF (RelocatableValue _ x) (RelocatableValue _ y) = + fromOrdering (compare x y) + compareF RelocatableValue{} _ = LTF + compareF _ RelocatableValue{} = GTF + + compareF (AssignedValue x) (AssignedValue y) = + compareF (assignId x) (assignId y) + compareF AssignedValue{} _ = LTF + compareF _ AssignedValue{} = GTF + + compareF (Initial r) (Initial r') = + case compareF r r' of + LTF -> LTF + EQF -> EQF + GTF -> GTF + +instance OrdF (ArchReg arch) + => TestEquality (Value arch ids) where + testEquality x y = orderingF_refl (compareF x y) + +instance OrdF (ArchReg arch) + => Eq (Value arch ids tp) where + x == y = isJust (testEquality x y) + +instance OrdF (ArchReg arch) + => Ord (Value arch ids tp) where + compare x y = toOrdering (compareF x y) + +instance OrdF (ArchReg arch) + => EqF (Value arch ids) where + eqF = (==) + +------------------------------------------------------------------------ +-- Value operations + +mkLit :: NatRepr n -> Integer -> Value arch ids (BVType n) +mkLit n v = BVValue n (v .&. mask) + where mask = maxUnsigned n + +bvValue :: KnownNat n => Integer -> Value arch ids (BVType n) +bvValue i = mkLit knownNat i + +valueAsApp :: Value arch ids tp -> Maybe (App (Value arch ids) tp) +valueAsApp (AssignedValue (Assignment _ (EvalApp a))) = Just a +valueAsApp _ = Nothing + +asInt64Constant :: Value arch ids (BVType 64) -> Maybe Int64 +asInt64Constant (BVValue _ o) = Just (fromInteger o) +asInt64Constant _ = Nothing + +asBaseOffset :: Value arch ids (BVType w) -> (Value arch ids (BVType w), Integer) +asBaseOffset x + | Just (BVAdd _ x_base (BVValue _ x_off)) <- valueAsApp x = (x_base, x_off) + | otherwise = (x,0) + +------------------------------------------------------------------------ +-- RegState + +-- | This represents the state of the processor registers. +newtype RegState (r :: k -> *) (f :: k -> *) = RegState (MapF.MapF r f) + +deriving instance (OrdF r, EqF f) => Eq (RegState r f) + +deriving instance FunctorF (RegState r) +deriving instance FoldableF (RegState r) + +instance TraversableF (RegState r) where + traverseF f (RegState m) = RegState <$> traverseF f m + +-- | Return underlying map of register state. +regStateMap :: RegState r f -> MapF.MapF r f +regStateMap (RegState m) = m + +-- | Traverse the register state with the name of each register and value. +traverseRegsWith :: Applicative m + => (forall tp. r tp -> f tp -> m (g tp)) + -> RegState r f + -> m (RegState r g) +traverseRegsWith f (RegState m) = RegState <$> MapF.traverseWithKey f m + +-- | Get a register out of the state. +boundValue :: forall r f tp + . OrdF r + => r tp + -> Simple Lens (RegState r f) (f tp) +boundValue r = lens getter setter + where getter (RegState m) = + case MapF.lookup r m of + Just v -> v + Nothing -> error "internal error in boundValue given unexpected reg" + setter (RegState m) v = RegState (MapF.insert r v m) + + +-- | Compares if two register states are equal. +cmpRegState :: OrdF r + => (forall u. f u -> g u -> Bool) + -- ^ Function for checking if two values are equal. + -> RegState r f + -> RegState r g + -> Bool +cmpRegState p (RegState x) (RegState y) = go (MapF.toList x) (MapF.toList y) + where go [] [] = True + go [] (_:_) = False + go (_:_) [] = False + go (MapF.Pair xk xv:xr) (MapF.Pair yk yv:yr) = + case testEquality xk yk of + Nothing -> False + Just Refl -> p xv yv && go xr yr + +------------------------------------------------------------------------ +-- RegisterInfo + +-- | This class provides access to information about registers. +class ( OrdF r + , ShowF r + , MemWidth (RegAddrWidth r) + , HasRepr r TypeRepr + ) => RegisterInfo r where + + -- | List of all arch registers. + archRegs :: [Some r] + + -- | The stack pointer register + sp_reg :: r (BVType (RegAddrWidth r)) + + -- | The instruction pointer register + ip_reg :: r (BVType (RegAddrWidth r)) + + -- | The register used to store system call numbers. + syscall_num_reg :: r (BVType (RegAddrWidth r)) + + -- | Registers used for passing system call arguments + syscallArgumentRegs :: [r (BVType (RegAddrWidth r))] + + +-- The value of the current instruction pointer. +curIP :: RegisterInfo r + => Simple Lens (RegState r f) (f (BVType (RegAddrWidth r))) +curIP = boundValue ip_reg + +mkRegStateM :: (RegisterInfo r, Applicative m) + => (forall tp . r tp -> m (f tp)) + -> m (RegState r f) +mkRegStateM f = RegState . MapF.fromList <$> traverse g archRegs + where g (Some r) = MapF.Pair r <$> f r + +-- Create a pure register state +mkRegState :: RegisterInfo r -- AbsRegState r + => (forall tp . r tp -> f tp) + -> RegState r f +mkRegState f = runIdentity (mkRegStateM (return . f)) + +zipWithRegState :: RegisterInfo r + => (forall u. f u -> g u -> h u) + -> RegState r f + -> RegState r g + -> RegState r h +zipWithRegState f x y = mkRegState (\r -> f (x ^. boundValue r) (y ^. boundValue r)) + +-- | Returns a offset if the value is an offset of the stack. +asStackAddrOffset :: RegisterInfo (ArchReg arch) + => Value arch ids tp + -> Maybe (BVValue arch ids (ArchAddrWidth arch)) +asStackAddrOffset addr + | Just (BVAdd _ (Initial base) offset) <- valueAsApp addr + , Just Refl <- testEquality base sp_reg = do + Just offset + | Initial base <- addr + , Just Refl <- testEquality base sp_reg = + case typeRepr base of + BVTypeRepr w -> Just (BVValue w 0) + | otherwise = + Nothing + + +------------------------------------------------------------------------ +-- Pretty print Assign, AssignRhs, Value operations + +ppLit :: NatRepr n -> Integer -> Doc +ppLit w i + | i >= 0 = text ("0x" ++ showHex i "") <+> text "::" <+> brackets (text (show w)) + | otherwise = error "ppLit given negative value" + +-- | Pretty print a value. +ppValue :: ShowF (ArchReg arch) => Prec -> Value arch ids tp -> Doc +ppValue p (BVValue w i) = assert (i >= 0) $ parenIf (p > colonPrec) $ ppLit w i +ppValue p (RelocatableValue _ a) = parenIf (p > plusPrec) $ text (show a) +ppValue _ (AssignedValue a) = ppAssignId (assignId a) +ppValue _ (Initial r) = text (showF r) PP.<> text "_0" + +instance ShowF (ArchReg arch) => PrettyPrec (Value arch ids tp) where + prettyPrec = ppValue + +instance ShowF (ArchReg arch) => Pretty (Value arch ids tp) where + pretty = ppValue 0 + +instance ShowF (ArchReg arch) => Show (Value arch ids tp) where + show = show . pretty + +class ( RegisterInfo (ArchReg arch) + , PrettyF (ArchStmt arch) + ) => ArchConstraints arch where + + -- | A function for pretty printing an archFn of a given type. + ppArchFn :: Applicative m + => (forall u . Value arch ids u -> m Doc) + -- ^ Function for pretty printing vlaue. + -> ArchFn arch ids tp + -> m Doc + +-- | Pretty print an assignment right-hand side using operations parameterized +-- over an application to allow side effects. +ppAssignRhs :: (Applicative m, ArchConstraints arch) + => (forall u . Value arch ids u -> m Doc) + -- ^ Function for pretty printing value. + -> AssignRhs arch ids tp + -> m Doc +ppAssignRhs pp (EvalApp a) = ppAppA pp a +ppAssignRhs _ (SetUndefined w) = pure $ text "undef ::" <+> brackets (text (show w)) +ppAssignRhs pp (ReadMem a repr) = + (\d -> text "read_mem" <+> d <+> PP.parens (pretty repr)) <$> pp a +ppAssignRhs pp (EvalArchFn f _) = ppArchFn pp f + +instance ArchConstraints arch => Pretty (AssignRhs arch ids tp) where + pretty = runIdentity . ppAssignRhs (Identity . ppValue 10) + +instance ArchConstraints arch => Pretty (Assignment arch ids tp) where + pretty (Assignment lhs rhs) = ppAssignId lhs <+> text ":=" <+> pretty rhs + +------------------------------------------------------------------------ +-- Pretty print a value assignment + +-- | Helper type to wrap up a 'Doc' with a dummy type argument; used to put +-- 'Doc's into heterogenous maps in the below +newtype DocF (a :: Type) = DocF Doc + +-- | This pretty prints a value's representation while saving the pretty +-- printed repreentation of subvalues in a map. +collectValueRep :: forall arch ids tp + . ArchConstraints arch + => Prec + -> Value arch ids tp + -> State (MapF (AssignId ids) DocF) Doc +collectValueRep _ (AssignedValue a :: Value arch ids tp) = do + let lhs = assignId a + mr <- gets $ MapF.lookup lhs + when (isNothing mr) $ do + let ppVal :: forall u . Value arch ids u -> + State (MapF (AssignId ids) DocF) Doc + ppVal = collectValueRep 10 + rhs <- ppAssignRhs ppVal (assignRhs a) + let d = ppAssignId lhs <+> text ":=" <+> rhs + modify $ MapF.insert lhs (DocF d) + return () + return $! ppAssignId lhs +collectValueRep p v = return $ ppValue p v + +-- | This pretty prints all the history used to create a value. +ppValueAssignments' :: State (MapF (AssignId ids) DocF) Doc -> Doc +ppValueAssignments' m = + case MapF.elems bindings of + [] -> rhs + (Some (DocF h):r) -> + let first = text "let" PP.<+> h + f (Some (DocF b)) = text " " PP.<> b + in vcat (first:fmap f r) <$$> + text " in" PP.<+> rhs + where (rhs, bindings) = runState m MapF.empty + +-- | This pretty prints all the history used to create a value. +ppValueAssignments :: ArchConstraints arch => Value arch ids tp -> Doc +ppValueAssignments v = ppValueAssignments' (collectValueRep 0 v) + +ppValueAssignmentList :: ArchConstraints arch => [Value arch ids tp] -> Doc +ppValueAssignmentList vals = + ppValueAssignments' $ do + docs <- mapM (collectValueRep 0) vals + return $ brackets $ hcat (punctuate comma docs) + +------------------------------------------------------------------------ +-- Pretty printing RegState + +-- | This class provides a way of optionally pretty printing the contents +-- of a register or omitting them. +class PrettyRegValue r (f :: Type -> *) where + -- | ppValueEq should return a doc if the contents of the given register + -- should be printed, and Nothing if the contents should be ignored. + ppValueEq :: r tp -> f tp -> Maybe Doc + +instance ( PrettyRegValue r f + ) + => Pretty (RegState r f) where + pretty (RegState m) = bracketsep $ catMaybes (f <$> MapF.toList m) + where f :: MapF.Pair r f -> Maybe Doc + f (MapF.Pair r v) = ppValueEq r v + +instance ( PrettyRegValue r f + ) + => Show (RegState r f) where + show s = show (pretty s) + +instance ( OrdF r + , ShowF r + , r ~ ArchReg arch + ) + => PrettyRegValue r (Value arch ids) where + ppValueEq r (Initial r') + | Just _ <- testEquality r r' = Nothing + ppValueEq r v + | otherwise = Just $ text (showF r) <+> text "=" <+> pretty v + +------------------------------------------------------------------------ +-- Stmt + +-- | A statement in our control flow graph language. +data Stmt arch ids + = forall tp . AssignStmt !(Assignment arch ids tp) + | forall tp . WriteMem !(ArchAddrValue arch ids) !(MemRepr tp) !(Value arch ids tp) + -- ^ This denotes a write to memory, and consists of an address to write to, a `MemRepr` defining + -- how the value should be stored in memory, and the value to be written. + | PlaceHolderStmt !([Some (Value arch ids)]) !String + | Comment !Text + | ExecArchStmt !(ArchStmt arch ids) + -- ^ Execute an architecture specific statement + +instance ArchConstraints arch => Pretty (Stmt arch ids) where + pretty (AssignStmt a) = pretty a + pretty (WriteMem a _ rhs) = text "*" PP.<> prettyPrec 11 a <+> text ":=" <+> ppValue 0 rhs + pretty (PlaceHolderStmt vals name) = text ("PLACEHOLDER: " ++ name) + <+> parens (hcat $ punctuate comma + $ map (viewSome (ppValue 0)) vals) + pretty (Comment s) = text $ "# " ++ Text.unpack s + pretty (ExecArchStmt s) = prettyF s + + +instance ArchConstraints arch => Show (Stmt arch ids) where + show = show . pretty + +------------------------------------------------------------------------ +-- References + +-- | Return refernces in a stmt type. +class StmtHasRefs f where + refsInStmt :: f ids -> Set (Some (AssignId ids)) + +-- | Return refernces in a function type. +class FnHasRefs (f :: * -> Type -> *) where + refsInFn :: f ids tp -> Set (Some (AssignId ids)) + +refsInValue :: Value arch ids tp -> Set (Some (AssignId ids)) +refsInValue (AssignedValue (Assignment v _)) = Set.singleton (Some v) +refsInValue _ = Set.empty + +refsInApp :: App (Value arch ids) tp -> Set (Some (AssignId ids)) +refsInApp app = foldApp refsInValue app + +refsInAssignRhs :: FnHasRefs (ArchFn arch) + => AssignRhs arch ids tp + -> Set (Some (AssignId ids)) +refsInAssignRhs rhs = + case rhs of + EvalApp v -> refsInApp v + SetUndefined _ -> Set.empty + ReadMem v _ -> refsInValue v + EvalArchFn f _ -> refsInFn f