From b8c3e65389da8bc192e486fe21cc01ba08c952b0 Mon Sep 17 00:00:00 2001 From: Tristan Ravitch Date: Mon, 6 Apr 2020 15:56:43 -0700 Subject: [PATCH 1/2] Add a test with a call --- macaw-aarch32/tests/arm/test-direct-call-a32.exe | Bin 0 -> 1332 bytes .../tests/arm/test-direct-call-a32.mcw.expected | 10 ++++++++++ .../tests/arm/test-just-exit-t32.mcw.expected | 4 ---- 3 files changed, 10 insertions(+), 4 deletions(-) create mode 100755 macaw-aarch32/tests/arm/test-direct-call-a32.exe create mode 100644 macaw-aarch32/tests/arm/test-direct-call-a32.mcw.expected delete mode 100644 macaw-aarch32/tests/arm/test-just-exit-t32.mcw.expected diff --git a/macaw-aarch32/tests/arm/test-direct-call-a32.exe b/macaw-aarch32/tests/arm/test-direct-call-a32.exe new file mode 100755 index 0000000000000000000000000000000000000000..62c6b9a175ab8cbb5f158fba510d057f71601789 GIT binary patch literal 1332 zcma)6&rj4)5T4g|u>=l4P!I?Bd?ZYyj8i`jjf;K@V| zn)oCB1H}Ck_TW*^UOW&_oJ?43A{t{nc=E7*)AnIS4^A?j`DVU(@Ab{J4`!}k5kim> zjmC)4Bw`b=?{qvgN==YO26CNbhWUukOhFzDYbp$iFdxM<#I-PL;Mo!9*?$$PfEAR6 z&&qMun}PIT{t~Y5fikBb%14{Mu}H_>`yZbFI5q8>qd(`j_VxV9M{kX0CZOiCR`>Q? z%cQQrbcO@X*=+S#F9UKO21Jcr+J4e9np+=7+vj$}b6(Bt8>lf8H3Bs{xS!kYFI;16 zUu)6hH=iix4mw!RYV47<8KkJO_ci3vw{U&%c-`6Q>5JCbVzFM4bt`Y@>})1q4^eJB z!dA}7=Ch77NmJu^Dc5(4WfCXgrq17@4&4-;qR;3TN5z0XD2|IEZ8W+}F#Z(2PZmA4 zI|n%GgX)JK-rnz8J-pQ_8dSf10fG0wxP_v88*;AH(AFrQEH=MV93 zAL5hrYck`NtA0slO77~aU81y0X^)nvP$<@Fg_?A$vQUVTU-97I%_WoY<8YP=zUN98 zrh@UdYxmcrTZEO>$gU`_vMzmlxl*@__42BhDSKo~e?yXuEy*seudVqNps9s9+m*6f zF4l2Soc(`g7{nt`o(P^7{ua|vCEO1d8_LtfqsI5X1N#)JMDe&ECX~NNzJLDqRgSsJ z=l<||4hJF3<6bD?9P4E$*1D(SFA(HeP{JG*w$W6FkX|FO1m$<@g7UunzvzVW*_4a` T+))5lt=9`b$=d)}_y_M7kE^mj literal 0 HcmV?d00001 diff --git a/macaw-aarch32/tests/arm/test-direct-call-a32.mcw.expected b/macaw-aarch32/tests/arm/test-direct-call-a32.mcw.expected new file mode 100644 index 00000000..cef3b170 --- /dev/null +++ b/macaw-aarch32/tests/arm/test-direct-call-a32.mcw.expected @@ -0,0 +1,10 @@ +R { funcs = [ (0x100d8, [(0x100d8, 48)]) + , (0x1010c, [ (0x1010c, 32) + , (0x1012c, 12) + ]) + , (0x10138, [ (0x10138, 28) + , (0x10154, 4) + ]) + ] + , ignoreBlocks = [0x10158] + } diff --git a/macaw-aarch32/tests/arm/test-just-exit-t32.mcw.expected b/macaw-aarch32/tests/arm/test-just-exit-t32.mcw.expected deleted file mode 100644 index 9d030188..00000000 --- a/macaw-aarch32/tests/arm/test-just-exit-t32.mcw.expected +++ /dev/null @@ -1,4 +0,0 @@ -R { funcs = [ (0x8001, [(0x8001, 6), (0x8007,10)]) - ] - , ignoreBlocks = [] - } From 5e5c90c993a9e0c3ecfa7bc611189ea2345e7fd3 Mon Sep 17 00:00:00 2001 From: Tristan Ravitch Date: Mon, 6 Apr 2020 17:56:22 -0700 Subject: [PATCH 2/2] Updates to the simplifier and call recognition --- macaw-aarch32/macaw-aarch32.cabal | 1 + .../src/Data/Macaw/ARM/Disassemble.hs | 31 +++++-------- macaw-aarch32/src/Data/Macaw/ARM/Identify.hs | 13 ++++-- .../src/Data/Macaw/ARM/Semantics/TH.hs | 44 +++++++++++-------- macaw-semmc/src/Data/Macaw/SemMC/Simplify.hs | 24 ++++++++++ 5 files changed, 72 insertions(+), 41 deletions(-) diff --git a/macaw-aarch32/macaw-aarch32.cabal b/macaw-aarch32/macaw-aarch32.cabal index 3cf3d761..a1f40995 100644 --- a/macaw-aarch32/macaw-aarch32.cabal +++ b/macaw-aarch32/macaw-aarch32.cabal @@ -30,6 +30,7 @@ library , ansi-wl-pprint , asl-translator , bytestring + , bv-sized , cereal , containers , crucible diff --git a/macaw-aarch32/src/Data/Macaw/ARM/Disassemble.hs b/macaw-aarch32/src/Data/Macaw/ARM/Disassemble.hs index 91d1bbef..de389fdb 100644 --- a/macaw-aarch32/src/Data/Macaw/ARM/Disassemble.hs +++ b/macaw-aarch32/src/Data/Macaw/ARM/Disassemble.hs @@ -76,7 +76,6 @@ import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as LBS import Data.Macaw.ARM.ARMReg import Data.Macaw.ARM.Arch() -import Data.Macaw.AbsDomain.AbsState as MA import Data.Macaw.CFG import Data.Macaw.CFG.Block import qualified Data.Macaw.CFG.Core as MC @@ -93,7 +92,6 @@ import qualified Dismantle.ARM.T32 as ThumbD import Text.Printf ( printf ) import qualified SemMC.Architecture.AArch32 as ARM -import Debug.Trace data InstructionSet = A32I ARMD.Instruction | T32I ThumbD.Instruction deriving (Eq, Show) @@ -185,8 +183,8 @@ disassembleBlock lookupSemantics gs curPCAddr blockOff maxOffset = do Right (i, bytesRead) -> do -- FIXME: Set PSTATE.T based on whether instruction is A32I or T32I -- traceM ("II: " ++ show i) - let curPC = MM.segmentOffAddr seg off - nextPCOffset = off + bytesRead + -- let curPC = MM.segmentOffAddr seg off + let nextPCOffset = off + bytesRead nextPC = MM.segmentOffAddr seg nextPCOffset nextPCVal :: Value ARM.AArch32 ids (BVType 32) = MC.RelocatableValue (MM.addrWidthRepr curPCAddr) nextPC -- curPCVal :: Value ARM.AArch32 ids (BVType 32) = MC.RelocatableValue (MM.addrWidthRepr curPCAddr) curPC @@ -241,18 +239,17 @@ readInstruction :: (MM.MemWidth w) -> Either (ARMMemoryError w) (InstructionSet, MM.MemWord w) readInstruction addr = do let seg = MM.segoffSegment addr - segRelAddrRaw = MM.segoffAddr addr - -- Addresses specified in ARM instructions have the low bit - -- clear, but Thumb (T32) target addresses have the low bit sit. - -- This is only manifested in the instruction addresses: the - -- actual PC for fetching instructions clears the low bit to - -- generate aligned memory accesses. - loBit = MM.addrOffset segRelAddrRaw .&. 1 - segRelAddr = segRelAddrRaw { addrOffset = MM.addrOffset segRelAddrRaw `xor` loBit } + let segRelAddr = MM.segoffAddr addr + -- Addresses specified in ARM instructions have the low bit + -- clear, but Thumb (T32) target addresses have the low bit sit. + -- This is only manifested in the instruction addresses: the + -- actual PC for fetching instructions clears the low bit to + -- generate aligned memory accesses. + let alignedMsegOff = MM.clearSegmentOffLeastBit addr if MM.segmentFlags seg `MMP.hasPerm` MMP.execute then do - let ao = addrOffset segRelAddr - alignedMsegOff <- liftMaybe (ARMInvalidInstructionAddress seg ao) (MM.resolveSegmentOff seg ao) + -- traceM ("Orig addr = " ++ show addr ++ " modified to " ++ show ao) + -- alignedMsegOff <- liftMaybe (ARMInvalidInstructionAddress seg ao) (MM.resolveSegmentOff seg ao) contents <- liftMemError $ MM.segoffContentsAfter alignedMsegOff case contents of [] -> ET.throwError $ ARMMemoryError (MM.AccessViolation segRelAddr) @@ -276,12 +273,6 @@ readInstruction addr = do Nothing -> ET.throwError $ ARMInvalidInstruction segRelAddr contents else ET.throwError $ ARMMemoryError (MM.PermissionsError segRelAddr) -liftMaybe :: ARMMemoryError w -> Maybe a -> Either (ARMMemoryError w) a -liftMaybe err ma = - case ma of - Just a -> Right a - Nothing -> Left err - liftMemError :: Either (MM.MemoryError w) a -> Either (ARMMemoryError w) a liftMemError e = case e of diff --git a/macaw-aarch32/src/Data/Macaw/ARM/Identify.hs b/macaw-aarch32/src/Data/Macaw/ARM/Identify.hs index 1d3eb424..ea200f24 100644 --- a/macaw-aarch32/src/Data/Macaw/ARM/Identify.hs +++ b/macaw-aarch32/src/Data/Macaw/ARM/Identify.hs @@ -10,7 +10,7 @@ module Data.Macaw.ARM.Identify ) where import Control.Lens ( (^.) ) -import Data.Macaw.ARM.Arch +import qualified Data.Macaw.ARM.ARMReg as AR import Data.Macaw.AbsDomain.AbsState ( AbsProcessorState , AbsValue(..) , transferValue @@ -18,6 +18,7 @@ import Data.Macaw.AbsDomain.AbsState ( AbsProcessorState ) import qualified Data.Macaw.CFG as MC import qualified Data.Macaw.Memory as MM +import qualified Data.Macaw.SemMC.Simplify as MSS import qualified Data.Macaw.Types as MT import Data.Semigroup import qualified Data.Sequence as Seq @@ -42,8 +43,14 @@ identifyCall :: MM.Memory (MC.ArchAddrWidth ARM.AArch32) -> Seq.Seq (MC.Stmt ARM.AArch32 ids) -> MC.RegState (MC.ArchReg ARM.AArch32) (MC.Value ARM.AArch32 ids) -> Maybe (Seq.Seq (MC.Stmt ARM.AArch32 ids), MC.ArchSegmentOff ARM.AArch32) -identifyCall _mem _stmts0 _rs = Nothing -- KWQ: for now, nothing is identified as a call - +identifyCall mem stmts0 rs + | not (null stmts0) + , MC.RelocatableValue {} <- rs ^. MC.boundValue AR.arm_LR + , Just retVal <- MSS.simplifyValue (rs ^. MC.boundValue AR.arm_LR) + , Just retAddrVal <- MC.valueAsMemAddr retVal + , Just retAddr <- MM.asSegmentOff mem retAddrVal = + Just (stmts0, retAddr) + | otherwise = Nothing -- | Intended to identify a return statement. -- diff --git a/macaw-aarch32/src/Data/Macaw/ARM/Semantics/TH.hs b/macaw-aarch32/src/Data/Macaw/ARM/Semantics/TH.hs index 4d050261..e9e71790 100644 --- a/macaw-aarch32/src/Data/Macaw/ARM/Semantics/TH.hs +++ b/macaw-aarch32/src/Data/Macaw/ARM/Semantics/TH.hs @@ -21,36 +21,26 @@ module Data.Macaw.ARM.Semantics.TH import Control.Monad (void) import qualified Control.Monad.Except as E -import qualified Data.Functor.Const as C +import qualified Data.Bits as B +import qualified Data.BitVector.Sized as BVS import Data.List (isPrefixOf) import Data.Macaw.ARM.ARMReg import Data.Macaw.ARM.Arch import qualified Data.Macaw.CFG as M import qualified Data.Macaw.SemMC.Generator as G -import qualified Data.Macaw.SemMC.Operands as O -import Data.Macaw.SemMC.TH ( addEltTH, appToExprTH, evalNonceAppTH, evalBoundVar, natReprTH, symFnName, asName ) +import Data.Macaw.SemMC.TH ( addEltTH, appToExprTH, evalNonceAppTH, evalBoundVar, natReprTH, symFnName ) import Data.Macaw.SemMC.TH.Monad import qualified Data.Macaw.Types as M import qualified Data.Parameterized.Context as Ctx import Data.Parameterized.Classes -import qualified Data.Parameterized.List as L -import qualified Data.Parameterized.Map as Map import Data.Parameterized.NatRepr -import Data.Parameterized.Some ( Some(..) ) -import qualified Data.Parameterized.TraversableFC as FC -import Data.Proxy ( Proxy(..) ) -import GHC.TypeLits import Language.Haskell.TH import qualified SemMC.Architecture.AArch32 as ARM import qualified SemMC.Architecture.ARM.Opcodes as ARM -import qualified SemMC.Architecture as A -import qualified SemMC.Architecture.ARM.Location as Loc -import qualified SemMC.Architecture.Location as L import qualified What4.BaseTypes as WT import qualified What4.Expr.Builder as WB import qualified Language.ASL.Globals as ASL -import qualified Language.ASL.Globals.Definitions as ASL loadSemantics :: IO (ARM.ASLSemantics) loadSemantics = ARM.loadSemantics (ARM.ASLSemanticsOpts { ARM.aslOptTrimRegs = True}) @@ -128,12 +118,12 @@ armNonceAppEval bvi nonceApp = case args of Ctx.Empty Ctx.:> assert Ctx.:> bv -> Just $ do assertTH <- addEltTH M.LittleEndian bvi assert - bv <- addEltTH M.LittleEndian bvi bv + bvElt <- addEltTH M.LittleEndian bvi bv liftQ [| case $(return assertTH) of - M.BoolValue True -> return $(return bv) + M.BoolValue True -> return $(return bvElt) M.BoolValue False -> E.throwError (G.GeneratorMessage $ "Bitvector length assertion failed!") -- FIXME: THIS SHOULD THROW AN ERROR - _ -> return $(return bv) + _ -> return $(return bvElt) -- nm -> E.throwError (G.GeneratorMessage $ "Bitvector length assertion failed: ") |] _ -> fail "Invalid call to assertBV" @@ -241,6 +231,25 @@ concreteIte v t f = case v of M.CValue (M.BoolCValue b) -> if b then t else f _ -> E.throwError (G.GeneratorMessage $ "concreteIte: requires concrete value" <> show (M.ppValueAssignments v)) +-- | A smart constructor for division +-- +-- The smart constructor recognizes divisions that can be converted into shifts. +-- We convert the operation to a shift if the divisior is a power of two. +sdiv :: (1 <= n) + => NatRepr n + -> M.Value ARM.AArch32 ids (M.BVType n) + -> M.Value ARM.AArch32 ids (M.BVType n) + -> G.Generator ARM.AArch32 ids s (G.Expr ARM.AArch32 ids (M.BVType n)) +sdiv repr dividend divisor = + case divisor of + M.BVValue nr val + | bv <- BVS.bitVector' repr val + , BVS.bvPopCount bv == 1 -> + withKnownNat repr $ + let app = M.BVSar nr dividend (M.BVValue nr (fromIntegral (B.countTrailingZeros bv))) + in (G.ValueExpr . M.AssignedValue) <$> G.addAssignment (M.EvalApp app) + _ -> addArchAssignment (SDiv repr dividend divisor) + armAppEvaluator :: M.Endianness -> BoundVarInterpretations ARM.AArch32 t fs -> WB.App (WB.Expr t) ctp @@ -255,8 +264,7 @@ armAppEvaluator endianness interps elt = WB.BVSdiv w bv1 bv2 -> return $ do e1 <- addEltTH endianness interps bv1 e2 <- addEltTH endianness interps bv2 - liftQ [| addArchAssignment (SDiv $(natReprTH w) $(return e1) $(return e2)) - |] + liftQ [| sdiv $(natReprTH w) $(return e1) $(return e2) |] WB.BVUrem w bv1 bv2 -> return $ do e1 <- addEltTH endianness interps bv1 e2 <- addEltTH endianness interps bv2 diff --git a/macaw-semmc/src/Data/Macaw/SemMC/Simplify.hs b/macaw-semmc/src/Data/Macaw/SemMC/Simplify.hs index 8cd5dcef..0574cd90 100644 --- a/macaw-semmc/src/Data/Macaw/SemMC/Simplify.hs +++ b/macaw-semmc/src/Data/Macaw/SemMC/Simplify.hs @@ -52,6 +52,7 @@ simplifyApp a = BVOr sz l r -> binopbv (.|.) sz l r BVShl sz l r -> binopbv (\l' r' -> shiftL l' (fromIntegral r')) sz l r BVShr sz l r -> binopbv (\l' r' -> shiftR l' (fromIntegral r')) sz l r + BVSar sz l r -> binopbv (\l' r' -> shiftR (toSigned sz l') (fromIntegral (toSigned sz r'))) sz l r BVAdd _ l (BVValue _ 0) -> Just l BVAdd _ (BVValue _ 0) r -> Just r BVAdd rep l@(BVValue {}) r@(RelocatableValue {}) -> @@ -66,10 +67,17 @@ simplifyApp a = BVMul rep l r -> binopbv (*) rep l r SExt (BVValue u n) sz -> Just (BVValue sz (toUnsigned sz (toSigned u n))) UExt (BVValue _ n) sz -> Just (mkLit sz n) + UExt (RelocatableValue _arep addr) sz -> do + memword <- MM.asAbsoluteAddr addr + return $ mkLit sz (fromIntegral memword) Trunc (BVValue _ x) sz -> Just (mkLit sz x) Eq l r -> boolop (==) l r BVComplement sz x -> unop complement sz x + BVSignedLe v1 v2 -> signedRelOp (<=) v1 v2 + BVSignedLt v1 v2 -> signedRelOp (<) v1 v2 + BVUnsignedLe v1 v2 -> unsignedRelOp (<=) v1 v2 + BVUnsignedLt v1 v2 -> unsignedRelOp (<) v1 v2 _ -> Nothing where unop :: forall n . (tp ~ BVType n) @@ -97,3 +105,19 @@ simplifyApp a = binopbv f sz (BVValue _ l) (BVValue _ r) = Just (mkLit sz (f l r)) binopbv _ _ _ _ = Nothing + signedRelOp :: forall n + . (Integer -> Integer -> Bool) + -> Value arch ids (BVType n) + -> Value arch ids (BVType n) + -> Maybe (Value arch ids BoolType) + signedRelOp op (BVValue r1 v1) (BVValue _ v2) = + Just (BoolValue (op (toSigned r1 v1) (toSigned r1 v2))) + signedRelOp _ _ _ = Nothing + unsignedRelOp :: forall n + . (Integer -> Integer -> Bool) + -> Value arch ids (BVType n) + -> Value arch ids (BVType n) + -> Maybe (Value arch ids BoolType) + unsignedRelOp op (BVValue r1 v1) (BVValue _ v2) = + Just (BoolValue (op (toUnsigned r1 v1) (toUnsigned r1 v2))) + unsignedRelOp _ _ _ = Nothing