diff --git a/macaw-aarch32/macaw-aarch32.cabal b/macaw-aarch32/macaw-aarch32.cabal index 3cf3d761..a1f40995 100644 --- a/macaw-aarch32/macaw-aarch32.cabal +++ b/macaw-aarch32/macaw-aarch32.cabal @@ -30,6 +30,7 @@ library , ansi-wl-pprint , asl-translator , bytestring + , bv-sized , cereal , containers , crucible diff --git a/macaw-aarch32/src/Data/Macaw/ARM/Disassemble.hs b/macaw-aarch32/src/Data/Macaw/ARM/Disassemble.hs index 91d1bbef..de389fdb 100644 --- a/macaw-aarch32/src/Data/Macaw/ARM/Disassemble.hs +++ b/macaw-aarch32/src/Data/Macaw/ARM/Disassemble.hs @@ -76,7 +76,6 @@ import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as LBS import Data.Macaw.ARM.ARMReg import Data.Macaw.ARM.Arch() -import Data.Macaw.AbsDomain.AbsState as MA import Data.Macaw.CFG import Data.Macaw.CFG.Block import qualified Data.Macaw.CFG.Core as MC @@ -93,7 +92,6 @@ import qualified Dismantle.ARM.T32 as ThumbD import Text.Printf ( printf ) import qualified SemMC.Architecture.AArch32 as ARM -import Debug.Trace data InstructionSet = A32I ARMD.Instruction | T32I ThumbD.Instruction deriving (Eq, Show) @@ -185,8 +183,8 @@ disassembleBlock lookupSemantics gs curPCAddr blockOff maxOffset = do Right (i, bytesRead) -> do -- FIXME: Set PSTATE.T based on whether instruction is A32I or T32I -- traceM ("II: " ++ show i) - let curPC = MM.segmentOffAddr seg off - nextPCOffset = off + bytesRead + -- let curPC = MM.segmentOffAddr seg off + let nextPCOffset = off + bytesRead nextPC = MM.segmentOffAddr seg nextPCOffset nextPCVal :: Value ARM.AArch32 ids (BVType 32) = MC.RelocatableValue (MM.addrWidthRepr curPCAddr) nextPC -- curPCVal :: Value ARM.AArch32 ids (BVType 32) = MC.RelocatableValue (MM.addrWidthRepr curPCAddr) curPC @@ -241,18 +239,17 @@ readInstruction :: (MM.MemWidth w) -> Either (ARMMemoryError w) (InstructionSet, MM.MemWord w) readInstruction addr = do let seg = MM.segoffSegment addr - segRelAddrRaw = MM.segoffAddr addr - -- Addresses specified in ARM instructions have the low bit - -- clear, but Thumb (T32) target addresses have the low bit sit. - -- This is only manifested in the instruction addresses: the - -- actual PC for fetching instructions clears the low bit to - -- generate aligned memory accesses. - loBit = MM.addrOffset segRelAddrRaw .&. 1 - segRelAddr = segRelAddrRaw { addrOffset = MM.addrOffset segRelAddrRaw `xor` loBit } + let segRelAddr = MM.segoffAddr addr + -- Addresses specified in ARM instructions have the low bit + -- clear, but Thumb (T32) target addresses have the low bit sit. + -- This is only manifested in the instruction addresses: the + -- actual PC for fetching instructions clears the low bit to + -- generate aligned memory accesses. + let alignedMsegOff = MM.clearSegmentOffLeastBit addr if MM.segmentFlags seg `MMP.hasPerm` MMP.execute then do - let ao = addrOffset segRelAddr - alignedMsegOff <- liftMaybe (ARMInvalidInstructionAddress seg ao) (MM.resolveSegmentOff seg ao) + -- traceM ("Orig addr = " ++ show addr ++ " modified to " ++ show ao) + -- alignedMsegOff <- liftMaybe (ARMInvalidInstructionAddress seg ao) (MM.resolveSegmentOff seg ao) contents <- liftMemError $ MM.segoffContentsAfter alignedMsegOff case contents of [] -> ET.throwError $ ARMMemoryError (MM.AccessViolation segRelAddr) @@ -276,12 +273,6 @@ readInstruction addr = do Nothing -> ET.throwError $ ARMInvalidInstruction segRelAddr contents else ET.throwError $ ARMMemoryError (MM.PermissionsError segRelAddr) -liftMaybe :: ARMMemoryError w -> Maybe a -> Either (ARMMemoryError w) a -liftMaybe err ma = - case ma of - Just a -> Right a - Nothing -> Left err - liftMemError :: Either (MM.MemoryError w) a -> Either (ARMMemoryError w) a liftMemError e = case e of diff --git a/macaw-aarch32/src/Data/Macaw/ARM/Identify.hs b/macaw-aarch32/src/Data/Macaw/ARM/Identify.hs index 1d3eb424..ea200f24 100644 --- a/macaw-aarch32/src/Data/Macaw/ARM/Identify.hs +++ b/macaw-aarch32/src/Data/Macaw/ARM/Identify.hs @@ -10,7 +10,7 @@ module Data.Macaw.ARM.Identify ) where import Control.Lens ( (^.) ) -import Data.Macaw.ARM.Arch +import qualified Data.Macaw.ARM.ARMReg as AR import Data.Macaw.AbsDomain.AbsState ( AbsProcessorState , AbsValue(..) , transferValue @@ -18,6 +18,7 @@ import Data.Macaw.AbsDomain.AbsState ( AbsProcessorState ) import qualified Data.Macaw.CFG as MC import qualified Data.Macaw.Memory as MM +import qualified Data.Macaw.SemMC.Simplify as MSS import qualified Data.Macaw.Types as MT import Data.Semigroup import qualified Data.Sequence as Seq @@ -42,8 +43,14 @@ identifyCall :: MM.Memory (MC.ArchAddrWidth ARM.AArch32) -> Seq.Seq (MC.Stmt ARM.AArch32 ids) -> MC.RegState (MC.ArchReg ARM.AArch32) (MC.Value ARM.AArch32 ids) -> Maybe (Seq.Seq (MC.Stmt ARM.AArch32 ids), MC.ArchSegmentOff ARM.AArch32) -identifyCall _mem _stmts0 _rs = Nothing -- KWQ: for now, nothing is identified as a call - +identifyCall mem stmts0 rs + | not (null stmts0) + , MC.RelocatableValue {} <- rs ^. MC.boundValue AR.arm_LR + , Just retVal <- MSS.simplifyValue (rs ^. MC.boundValue AR.arm_LR) + , Just retAddrVal <- MC.valueAsMemAddr retVal + , Just retAddr <- MM.asSegmentOff mem retAddrVal = + Just (stmts0, retAddr) + | otherwise = Nothing -- | Intended to identify a return statement. -- diff --git a/macaw-aarch32/src/Data/Macaw/ARM/Semantics/TH.hs b/macaw-aarch32/src/Data/Macaw/ARM/Semantics/TH.hs index d1a8de3a..551fe777 100644 --- a/macaw-aarch32/src/Data/Macaw/ARM/Semantics/TH.hs +++ b/macaw-aarch32/src/Data/Macaw/ARM/Semantics/TH.hs @@ -25,14 +25,15 @@ import qualified Control.Monad.Except as E import qualified Control.Monad.State.Strict as St import qualified Data.Map as Map import qualified Data.Functor.Const as C +import qualified Data.Bits as B +import qualified Data.BitVector.Sized as BVS import Data.List (isPrefixOf) import Data.Macaw.SemMC.TH.Monad import Data.Macaw.ARM.ARMReg import Data.Macaw.ARM.Arch import qualified Data.Macaw.CFG as M import qualified Data.Macaw.SemMC.Generator as G -import qualified Data.Macaw.SemMC.Operands as O -import Data.Macaw.SemMC.TH ( addEltTH, appToExprTH, evalNonceAppTH, evalBoundVar, natReprTH, symFnName, asName ) +import Data.Macaw.SemMC.TH ( addEltTH, appToExprTH, evalNonceAppTH, evalBoundVar, natReprTH, symFnName ) import Data.Macaw.SemMC.TH.Monad import qualified Data.Macaw.Types as M import qualified Data.Parameterized.Context as Ctx @@ -44,13 +45,11 @@ import Data.Parameterized.Some ( Some(..) ) import qualified Data.Parameterized.TraversableFC as FC import Data.Proxy ( Proxy(..) ) import GHC.TypeLits as TL +import Data.Parameterized.NatRepr import Language.Haskell.TH import Language.Haskell.TH.Syntax import qualified SemMC.Architecture.AArch32 as ARM import qualified SemMC.Architecture.ARM.Opcodes as ARM -import qualified SemMC.Architecture as A -import qualified SemMC.Architecture.ARM.Location as Loc -import qualified SemMC.Architecture.Location as L import qualified What4.BaseTypes as WT import qualified What4.Expr.Builder as WB @@ -58,7 +57,6 @@ import qualified Data.Kind as Kind import Numeric.Natural import qualified Language.ASL.Globals as ASL -import qualified Language.ASL.Globals.Definitions as ASL loadSemantics :: IO (ARM.ASLSemantics) loadSemantics = ARM.loadSemantics (ARM.ASLSemanticsOpts { ARM.aslOptTrimRegs = True}) @@ -155,12 +153,12 @@ armNonceAppEval bvi nonceApp = case args of Ctx.Empty Ctx.:> assert Ctx.:> bv -> Just $ do assertTH <- addEltTH M.LittleEndian bvi assert - bv <- addEltTH M.LittleEndian bvi bv + bvElt <- addEltTH M.LittleEndian bvi bv liftQ [| case $(return assertTH) of - M.BoolValue True -> return $(return bv) + M.BoolValue True -> return $(return bvElt) M.BoolValue False -> E.throwError (G.GeneratorMessage $ "Bitvector length assertion failed!") -- FIXME: THIS SHOULD THROW AN ERROR - _ -> return $(return bv) + _ -> return $(return bvElt) -- nm -> E.throwError (G.GeneratorMessage $ "Bitvector length assertion failed: ") |] _ -> fail "Invalid call to assertBV" @@ -314,6 +312,25 @@ concreteIte v t f = case v of M.CValue (M.BoolCValue b) -> if b then t else f _ -> E.throwError (G.GeneratorMessage $ "concreteIte: requires concrete value" <> show (M.ppValueAssignments v)) +-- | A smart constructor for division +-- +-- The smart constructor recognizes divisions that can be converted into shifts. +-- We convert the operation to a shift if the divisior is a power of two. +sdiv :: (1 <= n) + => NatRepr n + -> M.Value ARM.AArch32 ids (M.BVType n) + -> M.Value ARM.AArch32 ids (M.BVType n) + -> G.Generator ARM.AArch32 ids s (G.Expr ARM.AArch32 ids (M.BVType n)) +sdiv repr dividend divisor = + case divisor of + M.BVValue nr val + | bv <- BVS.bitVector' repr val + , BVS.bvPopCount bv == 1 -> + withKnownNat repr $ + let app = M.BVSar nr dividend (M.BVValue nr (fromIntegral (B.countTrailingZeros bv))) + in (G.ValueExpr . M.AssignedValue) <$> G.addAssignment (M.EvalApp app) + _ -> addArchAssignment (SDiv repr dividend divisor) + armAppEvaluator :: M.Endianness -> BoundVarInterpretations ARM.AArch32 t fs -> WB.App (WB.Expr t) ctp @@ -328,8 +345,7 @@ armAppEvaluator endianness interps elt = WB.BVSdiv w bv1 bv2 -> return $ do e1 <- addEltTH endianness interps bv1 e2 <- addEltTH endianness interps bv2 - liftQ [| addArchAssignment (SDiv $(natReprTH w) $(return e1) $(return e2)) - |] + liftQ [| sdiv $(natReprTH w) $(return e1) $(return e2) |] WB.BVUrem w bv1 bv2 -> return $ do e1 <- addEltTH endianness interps bv1 e2 <- addEltTH endianness interps bv2 diff --git a/macaw-aarch32/tests/arm/test-direct-call-a32.exe b/macaw-aarch32/tests/arm/test-direct-call-a32.exe new file mode 100755 index 00000000..62c6b9a1 Binary files /dev/null and b/macaw-aarch32/tests/arm/test-direct-call-a32.exe differ diff --git a/macaw-aarch32/tests/arm/test-direct-call-a32.mcw.expected b/macaw-aarch32/tests/arm/test-direct-call-a32.mcw.expected new file mode 100644 index 00000000..cef3b170 --- /dev/null +++ b/macaw-aarch32/tests/arm/test-direct-call-a32.mcw.expected @@ -0,0 +1,10 @@ +R { funcs = [ (0x100d8, [(0x100d8, 48)]) + , (0x1010c, [ (0x1010c, 32) + , (0x1012c, 12) + ]) + , (0x10138, [ (0x10138, 28) + , (0x10154, 4) + ]) + ] + , ignoreBlocks = [0x10158] + } diff --git a/macaw-aarch32/tests/arm/test-just-exit-t32.mcw.expected b/macaw-aarch32/tests/arm/test-just-exit-t32.mcw.expected deleted file mode 100644 index 9d030188..00000000 --- a/macaw-aarch32/tests/arm/test-just-exit-t32.mcw.expected +++ /dev/null @@ -1,4 +0,0 @@ -R { funcs = [ (0x8001, [(0x8001, 6), (0x8007,10)]) - ] - , ignoreBlocks = [] - } diff --git a/macaw-semmc/src/Data/Macaw/SemMC/Simplify.hs b/macaw-semmc/src/Data/Macaw/SemMC/Simplify.hs index 8cd5dcef..0574cd90 100644 --- a/macaw-semmc/src/Data/Macaw/SemMC/Simplify.hs +++ b/macaw-semmc/src/Data/Macaw/SemMC/Simplify.hs @@ -52,6 +52,7 @@ simplifyApp a = BVOr sz l r -> binopbv (.|.) sz l r BVShl sz l r -> binopbv (\l' r' -> shiftL l' (fromIntegral r')) sz l r BVShr sz l r -> binopbv (\l' r' -> shiftR l' (fromIntegral r')) sz l r + BVSar sz l r -> binopbv (\l' r' -> shiftR (toSigned sz l') (fromIntegral (toSigned sz r'))) sz l r BVAdd _ l (BVValue _ 0) -> Just l BVAdd _ (BVValue _ 0) r -> Just r BVAdd rep l@(BVValue {}) r@(RelocatableValue {}) -> @@ -66,10 +67,17 @@ simplifyApp a = BVMul rep l r -> binopbv (*) rep l r SExt (BVValue u n) sz -> Just (BVValue sz (toUnsigned sz (toSigned u n))) UExt (BVValue _ n) sz -> Just (mkLit sz n) + UExt (RelocatableValue _arep addr) sz -> do + memword <- MM.asAbsoluteAddr addr + return $ mkLit sz (fromIntegral memword) Trunc (BVValue _ x) sz -> Just (mkLit sz x) Eq l r -> boolop (==) l r BVComplement sz x -> unop complement sz x + BVSignedLe v1 v2 -> signedRelOp (<=) v1 v2 + BVSignedLt v1 v2 -> signedRelOp (<) v1 v2 + BVUnsignedLe v1 v2 -> unsignedRelOp (<=) v1 v2 + BVUnsignedLt v1 v2 -> unsignedRelOp (<) v1 v2 _ -> Nothing where unop :: forall n . (tp ~ BVType n) @@ -97,3 +105,19 @@ simplifyApp a = binopbv f sz (BVValue _ l) (BVValue _ r) = Just (mkLit sz (f l r)) binopbv _ _ _ _ = Nothing + signedRelOp :: forall n + . (Integer -> Integer -> Bool) + -> Value arch ids (BVType n) + -> Value arch ids (BVType n) + -> Maybe (Value arch ids BoolType) + signedRelOp op (BVValue r1 v1) (BVValue _ v2) = + Just (BoolValue (op (toSigned r1 v1) (toSigned r1 v2))) + signedRelOp _ _ _ = Nothing + unsignedRelOp :: forall n + . (Integer -> Integer -> Bool) + -> Value arch ids (BVType n) + -> Value arch ids (BVType n) + -> Maybe (Value arch ids BoolType) + unsignedRelOp op (BVValue r1 v1) (BVValue _ v2) = + Just (BoolValue (op (toUnsigned r1 v1) (toUnsigned r1 v2))) + unsignedRelOp _ _ _ = Nothing