diff --git a/macaw-arm/macaw-arm.cabal b/macaw-arm/macaw-arm.cabal index 7d4d3adc..e88abf01 100644 --- a/macaw-arm/macaw-arm.cabal +++ b/macaw-arm/macaw-arm.cabal @@ -13,11 +13,24 @@ extra-source-files: ChangeLog.md cabal-version: >=1.10 library - -- exposed-modules: + exposed-modules: Data.Macaw.ARM + , Data.Macaw.ARM.ARMReg + , Data.Macaw.ARM.BinaryFormat.ELF + , Data.Macaw.ARM.Semantics.ARMSemantics -- other-modules: -- other-extensions: build-depends: base >=4.10 && <4.11 - build-tools: arm-none-eabi-gcc + , ansi-wl-pprint + , bytestring + , cereal + , containers + , elf-edit + , lens + , macaw-base + , macaw-semmc + , semmc-arm + , vector + -- build-tools: arm-none-eabi-gcc hs-source-dirs: src default-language: Haskell2010 ghc-options: -Wall @@ -32,6 +45,7 @@ test-suite macaw-arm-tests , MismatchTests , Shared build-depends: base >=4.10 && <4.11 + , ansi-wl-pprint , binary , bytestring , containers @@ -43,5 +57,6 @@ test-suite macaw-arm-tests , macaw-base , macaw-arm , parameterized-utils + , semmc-arm , tasty , tasty-hunit diff --git a/macaw-arm/src/Data/Macaw/ARM.hs b/macaw-arm/src/Data/Macaw/ARM.hs new file mode 100644 index 00000000..6aad7853 --- /dev/null +++ b/macaw-arm/src/Data/Macaw/ARM.hs @@ -0,0 +1,58 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE TypeApplications #-} + +-- {-# LANGUAGE FlexibleContexts #-} +-- {-# LANGUAGE ScopedTypeVariables #-} +-- {-# LANGUAGE RankNTypes #-} +-- {-# LANGUAGE GADTs #-} +-- {-# LANGUAGE GeneralizedNewtypeDeriving #-} + +module Data.Macaw.ARM + ( -- * Macaw configurations + arm_linux_info, + -- * Type-level tags + ARM, + -- * ELF support + -- tocBaseForELF + -- tocEntryAddrsForELF + ) + where + + +import qualified Data.Macaw.ARM.Semantics.ARMSemantics as ARMSem +import qualified Data.Macaw.AbsDomain.AbsState as MA +import qualified Data.Macaw.Architecture.Info as MI +import Data.Macaw.CFG ( ArchSegmentOff ) +import qualified Data.Macaw.Memory as MM +import Data.Macaw.Types ( BVType ) +import qualified SemMC.ARM as ARM +import Data.Proxy ( Proxy(..) ) +import Data.Macaw.ARM.ARMReg + + +-- | The type tag for ARM (32-bit) +type ARM = ARM.ARM + + +-- arm_linux_info :: (ArchSegmentOff ARM.ARM -> Maybe (MA.AbsValue 32 (BVType 32))) -> MI.ArchitectureInfo ARM.ARM +arm_linux_info :: MI.ArchitectureInfo ARM.ARM +arm_linux_info = + MI.ArchitectureInfo { MI.withArchConstraints = undefined -- id -- \x -> x + , MI.archAddrWidth = MM.Addr32 + , MI.archEndianness = MM.LittleEndian + , MI.jumpTableEntrySize = undefined -- jumpTableEntrySize proxy + , MI.disassembleFn = undefined -- disassembleFn proxy ARMSem.execInstruction + , MI.mkInitialAbsState = undefined -- mkInitialAbsState proxy tocMap + , MI.absEvalArchFn = undefined -- absEvalArchFn proxy + , MI.absEvalArchStmt = undefined -- absEvalArchStmt proxy + , MI.postCallAbsState = undefined -- postCallAbsState proxy + , MI.identifyCall = undefined -- identifyCall proxy + , MI.identifyReturn = undefined -- identifyReturn proxy + , MI.rewriteArchFn = undefined -- rewritePrimFn + , MI.rewriteArchStmt = undefined -- rewriteStmt + , MI.rewriteArchTermStmt = undefined -- rewriteTermStmt + , MI.archDemandContext = undefined -- archDemandContext proxy + , MI.postArchTermStmtAbsState = undefined -- postARMTermStmtAbsState (preserveRegAcrossSyscall proxy) + } + where + proxy = Proxy @ARM.ARM diff --git a/macaw-arm/src/Data/Macaw/ARM/ARMReg.hs b/macaw-arm/src/Data/Macaw/ARM/ARMReg.hs new file mode 100644 index 00000000..797e76d7 --- /dev/null +++ b/macaw-arm/src/Data/Macaw/ARM/ARMReg.hs @@ -0,0 +1,40 @@ +-- | Defines the register types for ARM, along with some helpers + +{-# LANGUAGE ConstraintKinds #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE UndecidableInstances #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE TypeOperators #-} + +module Data.Macaw.ARM.ARMReg + ( ARMReg(..) + ) + where + +import GHC.TypeLits +import qualified Data.Macaw.CFG as MC +import qualified SemMC.ARM as ARM +import Data.Macaw.Types ( BVType ) + +data ARMReg tp where + ARM_IP :: (w ~ MC.RegAddrWidth ARMReg, 1 <= w) => ARMReg (BVType w) + +deriving instance Eq (ARMReg tp) +deriving instance Ord (ARMReg tp) + +instance Show (ARMReg tp) where + show r = case r of + ARM_IP -> "ip" + +-- instance ShowF ARMReg where +-- showF = show + +type instance MC.ArchReg ARM.ARM = ARMReg +type instance MC.RegAddrWidth ARMReg = 32 diff --git a/macaw-arm/src/Data/Macaw/ARM/BinaryFormat/ELF.hs b/macaw-arm/src/Data/Macaw/ARM/BinaryFormat/ELF.hs new file mode 100644 index 00000000..3d288bdf --- /dev/null +++ b/macaw-arm/src/Data/Macaw/ARM/BinaryFormat/ELF.hs @@ -0,0 +1,88 @@ +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE AllowAmbiguousTypes #-} + +module Data.Macaw.ARM.BinaryFormat.ELF + ( parseELFInfo + , getELFSymbols + -- , tocBaseForELF + -- , tocEntryAddrsForElf + ) + where + +import Data.List (intercalate) +import Data.Vector (toList) +import Data.Bits +import Text.PrettyPrint.ANSI.Leijen +import Control.Lens -- ((^.), (^..), filtered, over, folded, foldMapOf, to) +import Control.Monad ( replicateM, unless ) +import qualified Data.ByteString.Char8 as C8 +import qualified Data.ElfEdit as E +import qualified Data.Macaw.AbsDomain.AbsState as MA +import qualified Data.Macaw.CFG as MC +import qualified Data.Macaw.Memory as MM +import Data.Macaw.Types ( BVType ) +import qualified Data.Map.Strict as M +import Data.Proxy ( Proxy(..) ) +import qualified Data.Serialize.Get as G +import qualified Data.Set as S +import GHC.TypeLits ( KnownNat, natVal ) + + +parseFunctionDescriptors :: (MM.MemWidth (MC.RegAddrWidth (MC.ArchReg ppc))) + => proxy ppc + -> Int + -> G.Get (M.Map (MM.MemAddr (MC.RegAddrWidth (MC.ArchReg ppc))) (MA.AbsValue (MC.RegAddrWidth (MC.ArchReg ppc)) (BVType (MC.RegAddrWidth (MC.ArchReg ppc))))) +parseFunctionDescriptors _ ptrSize = do + let recordBytes = (3 * ptrSize) `div` 8 + let recordParser = + case ptrSize of + 32 -> getFunctionDescriptor G.getWord32be + 64 -> getFunctionDescriptor G.getWord64be + _ -> error ("Invalid pointer size: " ++ show ptrSize) + totalBytes <- G.remaining + unless (totalBytes `mod` recordBytes == 0) $ do + fail "The .opd section is not divisible by the record size" + funcDescs <- replicateM (totalBytes `div` recordBytes) recordParser + return (M.fromList funcDescs) + +getFunctionDescriptor :: (Integral a, MM.MemWidth w) + => G.Get a + -> G.Get (MM.MemAddr w, MA.AbsValue w (BVType w)) +getFunctionDescriptor ptrParser = do + entryAddr <- ptrParser + tocAddr <- ptrParser + _ <- ptrParser + let mso = MM.absoluteAddr (fromIntegral entryAddr) + return (mso, MA.FinSet (S.singleton (fromIntegral tocAddr))) + + +parseELFInfo :: forall arm proxy + . (KnownNat (MC.RegAddrWidth (MC.ArchReg arm)), MM.MemWidth (MC.RegAddrWidth (MC.ArchReg arm))) + => proxy arm + -> E.Elf (MC.RegAddrWidth (MC.ArchReg arm)) + -> Either String (M.Map + (MM.MemAddr (MC.RegAddrWidth (MC.ArchReg arm))) + (MA.AbsValue (MC.RegAddrWidth (MC.ArchReg arm)) (BVType (MC.RegAddrWidth (MC.ArchReg arm))))) +parseELFInfo proxy e = + let secnames = e^..E.elfSections.to (C8.unpack . E.elfSectionName) + in Left $ intercalate ", " secnames + -- Left $ e^..E.elfSections . foldMapOf folded (C8.unpack . E.elfSectionName) + -- case E.findSectionByName (C8.pack ".opd") e of + -- [sec] -> + -- G.runGet (parseFunctionDescriptors proxy (fromIntegral ptrSize)) (E.elfSectionData sec) + -- _ -> error "Could not find .opd section" + -- where + -- ptrSize = natVal (Proxy @(MC.RegAddrWidth (MC.ArchReg arm))) + +-- getELFSymbols :: E.Elf (MC.RegAddrWidth (MC.ArchReg arm)) -> String +getELFSymbols :: (Show (E.ElfWordType w), Data.Bits.Bits (E.ElfWordType w), Integral (E.ElfWordType w)) => E.Elf w -> Doc +getELFSymbols elf = + let dummy = 1 + -- symtab :: E.ElfSymbolTableEntry (E.ElfWordType (MC.RegAddrWidth (MC.ArchReg arm))) + -- symtab :: E.ElfSymbolTableEntry (E.ElfWordType w) + symtab = elf^.to E.elfSymtab + ps = fmap (E.ppSymbolTableEntries . toList . E.elfSymbolTableEntries) symtab + -- x = elf^.(E.elfSymtab).to (show . E.ppSymbolTableEntries) + in vsep ps -- intercalate ", and " ps diff --git a/macaw-arm/src/Data/Macaw/ARM/Semantics/ARMSemantics.hs b/macaw-arm/src/Data/Macaw/ARM/Semantics/ARMSemantics.hs new file mode 100644 index 00000000..6697a6c6 --- /dev/null +++ b/macaw-arm/src/Data/Macaw/ARM/Semantics/ARMSemantics.hs @@ -0,0 +1,16 @@ +{-# LANGUAGE DataKinds #-} + +module Data.Macaw.ARM.Semantics.ARMSemantics + ( execInstruction + ) + where + +import qualified Data.Macaw.CFG as MC +import qualified Data.Macaw.Types as MT +import SemMC.ARM ( ARM, Instruction ) +import Data.Macaw.SemMC.Generator ( Generator ) +-- import SemMC.Architecture.ARM.Opcodes ( allSemantics, allOpcodeInfo ) + +execInstruction :: MC.Value ARM ids (MT.BVType 32) -> Instruction -> Maybe (Generator ARM ids s ()) +execInstruction = undefined +-- execInstruction = $(genExecInstruction (Proxy @ARM) (locToRegTH (Proxy @ARM)) armNonceAppEval armAppEvaluator 'armInstructionMatcher allSemantics allOpcodeInfo) diff --git a/macaw-arm/tests/ARMTests.hs b/macaw-arm/tests/ARMTests.hs index ebd1a2bd..3fdf8dd4 100644 --- a/macaw-arm/tests/ARMTests.hs +++ b/macaw-arm/tests/ARMTests.hs @@ -1,43 +1,49 @@ {-# LANGUAGE DataKinds #-} -{-# LANGUAGE DeriveDataTypeable #-} -{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE RankNTypes #-} {-# LANGUAGE TypeApplications #-} -{-# LANGUAGE GADTs #-} -module ARMTests ( - armAsmTests - ) where +{-# LANGUAGE TypeFamilies #-} -import Control.Lens ( (^.) ) +module ARMTests + ( armAsmTests + ) + where + + +import Control.Lens ( (^.), to ) +import Control.Monad.Catch (throwM, Exception) import qualified Data.ElfEdit as E import qualified Data.Foldable as F +import Data.List (intercalate) +import qualified Data.Macaw.ARM as RO +import Data.Macaw.ARM.ARMReg +import qualified Data.Macaw.ARM.BinaryFormat.ELF as ARMELF +import qualified Data.Macaw.AbsDomain.AbsState as MA import qualified Data.Macaw.Discovery as MD -import Data.Parameterized.NatRepr import qualified Data.Macaw.Discovery.State as MD import qualified Data.Macaw.Memory as MM +import Data.Macaw.Types ( BVType ) import qualified Data.Map as M import Data.Maybe ( fromJust, mapMaybe ) import Data.Monoid +import Data.Parameterized.NatRepr import qualified Data.Parameterized.Some as PU import Data.Proxy ( Proxy(..) ) import qualified Data.Set as S +import Data.Typeable ( Typeable ) import Data.Word ( Word64 ) +import Debug.Trace (trace) +import qualified SemMC.ARM as ARM +import Shared import System.FilePath ( dropExtension, replaceExtension ) import qualified Test.Tasty as T import qualified Test.Tasty.HUnit as T +import Text.PrettyPrint.ANSI.Leijen ( putDoc ) import Text.Printf ( PrintfArg, printf ) import Text.Read ( readMaybe ) --- import qualified Data.Macaw.PPC as RO --- import qualified Data.Macaw.PPC.BinaryFormat.ELF as E + +-- import qualified Data.Macaw.PPC.BinaryFormat.ELF as E -- KWQ: replacement should be complete -- import qualified SemMC.Architecture.PPC64 as PPC64 -import Data.List (intercalate) -import Debug.Trace (trace) - -import Shared - armAsmTests :: [FilePath] -> T.TestTree armAsmTests = T.testGroup "ARM" . map mkTest @@ -51,7 +57,7 @@ instance (Read a) => Read (Hex a) where readsPrec i s = [ (Hex a, s') | (a, s') <- readsPrec i s ] -- | The type of expected results for test cases -data ExpectedResult = +data ExpectedResultFileData = R { funcs :: [(Hex Word64, [(Hex Word64, Word64)])] -- ^ The first element of the pair is the address of entry point -- of the function. The list is a list of the addresses of the @@ -64,75 +70,61 @@ data ExpectedResult = } deriving (Read, Show, Eq) +type ExpectedResult = (M.Map (Hex Word64) (S.Set (Hex Word64, Word64)), + S.Set (Hex Word64)) + +data ExpectedException = BadExpectedFile String + deriving (Typeable, Show) + +instance Exception ExpectedException + + +getExpected :: FilePath -> IO ExpectedResult +getExpected expectedFilename = do + expectedString <- readFile expectedFilename + case readMaybe expectedString of + -- Above: Read in the ExpectedResultFileData from the contents of the file + -- Nothing -> T.assertFailure ("Invalid expected result: " ++ show expectedString) + Nothing -> throwM $ BadExpectedFile ("Invalid expected spec: " ++ show expectedString) + Just er -> + let expectedEntries = M.fromList [ (entry, S.fromList starts) | (entry, starts) <- funcs er ] + -- expectedEntries maps function entry points to the set of block starts + -- within the function. + ignoredBlocks = S.fromList (ignoreBlocks er) + in return (expectedEntries, ignoredBlocks) + + -- | Read in a test case from disk and output a test tree. mkTest :: FilePath -> T.TestTree -mkTest fp = T.testCase fp $ withELF exeFilename (testDiscovery fp) +mkTest fp = T.testCase fp $ do x <- getExpected fp + withELF exeFilename $ testDiscovery x where asmFilename = dropExtension fp exeFilename = replaceExtension asmFilename "exe" -showSegments :: (MM.MemWidth w) => MM.Memory w -> String -showSegments mem = intercalate "\n" $ map show (MM.memSegments mem) -testDiscovery :: FilePath -> E.Elf w -> IO () -testDiscovery fname elf = +testDiscovery :: ExpectedResult -> E.Elf w -> IO () +testDiscovery expres elf = case E.elfClass elf of - E.ELFCLASS32 -> testDiscovery32 fname elf + E.ELFCLASS32 -> testDiscovery32 expres elf E.ELFCLASS64 -> error "testDiscovery64 TBD" - -- | Run a test over a given expected result filename and the ELF file -- associated with it -testDiscovery32 :: FilePath -> E.Elf 32 -> IO () -testDiscovery32 expectedFilename elf = +testDiscovery32 :: ExpectedResult -> E.Elf 32 -> IO () +testDiscovery32 (funcblocks, ignored) elf = withMemory MM.Addr32 elf $ \mem -> do - let Just entryPoint = trace (showSegments mem) $ MM.asSegmentOff mem epinfo - epinfo = let e' = findEntryPoint elf mem in trace ("epinfo: " <> show e') e' - putStrLn $ "entryPoint:\n ep:\n e: " <> show entryPoint - T.assertBool ("step 1: " <> show entryPoint) $ "foo" == show entryPoint - -- tocBase = RO.tocBaseForELF (Proxy @PPC64.PPC) elf - -- otherEntryAddrs :: [MM.MemAddr 64] - -- otherEntryAddrs = E.tocEntryAddrsForElf (Proxy @PPC64.PPC) elf - -- otherEntries = mapMaybe (MM.asSegmentOff mem) otherEntryAddrs - -- di = MD.cfgFromAddrs (RO.ppc64_linux_info tocBase) mem MD.emptySymbolAddrMap (entryPoint:otherEntries) [] - -- expectedString <- readFile expectedFilename - -- case readMaybe expectedString of - -- -- Above: Read in the ExpectedResult from the contents of the file - -- Nothing -> T.assertFailure ("Invalid expected result: " ++ show expectedString) - -- Just er -> do - -- let expectedEntries = M.fromList [ (entry, S.fromList starts) | (entry, starts) <- funcs er ] - -- -- expectedEntries maps function entry points to the set of block starts - -- -- within the function. - -- ignoredBlocks = S.fromList (ignoreBlocks er) - -- allFoundBlockAddrs :: S.Set Word64 - -- allFoundBlockAddrs = - -- S.fromList [ fromIntegral (fromJust (MM.asAbsoluteAddr (MM.relativeSegmentAddr (MD.pblockAddr pbr)))) - -- | PU.Some dfi <- M.elems (di ^. MD.funInfo) - -- , pbr <- M.elems (dfi ^. MD.parsedBlocks) - -- ] - -- -- Test that all discovered blocks were expected (and verify their sizes) - -- F.forM_ (M.elems (di ^. MD.funInfo)) $ \(PU.Some dfi) -> do - -- let actualEntry = fromIntegral (fromJust (MM.asAbsoluteAddr (MM.relativeSegmentAddr (MD.discoveredFunAddr dfi)))) - -- actualBlockStarts = S.fromList [ (baddr, bsize) - -- | pbr <- M.elems (dfi ^. MD.parsedBlocks) - -- , trace ("Parsed Block: " ++ show pbr) True - -- , let baddr = fromIntegral (fromJust (MM.asAbsoluteAddr (MM.relativeSegmentAddr (MD.pblockAddr pbr)))) - -- , let bsize = fromIntegral (MD.blockSize pbr) - -- ] - -- case (S.member actualEntry ignoredBlocks, M.lookup actualEntry expectedEntries) of - -- (True, _) -> return () - -- (_, Nothing) -> T.assertFailure (printf "Unexpected block start: 0x%x" actualEntry) - -- (_, Just expectedBlockStarts) -> - -- T.assertEqual (printf "Block starts for 0x%x" actualEntry) expectedBlockStarts (actualBlockStarts `removeIgnored` ignoredBlocks) + let Just entryPoint = MM.asSegmentOff mem epinfo + epinfo = findEntryPoint elf mem + putStrLn $ "entryPoint: " <> show entryPoint - -- -- Test that all expected blocks were discovered - -- F.forM_ (funcs er) $ \(_funcAddr, blockAddrs) -> - -- F.forM_ blockAddrs $ \(blockAddr@(Hex addr), _) -> do - -- T.assertBool ("Missing block address: " ++ show blockAddr) (S.member addr allFoundBlockAddrs) + let mbAddrs :: Either String (M.Map + (MM.MemAddr 32) + (MA.AbsValue 32 (BVType 32))) + mbAddrs = ARMELF.parseELFInfo (Proxy @ARM.ARM) elf -removeIgnored :: (Ord b, Ord a) => S.Set (a, b) -> S.Set a -> S.Set (a, b) -removeIgnored actualBlockStarts ignoredBlocks = - let removeIfPresent v@(addr, _) acc = if S.member addr ignoredBlocks - then S.delete v acc - else acc - in F.foldr removeIfPresent actualBlockStarts actualBlockStarts + putStrLn $ "sections = " <> show mbAddrs <> "\n" + putStrLn $ "symbols = " + putDoc $ ARMELF.getELFSymbols elf + + T.assertBool ("sections = " <> show mbAddrs) False diff --git a/macaw-arm/tests/arm/test-just-exit.s.expected b/macaw-arm/tests/arm/test-just-exit.s.expected index 6fb31646..f86f16e4 100644 --- a/macaw-arm/tests/arm/test-just-exit.s.expected +++ b/macaw-arm/tests/arm/test-just-exit.s.expected @@ -1,38 +1,4 @@ - .cpu arm7tdmi - .eabi_attribute 20, 1 - .eabi_attribute 21, 1 - .eabi_attribute 23, 3 - .eabi_attribute 24, 1 - .eabi_attribute 25, 1 - .eabi_attribute 26, 1 - .eabi_attribute 30, 6 - .eabi_attribute 34, 0 - .eabi_attribute 18, 4 - .file "test-just-exit.c" - .text - .align 2 - .global _start - .syntax unified - .arm - .fpu softvfp - .type _start, %function -_start: - @ Function supports interworking. - @ args = 0, pretend = 0, frame = 0 - @ frame_needed = 1, uses_anonymous_args = 0 - @ link register save eliminated. - str fp, [sp, #-4]! - add fp, sp, #0 - .syntax divided -@ 4 "test-just-exit.c" 1 - svc #0 -@ 0 "" 2 - .arm - .syntax unified - nop - add sp, fp, #0 - @ sp needed - ldr fp, [sp], #4 - bx lr - .size _start, .-_start - .ident "GCC: (GNU Tools for ARM Embedded Processors 6-2017-q2-update) 6.3.1 20170620 (release) [ARM/embedded-6-branch revision 249437]" +R { funcs = [(0x801c, [(0x801c, 20)]) + ] + , ignoreBlocks = [0x803c] + }