diff --git a/base/src/Data/Macaw/CFG/App.hs b/base/src/Data/Macaw/CFG/App.hs index 64ff0a9d..6702508d 100644 --- a/base/src/Data/Macaw/CFG/App.hs +++ b/base/src/Data/Macaw/CFG/App.hs @@ -82,6 +82,9 @@ data App (f :: Type -> *) (tp :: Type) where -- Multiply two numbers BVMul :: (1 <= n) => !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) + -- Divide two numbers and get the remainder (i.e. mod) + BVUrem :: (1 <= n) => !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) + -- Unsigned less than or equal. BVUnsignedLe :: (1 <= n) => !(f (BVType n)) -> !(f (BVType n)) -> App f BoolType @@ -261,6 +264,7 @@ ppAppA pp a0 = BVSub _ x y -> sexprA "bv_sub" [ pp x, pp y ] BVSbb _ x y b -> sexprA "bv_sbb" [ pp x, pp y, pp b ] BVMul _ x y -> sexprA "bv_mul" [ pp x, pp y ] + BVUrem _ x y -> sexprA "bv_urem" [ pp x, pp y ] BVUnsignedLt x y -> sexprA "bv_ult" [ pp x, pp y ] BVUnsignedLe x y -> sexprA "bv_ule" [ pp x, pp y ] BVSignedLt x y -> sexprA "bv_slt" [ pp x, pp y ] @@ -307,11 +311,12 @@ instance HasRepr (App f) TypeRepr where NotApp{} -> knownRepr XorApp{} -> knownRepr - BVAdd w _ _ -> BVTypeRepr w - BVAdc w _ _ _ -> BVTypeRepr w - BVSub w _ _ -> BVTypeRepr w - BVSbb w _ _ _ -> BVTypeRepr w - BVMul w _ _ -> BVTypeRepr w + BVAdd w _ _ -> BVTypeRepr w + BVAdc w _ _ _ -> BVTypeRepr w + BVSub w _ _ -> BVTypeRepr w + BVSbb w _ _ _ -> BVTypeRepr w + BVMul w _ _ -> BVTypeRepr w + BVUrem w _ _ -> BVTypeRepr w BVUnsignedLt{} -> knownRepr BVUnsignedLe{} -> knownRepr diff --git a/base/src/Data/Macaw/CFG/Core.hs b/base/src/Data/Macaw/CFG/Core.hs index b0b7f65f..de34b7a5 100644 --- a/base/src/Data/Macaw/CFG/Core.hs +++ b/base/src/Data/Macaw/CFG/Core.hs @@ -608,7 +608,13 @@ ppLit w i -- | Pretty print a value. ppValue :: RegisterInfo (ArchReg arch) => Prec -> Value arch ids tp -> Doc ppValue _ (BoolValue b) = text $ if b then "true" else "false" -ppValue p (BVValue w i) = assert (i >= 0) $ parenIf (p > colonPrec) $ ppLit w i +ppValue p (BVValue w i) + | i >= 0 = parenIf (p > colonPrec) $ ppLit w i + | otherwise = + -- TODO: We may want to report an error here. + parenIf (p > colonPrec) $ + text (show i) <+> text "::" <+> brackets (text (show w)) + ppValue p (RelocatableValue _ a) = parenIf (p > plusPrec) $ text (show a) ppValue _ (AssignedValue a) = ppAssignId (assignId a) ppValue _ (Initial r) = text (showF r) PP.<> text "_0" diff --git a/base/src/Data/Macaw/Discovery.hs b/base/src/Data/Macaw/Discovery.hs index 4300d982..f4e29b5b 100644 --- a/base/src/Data/Macaw/Discovery.hs +++ b/base/src/Data/Macaw/Discovery.hs @@ -39,6 +39,12 @@ module Data.Macaw.Discovery , Data.Macaw.Discovery.analyzeFunction , Data.Macaw.Discovery.exploreMemPointers , Data.Macaw.Discovery.analyzeDiscoveredFunctions + -- * Top level utilities + , Data.Macaw.Discovery.completeDiscoveryState + , DiscoveryOptions(..) + , defaultDiscoveryOptions + , DiscoveryEvent(..) + , discoveryLogFn -- * DiscoveryFunInfo , State.DiscoveryFunInfo , State.discoveredFunAddr @@ -73,6 +79,8 @@ import qualified Data.Set as Set import qualified Data.Text as Text import qualified Data.Vector as V import Data.Word +import GHC.IO (ioToST, stToIO) +import System.IO import Debug.Trace @@ -298,12 +306,12 @@ foundAddrs = lens _foundAddrs (\s v -> s { _foundAddrs = v }) -- | Add a block to the current function blocks. If this overlaps with an -- existing block, split them so that there's no overlap. -addFunBlock :: - MemWidth (RegAddrWidth (ArchReg arch)) => - ArchSegmentOff arch -> - ParsedBlock arch ids -> - FunState arch s ids -> - FunState arch s ids +addFunBlock + :: MemWidth (RegAddrWidth (ArchReg arch)) + => ArchSegmentOff arch + -> ParsedBlock arch ids + -> FunState arch s ids + -> FunState arch s ids addFunBlock segment block s = case Map.lookupLT segment (s ^. curFunBlocks) of Just (bSegment, bBlock) -- very sneaky way to check that they are in the same segment (a @@ -350,7 +358,8 @@ liftST = FunM . lift -- | Joins in the new abstract state and returns the locations for -- which the new state is changed. -mergeIntraJump :: ArchSegmentOff arch +mergeIntraJump :: MemWidth (ArchAddrWidth arch) + => ArchSegmentOff arch -- ^ Source label that we are jumping from. -> AbsBlockState (ArchReg arch) -- ^ The state of the system after jumping to new block. @@ -358,6 +367,7 @@ mergeIntraJump :: ArchSegmentOff arch -- ^ Address we are trying to reach. -> FunM arch s ids () mergeIntraJump src ab tgt = do +-- trace ("mergeIntraJump " ++ show src ++ " " ++ show tgt) $ do info <- uses curFunCtx archInfo withArchConstraints info $ do when (not (absStackHasReturnAddr ab)) $ do @@ -1006,7 +1016,7 @@ cfgFromAddrs, cfgFromAddrsTrustFns :: -> Memory (ArchAddrWidth arch) -- ^ Memory to use when decoding instructions. -> AddrSymMap (ArchAddrWidth arch) - -- ^ Ma1p from addresses to the associated symbol name. + -- ^ Map from addresses to the associated symbol name. -> [ArchSegmentOff arch] -- ^ Initial function entry points. -> [(ArchSegmentOff arch, ArchSegmentOff arch)] @@ -1031,3 +1041,133 @@ cfgFromAddrsWorker initial_state init_addrs mem_words = & analyzeDiscoveredFunctions & exploreMemPointers mem_words & analyzeDiscoveredFunctions + +------------------------------------------------------------------------ +-- Resolve functions with logging + +resolveFuns :: MemWidth (RegAddrWidth (ArchReg arch)) + => (ArchSegmentOff arch -> CodeAddrReason (ArchAddrWidth arch) -> ST s Bool) + -- ^ Callback for discovered functions + -- + -- Should return true if we should analyze the function and false otherwise. + -> (ArchSegmentOff arch -> ArchSegmentOff arch -> ST s ()) + -- ^ Callback for logging blocks discovered within function + -- Arguments include the address of function and address of block. + -> DiscoveryState arch + -> ST s (DiscoveryState arch) +resolveFuns analyzeFun analyzeBlock info = seq info $ + case Map.minViewWithKey (info^.unexploredFunctions) of + Nothing -> pure info + Just ((addr, rsn), rest) -> do + p <- analyzeFun addr rsn + if p then do + (info',_) <- analyzeFunction (analyzeBlock addr) addr rsn info + resolveFuns analyzeFun analyzeBlock info' + else + resolveFuns analyzeFun analyzeBlock (info & unexploredFunctions .~ rest) + +------------------------------------------------------------------------ +-- Top-level discovery + +-- | Options controlling 'completeDiscoveryState'. +data DiscoveryOptions + = DiscoveryOptions { exploreFunctionSymbols :: !Bool + -- ^ If @True@, 'completeDiscoveryState' + -- should automatically explore all addresses + -- in the address-to-symbol map. + , exploreCodeAddrInMem :: !Bool + -- ^ If @True@, 'completeDiscoveryState' will + -- explore all potential code addresses in + -- memory after exploring other potnetial + -- functions. + , logAtAnalyzeFunction :: !Bool + -- ^ Print a message each time we apply + -- discovery analysis to a new function. + , logAtAnalyzeBlock :: !Bool + -- ^ Print a message each time we analyze a + -- block within a function. + } + +defaultDiscoveryOptions :: DiscoveryOptions +defaultDiscoveryOptions = + DiscoveryOptions { exploreFunctionSymbols = True + , exploreCodeAddrInMem = False + , logAtAnalyzeFunction = True + , logAtAnalyzeBlock = False + } + +ppSymbol :: MemWidth w => MemSegmentOff w -> AddrSymMap w -> String +ppSymbol addr sym_map = + case Map.lookup addr sym_map of + Just fnName -> show addr ++ " (" ++ BSC.unpack fnName ++ ")" + Nothing -> show addr + +-- | Event for logging function +data DiscoveryEvent w + = AnalyzeFunction !(MemSegmentOff w) + | AnalyzeBlock !(MemSegmentOff w) + +{-# DEPRECATED discoveryLogFn "02/17/2018 Stop using this" #-} + +-- | Print out discovery event using options and address to symbol map. +discoveryLogFn :: MemWidth w + => DiscoveryOptions + -> AddrSymMap w + -> DiscoveryEvent w + -> ST RealWorld () +discoveryLogFn disOpt symMap (AnalyzeFunction addr) = ioToST $ do + when (logAtAnalyzeFunction disOpt) $ do + hPutStrLn stderr $ "Analyzing function: " ++ ppSymbol addr symMap + hFlush stderr +discoveryLogFn disOpt _ (AnalyzeBlock addr) = ioToST $ do + when (logAtAnalyzeBlock disOpt) $ do + hPutStrLn stderr $ " Analyzing block: " ++ show addr + + hFlush stderr + +-- | Explore until we have found all functions we can. +-- +-- This function is intended to make it easy to explore functions, and +-- can be controlled via 'DiscoveryOptions'. +completeDiscoveryState :: forall arch + . ArchitectureInfo arch + -> DiscoveryOptions + -- ^ Options controlling discovery + -> Memory (ArchAddrWidth arch) + -- ^ Memory state used for static code discovery. + -> [MemSegmentOff (ArchAddrWidth arch)] + -- ^ Initial entry points to explore + -> AddrSymMap (ArchAddrWidth arch) + -- ^ The map from addresses to symbols + -> (ArchSegmentOff arch -> Bool) + -- ^ Predicate to check if we should explore a function + -- + -- Return true to explore all functions. + -> IO (DiscoveryState arch) +completeDiscoveryState ainfo disOpt mem initEntries symMap funPred = stToIO $ withArchConstraints ainfo $ do + let initState + = emptyDiscoveryState mem symMap ainfo + & markAddrsAsFunction InitAddr initEntries + -- Add symbol table entries to discovery state if requested + let postSymState + | exploreFunctionSymbols disOpt = + initState & markAddrsAsFunction InitAddr (Map.keys symMap) + | otherwise = initState + let analyzeFn addr _rsn = ioToST $ do + let b = funPred addr + when (b && logAtAnalyzeFunction disOpt) $ do + hPutStrLn stderr $ "Analyzing function: " ++ ppSymbol addr symMap + hFlush stderr + pure $! b + let analyzeBlock _ addr = ioToST $ do + when (logAtAnalyzeBlock disOpt) $ do + hPutStrLn stderr $ " Analyzing block: " ++ show addr + hFlush stderr + -- Discover functions + postPhase1Discovery <- resolveFuns analyzeFn analyzeBlock postSymState + -- Discovery functions from memory + if exploreCodeAddrInMem disOpt then do + let mem_contents = withArchConstraints ainfo $ memAsAddrPairs mem LittleEndian + resolveFuns analyzeFn analyzeBlock $ postPhase1Discovery & exploreMemPointers mem_contents + else + return postPhase1Discovery diff --git a/base/src/Data/Macaw/Discovery/State.hs b/base/src/Data/Macaw/Discovery/State.hs index 9945160e..cc33b36a 100644 --- a/base/src/Data/Macaw/Discovery/State.hs +++ b/base/src/Data/Macaw/Discovery/State.hs @@ -320,13 +320,13 @@ emptyDiscoveryState mem symbols info = } -- | Map each jump table start to the address just after the end. -globalDataMap :: Simple Lens (DiscoveryState arch) - (Map (ArchMemAddr arch) (GlobalDataInfo (ArchMemAddr arch))) +globalDataMap + :: Simple Lens (DiscoveryState arch) (Map (ArchMemAddr arch) (GlobalDataInfo (ArchMemAddr arch))) globalDataMap = lens _globalDataMap (\s v -> s { _globalDataMap = v }) -- | List of functions to explore next. -unexploredFunctions :: Simple Lens (DiscoveryState arch) - (Map (ArchSegmentOff arch) (CodeAddrReason (ArchAddrWidth arch))) +unexploredFunctions + :: Simple Lens (DiscoveryState arch) (Map (ArchSegmentOff arch) (CodeAddrReason (ArchAddrWidth arch))) unexploredFunctions = lens _unexploredFunctions (\s v -> s { _unexploredFunctions = v }) -- | Get information for specific functions diff --git a/base/src/Data/Macaw/Memory.hs b/base/src/Data/Macaw/Memory.hs index b2711ee3..1105f15d 100644 --- a/base/src/Data/Macaw/Memory.hs +++ b/base/src/Data/Macaw/Memory.hs @@ -69,6 +69,7 @@ module Data.Macaw.Memory , memAsAddrPairs -- * Symbols , SymbolRef(..) + , SymbolVisibility(..) , SymbolVersion(..) -- * General purposes addrs , MemAddr @@ -287,6 +288,9 @@ instance MemWidth w => Integral (MemWord w) where where (q,r) = x `quotRem` y toInteger (MemWord x) = toInteger x +instance MemWidth w => Bounded (MemWord w) where + minBound = 0 + maxBound = MemWord (addrWidthMod (Proxy :: Proxy w)) instance MemWidth 32 where addrWidthRepr _ = Addr32 @@ -325,9 +329,19 @@ data SymbolVersion = SymbolVersion { symbolVersionFile :: !BS.ByteString , symbolVersionName :: !BS.ByteString } +-- | Information about the visibility of a symbol within a binary. +data SymbolVisibility + = LocalSymbol + -- ^ Th symbol is only visible within the module + | GlobalSymbol + -- ^ The symbol is globally visible to all modules + | VersionedSymbol !SymbolVersion + -- ^ The symbol is visible with the specific version associated + + -- | The name of a symbol along with optional version information. data SymbolRef = SymbolRef { symbolName :: !BS.ByteString - , symbolVersion :: !(Maybe SymbolVersion) + , symbolVisibility :: !SymbolVisibility } -- | Defines a portion of a segment. @@ -352,37 +366,6 @@ instance Show (SegmentRange w) where showList [] = id showList (h : r) = showsPrec 10 h . showList r -data DropError - = DropUnexpectedRelocation - | DropInvalidAddr - -dropErrorAsMemError :: MemAddr w -> DropError -> MemoryError w -dropErrorAsMemError a DropUnexpectedRelocation = UnexpectedRelocation a -dropErrorAsMemError a DropInvalidAddr = InvalidAddr a - --- | Given a contiguous list of segment ranges and a number of bytes to drop, this --- returns the remaining segment ranges or throws an error. -dropSegmentRangeListBytes :: forall w - . MemWidth w - => [SegmentRange w] - -> Int - -> Either DropError [SegmentRange w] -dropSegmentRangeListBytes ranges 0 = Right ranges -dropSegmentRangeListBytes (ByteRegion bs : rest) cnt = do - let sz = BS.length bs - if sz > cnt then - Right $ ByteRegion (BS.drop cnt bs) : rest - else - dropSegmentRangeListBytes rest (cnt - sz) -dropSegmentRangeListBytes (SymbolicRef _:rest) cnt = do - let sz = addrSize (error "rangeSize nat evaluated" :: NatRepr w) - if sz > cnt then - Left DropUnexpectedRelocation - else - dropSegmentRangeListBytes rest (cnt - sz) -dropSegmentRangeListBytes [] _ = - Left DropInvalidAddr - ------------------------------------------------------------------------ -- SegmentContents @@ -441,7 +424,7 @@ data MemSegment w -- -- N.B. 0 indicates a fixed base address of zero. , segmentOffset :: !(MemWord w) - -- ^ Offset of segment to base + -- ^ Offset of segment relative to segmentBase , segmentFlags :: !Perm.Flags -- ^ Permisison flags , segmentContents :: !(SegmentContents w) @@ -450,10 +433,11 @@ data MemSegment w } -- | Create a memory segment with the given values. -memSegment :: MemWidth w +memSegment :: forall w + . MemWidth w => RegionIndex -- ^ Index of base (0=absolute address) - -> MemWord w + -> Integer -- ^ Offset of segment -> Perm.Flags -- ^ Flags if defined @@ -462,11 +446,11 @@ memSegment :: MemWidth w -> MemSegment w memSegment base off flags contentsl -- Check for overflow in contents end - | off + contentsSize contents < off = + | off + toInteger (contentsSize contents) > toInteger (maxBound :: MemWord w) = error "Contents two large for base." | otherwise = MemSegment { segmentBase = base - , segmentOffset = off + , segmentOffset = fromInteger off , segmentFlags = flags , segmentContents = contents } @@ -599,11 +583,12 @@ resolveSegmentOff seg off -- | Return the absolute address associated with the segment offset pair (if any) msegAddr :: MemWidth w => MemSegmentOff w -> Maybe (MemWord w) -msegAddr (MemSegmentOff seg off) = - if segmentBase seg == 0 then - Just (segmentOffset seg + off) - else - Nothing +msegAddr mseg = do + let seg = msegSegment mseg + in if segmentBase seg == 0 then + Just (segmentOffset seg + msegOffset mseg) + else + Nothing -- | Clear the least-significant bit of an segment offset. clearSegmentOffLeastBit :: MemWidth w => MemSegmentOff w -> MemSegmentOff w @@ -680,13 +665,13 @@ data MemAddr w -- | Given an absolute address, this returns a segment and offset into the segment. absoluteAddr :: MemWord w -> MemAddr w -absoluteAddr = MemAddr 0 +absoluteAddr o = MemAddr { addrBase = 0, addrOffset = o } -- | Construct an address relative to an existing memory segment. relativeAddr :: MemWidth w => MemSegment w -> MemWord w -> MemAddr w -relativeAddr seg off = MemAddr (segmentBase seg) (segmentOffset seg + off) +relativeAddr seg off = MemAddr { addrBase = segmentBase seg, addrOffset = segmentOffset seg + off } --- | Return the address associated with a memory segment. +-- | Convert the segment offset to an address. relativeSegmentAddr :: MemWidth w => MemSegmentOff w -> MemAddr w relativeSegmentAddr (MemSegmentOff seg off) = relativeAddr seg off @@ -735,6 +720,41 @@ instance MemWidth w => Pretty (MemAddr w) where -- | Maps code addresses to the associated symbol name if any. type AddrSymMap w = Map.Map (MemSegmentOff w) BSC.ByteString +------------------------------------------------------------------------ +-- DropError + +-- | An error that occured when droping byes. +data DropError + = DropUnexpectedRelocation + | DropInvalidAddr + +dropErrorAsMemError :: MemAddr w -> DropError -> MemoryError w +dropErrorAsMemError a DropUnexpectedRelocation = UnexpectedRelocation a +dropErrorAsMemError a DropInvalidAddr = InvalidAddr a + +-- | Given a contiguous list of segment ranges and a number of bytes to drop, this +-- returns the remaining segment ranges or throws an error. +dropSegmentRangeListBytes :: forall w + . MemWidth w + => [SegmentRange w] + -> Int + -> Either DropError [SegmentRange w] +dropSegmentRangeListBytes ranges 0 = Right ranges +dropSegmentRangeListBytes (ByteRegion bs : rest) cnt = do + let sz = BS.length bs + if sz > cnt then + Right $ ByteRegion (BS.drop cnt bs) : rest + else + dropSegmentRangeListBytes rest (cnt - sz) +dropSegmentRangeListBytes (SymbolicRef _:rest) cnt = do + let sz = addrSize (error "rangeSize nat evaluated" :: NatRepr w) + if sz > cnt then + Left DropUnexpectedRelocation + else + dropSegmentRangeListBytes rest (cnt - sz) +dropSegmentRangeListBytes [] _ = + Left DropInvalidAddr + ------------------------------------------------------------------------ -- MemoryError diff --git a/base/src/Data/Macaw/Memory/ElfLoader.hs b/base/src/Data/Macaw/Memory/ElfLoader.hs index 5e329a4d..89cd3f29 100644 --- a/base/src/Data/Macaw/Memory/ElfLoader.hs +++ b/base/src/Data/Macaw/Memory/ElfLoader.hs @@ -68,6 +68,7 @@ import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import Data.Maybe import qualified Data.Vector as V +import Numeric (showHex) import Data.Macaw.Memory import Data.Macaw.Memory.LoadCommon @@ -114,6 +115,19 @@ flagsForSectionFlags f = where flagIf :: ElfSectionFlags w -> Perm.Flags -> Perm.Flags flagIf ef pf = if f `Elf.hasPermissions` ef then pf else Perm.none +------------------------------------------------------------------------ +-- RegionAdjust + +-- | This captures how to translate addresses in the Elf file to +-- regions in the memory object. +data RegionAdjust + = RegionAdjust { regionIndex :: !RegionIndex + -- ^ Region index for new segments + , regionOffset :: !Integer + -- ^ Offset from region to automatically add to + -- segment/sections during loading. + } + ------------------------------------------------------------------------ -- Loading by segment @@ -157,7 +171,7 @@ padBSSData incBSS dta sz -- | Return a memory segment for elf segment if it loadable. memSegmentForElfSegment :: (MemWidth w, Integral (ElfWordType w)) - => RegionIndex -- ^ Index for segment + => RegionAdjust -- ^ Index for segment -> IncludeBSS -- ^ Flag to control wheter we include BSS -> L.ByteString -- ^ Complete contents of Elf file. @@ -166,14 +180,16 @@ memSegmentForElfSegment :: (MemWidth w, Integral (ElfWordType w)) -> Elf.Phdr w -- ^ Program header entry -> MemSegment w -memSegmentForElfSegment regIdx incBSS contents relocMap phdr = mseg +memSegmentForElfSegment regAdj incBSS contents relocMap phdr = mseg where seg = Elf.phdrSegment phdr dta = sliceL (Elf.phdrFileRange phdr) contents sz = fromIntegral $ Elf.phdrMemSize phdr fixedData = padBSSData incBSS dta sz - addr = fromIntegral $ elfSegmentVirtAddr seg + addr = regionOffset regAdj + toInteger (elfSegmentVirtAddr seg) flags = flagsForSegmentFlags (elfSegmentFlags seg) - mseg = memSegment regIdx addr flags (byteSegments relocMap addr fixedData) + segContents = byteSegments relocMap (fromInteger addr) fixedData + mseg = memSegment (regionIndex regAdj) addr flags segContents + -- | Create memory segment from elf section. -- @@ -196,8 +212,7 @@ memSegmentForElfSection regIdx incBSS s ------------------------------------------------------------------------ -- MemLoader -data MemLoaderState w = MLS { mlsRegionIndex :: !RegionIndex - -- ^ Region index for new segments +data MemLoaderState w = MLS { mlsRegionAdjust :: !RegionAdjust , mlsIncludeBSS :: !Bool -- ^ Flag whether to include BSS , _mlsMemory :: !(Memory w) @@ -216,9 +231,9 @@ memLoaderPair mls = (mls^.mlsIndexMap, mls^.mlsMemory) type MemLoader w = StateT (MemLoaderState w) (Except String) -runMemLoader :: RegionIndex -> Bool -> Memory w -> MemLoader w () -> Either String (SectionIndexMap w, Memory w) -runMemLoader regIdx incBSS mem m = fmap memLoaderPair $ runExcept $ execStateT m s - where s = MLS { mlsRegionIndex = regIdx +runMemLoader :: RegionAdjust -> Bool -> Memory w -> MemLoader w () -> Either String (SectionIndexMap w, Memory w) +runMemLoader regAdj incBSS mem m = fmap memLoaderPair $ runExcept $ execStateT m s + where s = MLS { mlsRegionAdjust = regAdj , mlsIncludeBSS = incBSS , _mlsMemory = mem , _mlsIndexMap = Map.empty @@ -250,7 +265,11 @@ mkSymbolVersion ver = SymbolVersion { symbolVersionFile = Elf.verFile ver mkSymbolRef :: Elf.VersionedSymbol tp -> SymbolRef mkSymbolRef (sym, mverId) = SymbolRef { symbolName = Elf.steName sym - , symbolVersion = mkSymbolVersion <$> mverId + , symbolVisibility = + case mverId of + Elf.VersionLocal -> LocalSymbol + Elf.VersionGlobal -> GlobalSymbol + Elf.VersionSpecific verId -> VersionedSymbol (mkSymbolVersion verId) } ------------------------------------------------------------------------ @@ -273,13 +292,13 @@ relaSymbol symtab rel = Nothing -> Left $ "Could not find symbol at index " ++ show (Elf.r_sym rel) ++ "." Just sym -> Right sym +-- | Creates a map that forwards addresses to be relocated to their appropriate target. +type RelaTargetFn tp = V.Vector SymbolRef -> Elf.RelaEntry tp -> Either String (Maybe SymbolRef) + -- | Given a relocation entry, this returns either @Left msg@ if the relocation -- cannot be resolved, @Right Nothing@ if -relaTarget :: V.Vector SymbolRef - -- ^ Get c - -> Elf.RelaEntry Elf.X86_64_RelocationType - -> Either String (Maybe SymbolRef) -relaTarget symtab rel = +relaTargetX86_64 :: RelaTargetFn Elf.X86_64_RelocationType +relaTargetX86_64 symtab rel = case Elf.r_type rel of Elf.R_X86_64_GLOB_DAT -> do checkZeroAddend rel @@ -290,13 +309,28 @@ relaTarget symtab rel = Just <$> relaSymbol symtab rel tp -> Left $ "Do not yet support relocation type: " ++ show tp -relocEntry :: V.Vector SymbolRef - -> Elf.RelaEntry Elf.X86_64_RelocationType - -> Either String (Maybe (MemWord 64, SymbolRef)) -relocEntry symtab rel = fmap (fmap f) $ relaTarget symtab rel - where f :: SymbolRef -> (MemWord 64, SymbolRef) - f tgt = (memWord (Elf.r_offset rel), tgt) +relaTargetARM :: RelaTargetFn Elf.ARM_RelocationType +relaTargetARM symtab rel = + case Elf.r_type rel of + Elf.R_ARM_GLOB_DAT -> do + checkZeroAddend rel + Just <$> relaSymbol symtab rel + Elf.R_ARM_COPY -> Right Nothing + Elf.R_ARM_JUMP_SLOT -> do + checkZeroAddend rel + Just <$> relaSymbol symtab rel + tp -> Left $ "Do not yet support relocation type: " ++ show tp +--(Elf.IsRelocationType tp, MemWidth (Elf.RelocationWidth tp), Integral (Elf.RelocationWord tp)) +-- => +-- | Creates a map that forwards addresses to be relocated to their appropriate target. +relocEntry :: (MemWidth (Elf.RelocationWidth tp), Integral (Elf.RelocationWord tp)) + => RelaTargetFn tp + -> V.Vector SymbolRef + -> Elf.RelaEntry tp + -> Either String (Maybe (MemWord (Elf.RelocationWidth tp), SymbolRef)) +relocEntry relaTarget symtab rel = fmap (fmap f) $ relaTarget symtab rel + where f tgt = (memWord (fromIntegral (Elf.r_offset rel)), tgt) -- Given a list returns a map mapping keys to their associated values, or -- a key that appears in multiple elements. @@ -308,33 +342,46 @@ mapFromListUnique = foldlM f Map.empty Just _ -> Left k -- | Creates a map that forwards addresses to be relocated to their appropriate target. -mkRelocMap :: V.Vector SymbolRef - -> [Elf.RelaEntry Elf.X86_64_RelocationType] - -> Either String (RelocMap (MemWord 64)) -mkRelocMap symtab l = do - mentries <- traverse (relocEntry symtab) l +mkRelocMap :: ( Elf.IsRelocationType tp + , MemWidth (Elf.RelocationWidth tp) + , Integral (Elf.RelocationWord tp) + ) + => RelaTargetFn tp + -> V.Vector SymbolRef + -> [Elf.RelaEntry tp] + -> Either String (RelocMap (MemWord (Elf.RelocationWidth tp))) +mkRelocMap relaTarget symtab l = do + mentries <- traverse (relocEntry relaTarget symtab) l let errMsg w = show w ++ " appears in multiple relocations." case mapFromListUnique $ catMaybes mentries of Left dup -> Left (errMsg dup) Right v -> Right v -- | Creates a relocation map from the contents of a dynamic section. -relocMapOfDynamic :: Elf.ElfHeader w +relocMapOfDynamic :: forall w + . (MemWidth w, Integral (ElfWordType w)) + => Elf.ElfHeader w -> Elf.VirtAddrMap w -> L.ByteString -- ^ Contents of .dynamic section -> MemLoader w (RelocMap (MemWord w)) relocMapOfDynamic hdr virtMap dynContents = case (Elf.headerClass hdr, Elf.headerMachine hdr) of - (Elf.ELFCLASS64, Elf.EM_X86_64) -> do - dynSection <- either (throwError . show) pure $ - Elf.dynamicEntries (Elf.headerData hdr) Elf.ELFCLASS64 virtMap dynContents - relocs <- either (throwError . show) pure $ - Elf.dynRelocations (dynSection :: Elf.DynamicSection Elf.X86_64_RelocationType) - syms <- either (throwError . show) pure $ - Elf.dynSymTable dynSection - either throwError pure $ - mkRelocMap (mkSymbolRef <$> syms) relocs + (Elf.ELFCLASS64, Elf.EM_X86_64) -> go relaTargetX86_64 + (Elf.ELFCLASS32, Elf.EM_ARM) -> go relaTargetARM (_,mach) -> throwError $ "Dynamic libraries are not supported on " ++ show mach ++ "." + where go :: forall tp + . (Elf.IsRelocationType tp, w ~ Elf.RelocationWidth tp) + => RelaTargetFn tp + -> MemLoader (Elf.RelocationWidth tp) (RelocMap (MemWord (Elf.RelocationWidth tp))) + go relaTarget = do + dynSection <- either (throwError . show) pure $ + Elf.dynamicEntries (Elf.headerData hdr) (Elf.headerClass hdr) virtMap dynContents + relocs <- either (throwError . show) pure $ + Elf.dynRelocations dynSection + syms <- either (throwError . show) pure $ + Elf.dynSymTable dynSection + either throwError pure $ + mkRelocMap relaTarget (mkSymbolRef <$> syms) relocs ------------------------------------------------------------------------ -- Elf segment loading @@ -353,11 +400,11 @@ insertElfSegment :: ElfFileSectionMap (ElfWordType w) -> Elf.Phdr w -> MemLoader w () insertElfSegment shdrMap contents relocMap phdr = do - regIdx <- gets mlsRegionIndex + regAdj <- gets mlsRegionAdjust incBSS <- gets mlsIncludeBSS w <- uses mlsMemory memAddrWidth reprConstraints w $ do - let seg = memSegmentForElfSegment regIdx incBSS contents relocMap phdr + let seg = memSegmentForElfSegment regAdj incBSS contents relocMap phdr let seg_idx = elfSegmentIndex (Elf.phdrSegment phdr) loadMemSegment ("Segment " ++ show seg_idx) seg let phdr_offset = Elf.fromFileOffset (Elf.phdrFileStart phdr) @@ -413,13 +460,14 @@ memoryForElfSegments e = do insertElfSection :: ElfSection (ElfWordType w) -> MemLoader w () insertElfSection sec = do - regIdx <- gets mlsRegionIndex + regAdj <- mlsRegionAdjust <$> get incBSS <- gets mlsIncludeBSS w <- uses mlsMemory memAddrWidth reprConstraints w $ do -- Check if we should load section let doLoad = elfSectionFlags sec `Elf.hasPermissions` Elf.shf_alloc && elfSectionName sec /= ".eh_frame" + let regIdx = regionIndex regAdj case memSegmentForElfSection regIdx incBSS sec of Just seg | doLoad -> do loadMemSegment ("Section " ++ BSC.unpack (elfSectionName sec) ++ " " ++ show (Elf.elfSectionSize sec)) seg @@ -473,8 +521,10 @@ memoryForElf :: LoadOptions -> Elf w -> Either String (SectionIndexMap w, Memory w) memoryForElf opt e = do - let regIdx = adjustedLoadRegionIndex e opt - runMemLoader regIdx (includeBSS opt) (emptyMemory (elfAddrWidth (elfClass e))) $ do + let regAdj = RegionAdjust { regionIndex = adjustedLoadRegionIndex e opt + , regionOffset = loadRegionBaseOffset opt + } + runMemLoader regAdj (includeBSS opt) (emptyMemory (elfAddrWidth (elfClass e))) $ do case adjustedLoadStyle e opt of LoadBySection -> memoryForElfSections e LoadBySegment -> memoryForElfSegments e @@ -559,12 +609,17 @@ resolveElfFuncSymbols mem secMap e = -- initElfDiscoveryInfo -- | Return the segment offset of the elf file entry point or fail if undefined. -getElfEntry :: Memory w -> Elf w -> Either String (MemSegmentOff w) -getElfEntry mem e = addrWidthClass (memAddrWidth mem) $ do +getElfEntry :: LoadOptions -> Memory w -> Elf w -> ([String], Maybe (MemSegmentOff w)) +getElfEntry loadOpts mem e = addrWidthClass (memAddrWidth mem) $ do Elf.elfClassInstances (Elf.elfClass e) $ do - case resolveAbsoluteAddr mem (fromIntegral (Elf.elfEntry e)) of - Nothing -> Left "Could not resolve entry" - Just v -> Right v + let regIdx = adjustedLoadRegionIndex e loadOpts + let adjAddr = loadRegionBaseOffset loadOpts + toInteger (Elf.elfEntry e) + case resolveAddr mem regIdx (fromInteger adjAddr) of + Nothing -> + ( ["Could not resolve entry point: " ++ showHex (Elf.elfEntry e) ""] + , Nothing + ) + Just v -> ([], Just v) -- | This interprets the Elf file to construct the initial memory, -- entry points, and functions symbols. @@ -592,14 +647,14 @@ initElfDiscoveryInfo loadOpts e = do pure (show <$> symErrs, mem, Nothing, funcSymbols) Elf.ET_EXEC -> do (secMap, mem) <- memoryForElf loadOpts e - entry <- getElfEntry mem e + let (entryWarn, mentry) = getElfEntry loadOpts mem e let (symErrs, funcSymbols) = resolveElfFuncSymbols mem secMap e - Right (show <$> symErrs, mem, Just entry, funcSymbols) + Right (entryWarn ++ fmap show symErrs, mem, mentry, funcSymbols) Elf.ET_DYN -> do (secMap, mem) <- memoryForElf loadOpts e - entry <- getElfEntry mem e + let (entryWarn, mentry) = getElfEntry loadOpts mem e let (symErrs, funcSymbols) = resolveElfFuncSymbols mem secMap e - pure (show <$> symErrs, mem, Just entry, funcSymbols) + pure (entryWarn ++ fmap show symErrs, mem, mentry, funcSymbols) Elf.ET_CORE -> do Left $ "Reopt does not support loading core files." tp -> do diff --git a/base/src/Data/Macaw/Memory/LoadCommon.hs b/base/src/Data/Macaw/Memory/LoadCommon.hs index 6e8c4648..a61b69f7 100644 --- a/base/src/Data/Macaw/Memory/LoadCommon.hs +++ b/base/src/Data/Macaw/Memory/LoadCommon.hs @@ -6,6 +6,7 @@ Common datatypes for creating a memory from a binary file. -} module Data.Macaw.Memory.LoadCommon ( LoadOptions(..) + , defaultLoadOptions , LoadStyle(..) ) where @@ -34,6 +35,12 @@ data LoadOptions -- -- If 'Nothing' then static executables have region index 0 and other -- files have region index 1. + , loadRegionBaseOffset :: !Integer + -- ^ Increment to automatically add to segment/section memory offsets + -- when loading. + -- + -- This defaults to '0', and is primarily intended to allow loading + -- relocatable files at specific hard-coded offsets. , loadStyleOverride :: !(Maybe LoadStyle) -- ^ Controls whether to load by section or segment -- @@ -41,3 +48,12 @@ data LoadOptions , includeBSS :: !Bool -- ^ Include data not backed by file when creating memory segments. } + +-- | Default options for loading +defaultLoadOptions :: LoadOptions +defaultLoadOptions = + LoadOptions { loadRegionIndex = Nothing + , loadRegionBaseOffset = 0 + , loadStyleOverride = Nothing + , includeBSS = False + } diff --git a/x86/tests/submodules/elf-edit b/x86/tests/submodules/elf-edit index b3f95f1d..8c4fe6e4 160000 --- a/x86/tests/submodules/elf-edit +++ b/x86/tests/submodules/elf-edit @@ -1 +1 @@ -Subproject commit b3f95f1da846bb6f6b44ed5f033d07b6fb1759a6 +Subproject commit 8c4fe6e4e625d5c98b31dd79a31a5b391f7a738f