diff --git a/base/src/Data/Macaw/Architecture/Info.hs b/base/src/Data/Macaw/Architecture/Info.hs index 422a650d..ba274b1e 100644 --- a/base/src/Data/Macaw/Architecture/Info.hs +++ b/base/src/Data/Macaw/Architecture/Info.hs @@ -9,7 +9,6 @@ This defines the architecture-specific information needed for code discovery. module Data.Macaw.Architecture.Info ( ArchitectureInfo(..) , DisassembleFn - , archPostSyscallAbsState -- * Unclassified blocks , module Data.Macaw.CFG.Block ) where @@ -64,9 +63,6 @@ data ArchitectureInfo arch -- ^ The size of each entry in a jump table. , disassembleFn :: !(DisassembleFn arch) -- ^ Function for disasembling a block. - , preserveRegAcrossSyscall :: !(forall tp . ArchReg arch tp -> Bool) - - -- ^ Return true if architecture register should be preserved across a system call. , mkInitialAbsState :: !(Memory (RegAddrWidth (ArchReg arch)) -> ArchSegmentOff arch -> AbsBlockState (ArchReg arch)) @@ -120,16 +116,16 @@ data ArchitectureInfo arch -> Rewriter arch src tgt (ArchTermStmt arch tgt)) -- ^ This rewrites an architecture specific statement , archDemandContext :: !(forall ids . DemandContext arch ids) + -- ^ Provides architecture-specific information for computing which arguments must be + -- evaluated when evaluating a statement. + , postArchTermStmtAbsState :: !(forall ids + -- The abstract state when block terminates. + . AbsBlockState (ArchReg arch) + -- The architecture-specific statement + -> ArchTermStmt arch ids + -- The IP we are going to next. + -> ArchSegmentOff arch + -> AbsBlockState (ArchReg arch)) + -- ^ Returns the abstract state after executing an architecture-specific + -- terminal statement. } - - --- | Return state post call -archPostSyscallAbsState :: ArchitectureInfo arch - -- ^ Architecture information - -> AbsBlockState (ArchReg arch) - -> ArchSegmentOff arch - -> AbsBlockState (ArchReg arch) -archPostSyscallAbsState info = withArchConstraints info $ AbsState.absEvalCall params - where params = CallParams { postCallStackDelta = 0 - , preserveReg = preserveRegAcrossSyscall info - } diff --git a/base/src/Data/Macaw/CFG/App.hs b/base/src/Data/Macaw/CFG/App.hs index 14689778..ad9138eb 100644 --- a/base/src/Data/Macaw/CFG/App.hs +++ b/base/src/Data/Macaw/CFG/App.hs @@ -142,29 +142,36 @@ data App (f :: Type -> *) (tp :: Type) where -> !(f BoolType) -> App f BoolType + -- | This returns the number of true bits in the input. + PopCount :: (1 <= n) => !(NatRepr n) -> !(f (BVType n)) -> App f (BVType n) + -- Return true if value contains even number of true bits. - EvenParity :: !(f (BVType 8)) -> App f BoolType + --EvenParity :: !(f (BVType 8)) -> App f BoolType -- Reverse the bytes in a bitvector expression. - ReverseBytes :: (1 <= n) => !(NatRepr n) -> !(f (BVType n)) -> App f (BVType n) + ReverseBytes :: (1 <= n) => !(NatRepr n) -> !(f (BVType (8*n))) -> App f (BVType (8*n)) - -- bsf "bit scan forward" returns the index of the least-significant - -- bit that is 1. Undefined if value is zero. - -- All bits at indices less than return value must be unset. + -- | bsf "bit scan forward" returns the index of the + -- least-significant bit that is 1. An equivalent way of stating + -- this is that it returns the number "trailing" zero-bits. This + -- returns n if the value is zero. Bsf :: (1 <= n) => !(NatRepr n) -> !(f (BVType n)) -> App f (BVType n) - -- bsr "bit scan reverse" returns the index of the most-significant - -- bit that is 1. Undefined if value is zero. - -- All bits at indices greater than return value must be unset. + -- | bsf "bit scan forward" returns the index of the + -- most-significant bit that is 1. An equivalent way of stating + -- this is that it returns the number "least" zero-bits. This + -- returns n if the value is zero. Bsr :: (1 <= n) => !(NatRepr n) -> !(f (BVType n)) -> App f (BVType n) ---------------------------------------------------------------------- -- Floating point operations + -- | Return true if floating point value is a "quiet" NaN. FPIsQNaN :: !(FloatInfoRepr flt) -> !(f (FloatType flt)) -> App f BoolType + -- | Return true if floating point value is a "signaling" NaN. FPIsSNaN :: !(FloatInfoRepr flt) -> !(f (FloatType flt)) -> App f BoolType @@ -337,7 +344,7 @@ ppAppA pp a0 = BVShr _ x y -> sexprA "bv_shr" [ pp x, pp y ] BVSar _ x y -> sexprA "bv_sar" [ pp x, pp y ] Eq x y -> sexprA "eq" [ pp x, pp y ] - EvenParity x -> sexprA "even_parity" [ pp x ] + PopCount _ x -> sexprA "popcount" [ pp x ] ReverseBytes _ x -> sexprA "reverse_bytes" [ pp x ] UadcOverflows _ x y c -> sexprA "uadc_overflows" [ pp x, pp y, pp c ] SadcOverflows _ x y c -> sexprA "sadc_overflows" [ pp x, pp y, pp c ] @@ -397,14 +404,17 @@ instance HasRepr (App f) TypeRepr where BVShr w _ _ -> BVTypeRepr w BVSar w _ _ -> BVTypeRepr w Eq _ _ -> knownType - EvenParity _ -> knownType - ReverseBytes w _ -> BVTypeRepr w - UadcOverflows{} -> knownType + + UadcOverflows{} -> knownType SadcOverflows{} -> knownType UsbbOverflows{} -> knownType SsbbOverflows{} -> knownType + PopCount w _ -> BVTypeRepr w + ReverseBytes w _ -> + case leqMulCongr (LeqProof :: LeqProof 1 8) (leqProof (knownNat :: NatRepr 1) w) of + LeqProof -> BVTypeRepr (natMultiply (knownNat :: NatRepr 8) w) Bsf w _ -> BVTypeRepr w Bsr w _ -> BVTypeRepr w diff --git a/base/src/Data/Macaw/CFG/Core.hs b/base/src/Data/Macaw/CFG/Core.hs index 4f38fc45..7674a8f9 100644 --- a/base/src/Data/Macaw/CFG/Core.hs +++ b/base/src/Data/Macaw/CFG/Core.hs @@ -206,6 +206,13 @@ type family ArchStmt (arch :: *) :: * -> * -- -- The second type parameter is the ids phantom type used to provide -- uniqueness of Nonce values that identify assignments. +-- +-- The architecture-specific terminal statement may have side effects, but is +-- assumed to jump to the location specified as the current instruction-pointer +-- after executing. This location is assumed to be in the current calling context. +-- +-- NOTE: Due to the restrictions on ArchTermStmt control-flow, we may +-- want to remove ArchTermStmt entirely and replace with ArchStmt. type family ArchTermStmt (arch :: *) :: * -> * -- | Number of bits in addreses for architecture. diff --git a/base/src/Data/Macaw/Discovery.hs b/base/src/Data/Macaw/Discovery.hs index 88fb22d8..923bb63e 100644 --- a/base/src/Data/Macaw/Discovery.hs +++ b/base/src/Data/Macaw/Discovery.hs @@ -780,7 +780,7 @@ parseBlock ctx b regs = do case concretizeAbsCodePointers mem (abst^.absRegState^.curIP) of [addr] -> do -- Merge system call result with possible next IPs. - let post = archPostSyscallAbsState arch_info abst addr + let post = postArchTermStmtAbsState arch_info abst ts addr intraJumpTargets %= ((addr, post):) pure $! StatementList { stmtsIdent = idx diff --git a/base/src/Data/Macaw/Memory.hs b/base/src/Data/Macaw/Memory.hs index 1653bd26..2fc5dc33 100644 --- a/base/src/Data/Macaw/Memory.hs +++ b/base/src/Data/Macaw/Memory.hs @@ -44,6 +44,7 @@ module Data.Macaw.Memory , ppMemSegment , segmentSize , SegmentRange(..) + , dropSegmentRangeListBytes -- * MemWord , MemWord , MemWidth(..) @@ -58,6 +59,7 @@ module Data.Macaw.Memory , msegAddr , incSegmentOff , diffSegmentOff + , clearSegmentOffLeastBit , memAsAddrPairs -- * Symbols , SymbolRef(..) @@ -340,6 +342,30 @@ instance Show (SegmentRange w) where showList [] = id showList (h : r) = showsPrec 10 h . showList r +-- | Given a contiguous list of segment ranges and a number of bytes to drop, this +-- returns the remaining segment ranges or throws an error. +dropSegmentRangeListBytes :: forall w + . MemWidth w + => MemAddr w + -> [SegmentRange w] + -> Int + -> Either (MemoryError w) [SegmentRange w] +dropSegmentRangeListBytes _ ranges 0 = Right ranges +dropSegmentRangeListBytes addr (ByteRegion bs : rest) cnt = do + let sz = BS.length bs + if sz > cnt then + Right $ ByteRegion (BS.drop cnt bs) : rest + else + dropSegmentRangeListBytes (incAddr (toInteger sz) addr) rest (cnt - sz) +dropSegmentRangeListBytes addr (SymbolicRef _:rest) cnt = do + let sz = addrSize (error "rangeSize nat evaluated" :: NatRepr w) + if sz > cnt then + Left (UnexpectedRelocation addr) + else + dropSegmentRangeListBytes (incAddr (toInteger sz) addr) rest (cnt - sz) +dropSegmentRangeListBytes addr [] _ = + Left (InvalidAddr addr) + ------------------------------------------------------------------------ -- SegmentContents @@ -574,6 +600,10 @@ resolveSegmentOff seg off msegAddr :: MemWidth w => MemSegmentOff w -> Maybe (MemWord w) msegAddr (MemSegmentOff seg off) = (+ off) <$> segmentBase seg +-- | Clear the least-significant bit of an segment offset. +clearSegmentOffLeastBit :: MemWidth w => MemSegmentOff w -> MemSegmentOff w +clearSegmentOffLeastBit (MemSegmentOff seg off) = MemSegmentOff seg (off .&. complement 1) + -- | Increment a segment offset by a given amount. -- -- Returns 'Nothing' if the result would be out of range. diff --git a/x86/src/Data/Macaw/X86.hs b/x86/src/Data/Macaw/X86.hs index 02b0613d..c35d27df 100644 --- a/x86/src/Data/Macaw/X86.hs +++ b/x86/src/Data/Macaw/X86.hs @@ -509,13 +509,7 @@ instance S.IsValue (Expr ids) where -- Default case | otherwise = app (UExt e0 w) - even_parity x - | Just xv <- asBVLit x = - let go 8 r = r - go i r = go (i+1) $! (xv `testBit` i /= r) - in S.boolValue (go 0 True) - | otherwise = app $ EvenParity x - reverse_bytes x = app $ ReverseBytes (typeWidth x) x + reverse_bytes w x = app $ ReverseBytes w x uadc_overflows x y c | Just 0 <- asBVLit y, Just False <- asBoolLit c = S.false @@ -737,35 +731,45 @@ modGenState m = X86G $ ContT $ \c -> ReaderT $ \s -> let (a,s') = runState m s in runReaderT (c a) s' -modState :: State (RegState X86Reg (Value X86_64 ids)) a -> X86Generator st_s ids a -modState m = modGenState $ do - s <- use curX86State - let (r,s') = runState m s - curX86State .= s' - return r +getState :: X86Generator st_s ids (GenState st_s ids) +getState = X86G ask + +-- | Return the value associated with the given register. +getReg :: X86Reg tp -> X86Generator st_s ids (Value X86_64 ids tp) +getReg r = view (curX86State . boundValue r) <$> getState + +-- | Set the value associated with the given register. +setReg :: X86Reg tp -> Value X86_64 ids tp -> X86Generator st_s ids () +setReg r v = modGenState $ curX86State . boundValue r .= v -- | Create a new assignment identifier newAssignID :: X86Generator st_s ids (AssignId ids tp) newAssignID = do - gs <- X86G $ ask + gs <- getState liftM AssignId $ X86G $ lift $ lift $ lift $ freshNonce $ assignIdGen gs +-- | Add a statement to the list of statements. addStmt :: Stmt X86_64 ids -> X86Generator st_s ids () addStmt stmt = seq stmt $ modGenState $ blockState . pBlockStmts %= (Seq.|> stmt) - addAssignment :: AssignRhs X86_64 ids tp -> X86Generator st_s ids (Assignment X86_64 ids tp) addAssignment rhs = do l <- newAssignID let a = Assignment l rhs addStmt $ AssignStmt a - return a + pure $! a -addArchFn :: X86PrimFn (Value X86_64 ids) tp - -> X86Generator st_s ids (Assignment X86_64 ids tp) -addArchFn fn = addAssignment (EvalArchFn fn (typeRepr fn)) +evalAssignRhs :: AssignRhs X86_64 ids tp + -> X86Generator st_s ids (Expr ids tp) +evalAssignRhs rhs = + ValueExpr . AssignedValue <$> addAssignment rhs + +-- | Evaluate an architecture-specific function and return the resulting expr. +evalArchFn :: X86PrimFn (Value X86_64 ids) tp + -> X86Generator st_s ids (Expr ids tp) +evalArchFn f = evalAssignRhs (EvalArchFn f (typeRepr f)) -- | This function does a top-level constant propagation/constant reduction. -- We assume that the leaf nodes have also been propagated (i.e., we only operate @@ -875,7 +879,7 @@ type ImpLocation ids tp = S.Location (AddrExpr ids) tp getX87Top :: X86Generator st_s ids Int getX87Top = do - top_val <- modState $ use $ boundValue X87_TopReg + top_val <- getReg X87_TopReg case top_val of -- Validate that i is less than top and top + BVValue _ (fromInteger -> topv) -> @@ -890,18 +894,18 @@ getX87Offset i = do return $! topv + i readLoc :: X86PrimLoc tp -> X86Generator st_s ids (Expr ids tp) -readLoc l = ValueExpr . AssignedValue <$> addArchFn (ReadLoc l) +readLoc l = evalArchFn (ReadLoc l) getLoc :: ImpLocation ids tp -> X86Generator st_s ids (Expr ids tp) getLoc (l0 :: ImpLocation ids tp) = case l0 of S.MemoryAddr w tp -> do addr <- eval w - ValueExpr . AssignedValue <$> addAssignment (ReadMem addr tp) + evalAssignRhs (ReadMem addr tp) S.ControlReg _ -> - fail $ "Do not support writing to control registers." + fail $ "Do not support reading control registers." S.DebugReg _ -> - fail $ "Do not support writing to debug registers." + fail $ "Do not support reading debug registers." S.SegmentReg s | s == F.FS -> readLoc FS | s == F.GS -> readLoc GS @@ -911,16 +915,14 @@ getLoc (l0 :: ImpLocation ids tp) = S.X87ControlReg r -> readLoc (X87_ControlLoc r) S.FullRegister r -> do - modState $ ValueExpr <$> use (boundValue r) + ValueExpr <$> getReg r S.Register (rv :: S.RegisterView m b n) -> do let r = S.registerViewReg rv - modState $ S.registerViewRead rv . ValueExpr <$> use (boundValue r) + S.registerViewRead rv . ValueExpr <$> getReg r -- TODO S.X87StackRegister i -> do idx <- getX87Offset i - e <- modState $ use $ boundValue (X87_FPUReg (F.mmxReg (fromIntegral idx))) - -- TODO: Check tag register is assigned. - return $! ValueExpr e + ValueExpr <$> getReg (X87_FPUReg (F.mmxReg (fromIntegral idx))) addArchStmt :: X86Stmt (Value X86_64 ids) -> X86Generator st_s ids () addArchStmt s = addStmt $ ExecArchStmt (X86Stmt s) @@ -954,19 +956,19 @@ setLoc loc v = S.X87ControlReg r -> addWriteLoc (X87_ControlLoc r) v S.FullRegister r -> do - modState $ boundValue r .= v + setReg r v S.Register (rv :: S.RegisterView m b n) -> do let r = S.registerViewReg rv - v0 <- modState $ ValueExpr <$> use (boundValue r) + v0 <- ValueExpr <$> getReg r v1 <- eval $ S.registerViewWrite rv v0 (ValueExpr v) - modState $ boundValue r .= v1 + setReg r v1 S.X87StackRegister i -> do off <- getX87Offset i - modState $ boundValue (X87_FPUReg (F.mmxReg (fromIntegral off))) .= v + setReg (X87_FPUReg (F.mmxReg (fromIntegral off))) v instance S.Semantics (X86Generator st_s ids) where make_undefined tp = - ValueExpr . AssignedValue <$> addAssignment (SetUndefined tp) + evalAssignRhs (SetUndefined tp) -- Get value of a location. get = getLoc @@ -1039,8 +1041,7 @@ instance S.Semantics (X86Generator st_s ids) where is_reverse_v <- eval is_reverse src_v <- eval src dest_v <- eval dest - ValueExpr . AssignedValue - <$> addArchFn (MemCmp sz count_v src_v dest_v is_reverse_v) + evalArchFn (MemCmp sz count_v src_v dest_v is_reverse_v) memset count val dest dfl = do count_v <- eval count @@ -1055,13 +1056,17 @@ instance S.Semantics (X86Generator st_s ids) where count_v <- eval count is_reverse_v <- eval is_reverse case is_reverse_v of - BoolValue False -> do - ValueExpr . AssignedValue <$> addArchFn (RepnzScas sz val_v buf_v count_v) - _ -> do + BoolValue False -> + evalArchFn (RepnzScas sz val_v buf_v count_v) + _ -> fail $ "Unsupported rep_scas value " ++ show is_reverse_v rep_scas False _is_reverse _sz _val _buf _count = do fail $ "Semantics only currently supports finding elements." + even_parity v = do + val_v <- eval v + evalArchFn (EvenParity val_v) + primitive S.Syscall = do shiftX86GCont $ \_ s0 -> do -- Get last block. @@ -1075,54 +1080,49 @@ instance S.Semantics (X86Generator st_s ids) where } primitive S.CPUID = do - rax_val <- modState $ use $ boundValue RAX + rax_val <- getReg RAX eax_val <- eval (S.bvTrunc' n32 (ValueExpr rax_val)) -- Call CPUID and get a 128-bit value back. - res <- ValueExpr . AssignedValue <$> addArchFn (CPUID eax_val) + res <- evalArchFn (CPUID eax_val) S.eax S..= S.bvTrunc n32 res S.ebx S..= S.bvTrunc n32 (res `S.bvShr` bvLit n128 32) S.ecx S..= S.bvTrunc n32 (res `S.bvShr` bvLit n128 64) S.edx S..= S.bvTrunc n32 (res `S.bvShr` bvLit n128 96) primitive S.RDTSC = do - res <- ValueExpr . AssignedValue <$> addArchFn RDTSC + res <- evalArchFn RDTSC S.edx S..= S.bvTrunc n32 (res `S.bvShr` bvLit n64 32) S.eax S..= S.bvTrunc n32 res primitive S.XGetBV = do ecx_val <- eval =<< S.get S.ecx - res <- ValueExpr . AssignedValue <$> addArchFn (XGetBV ecx_val) + res <- evalArchFn (XGetBV ecx_val) S.edx S..= S.bvTrunc n32 (res `S.bvShr` bvLit n64 32) S.eax S..= S.bvTrunc n32 res - fnstcw addr = do - addr_val <- eval addr - addArchStmt $ StoreX87Control addr_val + fnstcw addr = + addArchStmt =<< StoreX87Control <$> eval addr - pshufb w x y = do - x_val <- eval x - y_val <- eval y - ValueExpr . AssignedValue <$> addArchFn (PShufb w x_val y_val) + pshufb w x y = + evalArchFn =<< PShufb w <$> eval x <*> eval y getSegmentBase seg = case seg of - F.FS -> ValueExpr . AssignedValue <$> addArchFn ReadFSBase - F.GS -> ValueExpr . AssignedValue <$> addArchFn ReadGSBase + F.FS -> evalArchFn ReadFSBase + F.GS -> evalArchFn ReadGSBase _ -> error $ "X86_64 getSegmentBase " ++ show seg ++ ": unimplemented!" bvQuotRem rep n d = do nv <- eval n dv <- eval d - q <- ValueExpr . AssignedValue <$> addArchFn (X86Div rep nv dv) - r <- ValueExpr . AssignedValue <$> addArchFn (X86Rem rep nv dv) - pure (q,r) + (,) <$> evalArchFn (X86Div rep nv dv) + <*> evalArchFn (X86Rem rep nv dv) bvSignedQuotRem rep n d = do nv <- eval n dv <- eval d - q <- ValueExpr . AssignedValue <$> addArchFn (X86IDiv rep nv dv) - r <- ValueExpr . AssignedValue <$> addArchFn (X86IRem rep nv dv) - pure (q,r) + (,) <$> evalArchFn (X86IDiv rep nv dv) + <*> evalArchFn (X86IRem rep nv dv) -- exception :: Value m BoolType -- mask -- -> Value m BoolType -- predicate @@ -1136,18 +1136,16 @@ instance S.Semantics (X86Generator st_s ids) where v <- eval e topv <- getX87Top let new_top = fromIntegral $ (topv - 1) .&. 0x7 - modState $ do - -- TODO: Update tagWords - -- Store value at new top - boundValue (X87_FPUReg (F.mmxReg new_top)) .= v - -- Update top - boundValue X87_TopReg .= BVValue knownNat (toInteger new_top) + -- TODO: Update tagWords + -- Store value at new top + setReg (X87_FPUReg (F.mmxReg new_top)) v + -- Update top + setReg X87_TopReg (BVValue knownNat (toInteger new_top)) x87Pop = do topv <- getX87Top let new_top = (topv + 1) .&. 0x7 - modState $ do - -- Update top - boundValue X87_TopReg .= BVValue knownNat (toInteger new_top) + -- Update top + setReg X87_TopReg (BVValue knownNat (toInteger new_top)) initGenState :: NonceGenerator (ST st_s) ids -> MemSegmentOff 64 @@ -1190,38 +1188,43 @@ instance Show X86TranslateError where ++ Text.unpack msg where addr = show (transErrorAddr err) +returnWithError :: GenState st_s ids + -> MemSegmentOff 64 + -> X86TranslateErrorReason + -> ST st_s (BlockSeq ids, MemWord 64, Maybe X86TranslateError) +returnWithError gs curIPAddr rsn = + let err = X86TranslateError curIPAddr rsn + term = (`TranslateError` Text.pack (show err)) + b = finishBlock' (gs^.blockState) term + res = seq b $ gs^.blockSeq & frontierBlocks %~ (Seq.|> b) + in return (res, msegOffset curIPAddr, Just err) + -- | Disassemble block, returning list of blocks read so far, ending PC, and an optional error. -- and ending PC. disassembleBlockImpl :: forall st_s ids - . Memory 64 - -> GenState st_s ids + . GenState st_s ids -- ^ State information for disassembling. -> MemSegmentOff 64 -- ^ Address to disassemble -> MemWord 64 -- ^ Maximum offset for this addr. + -> [SegmentRange 64] + -- ^ List of contents to read next. -> ST st_s (BlockSeq ids, MemWord 64, Maybe X86TranslateError) -disassembleBlockImpl mem gs curIPAddr max_offset = do +disassembleBlockImpl gs curIPAddr max_offset contents = do let seg = msegSegment curIPAddr let off = msegOffset curIPAddr - let returnWithError rsn = - let err = X86TranslateError curIPAddr rsn - term = (`TranslateError` Text.pack (show err)) - b = finishBlock' (gs^.blockState) term - res = seq b $ gs^.blockSeq & frontierBlocks %~ (Seq.|> b) - in return (res, off, Just err) - case readInstruction mem curIPAddr of + case readInstruction' curIPAddr contents of Left msg -> do - returnWithError (DecodeError msg) + returnWithError gs curIPAddr (DecodeError msg) Right (i, next_ip_off) -> do let next_ip :: MemAddr 64 next_ip = relativeAddr seg next_ip_off let next_ip_val :: BVValue X86_64 ids 64 next_ip_val = RelocatableValue n64 next_ip - case execInstruction (ValueExpr next_ip_val) i of Nothing -> do - returnWithError (UnsupportedInstruction i) + returnWithError gs curIPAddr (UnsupportedInstruction i) Just exec -> do gsr <- runExceptT $ runX86Generator (\() s -> pure (mkGenResult s)) gs $ do @@ -1234,23 +1237,30 @@ disassembleBlockImpl mem gs curIPAddr max_offset = do exec case gsr of Left msg -> do - returnWithError (ExecInstructionError i msg) + returnWithError gs curIPAddr (ExecInstructionError i msg) Right res -> do case resState res of - -- If next ip is exactly the next_ip_val then keep running. + -- If IP after interpretation is the next_ip, there are no blocks, and we + -- haven't crossed max_offset, then keep running. Just p_b | Seq.null (resBlockSeq res ^. frontierBlocks) - , v <- p_b^.(pBlockState . curIP) - , v == next_ip_val + , RelocatableValue _ v <- p_b^.(pBlockState . curIP) + , v == next_ip -- Check to see if we should continue , next_ip_off < max_offset - , Just next_ip_segaddr <- asSegmentOff mem next_ip -> do + , Just next_ip_segaddr <- resolveSegmentOff seg next_ip_off -> do let gs2 = GenState { assignIdGen = assignIdGen gs , _blockSeq = resBlockSeq res , _blockState = p_b , genAddr = next_ip_segaddr } - disassembleBlockImpl mem gs2 next_ip_segaddr max_offset + case dropSegmentRangeListBytes (relativeSegmentAddr curIPAddr) + contents + (fromIntegral (next_ip_off - off)) of + Left msg -> do + returnWithError gs curIPAddr (DecodeError msg) + Right contents' -> + disassembleBlockImpl gs2 next_ip_segaddr max_offset contents' _ -> do let gs3 = finishBlock FetchAndExecute res return (gs3, next_ip_off, Nothing) @@ -1268,10 +1278,15 @@ disassembleBlock mem nonce_gen loc max_size = do let addr = loc_ip loc let gs = initGenState nonce_gen addr (initX86State loc) let sz = msegOffset addr + max_size - (gs', next_ip_off, maybeError) <- disassembleBlockImpl mem gs addr sz + (gs', next_ip_off, maybeError) <- + case addrContentsAfter mem (relativeSegmentAddr addr) of + Left msg -> + returnWithError gs addr (DecodeError msg) + Right contents -> + disassembleBlockImpl gs addr sz contents assert (next_ip_off > msegOffset addr) $ do let block_sz = next_ip_off - msegOffset addr - pure $ (Fold.toList (gs'^.frontierBlocks), block_sz, maybeError) + pure (Fold.toList (gs'^.frontierBlocks), block_sz, maybeError) -- | The abstract state for a function begining at a given address. initialX86AbsState :: MemSegmentOff 64 -> AbsBlockState X86Reg @@ -1301,6 +1316,7 @@ transferAbsValue :: AbsProcessorState X86Reg ids -> AbsValue 64 tp transferAbsValue r f = case f of + EvenParity _ -> TopV ReadLoc _ -> TopV ReadFSBase -> TopV ReadGSBase -> TopV @@ -1351,7 +1367,12 @@ tryDisassembleBlockFromAbsState mem nonce_gen addr max_size ab = do } let gs = initGenState nonce_gen addr (initX86State loc) let off = msegOffset addr - (gs', next_ip_off, maybeError) <- lift $ disassembleBlockImpl mem gs addr (off + max_size) + (gs', next_ip_off, maybeError) <- lift $ + case addrContentsAfter mem (relativeSegmentAddr addr) of + Left msg -> + returnWithError gs addr (DecodeError msg) + Right contents -> do + disassembleBlockImpl gs addr (off + max_size) contents assert (next_ip_off > off) $ do let sz = next_ip_off - off let blocks = Fold.toList (gs'^.frontierBlocks) @@ -1461,6 +1482,21 @@ x86DemandContext = , archFnHasSideEffects = x86PrimFnHasSideEffects } + +postX86TermStmtAbsState :: (forall tp . X86Reg tp -> Bool) + -> AbsBlockState X86Reg + -> X86TermStmt ids + -> MemSegmentOff 64 + -> AbsBlockState X86Reg +postX86TermStmtAbsState preservePred s tstmt nextIP = + case tstmt of + X86Syscall -> + let params = CallParams { postCallStackDelta = 0 + , preserveReg = preservePred + } + in absEvalCall params s nextIP + + -- | Common architecture information for X86_64 x86_64_info :: (forall tp . X86Reg tp -> Bool) -- ^ Function that returns true if we should preserve a register across a system call. @@ -1471,7 +1507,6 @@ x86_64_info preservePred = , archEndianness = LittleEndian , jumpTableEntrySize = 8 , disassembleFn = disassembleBlockFromAbsState - , preserveRegAcrossSyscall = preservePred , mkInitialAbsState = \_ -> initialX86AbsState , absEvalArchFn = transferAbsValue , absEvalArchStmt = \s _ -> s @@ -1482,6 +1517,7 @@ x86_64_info preservePred = , rewriteArchStmt = rewriteX86Stmt , rewriteArchTermStmt = rewriteX86TermStmt , archDemandContext = x86DemandContext + , postArchTermStmtAbsState = postX86TermStmtAbsState preservePred } -- | Architecture information for X86_64 on FreeBSD. diff --git a/x86/src/Data/Macaw/X86/ArchTypes.hs b/x86/src/Data/Macaw/X86/ArchTypes.hs index 0b74eb61..6c663cd9 100644 --- a/x86/src/Data/Macaw/X86/ArchTypes.hs +++ b/x86/src/Data/Macaw/X86/ArchTypes.hs @@ -95,7 +95,9 @@ instance Pretty (X86PrimLoc tp) where -- | Defines primitive functions in the X86 format. data X86PrimFn f tp - = ReadLoc !(X86PrimLoc tp) + = (tp ~ BoolType) => EvenParity (f (BVType 8)) + -- ^ Return true if least-significant bit has even number of bits set. + | ReadLoc !(X86PrimLoc tp) -- ^ Read from a primitive X86 location | (tp ~ BVType 64) => ReadFSBase -- ^ Read the 'FS' base address @@ -186,6 +188,7 @@ data X86PrimFn f tp instance HasRepr (X86PrimFn f) TypeRepr where typeRepr f = case f of + EvenParity{} -> knownType ReadLoc loc -> typeRepr loc ReadFSBase -> knownType ReadGSBase -> knownType @@ -210,6 +213,7 @@ instance FoldableFC X86PrimFn where instance TraversableFC X86PrimFn where traverseFC go f = case f of + EvenParity x -> EvenParity <$> go x ReadLoc l -> pure (ReadLoc l) ReadFSBase -> pure ReadFSBase ReadGSBase -> pure ReadGSBase @@ -230,6 +234,7 @@ instance TraversableFC X86PrimFn where instance IsArchFn X86PrimFn where ppArchFn pp f = case f of + EvenParity x -> sexprA "even_parity" [ pp x ] ReadLoc loc -> pure $ pretty loc ReadFSBase -> pure $ text "fs.base" ReadGSBase -> pure $ text "gs.base" @@ -253,20 +258,21 @@ instance IsArchFn X86PrimFn where x86PrimFnHasSideEffects :: X86PrimFn f tp -> Bool x86PrimFnHasSideEffects f = case f of - ReadLoc{} -> False - ReadFSBase -> False - ReadGSBase -> False - CPUID{} -> False - RDTSC -> False - XGetBV{} -> False - PShufb{} -> False - MemCmp{} -> False - RepnzScas{} -> True - MMXExtend{} -> False - X86IDiv{} -> True -- To be conservative we treat the divide errors as side effects. - X86IRem{} -> True -- /\ .. - X86Div{} -> True -- /\ .. - X86Rem{} -> True -- /\ .. + EvenParity{} -> False + ReadLoc{} -> False + ReadFSBase -> False + ReadGSBase -> False + CPUID{} -> False + RDTSC -> False + XGetBV{} -> False + PShufb{} -> False + MemCmp{} -> False + RepnzScas{} -> True + MMXExtend{} -> False + X86IDiv{} -> True -- To be conservative we treat the divide errors as side effects. + X86IRem{} -> True -- /\ .. + X86Div{} -> True -- /\ .. + X86Rem{} -> True -- /\ .. ------------------------------------------------------------------------ -- X86Stmt @@ -344,6 +350,10 @@ rewriteX86PrimFn :: X86PrimFn (Value X86_64 src) tp -> Rewriter X86_64 src tgt (Value X86_64 tgt tp) rewriteX86PrimFn f = case f of + EvenParity (BVValue _ xv) -> do + let go 8 r = r + go i r = go (i+1) $! (xv `testBit` i /= r) + pure $ BoolValue (go 0 True) MMXExtend e -> do tgtExpr <- rewriteValue e case tgtExpr of diff --git a/x86/src/Data/Macaw/X86/Flexdis.hs b/x86/src/Data/Macaw/X86/Flexdis.hs index 09429c65..b55008e9 100644 --- a/x86/src/Data/Macaw/X86/Flexdis.hs +++ b/x86/src/Data/Macaw/X86/Flexdis.hs @@ -13,6 +13,7 @@ module Data.Macaw.X86.Flexdis ( MemoryByteReader , runMemoryByteReader , readInstruction + , readInstruction' ) where import Control.Monad.Except @@ -78,6 +79,22 @@ instance MemWidth w => Monad (MemoryByteReader w) where addr <- MBR $ gets msAddr throwError $ UserMemoryError addr msg +-- | Run a memory byte reader starting from the given offset and offset for next. +runMemoryByteReader' :: MemSegmentOff w -- ^ Starting segment + -> [SegmentRange w] -- ^ Data to read next. + -> MemoryByteReader w a -- ^ Byte reader to read values from. + -> Either (MemoryError w) (a, MemWord w) +runMemoryByteReader' addr contents (MBR m) = do + let ms0 = MS { msSegment = msegSegment addr + , msStart = msegOffset addr + , msPrev = emptyPrevData + , msOffset = msegOffset addr + , msNext = contents + } + case runState (runExceptT m) ms0 of + (Left e, _) -> Left e + (Right v, ms) -> Right (v, msOffset ms) + -- | Create a memory stream pointing to given address, and return pair whose -- first element is the value read or an error, and whose second element is -- the address of the next value to read. @@ -89,22 +106,14 @@ runMemoryByteReader :: Memory w -> MemSegmentOff w -- ^ Starting segment -> MemoryByteReader w a -- ^ Byte reader to read values from. -> Either (MemoryError w) (a, MemWord w) -runMemoryByteReader mem reqPerm addr (MBR m) = do +runMemoryByteReader mem reqPerm addr m = do addrWidthClass (memAddrWidth mem) $ do let seg = msegSegment addr if not (segmentFlags seg `Perm.hasPerm` reqPerm) then Left $ PermissionsError (relativeSegmentAddr addr) else do contents <- addrContentsAfter mem (relativeSegmentAddr addr) - let ms0 = MS { msSegment = seg - , msStart = msegOffset addr - , msPrev = emptyPrevData - , msOffset = msegOffset addr - , msNext = contents - } - case runState (runExceptT m) ms0 of - (Left e, _) -> Left e - (Right v, ms) -> Right (v, msOffset ms) + runMemoryByteReader' addr contents m instance MemWidth w => ByteReader (MemoryByteReader w) where readByte = do @@ -133,11 +142,26 @@ instance MemWidth w => ByteReader (MemoryByteReader w) where ------------------------------------------------------------------------ -- readInstruction + +-- | Read instruction at a given memory address. +readInstruction' :: MemSegmentOff 64 + -- ^ Address to read from. + -> [SegmentRange 64] -- ^ Data to read next. + -> Either (MemoryError 64) + (Flexdis.InstructionInstance, MemWord 64) +readInstruction' addr contents = do + let seg = msegSegment addr + if not (segmentFlags seg `Perm.hasPerm` Perm.execute) then + Left $ PermissionsError (relativeSegmentAddr addr) + else do + runMemoryByteReader' addr contents Flexdis.disassembleInstruction + -- | Read instruction at a given memory address. readInstruction :: Memory 64 -> MemSegmentOff 64 -- ^ Address to read from. -> Either (MemoryError 64) (Flexdis.InstructionInstance, MemWord 64) -readInstruction mem addr = runMemoryByteReader mem Perm.execute addr m - where m = Flexdis.disassembleInstruction +readInstruction mem addr = do + readInstruction' addr + =<< addrContentsAfter mem (relativeSegmentAddr addr) diff --git a/x86/src/Data/Macaw/X86/Monad.hs b/x86/src/Data/Macaw/X86/Monad.hs index 82bd4503..aaca9143 100644 --- a/x86/src/Data/Macaw/X86/Monad.hs +++ b/x86/src/Data/Macaw/X86/Monad.hs @@ -997,12 +997,9 @@ class IsValue (v :: Type -> *) where least_byte :: forall n . (8 <= n) => v (BVType n) -> v (BVType 8) least_byte = bvTrunc knownNat - -- | Return true if value contains an even number of true bits. - even_parity :: v (BVType 8) -> v BoolType - -- | Reverse the bytes in a bitvector expression. -- The parameter n should be a multiple of 8. - reverse_bytes :: (1 <= n) => v (BVType n) -> v (BVType n) + reverse_bytes :: (1 <= n) => NatRepr n -> v (BVType (8*n)) -> v (BVType (8*n)) -- | Return true expression is signed add overflows. See -- @sadc_overflows@ for definition. @@ -1432,6 +1429,8 @@ class ( Applicative m -- ^ Maximum number of elementes to compare -> m (Value m (BVType 64)) + -- | Return true if value contains an even number of true bits. + even_parity :: Value m (BVType 8) -> m (Value m BoolType) -- | execute a primitive instruction. primitive :: Primitive -> m () diff --git a/x86/src/Data/Macaw/X86/Semantics.hs b/x86/src/Data/Macaw/X86/Semantics.hs index e0112eef..b543a603 100644 --- a/x86/src/Data/Macaw/X86/Semantics.hs +++ b/x86/src/Data/Macaw/X86/Semantics.hs @@ -72,7 +72,7 @@ set_result_flags :: IsLocationBV m n => Value m (BVType n) -> m () set_result_flags res = do sf_loc .= msb res zf_loc .= is_zero res - pf_loc .= even_parity (least_byte res) + (pf_loc .=) =<< even_parity (least_byte res) -- | Assign value to location and update corresponding flags. set_result_value :: IsLocationBV m n => MLocation m (BVType n) -> Value m (BVType n) -> m () @@ -712,7 +712,8 @@ exec_sh lw l val val_setter cf_setter of_setter = do -- Set result flags modify sf_loc $ mux isNonzero (msb res) modify zf_loc $ mux isNonzero (is_zero res) - modify pf_loc $ mux isNonzero (even_parity (least_byte res)) + p <- even_parity (least_byte res) + modify pf_loc $ mux isNonzero p modify l $ mux isNonzero res def_sh :: String