From 26b11c2025a4a4e6583d9daacd2ead3a4d4415e2 Mon Sep 17 00:00:00 2001 From: Lars Jellema Date: Wed, 18 Sep 2019 13:47:36 +0200 Subject: [PATCH] Store NVD in a SQLite database --- nixpkgs-update.nix | 15 +-- package.yaml | 1 + src/CVE.hs | 227 ++++++++++++++++++++++++++++++++------------- src/NVD.hs | 88 ++++++++++++++---- src/Utils.hs | 32 ++++++- 5 files changed, 273 insertions(+), 90 deletions(-) diff --git a/nixpkgs-update.nix b/nixpkgs-update.nix index 9647b89..88d9dd0 100644 --- a/nixpkgs-update.nix +++ b/nixpkgs-update.nix @@ -2,9 +2,9 @@ , cryptohash-sha256, directory, doctest, errors, filepath, github , hex, hpack, http-conduit, iso8601-time, lifted-base, mtl , neat-interpolation, optparse-applicative, parsec, parsers -, polysemy, regex-applicative-text, shelly, stdenv -, template-haskell, text, time, transformers, typed-process, unix -, vector, xdg-basedir, zlib +, polysemy, regex-applicative-text, sqlite-simple, stdenv +, template-haskell, temporary, text, time, transformers +, typed-process, unix, vector, xdg-basedir, zlib }: mkDerivation { pname = "nixpkgs-update"; @@ -17,16 +17,17 @@ mkDerivation { aeson base bytestring conduit containers cryptohash-sha256 directory errors filepath github hex http-conduit iso8601-time lifted-base mtl neat-interpolation optparse-applicative parsec - parsers polysemy regex-applicative-text shelly template-haskell - text time transformers typed-process unix vector xdg-basedir zlib + parsers polysemy regex-applicative-text sqlite-simple + template-haskell temporary text time transformers typed-process + unix vector xdg-basedir zlib ]; testHaskellDepends = [ aeson base bytestring conduit containers cryptohash-sha256 directory doctest errors filepath github hex http-conduit iso8601-time lifted-base mtl neat-interpolation optparse-applicative parsec parsers polysemy regex-applicative-text - shelly template-haskell text time transformers typed-process unix - vector xdg-basedir zlib + sqlite-simple template-haskell temporary text time transformers + typed-process unix vector xdg-basedir zlib ]; prePatch = "hpack"; homepage = "https://github.com/ryantm/nixpkgs-update#readme"; diff --git a/package.yaml b/package.yaml index 2201260..d51f25b 100644 --- a/package.yaml +++ b/package.yaml @@ -53,6 +53,7 @@ dependencies: - polysemy - polysemy-plugin - regex-applicative-text + - sqlite-simple - template-haskell - text - time >= 1.8 && < 1.10 diff --git a/src/CVE.hs b/src/CVE.hs index cbec680..c432c58 100644 --- a/src/CVE.hs +++ b/src/CVE.hs @@ -4,7 +4,8 @@ module CVE ( parseFeed - , CVE + , CVE(..) + , cveMatcherList ) where import OurPrelude @@ -12,30 +13,38 @@ import OurPrelude import Data.Aeson ( FromJSON , Object + , (.!=) , (.:) , (.:!) , eitherDecode , parseJSON , withObject - , withText ) -import Data.Aeson.Types (Parser) -import Data.Bifunctor (bimap) -import qualified Data.ByteString.Lazy.Char8 as BSL +import Data.Aeson.Types (Parser, prependFailure) +import Data.Bifunctor (bimap, second) +import Data.List (intercalate) import Data.Map (Map) -import qualified Data.Map as M -import qualified Data.Text as T +import Data.Set (Set) import Data.Text.Read (decimal) import Data.Time.Clock (UTCTime) +import Database.SQLite.Simple (FromRow, SQLData, ToRow, field, fromRow, toRow) + import Utils (Boundary(..), ProductID, VersionMatcher(..)) +import qualified Data.ByteString.Lazy.Char8 as BSL +import qualified Data.Map as M +import qualified Data.Set as S +import qualified Data.Text as T + +type CVEID = Text + data CVE = CVE - { cveID :: Text + { cveID :: CVEID , cveYear :: Int , cveSubID :: Int - , cveAffects :: Map ProductID [VersionMatcher] - , cveDescriptions :: Text + , cveMatchers :: Map ProductID (Set VersionMatcher) + , cveDescription :: Text , cveCPEs :: [CPE] , cvePublished :: UTCTime , cveLastModified :: UTCTime @@ -44,22 +53,71 @@ data CVE = data CPE = CPE - { cpePart :: Text - , cpeVendor :: Text - , cpeProduct :: Text - , cpeVersion :: Text - , cpeUpdate :: Text - , cpeEdition :: Text - , cpeLanguage :: Text - , cpeSoftwareEdition :: Text - , cpeTargetSoftware :: Text - , cpeTargetHardware :: Text - , cpeOther :: Text + { cpeVulnerable :: Bool + , cpePart :: Maybe Text + , cpeVendor :: Maybe Text + , cpeProduct :: Maybe Text + , cpeVersion :: Maybe Text + , cpeUpdate :: Maybe Text + , cpeEdition :: Maybe Text + , cpeLanguage :: Maybe Text + , cpeSoftwareEdition :: Maybe Text + , cpeTargetSoftware :: Maybe Text + , cpeTargetHardware :: Maybe Text + , cpeOther :: Maybe Text + , cpeMatcher :: Maybe VersionMatcher } - deriving (Show) --- type CVEID = Text ---type CVEIndex = Map ProductID [(VersionMatcher, [CVEID])] +data MatcherRow = + MatcherRow CVEID ProductID VersionMatcher + +instance ToRow MatcherRow where + toRow (MatcherRow cveID productID matcher) = toRow (cveID, productID, matcher) + +instance FromRow MatcherRow where + fromRow = MatcherRow <$> field <*> field <*> field + +cveMatcherList :: CVE -> [MatcherRow] +cveMatcherList CVE {cveID, cveMatchers} = do + (productID, matchers) <- M.assocs cveMatchers + matcher <- S.elems matchers + return $ MatcherRow cveID productID matcher + +instance Show CPE where + show CPE { cpePart + , cpeVendor + , cpeProduct + , cpeVersion + , cpeUpdate + , cpeEdition + , cpeLanguage + , cpeSoftwareEdition + , cpeTargetSoftware + , cpeTargetHardware + , cpeOther + , cpeMatcher + } = + "CPE {" <> + (intercalate ", " . concat) + [ cpeField "part" cpePart + , cpeField "vendor" cpeVendor + , cpeField "product" cpeProduct + , cpeField "version" cpeVersion + , cpeField "update" cpeUpdate + , cpeField "edition" cpeEdition + , cpeField "language" cpeLanguage + , cpeField "softwareEdition" cpeSoftwareEdition + , cpeField "targetSoftware" cpeTargetSoftware + , cpeField "targetHardware" cpeTargetHardware + , cpeField "other" cpeOther + , cpeField "matcher" cpeMatcher + ] <> + "}" + where + cpeField :: Show a => String -> Maybe a -> [String] + cpeField _ Nothing = [] + cpeField name (Just value) = [name <> " = " <> show value] + eitherToParser :: Either String a -> Parser a eitherToParser (Left e) = fail e eitherToParser (Right a) = pure a @@ -73,8 +131,8 @@ cveIDNumbers rest0 = do guard $ T.null rest4 pure (year, subid) -parseDescriptions :: Object -> Parser Text -parseDescriptions o = do +parseDescription :: Object -> Parser Text +parseDescription o = do dData <- o .: "description_data" descriptions <- fmap concat $ @@ -86,28 +144,24 @@ parseDescriptions o = do case lang of "en" -> [value] _ -> [] - case descriptions of - [d] -> pure d - -- [] -> pure "" - _ -> fail "multiple english descriptions" + pure $ T.intercalate "\n\n" descriptions --- foobar :: (FromJSON a) => [a] -> (a -> Parser [b]) -> Parser [b] --- foobar l f = fmap concat $ sequence $ map f l -parseAffects :: Object -> Parser (Map ProductID [VersionMatcher]) +parseAffects :: Object -> Parser (Map ProductID (Set VersionMatcher)) parseAffects o = do vendor <- o .: "vendor" vendorData <- vendor .: "vendor_data" - fmap (M.fromListWith (<>) . concat) $ + fmap (M.fromListWith S.union . concat) $ sequence $ flip map vendorData $ \v -> do - productPart <- v .: "product" - productData <- productPart .: "product_data" + product_ <- v .: "product" + productData <- product_ .: "product_data" sequence $ flip map productData $ \p -> do productID <- p .: "product_name" version <- p .: "version" versionData <- version .: "version_data" matchers <- + fmap S.fromList $ sequence $ flip map versionData $ \ver -> do value <- ver .: "version_value" @@ -118,31 +172,86 @@ parseAffects o = do _ -> fail $ "unknown version comparator: " <> show affected pure (productID, matchers) +-- TODO: We ignore update, edition and softwareEdition for now, but they might +-- be relevant. +cpeToMatcher :: CPE -> Map ProductID (Set VersionMatcher) +cpeToMatcher CPE {cpeProduct = Just p, cpeVersion = Just v, cpeMatcher = Just m} = + M.singleton p $ S.fromList [FuzzyMatcher v, m] +cpeToMatcher CPE {cpeProduct = Just p, cpeVersion = Just v} = + M.singleton p $ S.fromList [FuzzyMatcher v] +cpeToMatcher CPE {cpeProduct = Just p, cpeMatcher = Just m} = + M.singleton p $ S.fromList [m] +cpeToMatcher _ = M.empty + +cpeMatchers :: [CPE] -> Map ProductID (Set VersionMatcher) +cpeMatchers = M.unionsWith S.union . map cpeToMatcher + instance FromJSON CVE where parseJSON = withObject "CVE" $ \o -> do cve <- o .: "cve" - cfgs <- o .: "configurations" - cveCPEs <- parseConfigurations cfgs meta <- cve .: "CVE_data_meta" cveID <- meta .: "ID" - cvePublished <- o .: "publishedDate" - cveLastModified <- o .: "lastModifiedDate" - (cveYear, cveSubID) <- eitherToParser $ cveIDNumbers cveID - description <- cve .: "description" - cveDescriptions <- parseDescriptions description - affects <- cve .: "affects" - cveAffects <- parseAffects affects - pure CVE {..} + prependFailure (T.unpack cveID <> ": ") $ do + cfgs <- o .: "configurations" + cveCPEs <- parseConfigurations cfgs + cvePublished <- o .: "publishedDate" + cveLastModified <- o .: "lastModifiedDate" + (cveYear, cveSubID) <- eitherToParser $ cveIDNumbers cveID + description <- cve .: "description" + cveDescription <- parseDescription description + affects <- cve .: "affects" + affectMatchers <- parseAffects affects + let cveMatchers = + M.unionWith S.union affectMatchers (cpeMatchers cveCPEs) + pure CVE {..} -splitCPE :: Text -> [Text] -splitCPE = map (T.replace "\a" ":") . T.splitOn ":" . T.replace "\\:" "\a" +instance ToRow CVE where + toRow CVE {cveID, cveDescription, cvePublished, cveLastModified} = + toRow (cveID, cveDescription, cvePublished, cveLastModified) + +instance FromRow CVE where + fromRow = do + cveID <- field + cveDescription <- field + cvePublished <- field + cveLastModified <- field + pure CVE {..} + +splitCPE :: Text -> [Maybe Text] +splitCPE = + map (toMaybe . T.replace "\a" ":") . T.splitOn ":" . T.replace "\\:" "\a" + where + toMaybe "*" = Nothing + toMaybe x = Just x instance FromJSON CPE where parseJSON = - withText "CPE" $ \t -> do + withObject "CPE" $ \o -> do + t <- o .: "cpe23Uri" + cpeVulnerable <- o .: "vulnerable" + vStartIncluding <- o .:! "versionStartIncluding" + vEndIncluding <- o .:! "versionEndIncluding" + vStartExcluding <- o .:! "versionStartExcluding" + vEndExcluding <- o .:! "versionEndExcluding" + startBoundary <- + case (vStartIncluding, vStartExcluding) of + (Nothing, Nothing) -> pure Unbounded + (Just start, Nothing) -> pure (Including start) + (Nothing, Just start) -> pure (Including start) + (Just _, Just _) -> fail "multiple starts" + endBoundary <- + case (vEndIncluding, vEndExcluding) of + (Nothing, Nothing) -> pure Unbounded + (Just end, Nothing) -> pure (Including end) + (Nothing, Just end) -> pure (Including end) + (Just _, Just _) -> fail "multiple ends" + let cpeMatcher = + case (startBoundary, endBoundary) of + (Unbounded, Unbounded) -> Nothing + (start, end) -> Just (RangeMatcher start end) case splitCPE t of - ["cpe", "2.3", cpePart, cpeVendor, cpeProduct, cpeVersion, cpeUpdate, cpeEdition, cpeLanguage, cpeSoftwareEdition, cpeTargetSoftware, cpeTargetHardware, cpeOther] -> + [Just "cpe", Just "2.3", cpePart, cpeVendor, cpeProduct, cpeVersion, cpeUpdate, cpeEdition, cpeLanguage, cpeSoftwareEdition, cpeTargetSoftware, cpeTargetHardware, cpeOther] -> pure CPE {..} _ -> fail $ "unparsable CPE: " <> T.unpack t @@ -151,9 +260,9 @@ guardAttr object attribute expected = do actual <- object .: attribute unless (actual == expected) $ fail $ - "unexpected " <> - T.unpack attribute <> - ", expected " <> show expected <> ", got " <> show actual + "unexpected " <> T.unpack attribute <> ", expected " <> show expected <> + ", got " <> + show actual -- TODO: Determine how nodes work exactly. What does AND mean and what does -- vulnerable: false mean? For now, we assume everything with vulnerable: true @@ -165,16 +274,8 @@ parseNode node = do parseNode' :: (Maybe [Object]) -> Object -> Parser [CPE] parseNode' Nothing node = do - matches <- node .: "cpe_match" - fmap concat $ - sequence $ - flip map matches $ \match -> do - vulnerable <- match .: "vulnerable" - cpe <- match .: "cpe23Uri" - pure $ - if vulnerable - then [cpe] - else [] + matches <- node .:! "cpe_match" .!= [] + pure $ filter cpeVulnerable matches parseNode' (Just children) _ = do fmap concat $ sequence $ map parseNode children diff --git a/src/NVD.hs b/src/NVD.hs index af8b086..e0ddc74 100644 --- a/src/NVD.hs +++ b/src/NVD.hs @@ -4,7 +4,7 @@ module NVD where import OurPrelude -import CVE (CVE, parseFeed) +import CVE (CVE(..), cveMatcherList, parseFeed) import Codec.Compression.GZip (decompress) import Control.Exception (ioError, try) import Crypto.Hash.SHA256 (hashlazy) @@ -22,6 +22,7 @@ import Data.Time.Clock , utctDay ) import Data.Time.ISO8601 (parseISO8601) +import Database.SQLite.Simple as DB import Network.HTTP.Conduit (simpleHttp) import System.Directory ( XdgDirectory(..) @@ -49,6 +50,36 @@ type MaxAge = NominalDiffTime data Meta = Meta Timestamp Checksum +withDB :: (DB.Connection -> IO a) -> IO a +withDB action = do + cacheDir <- liftIO $ getXdgDirectory XdgCache "nixpkgs-update/nvd" + createDirectoryIfMissing True cacheDir + DB.withConnection (cacheDir "db.sqlite3") $ \conn -> do + execute_ conn $ + Query $ + T.unlines + [ "CREATE TABLE IF NOT EXISTS cves (" + , " cve_id text PRIMARY KEY," + , " description text," + , " published text," + , " modified text)" + ] + execute_ conn $ + Query $ + T.unlines + [ "CREATE TABLE IF NOT EXISTS matchers (" + , " cve_id text," + , " product_id text," + , " matcher text)" + ] + execute_ conn $ + Query $ + T.unlines + [ "CREATE INDEX IF NOT EXISTS matchers_by_product_id" + , "ON matchers(product_id)" + ] + action conn + feedURL :: FeedID -> Extension -> String feedURL feed ext = "https://nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-" <> feed <> ext @@ -84,23 +115,43 @@ getMeta feed = do either throwText pure $ parseMeta raw getCVEs :: (ProductID, Version) -> IO [CVE] -getCVEs (_product, _) = do +getCVEs (product, version) = do years <- allYears - _feeds <- sequence $ map (cacheFeed (7 * nominalDay)) years + feeds <- sequence $ map (cacheFeed (7 * nominalDay)) years return [] updateVulnDB :: IO () -updateVulnDB - -- This will be enough to develop with. - = do - feed <- cacheFeed (99 * nominalDay) "2019" - putStrLn $ "loading data" - parsed <- either throwText pure $ parseFeed feed - print $ head parsed - -- putStrLn $ "checking feed cache" +updateVulnDB = + withDB $ \conn -> do + putStrLn $ "checking feed cache" -- years <- allYears -- feeds <- sequence $ map (cacheFeed (7 * nominalDay)) years - return () + feeds <- sequence $ map (cacheFeed (99 * nominalDay)) ["2019"] + putStrLn $ "loading data" + parsed <- sequence $ map (either throwText pure . parseFeed) feeds + let cves = take 10 $ head parsed + executeMany + conn + (Query $ + T.unlines + [ "REPLACE INTO cves(cve_id, description, published, modified)" + , "VALUES (?, ?, ?, ?)" + ]) + cves + executeMany + conn + (Query $ T.unlines ["DELETE FROM matchers", "WHERE cve_id = ?"]) + (map (Only . cveID) cves) + executeMany + conn + (Query $ + T.unlines + [ "INSERT INTO matchers(cve_id, product_id, matcher)" + , "VALUES (?, ?, ?)" + ]) + (concatMap cveMatcherList cves) + print $ head $ head parsed + return () getCacheFile :: MonadIO m => FeedID -> m FilePath getCacheFile feed = do @@ -112,22 +163,23 @@ cacheFeed :: MonadIO m => MaxAge -> FeedID -> m BSL.ByteString cacheFeed maxAge feed = do cacheFile <- getCacheFile feed cacheTime <- liftIO $ try $ getModificationTime cacheFile - Meta newestTime expectedChecksum <- getMeta feed + currentTime <- liftIO getCurrentTime let needsUpdate = case cacheTime of Left (_ :: IOError) -> True - Right t -> diffUTCTime newestTime t > maxAge + Right t -> diffUTCTime currentTime t > maxAge if needsUpdate then do - liftIO $ putStrLn $ "updating feed: " <> feed + liftIO $ putStrLn $ "updating feed " <> feed + Meta _ expectedChecksum <- getMeta feed compressed <- simpleHttp $ feedURL feed ".json.gz" let raw = decompress compressed let actualChecksum = BSL.fromStrict $ hashlazy raw when (actualChecksum /= expectedChecksum) $ throwString $ - "wrong hash, expected: " <> - BSL.unpack (hex expectedChecksum) <> - " got: " <> BSL.unpack (hex actualChecksum) + "wrong hash, expected: " <> BSL.unpack (hex expectedChecksum) <> + " got: " <> + BSL.unpack (hex actualChecksum) liftIO $ BSL.writeFile cacheFile raw return raw else do diff --git a/src/Utils.hs b/src/Utils.hs index e4613ce..1e84e45 100644 --- a/src/Utils.hs +++ b/src/Utils.hs @@ -26,6 +26,16 @@ import OurPrelude import Data.Bits ((.|.)) import qualified Data.Text as T +import Database.SQLite.Simple (ResultError(..), SQLData(..)) +import Database.SQLite.Simple.FromField + ( FieldParser + , FromField + , fromField + , returnError + ) +import Database.SQLite.Simple.Internal (Field(..)) +import Database.SQLite.Simple.Ok (Ok(..)) +import Database.SQLite.Simple.ToField (ToField, toField) import System.Directory (doesDirectoryExist, setCurrentDirectory) import System.Environment.XDG.BaseDir import System.Posix.Directory (createDirectory) @@ -41,6 +51,8 @@ import System.Posix.Files import System.Posix.Temp (mkdtemp) import System.Posix.Types (FileMode) import qualified System.Process.Typed +import Text.Read (readEither) +import Type.Reflection (Typeable) default (T.Text) @@ -55,7 +67,7 @@ data Boundary a = Unbounded | Including a | Excluding a - deriving (Eq, Ord, Show) + deriving (Eq, Ord, Show, Read) -- | The Ord instance is used to sort lists of matchers in order to compare them -- as a set, it is not useful for comparing versions. @@ -63,7 +75,23 @@ data VersionMatcher = ExactMatcher Version | FuzzyMatcher Version | RangeMatcher (Boundary Version) (Boundary Version) - deriving (Eq, Ord, Show) + deriving (Eq, Ord, Show, Read) + +readField :: (Read a, Typeable a) => FieldParser a +readField f@(Field (SQLText t) _) = + case readEither (T.unpack t) of + Right x -> Ok x + Left e -> returnError ConversionFailed f $ "read error: " <> e +readField f = returnError ConversionFailed f "expecting SQLText column type" + +showField :: Show a => a -> SQLData +showField = toField . show + +instance FromField VersionMatcher where + fromField = readField + +instance ToField VersionMatcher where + toField = showField data Options = Options