Store NVD in a SQLite database

This commit is contained in:
Lars Jellema 2019-09-18 13:47:36 +02:00
parent 3950aa3f36
commit 26b11c2025
No known key found for this signature in database
GPG Key ID: 563A03936D48B4BC
5 changed files with 273 additions and 90 deletions

View File

@ -2,9 +2,9 @@
, cryptohash-sha256, directory, doctest, errors, filepath, github
, hex, hpack, http-conduit, iso8601-time, lifted-base, mtl
, neat-interpolation, optparse-applicative, parsec, parsers
, polysemy, regex-applicative-text, shelly, stdenv
, template-haskell, text, time, transformers, typed-process, unix
, vector, xdg-basedir, zlib
, polysemy, regex-applicative-text, sqlite-simple, stdenv
, template-haskell, temporary, text, time, transformers
, typed-process, unix, vector, xdg-basedir, zlib
}:
mkDerivation {
pname = "nixpkgs-update";
@ -17,16 +17,17 @@ mkDerivation {
aeson base bytestring conduit containers cryptohash-sha256
directory errors filepath github hex http-conduit iso8601-time
lifted-base mtl neat-interpolation optparse-applicative parsec
parsers polysemy regex-applicative-text shelly template-haskell
text time transformers typed-process unix vector xdg-basedir zlib
parsers polysemy regex-applicative-text sqlite-simple
template-haskell temporary text time transformers typed-process
unix vector xdg-basedir zlib
];
testHaskellDepends = [
aeson base bytestring conduit containers cryptohash-sha256
directory doctest errors filepath github hex http-conduit
iso8601-time lifted-base mtl neat-interpolation
optparse-applicative parsec parsers polysemy regex-applicative-text
shelly template-haskell text time transformers typed-process unix
vector xdg-basedir zlib
sqlite-simple template-haskell temporary text time transformers
typed-process unix vector xdg-basedir zlib
];
prePatch = "hpack";
homepage = "https://github.com/ryantm/nixpkgs-update#readme";

View File

@ -53,6 +53,7 @@ dependencies:
- polysemy
- polysemy-plugin
- regex-applicative-text
- sqlite-simple
- template-haskell
- text
- time >= 1.8 && < 1.10

View File

@ -4,7 +4,8 @@
module CVE
( parseFeed
, CVE
, CVE(..)
, cveMatcherList
) where
import OurPrelude
@ -12,30 +13,38 @@ import OurPrelude
import Data.Aeson
( FromJSON
, Object
, (.!=)
, (.:)
, (.:!)
, eitherDecode
, parseJSON
, withObject
, withText
)
import Data.Aeson.Types (Parser)
import Data.Bifunctor (bimap)
import qualified Data.ByteString.Lazy.Char8 as BSL
import Data.Aeson.Types (Parser, prependFailure)
import Data.Bifunctor (bimap, second)
import Data.List (intercalate)
import Data.Map (Map)
import qualified Data.Map as M
import qualified Data.Text as T
import Data.Set (Set)
import Data.Text.Read (decimal)
import Data.Time.Clock (UTCTime)
import Database.SQLite.Simple (FromRow, SQLData, ToRow, field, fromRow, toRow)
import Utils (Boundary(..), ProductID, VersionMatcher(..))
import qualified Data.ByteString.Lazy.Char8 as BSL
import qualified Data.Map as M
import qualified Data.Set as S
import qualified Data.Text as T
type CVEID = Text
data CVE =
CVE
{ cveID :: Text
{ cveID :: CVEID
, cveYear :: Int
, cveSubID :: Int
, cveAffects :: Map ProductID [VersionMatcher]
, cveDescriptions :: Text
, cveMatchers :: Map ProductID (Set VersionMatcher)
, cveDescription :: Text
, cveCPEs :: [CPE]
, cvePublished :: UTCTime
, cveLastModified :: UTCTime
@ -44,22 +53,71 @@ data CVE =
data CPE =
CPE
{ cpePart :: Text
, cpeVendor :: Text
, cpeProduct :: Text
, cpeVersion :: Text
, cpeUpdate :: Text
, cpeEdition :: Text
, cpeLanguage :: Text
, cpeSoftwareEdition :: Text
, cpeTargetSoftware :: Text
, cpeTargetHardware :: Text
, cpeOther :: Text
{ cpeVulnerable :: Bool
, cpePart :: Maybe Text
, cpeVendor :: Maybe Text
, cpeProduct :: Maybe Text
, cpeVersion :: Maybe Text
, cpeUpdate :: Maybe Text
, cpeEdition :: Maybe Text
, cpeLanguage :: Maybe Text
, cpeSoftwareEdition :: Maybe Text
, cpeTargetSoftware :: Maybe Text
, cpeTargetHardware :: Maybe Text
, cpeOther :: Maybe Text
, cpeMatcher :: Maybe VersionMatcher
}
deriving (Show)
-- type CVEID = Text
--type CVEIndex = Map ProductID [(VersionMatcher, [CVEID])]
data MatcherRow =
MatcherRow CVEID ProductID VersionMatcher
instance ToRow MatcherRow where
toRow (MatcherRow cveID productID matcher) = toRow (cveID, productID, matcher)
instance FromRow MatcherRow where
fromRow = MatcherRow <$> field <*> field <*> field
cveMatcherList :: CVE -> [MatcherRow]
cveMatcherList CVE {cveID, cveMatchers} = do
(productID, matchers) <- M.assocs cveMatchers
matcher <- S.elems matchers
return $ MatcherRow cveID productID matcher
instance Show CPE where
show CPE { cpePart
, cpeVendor
, cpeProduct
, cpeVersion
, cpeUpdate
, cpeEdition
, cpeLanguage
, cpeSoftwareEdition
, cpeTargetSoftware
, cpeTargetHardware
, cpeOther
, cpeMatcher
} =
"CPE {" <>
(intercalate ", " . concat)
[ cpeField "part" cpePart
, cpeField "vendor" cpeVendor
, cpeField "product" cpeProduct
, cpeField "version" cpeVersion
, cpeField "update" cpeUpdate
, cpeField "edition" cpeEdition
, cpeField "language" cpeLanguage
, cpeField "softwareEdition" cpeSoftwareEdition
, cpeField "targetSoftware" cpeTargetSoftware
, cpeField "targetHardware" cpeTargetHardware
, cpeField "other" cpeOther
, cpeField "matcher" cpeMatcher
] <>
"}"
where
cpeField :: Show a => String -> Maybe a -> [String]
cpeField _ Nothing = []
cpeField name (Just value) = [name <> " = " <> show value]
eitherToParser :: Either String a -> Parser a
eitherToParser (Left e) = fail e
eitherToParser (Right a) = pure a
@ -73,8 +131,8 @@ cveIDNumbers rest0 = do
guard $ T.null rest4
pure (year, subid)
parseDescriptions :: Object -> Parser Text
parseDescriptions o = do
parseDescription :: Object -> Parser Text
parseDescription o = do
dData <- o .: "description_data"
descriptions <-
fmap concat $
@ -86,28 +144,24 @@ parseDescriptions o = do
case lang of
"en" -> [value]
_ -> []
case descriptions of
[d] -> pure d
-- [] -> pure ""
_ -> fail "multiple english descriptions"
pure $ T.intercalate "\n\n" descriptions
-- foobar :: (FromJSON a) => [a] -> (a -> Parser [b]) -> Parser [b]
-- foobar l f = fmap concat $ sequence $ map f l
parseAffects :: Object -> Parser (Map ProductID [VersionMatcher])
parseAffects :: Object -> Parser (Map ProductID (Set VersionMatcher))
parseAffects o = do
vendor <- o .: "vendor"
vendorData <- vendor .: "vendor_data"
fmap (M.fromListWith (<>) . concat) $
fmap (M.fromListWith S.union . concat) $
sequence $
flip map vendorData $ \v -> do
productPart <- v .: "product"
productData <- productPart .: "product_data"
product_ <- v .: "product"
productData <- product_ .: "product_data"
sequence $
flip map productData $ \p -> do
productID <- p .: "product_name"
version <- p .: "version"
versionData <- version .: "version_data"
matchers <-
fmap S.fromList $
sequence $
flip map versionData $ \ver -> do
value <- ver .: "version_value"
@ -118,31 +172,86 @@ parseAffects o = do
_ -> fail $ "unknown version comparator: " <> show affected
pure (productID, matchers)
-- TODO: We ignore update, edition and softwareEdition for now, but they might
-- be relevant.
cpeToMatcher :: CPE -> Map ProductID (Set VersionMatcher)
cpeToMatcher CPE {cpeProduct = Just p, cpeVersion = Just v, cpeMatcher = Just m} =
M.singleton p $ S.fromList [FuzzyMatcher v, m]
cpeToMatcher CPE {cpeProduct = Just p, cpeVersion = Just v} =
M.singleton p $ S.fromList [FuzzyMatcher v]
cpeToMatcher CPE {cpeProduct = Just p, cpeMatcher = Just m} =
M.singleton p $ S.fromList [m]
cpeToMatcher _ = M.empty
cpeMatchers :: [CPE] -> Map ProductID (Set VersionMatcher)
cpeMatchers = M.unionsWith S.union . map cpeToMatcher
instance FromJSON CVE where
parseJSON =
withObject "CVE" $ \o -> do
cve <- o .: "cve"
cfgs <- o .: "configurations"
cveCPEs <- parseConfigurations cfgs
meta <- cve .: "CVE_data_meta"
cveID <- meta .: "ID"
cvePublished <- o .: "publishedDate"
cveLastModified <- o .: "lastModifiedDate"
(cveYear, cveSubID) <- eitherToParser $ cveIDNumbers cveID
description <- cve .: "description"
cveDescriptions <- parseDescriptions description
affects <- cve .: "affects"
cveAffects <- parseAffects affects
pure CVE {..}
prependFailure (T.unpack cveID <> ": ") $ do
cfgs <- o .: "configurations"
cveCPEs <- parseConfigurations cfgs
cvePublished <- o .: "publishedDate"
cveLastModified <- o .: "lastModifiedDate"
(cveYear, cveSubID) <- eitherToParser $ cveIDNumbers cveID
description <- cve .: "description"
cveDescription <- parseDescription description
affects <- cve .: "affects"
affectMatchers <- parseAffects affects
let cveMatchers =
M.unionWith S.union affectMatchers (cpeMatchers cveCPEs)
pure CVE {..}
splitCPE :: Text -> [Text]
splitCPE = map (T.replace "\a" ":") . T.splitOn ":" . T.replace "\\:" "\a"
instance ToRow CVE where
toRow CVE {cveID, cveDescription, cvePublished, cveLastModified} =
toRow (cveID, cveDescription, cvePublished, cveLastModified)
instance FromRow CVE where
fromRow = do
cveID <- field
cveDescription <- field
cvePublished <- field
cveLastModified <- field
pure CVE {..}
splitCPE :: Text -> [Maybe Text]
splitCPE =
map (toMaybe . T.replace "\a" ":") . T.splitOn ":" . T.replace "\\:" "\a"
where
toMaybe "*" = Nothing
toMaybe x = Just x
instance FromJSON CPE where
parseJSON =
withText "CPE" $ \t -> do
withObject "CPE" $ \o -> do
t <- o .: "cpe23Uri"
cpeVulnerable <- o .: "vulnerable"
vStartIncluding <- o .:! "versionStartIncluding"
vEndIncluding <- o .:! "versionEndIncluding"
vStartExcluding <- o .:! "versionStartExcluding"
vEndExcluding <- o .:! "versionEndExcluding"
startBoundary <-
case (vStartIncluding, vStartExcluding) of
(Nothing, Nothing) -> pure Unbounded
(Just start, Nothing) -> pure (Including start)
(Nothing, Just start) -> pure (Including start)
(Just _, Just _) -> fail "multiple starts"
endBoundary <-
case (vEndIncluding, vEndExcluding) of
(Nothing, Nothing) -> pure Unbounded
(Just end, Nothing) -> pure (Including end)
(Nothing, Just end) -> pure (Including end)
(Just _, Just _) -> fail "multiple ends"
let cpeMatcher =
case (startBoundary, endBoundary) of
(Unbounded, Unbounded) -> Nothing
(start, end) -> Just (RangeMatcher start end)
case splitCPE t of
["cpe", "2.3", cpePart, cpeVendor, cpeProduct, cpeVersion, cpeUpdate, cpeEdition, cpeLanguage, cpeSoftwareEdition, cpeTargetSoftware, cpeTargetHardware, cpeOther] ->
[Just "cpe", Just "2.3", cpePart, cpeVendor, cpeProduct, cpeVersion, cpeUpdate, cpeEdition, cpeLanguage, cpeSoftwareEdition, cpeTargetSoftware, cpeTargetHardware, cpeOther] ->
pure CPE {..}
_ -> fail $ "unparsable CPE: " <> T.unpack t
@ -151,9 +260,9 @@ guardAttr object attribute expected = do
actual <- object .: attribute
unless (actual == expected) $
fail $
"unexpected " <>
T.unpack attribute <>
", expected " <> show expected <> ", got " <> show actual
"unexpected " <> T.unpack attribute <> ", expected " <> show expected <>
", got " <>
show actual
-- TODO: Determine how nodes work exactly. What does AND mean and what does
-- vulnerable: false mean? For now, we assume everything with vulnerable: true
@ -165,16 +274,8 @@ parseNode node = do
parseNode' :: (Maybe [Object]) -> Object -> Parser [CPE]
parseNode' Nothing node = do
matches <- node .: "cpe_match"
fmap concat $
sequence $
flip map matches $ \match -> do
vulnerable <- match .: "vulnerable"
cpe <- match .: "cpe23Uri"
pure $
if vulnerable
then [cpe]
else []
matches <- node .:! "cpe_match" .!= []
pure $ filter cpeVulnerable matches
parseNode' (Just children) _ = do
fmap concat $ sequence $ map parseNode children

View File

@ -4,7 +4,7 @@ module NVD where
import OurPrelude
import CVE (CVE, parseFeed)
import CVE (CVE(..), cveMatcherList, parseFeed)
import Codec.Compression.GZip (decompress)
import Control.Exception (ioError, try)
import Crypto.Hash.SHA256 (hashlazy)
@ -22,6 +22,7 @@ import Data.Time.Clock
, utctDay
)
import Data.Time.ISO8601 (parseISO8601)
import Database.SQLite.Simple as DB
import Network.HTTP.Conduit (simpleHttp)
import System.Directory
( XdgDirectory(..)
@ -49,6 +50,36 @@ type MaxAge = NominalDiffTime
data Meta =
Meta Timestamp Checksum
withDB :: (DB.Connection -> IO a) -> IO a
withDB action = do
cacheDir <- liftIO $ getXdgDirectory XdgCache "nixpkgs-update/nvd"
createDirectoryIfMissing True cacheDir
DB.withConnection (cacheDir </> "db.sqlite3") $ \conn -> do
execute_ conn $
Query $
T.unlines
[ "CREATE TABLE IF NOT EXISTS cves ("
, " cve_id text PRIMARY KEY,"
, " description text,"
, " published text,"
, " modified text)"
]
execute_ conn $
Query $
T.unlines
[ "CREATE TABLE IF NOT EXISTS matchers ("
, " cve_id text,"
, " product_id text,"
, " matcher text)"
]
execute_ conn $
Query $
T.unlines
[ "CREATE INDEX IF NOT EXISTS matchers_by_product_id"
, "ON matchers(product_id)"
]
action conn
feedURL :: FeedID -> Extension -> String
feedURL feed ext =
"https://nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-" <> feed <> ext
@ -84,23 +115,43 @@ getMeta feed = do
either throwText pure $ parseMeta raw
getCVEs :: (ProductID, Version) -> IO [CVE]
getCVEs (_product, _) = do
getCVEs (product, version) = do
years <- allYears
_feeds <- sequence $ map (cacheFeed (7 * nominalDay)) years
feeds <- sequence $ map (cacheFeed (7 * nominalDay)) years
return []
updateVulnDB :: IO ()
updateVulnDB
-- This will be enough to develop with.
= do
feed <- cacheFeed (99 * nominalDay) "2019"
putStrLn $ "loading data"
parsed <- either throwText pure $ parseFeed feed
print $ head parsed
-- putStrLn $ "checking feed cache"
updateVulnDB =
withDB $ \conn -> do
putStrLn $ "checking feed cache"
-- years <- allYears
-- feeds <- sequence $ map (cacheFeed (7 * nominalDay)) years
return ()
feeds <- sequence $ map (cacheFeed (99 * nominalDay)) ["2019"]
putStrLn $ "loading data"
parsed <- sequence $ map (either throwText pure . parseFeed) feeds
let cves = take 10 $ head parsed
executeMany
conn
(Query $
T.unlines
[ "REPLACE INTO cves(cve_id, description, published, modified)"
, "VALUES (?, ?, ?, ?)"
])
cves
executeMany
conn
(Query $ T.unlines ["DELETE FROM matchers", "WHERE cve_id = ?"])
(map (Only . cveID) cves)
executeMany
conn
(Query $
T.unlines
[ "INSERT INTO matchers(cve_id, product_id, matcher)"
, "VALUES (?, ?, ?)"
])
(concatMap cveMatcherList cves)
print $ head $ head parsed
return ()
getCacheFile :: MonadIO m => FeedID -> m FilePath
getCacheFile feed = do
@ -112,22 +163,23 @@ cacheFeed :: MonadIO m => MaxAge -> FeedID -> m BSL.ByteString
cacheFeed maxAge feed = do
cacheFile <- getCacheFile feed
cacheTime <- liftIO $ try $ getModificationTime cacheFile
Meta newestTime expectedChecksum <- getMeta feed
currentTime <- liftIO getCurrentTime
let needsUpdate =
case cacheTime of
Left (_ :: IOError) -> True
Right t -> diffUTCTime newestTime t > maxAge
Right t -> diffUTCTime currentTime t > maxAge
if needsUpdate
then do
liftIO $ putStrLn $ "updating feed: " <> feed
liftIO $ putStrLn $ "updating feed " <> feed
Meta _ expectedChecksum <- getMeta feed
compressed <- simpleHttp $ feedURL feed ".json.gz"
let raw = decompress compressed
let actualChecksum = BSL.fromStrict $ hashlazy raw
when (actualChecksum /= expectedChecksum) $
throwString $
"wrong hash, expected: " <>
BSL.unpack (hex expectedChecksum) <>
" got: " <> BSL.unpack (hex actualChecksum)
"wrong hash, expected: " <> BSL.unpack (hex expectedChecksum) <>
" got: " <>
BSL.unpack (hex actualChecksum)
liftIO $ BSL.writeFile cacheFile raw
return raw
else do

View File

@ -26,6 +26,16 @@ import OurPrelude
import Data.Bits ((.|.))
import qualified Data.Text as T
import Database.SQLite.Simple (ResultError(..), SQLData(..))
import Database.SQLite.Simple.FromField
( FieldParser
, FromField
, fromField
, returnError
)
import Database.SQLite.Simple.Internal (Field(..))
import Database.SQLite.Simple.Ok (Ok(..))
import Database.SQLite.Simple.ToField (ToField, toField)
import System.Directory (doesDirectoryExist, setCurrentDirectory)
import System.Environment.XDG.BaseDir
import System.Posix.Directory (createDirectory)
@ -41,6 +51,8 @@ import System.Posix.Files
import System.Posix.Temp (mkdtemp)
import System.Posix.Types (FileMode)
import qualified System.Process.Typed
import Text.Read (readEither)
import Type.Reflection (Typeable)
default (T.Text)
@ -55,7 +67,7 @@ data Boundary a
= Unbounded
| Including a
| Excluding a
deriving (Eq, Ord, Show)
deriving (Eq, Ord, Show, Read)
-- | The Ord instance is used to sort lists of matchers in order to compare them
-- as a set, it is not useful for comparing versions.
@ -63,7 +75,23 @@ data VersionMatcher
= ExactMatcher Version
| FuzzyMatcher Version
| RangeMatcher (Boundary Version) (Boundary Version)
deriving (Eq, Ord, Show)
deriving (Eq, Ord, Show, Read)
readField :: (Read a, Typeable a) => FieldParser a
readField f@(Field (SQLText t) _) =
case readEither (T.unpack t) of
Right x -> Ok x
Left e -> returnError ConversionFailed f $ "read error: " <> e
readField f = returnError ConversionFailed f "expecting SQLText column type"
showField :: Show a => a -> SQLData
showField = toField . show
instance FromField VersionMatcher where
fromField = readField
instance ToField VersionMatcher where
toField = showField
data Options =
Options