Remove feed caching and clean up queries

This commit is contained in:
Lars Jellema 2019-10-02 17:02:12 +02:00
parent 93437182d4
commit 490d1ed162
No known key found for this signature in database
GPG Key ID: 563A03936D48B4BC

View File

@ -25,8 +25,7 @@ import Data.List (group)
import qualified Data.Text as T import qualified Data.Text as T
import Data.Time.Calendar (toGregorian) import Data.Time.Calendar (toGregorian)
import Data.Time.Clock import Data.Time.Clock
( NominalDiffTime ( UTCTime
, UTCTime
, diffUTCTime , diffUTCTime
, getCurrentTime , getCurrentTime
, nominalDay , nominalDay
@ -48,11 +47,10 @@ import Network.HTTP.Conduit (simpleHttp)
import System.Directory import System.Directory
( XdgDirectory(..) ( XdgDirectory(..)
, createDirectoryIfMissing , createDirectoryIfMissing
, getModificationTime
, getXdgDirectory , getXdgDirectory
, removeFile , removeFile
) )
import System.FilePath ((<.>), (</>)) import System.FilePath ((</>))
import System.IO.Error (userError) import System.IO.Error (userError)
import Utils (ProductID, Version) import Utils (ProductID, Version)
import Version (matchVersion) import Version (matchVersion)
@ -66,8 +64,6 @@ type Timestamp = UTCTime
type Checksum = BSL.ByteString type Checksum = BSL.ByteString
type MaxAge = NominalDiffTime
type DBVersion = Int type DBVersion = Int
data Meta = data Meta =
@ -95,7 +91,7 @@ withDB action = do
markUpdated :: Connection -> IO () markUpdated :: Connection -> IO ()
markUpdated conn = do markUpdated conn = do
now <- getCurrentTime now <- getCurrentTime
execute conn (Query $ T.unlines ["UPDATE meta", "SET last_update = ?"]) [now] execute conn "UPDATE meta SET last_update = ?" [now]
-- | Rebuild the entire database, redownloading all data. -- | Rebuild the entire database, redownloading all data.
rebuildDB :: IO () rebuildDB :: IO ()
@ -103,13 +99,10 @@ rebuildDB = do
dbPath <- getDBPath dbPath <- getDBPath
removeFile dbPath removeFile dbPath
withConnection dbPath $ \conn -> do withConnection dbPath $ \conn -> do
execute_ conn $ execute_ conn "CREATE TABLE meta (db_version int, last_update text)"
Query $
T.unlines
["CREATE TABLE meta (", " db_version int,", " last_update text)"]
execute execute
conn conn
(Query $ T.unlines ["INSERT INTO meta", "VALUES (?, ?)"]) "INSERT INTO meta VALUES (?, ?)"
(softwareVersion, "1970-01-01 00:00:00" :: Text) (softwareVersion, "1970-01-01 00:00:00" :: Text)
execute_ conn $ execute_ conn $
Query $ Query $
@ -129,12 +122,9 @@ rebuildDB = do
, " matcher text," , " matcher text,"
, " UNIQUE(cve_id, product_id, matcher))" , " UNIQUE(cve_id, product_id, matcher))"
] ]
execute_ conn $ execute_ conn "CREATE INDEX matchers_by_product_id ON matchers(product_id)"
Query $
T.unlines
["CREATE INDEX matchers_by_product_id", "ON matchers(product_id)"]
years <- allYears years <- allYears
forM_ years $ downloadFeed conn (7.5 * nominalDay) forM_ years $ updateFeed conn
markUpdated conn markUpdated conn
feedURL :: FeedID -> Extension -> String feedURL :: FeedID -> Extension -> String
@ -223,7 +213,7 @@ putCVEs conn cves =
cves cves
executeMany executeMany
conn conn
(Query $ T.unlines ["DELETE FROM matchers", "WHERE cve_id = ?"]) "DELETE FROM matchers WHERE cve_id = ?"
(map (Only . cveID) cves) (map (Only . cveID) cves)
executeMany executeMany
conn conn
@ -255,13 +245,9 @@ needsRebuild = do
dbVersion /= softwareVersion dbVersion /= softwareVersion
-- | Download a feed and store it in the database. -- | Download a feed and store it in the database.
downloadFeed :: Connection -> MaxAge -> FeedID -> IO () updateFeed :: Connection -> FeedID -> IO ()
downloadFeed conn maxAge feedID updateFeed conn feedID = do
-- TODO: Because the database may need to be rebuilt frequently during json <- downloadFeed feedID
-- development, we cache the json in files to avoid redownloading. After
-- development is done, it can be downloaded directly without caching.
= do
json <- cacheFeedInFile maxAge feedID
parsed <- either throwText pure $ parseFeed json parsed <- either throwText pure $ parseFeed json
putCVEs conn parsed putCVEs conn parsed
@ -272,37 +258,25 @@ withVulnDB action = do
rebuild <- needsRebuild rebuild <- needsRebuild
when rebuild rebuildDB when rebuild rebuildDB
withDB $ \conn -> do withDB $ \conn -> do
unless rebuild $ do (_, lastUpdate) <- withDB getDBMeta
downloadFeed conn (0.25 * nominalDay) "modified" currentTime <- getCurrentTime
when (diffUTCTime currentTime lastUpdate > (0.25 * nominalDay)) $ do
updateFeed conn "modified"
markUpdated conn markUpdated conn
action conn action conn
-- | Update a feed if it's older than a maximum age and return the contents as -- | Update a feed if it's older than a maximum age and return the contents as
-- ByteString. -- ByteString.
cacheFeedInFile :: MaxAge -> FeedID -> IO BSL.ByteString downloadFeed :: FeedID -> IO BSL.ByteString
cacheFeedInFile maxAge feed = do downloadFeed feed = do
cacheDir <- getXdgDirectory XdgCache "nixpkgs-update/nvd" putStrLn $ "updating feed " <> feed
createDirectoryIfMissing True cacheDir Meta _ expectedChecksum <- getMeta feed
let cacheFile = cacheDir </> feed <.> "json" compressed <- simpleHttp $ feedURL feed ".json.gz"
cacheTime <- try $ getModificationTime cacheFile let raw = decompress compressed
currentTime <- getCurrentTime let actualChecksum = BSL.fromStrict $ hashlazy raw
let needsUpdate = when (actualChecksum /= expectedChecksum) $
case cacheTime of throwString $
Left (_ :: IOError) -> True "wrong hash, expected: " <>
Right t -> diffUTCTime currentTime t > maxAge BSL.unpack (hex expectedChecksum) <>
if needsUpdate " got: " <> BSL.unpack (hex actualChecksum)
then do return raw
putStrLn $ "updating feed " <> feed
Meta _ expectedChecksum <- getMeta feed
compressed <- simpleHttp $ feedURL feed ".json.gz"
let raw = decompress compressed
let actualChecksum = BSL.fromStrict $ hashlazy raw
when (actualChecksum /= expectedChecksum) $
throwString $
"wrong hash, expected: " <> BSL.unpack (hex expectedChecksum) <>
" got: " <>
BSL.unpack (hex actualChecksum)
BSL.writeFile cacheFile raw
return raw
else do
BSL.readFile cacheFile