1
1
mirror of https://github.com/aelve/guide.git synced 2024-12-01 20:54:08 +03:00

Broke the parsing of tar archive in two stages. At first create a map of versions by paths in tar archive. And then load only the latest versions

This commit is contained in:
Boris M. Yartsev 2017-06-04 04:18:10 +03:00
parent 46538b5ca9
commit e8751fa399
3 changed files with 154 additions and 80 deletions

View File

@ -23,9 +23,11 @@ library
, split , split
, http-types , http-types
, bytestring , bytestring
, Cabal
, http-client , http-client
, filepath , filepath
, http-client-tls , http-client-tls
, utf8-string
, pureMD5 , pureMD5
, aeson , aeson
, text , text

View File

@ -19,7 +19,7 @@ module REPL ( {-
processCycle, processCycle,
ProcessBuilderInfo (..) ProcessBuilderInfo (..)
) where ) where
import qualified Codec.Archive.Tar.Index as TI -- import qualified Codec.Archive.Tar.Index as TI
import qualified Data.Map.Strict as M import qualified Data.Map.Strict as M
import qualified Data.Char as DC import qualified Data.Char as DC
import qualified Data.List as DL import qualified Data.List as DL
@ -81,12 +81,15 @@ buildCommand pbi = processCommand
| chk "unzipclone" = unzipArchive (archiveClone pbi) (tarClone pbi) -- unzips the downloaded gzip archive | chk "unzipclone" = unzipArchive (archiveClone pbi) (tarClone pbi) -- unzips the downloaded gzip archive
| chk "unzip" = unzipArchive (archive pbi) (tar pbi) -- unzips the downloaded gzip archive | chk "unzip" = unzipArchive (archive pbi) (tar pbi) -- unzips the downloaded gzip archive
| chk "cleanclone" = undefined
| chk "clean" = undefined
| chk "tarparsepreclone" = showPreMap (tarClone pbi) 50 -- loads the tar clone information in the memory
| chk "tarparsepre" = showPreMap (tar pbi) 50 -- loads the tar information in the memory
| chk "tarparseclone" = showMap (tarClone pbi) 50 -- loads the tar clone information in the memory | chk "tarparseclone" = showMap (tarClone pbi) 50 -- loads the tar clone information in the memory
| chk "tarparse" = showMap (tar pbi) 50 -- loads the tar information in the memory | chk "tarparse" = showMap (tar pbi) 50 -- loads the tar information in the memory
| chk "tarshowclone" = showTarContents (tarClone pbi)
| chk "tarshow" = showTarContents (tar pbi)
| chk "compare" = showArchiveCompare (archive pbi) (archiveClone pbi) | chk "compare" = showArchiveCompare (archive pbi) (archiveClone pbi)
| chk "updatecut" = performArchiveCutUpdate (snapshotURL pbi) (archiveURL pbi) | chk "updatecut" = performArchiveCutUpdate (snapshotURL pbi) (archiveURL pbi)
@ -103,9 +106,6 @@ buildCommand pbi = processCommand
where pc = map DC.toLower command where pc = map DC.toLower command
chk val = DL.isPrefixOf val pc chk val = DL.isPrefixOf val pc
showFirstDirEntries :: TI.TarIndex -> Int -> IO ()
showFirstDirEntries index count = mapM_ print $ take count (getEntries index)
-- Displays the snapshot of the file -- Displays the snapshot of the file
showFileSnapshot :: FilePath -> IO() showFileSnapshot :: FilePath -> IO()
showFileSnapshot file = do showFileSnapshot file = do
@ -139,46 +139,54 @@ copyArchive archive1 archive2 = do
showMap :: FilePath -> Int -> IO() showMap :: FilePath -> Int -> IO()
showMap path count = do showMap path count = do
putStrLn $ "Displaying " ++ (show count) ++ " entries for " ++ path putStrLn $ "Displaying " ++ (show count) ++ " entries for " ++ path
tarIndexE <- loadTarIndex path tar <- loadTar path
case tarIndexE of mapM_ (print.snd) $ take count $ M.toList $ buildHackageMap tar (buildPreHackageMap tar)
Left error -> putStrLn "Whoa. Error loading tar"
Right index -> mapM_ (print.snd) $ take count $ M.toList $ buildHackageMap index showPreMap :: FilePath -> Int -> IO()
showPreMap path count = do
putStrLn $ "Pre displaying " ++ (show count) ++ " entries for " ++ path
tar <- loadTar path
mapM_ print $ take count $ M.toList $ buildPreHackageMap tar
showDiffMap :: FilePath -> FilePath -> IO () showDiffMap :: FilePath -> FilePath -> IO ()
showDiffMap newTarFile oldTarFile = do showDiffMap newTarFile oldTarFile = do
putStrLn $ "Displaying difference between " ++ newTarFile ++ " and " ++ oldTarFile putStrLn $ "Displaying difference between " ++ newTarFile ++ " and " ++ oldTarFile
newTarIndexE <- loadTarIndex newTarFile newTar <- loadTar newTarFile
oldTarIndexE <- loadTarIndex oldTarFile oldTar <- loadTar oldTarFile
let newMapE = buildHackageMap <$> newTarIndexE let newMap = buildHackageMap newTar (buildPreHackageMap newTar)
let oldMapE = buildHackageMap <$> oldTarIndexE let oldMap = buildHackageMap oldTar (buildPreHackageMap oldTar)
let diffMapE = buildDifferenceMap <$> oldMapE <*> newMapE let diffMap = buildDifferenceMap oldMap newMap
case diffMapE of mapM_ (print.snd) $ M.toList diffMap
Right m -> mapM_ (print.snd) $ M.toList m
Left _ -> print "Error creating the indexes"
showHelp :: ProcessBuilderInfo -> IO() showHelp :: ProcessBuilderInfo -> IO()
showHelp pbi = do showHelp pbi = do
putStrLn "Available commands: " putStrLn "Available commands: "
putStrLn $ "check - downloads the json length and md5 hash from " ++ (snapshotURL pbi) ++ putStrLn $ "check - downloads the json length and md5 hash from " ++ (snapshotURL pbi) ++
", and compares it with local " ++ (archive pbi) ", and compares it with local " ++ arch
putStrLn $ "checkclone - same for " ++ (archiveClone pbi) putStrLn $ "checkclone - same for " ++ archC
putStrLn $ "file - displays the current " ++ (archive pbi) ++ " length and md5 hash" putStrLn $ "file - displays the current " ++ arch ++ " length and md5 hash"
putStrLn $ "fileclone - same for " ++ (archiveClone pbi) ++ " file" putStrLn $ "fileclone - same for " ++ archC ++ " file"
putStrLn $ "copyorig - copy the " ++ (archive pbi) ++ " to " ++ (archiveClone pbi) putStrLn $ "copyorig - copy the " ++ arch ++ " to " ++ archC
putStrLn $ "cut size - cuts the size bytes from the end of the " ++ (archive pbi) ++ " , for update command" putStrLn $ "cut size - cuts the size bytes from the end of the " ++ arch ++ " , for update command"
putStrLn $ "cutclone size - cuts the size bytes from the end of the 01-index.tar.gz, for update command" putStrLn $ "cutclone size - cuts the size bytes from the end of the 01-index.tar.gz, for update command"
putStrLn $ "unzip - unzips the " ++ (archive pbi) ++ " in the " ++ (tar pbi) ++ " file" putStrLn $ "unzip - unzips the " ++ arch ++ " in the " ++ (tar pbi) ++ " file"
putStrLn $ "unzipclone - unzips the " ++ (archiveClone pbi) ++ " in the " ++ (tarClone pbi) ++ " file" putStrLn $ "unzipclone - unzips the " ++ archC ++ " in the " ++ (tarClone pbi) ++ " file"
putStrLn $ "compare - compares the " ++ (archive pbi) ++ " with " ++ (archiveClone pbi) putStrLn $ "clean - deletes the " ++ arch ++ " and " ++ (tar pbi)
putStrLn $ "cleanclone - deletes the " ++ archC ++ " and " ++ (tarClone pbi)
putStrLn $ "compare - compares the " ++ arch ++ " with " ++ archC
putStrLn $ "tarparse - loads the map of entries from " ++ (tar pbi) ++ " and displays it" putStrLn $ "tarparse - loads the map of entries from " ++ (tar pbi) ++ " and displays it"
putStrLn $ "tarparseclone - same for " ++ (tarClone pbi) putStrLn $ "tarparseclone - same for " ++ (tarClone pbi)
putStrLn $ "tarshow - show sample contents from " ++ (tar pbi) putStrLn $ "tarparsepre - loads the premap of entries from " ++ (tar pbi) ++ " and displays it"
putStrLn $ "tarshowclone - show sample contents from " ++ (tarClone pbi) putStrLn $ "tarparsepreclone - same for " ++ (tarClone pbi)
putStrLn $ "tarcmp - compares the entries of " ++ (tar pbi) ++ " and " ++ (tarClone pbi) putStrLn $ "tarcmp - compares the entries of " ++ (tar pbi) ++ " and " ++ (tarClone pbi)
putStrLn $ "update - updates the current " ++ (archive pbi) ++ " from " ++ (archiveURL pbi) putStrLn $ "update - updates the current " ++ arch ++ " from " ++ (archiveURL pbi)
putStrLn $ "updatecut size - cuts the size from " ++ (archive pbi) ++ " and then updates" putStrLn $ "updatecut size - cuts the size from " ++ arch ++ " and then updates"
putStrLn "exit - exits this repl" putStrLn "exit - exits this repl"
where
arch = archive pbi
archC = archiveClone pbi
showArchiveCompare :: FilePath -> FilePath -> IO() showArchiveCompare :: FilePath -> FilePath -> IO()
showArchiveCompare archive1 archive2= do showArchiveCompare archive1 archive2= do
@ -186,15 +194,6 @@ showArchiveCompare archive1 archive2= do
putStrLn $ "Compare result " ++ archive1 ++ " " ++ archive2 ++ " " ++ (show val) putStrLn $ "Compare result " ++ archive1 ++ " " ++ archive2 ++ " " ++ (show val)
showTarContents :: FilePath -> IO()
showTarContents archive = do
putStrLn $ "Displaying the tar indices" ++ " for " ++ archive
tarIndexE <- loadTarIndex archive
case tarIndexE of
Left error -> putStrLn "Whoa. Error loading tar"
Right index -> showFirstDirEntries index 100
exitREPL :: IO() exitREPL :: IO()
exitREPL = putStrLn "Finished working with hackage REPL" >> exitSuccess exitREPL = putStrLn "Finished working with hackage REPL" >> exitSuccess

View File

@ -1,28 +1,43 @@
module TarUtil (getEntries, {-# LANGUAGE OverloadedStrings #-}
loadTarIndex,
module TarUtil (
buildDifferenceMap,
buildHackageMap, buildHackageMap,
buildDifferenceMap buildPreHackageMap,
loadTar
) where ) where
import qualified Codec.Archive.Tar.Index as TI
import qualified Codec.Archive.Tar as Tar import qualified Codec.Archive.Tar as Tar
import qualified Data.List.Split as SPLT import qualified Data.List.Split as SPLT
import qualified Data.Char as DC import qualified Data.Char as DC
import qualified Data.List as DL import qualified Data.List as DL
import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy as BL
import qualified Data.Version as DV import qualified Data.Version as DV
import qualified Distribution.PackageDescription.Parse as CP
import qualified Data.Map.Strict as Map import qualified Distribution.Package as DP
import System.FilePath.Posix(hasTrailingPathSeparator)
import Control.Monad(guard)
import qualified Text.ParserCombinators.ReadP as RP import qualified Text.ParserCombinators.ReadP as RP
import qualified Distribution.PackageDescription as DPD
import qualified Distribution.PackageDescription.Parse as DPDP
import qualified Data.Map.Strict as M
import qualified Control.Exception as X
import Data.Maybe
import Debug.Trace
import Control.Monad(guard)
import qualified Data.ByteString.Lazy.UTF8 as UTFC
import System.FilePath.Posix(hasTrailingPathSeparator)
-- The record for each of the package from hackage -- The record for each of the package from hackage
-- TODO - add another information about the packages -- TODO - add another information about the packages
data HackagePackage = HP { data HackagePackage = HP {
-- packageData :: HHPathData
name :: String, name :: String,
version :: DV.Version version :: DV.Version,
author :: String
} deriving (Eq, Show) } deriving (Eq, Show)
-- The status of the package between two updates -- The status of the package between two updates
@ -30,57 +45,115 @@ data HackageUpdate = Added | Removed | Updated deriving (Eq, Show)
-- The map of all the hackage packages with name as the key and HackagePackage -- The map of all the hackage packages with name as the key and HackagePackage
-- as the value -- as the value
type HackageMap = Map.Map String HackagePackage type HackageMap = M.Map String HackagePackage
type PreHackageMap = M.Map String DV.Version
-- The map, that shows, which packages have change since the last update -- The map, that shows, which packages have change since the last update
type HackageUpdateMap = Map.Map String (HackageUpdate, HackagePackage) type HackageUpdateMap = M.Map String (HackageUpdate, HackagePackage)
-- This is the data that is extracted from the path to cabal file
-- Like, when program parses "safeio/0.0.2.0/safeio.cabal"
-- It gets the version 0.0.2.0 and safeio package name. Also checks, xxx and yy match in
-- "xxx/version/yyy.cabal
type HPPathData = (String, DV.Version)
-- Parses the file path of the cabal file to get version and package name -- Parses the file path of the cabal file to get version and package name
parseCabalFilePath :: RP.ReadP HackagePackage parseCabalFilePath :: RP.ReadP HPPathData
parseCabalFilePath = do parseCabalFilePath = do
package <- RP.munch1 DC.isLetter package <- RP.munch1 DC.isLetter
RP.char '/' RP.char '/'
version <- DV.parseVersion version <- DV.parseVersion
RP.char '/' RP.char '/'
name <- RP.munch1 DC.isLetter name <- RP.munch1 (\l -> DC.isLetter l || l == '-')
guard (name == package) guard (name == package)
suff <- RP.string ".cabal" suff <- RP.string ".cabal"
RP.eof RP.eof
pure $ HP { name = package, version = version} pure $ (package, version)
{-
-- Update map of the packages with the hackage package -- Update map of the packages with the hackage package
-- Update when, the version of package is newer than version of package in the -- Update when, the version of package is newer than version of package in the
-- map -- map
updateMap :: HackagePackage -> HackageMap -> HackageMap updateMap :: HackagePackage -> HackageMap -> HackageMap
updateMap hp map = case Map.lookup (name hp) map of updateMap hp map = case M.lookup (name hp) map of
Just oldHp -> if (version hp) > (version oldHp) then updatedMap Just oldHp -> if (version hp) > (version oldHp) then updatedMap
else map else map
Nothing -> updatedMap Nothing -> updatedMap
where updatedMap = Map.insert (name hp) hp map where updatedMap = M.insert (name hp) hp map
-}
getEntries :: TI.TarIndex -> [HackagePackage]
getEntries index = map fst $ map head $ filter (not.null) $ map (goodParse.parse.getPath) entries
where entries = TI.toList index
getPath = fst
parse = RP.readP_to_S parseCabalFilePath
goodParse = filter (null.snd)
loadTarIndex :: FilePath -> IO (Either Tar.FormatError TI.TarIndex)
loadTarIndex file = do
content <- BL.readFile file
return $ TI.build $ Tar.read content
-- convert tarindex to list, then apply parser combinator, throw out all
-- empty parsingresults and then take the first successfull parsing result
buildHackageMap :: TI.TarIndex -> HackageMap
buildHackageMap index = foldr updateMap Map.empty (getEntries index)
buildDifferenceMap :: HackageMap -> HackageMap -> HackageUpdateMap buildDifferenceMap :: HackageMap -> HackageMap -> HackageUpdateMap
buildDifferenceMap oldMap newMap = foldr Map.union Map.empty [deletedMap, addedMap, updatedMap] buildDifferenceMap oldMap newMap = foldr M.union M.empty [deletedMap, addedMap, updatedMap]
where where
deletedMap = Map.map ((,) Removed) $ Map.difference oldMap newMap deletedMap = M.map ((,) Removed) $ M.difference oldMap newMap
addedMap = Map.map ((,) Added) $ Map.difference newMap oldMap addedMap = M.map ((,) Added) $ M.difference newMap oldMap
updatedMap' = Map.intersection newMap oldMap updatedMap' = M.intersection newMap oldMap
updatedMap = Map.map ((,) Updated) $ Map.differenceWith diff updatedMap' oldMap updatedMap = M.map ((,) Updated) $ M.differenceWith diff updatedMap' oldMap
diff newpack oldpack = if (newpack /= oldpack) then Just newpack else Nothing diff newpack oldpack = if (newpack /= oldpack) then Just newpack else Nothing
createPackage :: DPD.PackageDescription -> HackagePackage
createPackage pd = HP { name = nm, version = ver, author = auth }
where
pkg = DPD.package pd
nm = DP.unPackageName (DP.pkgName pkg)
ver = DP.pkgVersion pkg
auth = DPD.author pd
parsePath :: FilePath -> Maybe HPPathData
parsePath path = case RP.readP_to_S parseCabalFilePath path of
[(pd, _)] -> Just pd
_ -> Nothing
parsePackageDescription :: Tar.EntryContent -> Maybe DPD.PackageDescription
parsePackageDescription (Tar.NormalFile content _) =
case (DPDP.parsePackageDescription (UTFC.toString content)) of
DPDP.ParseOk _ pd -> Just (DPD.packageDescription pd)
DPDP.ParseFailed _ -> Nothing
parsePackageDescription _ = Nothing
parsePackage :: Tar.Entry -> Maybe HackagePackage
parsePackage entry = do
(path, version) <- parsePath $ Tar.entryPath entry
pd <- parsePackageDescription $ Tar.entryContent entry
return $ createPackage pd
updatePreMap :: HPPathData -> PreHackageMap -> PreHackageMap
updatePreMap (name, version) map = case M.lookup name map of
Just oldVersion -> if version > oldVersion then updatedMap
else map
Nothing -> updatedMap
where updatedMap = M.insert name version map
buildPreHackageMap :: Tar.Entries Tar.FormatError -> PreHackageMap
buildPreHackageMap (Tar.Next entry entries) =
case parsePath $ Tar.entryPath entry of
Just hp -> updatePreMap hp map
Nothing -> map
where map = buildPreHackageMap entries
buildPreHackageMap Tar.Done = M.empty
buildPrehackageMap (Tar.Fail e) = X.throw e
buildHackageMap :: Tar.Entries Tar.FormatError -> PreHackageMap -> HackageMap
buildHackageMap (Tar.Next entry entries) premap =
case update of
Just hp -> M.insert (name hp) hp map
Nothing -> map
where map = buildHackageMap entries premap
path = Tar.entryPath entry
update :: Maybe HackagePackage
update = do
(name, version) <- parsePath path
preversion <- M.lookup name premap
if (preversion == version) then parsePackage entry
else Nothing
buildHackageMap Tar.Done _ = M.empty
buildHackageMap (Tar.Fail e) _ = X.throw e
loadTar :: FilePath -> IO (Tar.Entries Tar.FormatError)
loadTar file = do
content <- BL.readFile file
return $ Tar.read content