1
1
mirror of https://github.com/aelve/guide.git synced 2024-11-25 18:56:52 +03:00

Broke the parsing of tar archive in two stages. At first create a map of versions by paths in tar archive. And then load only the latest versions

This commit is contained in:
Boris M. Yartsev 2017-06-04 04:18:10 +03:00
parent 46538b5ca9
commit e8751fa399
3 changed files with 154 additions and 80 deletions

View File

@ -23,9 +23,11 @@ library
, split
, http-types
, bytestring
, Cabal
, http-client
, filepath
, http-client-tls
, utf8-string
, pureMD5
, aeson
, text

View File

@ -19,7 +19,7 @@ module REPL ( {-
processCycle,
ProcessBuilderInfo (..)
) where
import qualified Codec.Archive.Tar.Index as TI
-- import qualified Codec.Archive.Tar.Index as TI
import qualified Data.Map.Strict as M
import qualified Data.Char as DC
import qualified Data.List as DL
@ -81,12 +81,15 @@ buildCommand pbi = processCommand
| chk "unzipclone" = unzipArchive (archiveClone pbi) (tarClone pbi) -- unzips the downloaded gzip archive
| chk "unzip" = unzipArchive (archive pbi) (tar pbi) -- unzips the downloaded gzip archive
| chk "cleanclone" = undefined
| chk "clean" = undefined
| chk "tarparsepreclone" = showPreMap (tarClone pbi) 50 -- loads the tar clone information in the memory
| chk "tarparsepre" = showPreMap (tar pbi) 50 -- loads the tar information in the memory
| chk "tarparseclone" = showMap (tarClone pbi) 50 -- loads the tar clone information in the memory
| chk "tarparse" = showMap (tar pbi) 50 -- loads the tar information in the memory
| chk "tarshowclone" = showTarContents (tarClone pbi)
| chk "tarshow" = showTarContents (tar pbi)
| chk "compare" = showArchiveCompare (archive pbi) (archiveClone pbi)
| chk "updatecut" = performArchiveCutUpdate (snapshotURL pbi) (archiveURL pbi)
@ -103,9 +106,6 @@ buildCommand pbi = processCommand
where pc = map DC.toLower command
chk val = DL.isPrefixOf val pc
showFirstDirEntries :: TI.TarIndex -> Int -> IO ()
showFirstDirEntries index count = mapM_ print $ take count (getEntries index)
-- Displays the snapshot of the file
showFileSnapshot :: FilePath -> IO()
showFileSnapshot file = do
@ -139,46 +139,54 @@ copyArchive archive1 archive2 = do
showMap :: FilePath -> Int -> IO()
showMap path count = do
putStrLn $ "Displaying " ++ (show count) ++ " entries for " ++ path
tarIndexE <- loadTarIndex path
case tarIndexE of
Left error -> putStrLn "Whoa. Error loading tar"
Right index -> mapM_ (print.snd) $ take count $ M.toList $ buildHackageMap index
tar <- loadTar path
mapM_ (print.snd) $ take count $ M.toList $ buildHackageMap tar (buildPreHackageMap tar)
showPreMap :: FilePath -> Int -> IO()
showPreMap path count = do
putStrLn $ "Pre displaying " ++ (show count) ++ " entries for " ++ path
tar <- loadTar path
mapM_ print $ take count $ M.toList $ buildPreHackageMap tar
showDiffMap :: FilePath -> FilePath -> IO ()
showDiffMap newTarFile oldTarFile = do
putStrLn $ "Displaying difference between " ++ newTarFile ++ " and " ++ oldTarFile
newTarIndexE <- loadTarIndex newTarFile
oldTarIndexE <- loadTarIndex oldTarFile
let newMapE = buildHackageMap <$> newTarIndexE
let oldMapE = buildHackageMap <$> oldTarIndexE
let diffMapE = buildDifferenceMap <$> oldMapE <*> newMapE
case diffMapE of
Right m -> mapM_ (print.snd) $ M.toList m
Left _ -> print "Error creating the indexes"
newTar <- loadTar newTarFile
oldTar <- loadTar oldTarFile
let newMap = buildHackageMap newTar (buildPreHackageMap newTar)
let oldMap = buildHackageMap oldTar (buildPreHackageMap oldTar)
let diffMap = buildDifferenceMap oldMap newMap
mapM_ (print.snd) $ M.toList diffMap
showHelp :: ProcessBuilderInfo -> IO()
showHelp pbi = do
putStrLn "Available commands: "
putStrLn $ "check - downloads the json length and md5 hash from " ++ (snapshotURL pbi) ++
", and compares it with local " ++ (archive pbi)
putStrLn $ "checkclone - same for " ++ (archiveClone pbi)
putStrLn $ "file - displays the current " ++ (archive pbi) ++ " length and md5 hash"
putStrLn $ "fileclone - same for " ++ (archiveClone pbi) ++ " file"
putStrLn $ "copyorig - copy the " ++ (archive pbi) ++ " to " ++ (archiveClone pbi)
putStrLn $ "cut size - cuts the size bytes from the end of the " ++ (archive pbi) ++ " , for update command"
", and compares it with local " ++ arch
putStrLn $ "checkclone - same for " ++ archC
putStrLn $ "file - displays the current " ++ arch ++ " length and md5 hash"
putStrLn $ "fileclone - same for " ++ archC ++ " file"
putStrLn $ "copyorig - copy the " ++ arch ++ " to " ++ archC
putStrLn $ "cut size - cuts the size bytes from the end of the " ++ arch ++ " , for update command"
putStrLn $ "cutclone size - cuts the size bytes from the end of the 01-index.tar.gz, for update command"
putStrLn $ "unzip - unzips the " ++ (archive pbi) ++ " in the " ++ (tar pbi) ++ " file"
putStrLn $ "unzipclone - unzips the " ++ (archiveClone pbi) ++ " in the " ++ (tarClone pbi) ++ " file"
putStrLn $ "compare - compares the " ++ (archive pbi) ++ " with " ++ (archiveClone pbi)
putStrLn $ "unzip - unzips the " ++ arch ++ " in the " ++ (tar pbi) ++ " file"
putStrLn $ "unzipclone - unzips the " ++ archC ++ " in the " ++ (tarClone pbi) ++ " file"
putStrLn $ "clean - deletes the " ++ arch ++ " and " ++ (tar pbi)
putStrLn $ "cleanclone - deletes the " ++ archC ++ " and " ++ (tarClone pbi)
putStrLn $ "compare - compares the " ++ arch ++ " with " ++ archC
putStrLn $ "tarparse - loads the map of entries from " ++ (tar pbi) ++ " and displays it"
putStrLn $ "tarparseclone - same for " ++ (tarClone pbi)
putStrLn $ "tarshow - show sample contents from " ++ (tar pbi)
putStrLn $ "tarshowclone - show sample contents from " ++ (tarClone pbi)
putStrLn $ "tarparsepre - loads the premap of entries from " ++ (tar pbi) ++ " and displays it"
putStrLn $ "tarparsepreclone - same for " ++ (tarClone pbi)
putStrLn $ "tarcmp - compares the entries of " ++ (tar pbi) ++ " and " ++ (tarClone pbi)
putStrLn $ "update - updates the current " ++ (archive pbi) ++ " from " ++ (archiveURL pbi)
putStrLn $ "updatecut size - cuts the size from " ++ (archive pbi) ++ " and then updates"
putStrLn $ "update - updates the current " ++ arch ++ " from " ++ (archiveURL pbi)
putStrLn $ "updatecut size - cuts the size from " ++ arch ++ " and then updates"
putStrLn "exit - exits this repl"
where
arch = archive pbi
archC = archiveClone pbi
showArchiveCompare :: FilePath -> FilePath -> IO()
showArchiveCompare archive1 archive2= do
@ -186,15 +194,6 @@ showArchiveCompare archive1 archive2= do
putStrLn $ "Compare result " ++ archive1 ++ " " ++ archive2 ++ " " ++ (show val)
showTarContents :: FilePath -> IO()
showTarContents archive = do
putStrLn $ "Displaying the tar indices" ++ " for " ++ archive
tarIndexE <- loadTarIndex archive
case tarIndexE of
Left error -> putStrLn "Whoa. Error loading tar"
Right index -> showFirstDirEntries index 100
exitREPL :: IO()
exitREPL = putStrLn "Finished working with hackage REPL" >> exitSuccess

View File

@ -1,28 +1,43 @@
module TarUtil (getEntries,
loadTarIndex,
{-# LANGUAGE OverloadedStrings #-}
module TarUtil (
buildDifferenceMap,
buildHackageMap,
buildDifferenceMap
buildPreHackageMap,
loadTar
) where
import qualified Codec.Archive.Tar.Index as TI
import qualified Codec.Archive.Tar as Tar
import qualified Data.List.Split as SPLT
import qualified Data.Char as DC
import qualified Data.List as DL
import qualified Data.ByteString.Lazy as BL
import qualified Data.Version as DV
import qualified Data.Map.Strict as Map
import System.FilePath.Posix(hasTrailingPathSeparator)
import Control.Monad(guard)
import qualified Distribution.PackageDescription.Parse as CP
import qualified Distribution.Package as DP
import qualified Text.ParserCombinators.ReadP as RP
import qualified Distribution.PackageDescription as DPD
import qualified Distribution.PackageDescription.Parse as DPDP
import qualified Data.Map.Strict as M
import qualified Control.Exception as X
import Data.Maybe
import Debug.Trace
import Control.Monad(guard)
import qualified Data.ByteString.Lazy.UTF8 as UTFC
import System.FilePath.Posix(hasTrailingPathSeparator)
-- The record for each of the package from hackage
-- TODO - add another information about the packages
data HackagePackage = HP {
-- packageData :: HHPathData
name :: String,
version :: DV.Version
version :: DV.Version,
author :: String
} deriving (Eq, Show)
-- The status of the package between two updates
@ -30,57 +45,115 @@ data HackageUpdate = Added | Removed | Updated deriving (Eq, Show)
-- The map of all the hackage packages with name as the key and HackagePackage
-- as the value
type HackageMap = Map.Map String HackagePackage
type HackageMap = M.Map String HackagePackage
type PreHackageMap = M.Map String DV.Version
-- The map, that shows, which packages have change since the last update
type HackageUpdateMap = Map.Map String (HackageUpdate, HackagePackage)
type HackageUpdateMap = M.Map String (HackageUpdate, HackagePackage)
-- This is the data that is extracted from the path to cabal file
-- Like, when program parses "safeio/0.0.2.0/safeio.cabal"
-- It gets the version 0.0.2.0 and safeio package name. Also checks, xxx and yy match in
-- "xxx/version/yyy.cabal
type HPPathData = (String, DV.Version)
-- Parses the file path of the cabal file to get version and package name
parseCabalFilePath :: RP.ReadP HackagePackage
parseCabalFilePath :: RP.ReadP HPPathData
parseCabalFilePath = do
package <- RP.munch1 DC.isLetter
RP.char '/'
version <- DV.parseVersion
RP.char '/'
name <- RP.munch1 DC.isLetter
name <- RP.munch1 (\l -> DC.isLetter l || l == '-')
guard (name == package)
suff <- RP.string ".cabal"
RP.eof
pure $ HP { name = package, version = version}
pure $ (package, version)
{-
-- Update map of the packages with the hackage package
-- Update when, the version of package is newer than version of package in the
-- map
updateMap :: HackagePackage -> HackageMap -> HackageMap
updateMap hp map = case Map.lookup (name hp) map of
updateMap hp map = case M.lookup (name hp) map of
Just oldHp -> if (version hp) > (version oldHp) then updatedMap
else map
Nothing -> updatedMap
where updatedMap = Map.insert (name hp) hp map
getEntries :: TI.TarIndex -> [HackagePackage]
getEntries index = map fst $ map head $ filter (not.null) $ map (goodParse.parse.getPath) entries
where entries = TI.toList index
getPath = fst
parse = RP.readP_to_S parseCabalFilePath
goodParse = filter (null.snd)
loadTarIndex :: FilePath -> IO (Either Tar.FormatError TI.TarIndex)
loadTarIndex file = do
content <- BL.readFile file
return $ TI.build $ Tar.read content
-- convert tarindex to list, then apply parser combinator, throw out all
-- empty parsingresults and then take the first successfull parsing result
buildHackageMap :: TI.TarIndex -> HackageMap
buildHackageMap index = foldr updateMap Map.empty (getEntries index)
where updatedMap = M.insert (name hp) hp map
-}
buildDifferenceMap :: HackageMap -> HackageMap -> HackageUpdateMap
buildDifferenceMap oldMap newMap = foldr Map.union Map.empty [deletedMap, addedMap, updatedMap]
buildDifferenceMap oldMap newMap = foldr M.union M.empty [deletedMap, addedMap, updatedMap]
where
deletedMap = Map.map ((,) Removed) $ Map.difference oldMap newMap
addedMap = Map.map ((,) Added) $ Map.difference newMap oldMap
updatedMap' = Map.intersection newMap oldMap
updatedMap = Map.map ((,) Updated) $ Map.differenceWith diff updatedMap' oldMap
deletedMap = M.map ((,) Removed) $ M.difference oldMap newMap
addedMap = M.map ((,) Added) $ M.difference newMap oldMap
updatedMap' = M.intersection newMap oldMap
updatedMap = M.map ((,) Updated) $ M.differenceWith diff updatedMap' oldMap
diff newpack oldpack = if (newpack /= oldpack) then Just newpack else Nothing
createPackage :: DPD.PackageDescription -> HackagePackage
createPackage pd = HP { name = nm, version = ver, author = auth }
where
pkg = DPD.package pd
nm = DP.unPackageName (DP.pkgName pkg)
ver = DP.pkgVersion pkg
auth = DPD.author pd
parsePath :: FilePath -> Maybe HPPathData
parsePath path = case RP.readP_to_S parseCabalFilePath path of
[(pd, _)] -> Just pd
_ -> Nothing
parsePackageDescription :: Tar.EntryContent -> Maybe DPD.PackageDescription
parsePackageDescription (Tar.NormalFile content _) =
case (DPDP.parsePackageDescription (UTFC.toString content)) of
DPDP.ParseOk _ pd -> Just (DPD.packageDescription pd)
DPDP.ParseFailed _ -> Nothing
parsePackageDescription _ = Nothing
parsePackage :: Tar.Entry -> Maybe HackagePackage
parsePackage entry = do
(path, version) <- parsePath $ Tar.entryPath entry
pd <- parsePackageDescription $ Tar.entryContent entry
return $ createPackage pd
updatePreMap :: HPPathData -> PreHackageMap -> PreHackageMap
updatePreMap (name, version) map = case M.lookup name map of
Just oldVersion -> if version > oldVersion then updatedMap
else map
Nothing -> updatedMap
where updatedMap = M.insert name version map
buildPreHackageMap :: Tar.Entries Tar.FormatError -> PreHackageMap
buildPreHackageMap (Tar.Next entry entries) =
case parsePath $ Tar.entryPath entry of
Just hp -> updatePreMap hp map
Nothing -> map
where map = buildPreHackageMap entries
buildPreHackageMap Tar.Done = M.empty
buildPrehackageMap (Tar.Fail e) = X.throw e
buildHackageMap :: Tar.Entries Tar.FormatError -> PreHackageMap -> HackageMap
buildHackageMap (Tar.Next entry entries) premap =
case update of
Just hp -> M.insert (name hp) hp map
Nothing -> map
where map = buildHackageMap entries premap
path = Tar.entryPath entry
update :: Maybe HackagePackage
update = do
(name, version) <- parsePath path
preversion <- M.lookup name premap
if (preversion == version) then parsePackage entry
else Nothing
buildHackageMap Tar.Done _ = M.empty
buildHackageMap (Tar.Fail e) _ = X.throw e
loadTar :: FilePath -> IO (Tar.Entries Tar.FormatError)
loadTar file = do
content <- BL.readFile file
return $ Tar.read content