Speed up dependency information chasing (#2444)

Comparing FilePaths is really slow so by mapping them to Ints, we can
speed up dependency chasing significantly.

We might want to switch to doing some kind of global hash consing of
file paths at the Shake level but for now, this seems like a nice
improvement while not being too invasive.

This is roughly an ~8s speedup on my testcase.
This commit is contained in:
Moritz Kiefer 2019-08-07 16:51:38 +02:00 committed by Gary Verhaegen
parent 6e0a519178
commit cc97e1e1be
2 changed files with 159 additions and 64 deletions

View File

@ -36,10 +36,13 @@ import Development.IDE.Import.FindImports
import Development.IDE.Core.FileStore
import Development.IDE.Types.Diagnostics
import Development.IDE.Types.Location
import Data.Coerce
import Data.Either.Extra
import Data.Maybe
import Data.Foldable
import qualified Data.Map.Strict as Map
import qualified Data.IntMap.Strict as IntMap
import qualified Data.IntSet as IntSet
import Data.List
import qualified Data.Set as Set
import qualified Data.Text as T
import Development.IDE.GHC.Error
@ -203,40 +206,62 @@ getLocatedImportsRule =
-- | Given a target file path, construct the raw dependency results by following
-- imports recursively.
rawDependencyInformation :: NormalizedFilePath -> ExceptT [FileDiagnostic] Action RawDependencyInformation
rawDependencyInformation f = go (Set.singleton f) Map.empty
where go fs !modGraph =
case Set.minView fs of
Nothing -> pure $ RawDependencyInformation modGraph
Just (f, fs) -> do
importsOrErr <- lift $ use GetLocatedImports f
case importsOrErr of
Nothing ->
let modGraph' = Map.insert f (Left ModuleParseError) modGraph
in go fs modGraph'
Just (modImports, pkgImports) -> do
let newFiles = Set.fromList (mapMaybe snd modImports) Set.\\ Map.keysSet modGraph
modGraph' = Map.insert f (Right $ ModuleImports modImports pkgImports) modGraph
go (newFiles `Set.union` fs) modGraph'
rawDependencyInformation :: NormalizedFilePath -> Action RawDependencyInformation
rawDependencyInformation f = do
let (initialId, initialMap) = getPathId f emptyPathIdMap
go (IntSet.singleton $ getFilePathId initialId)
(RawDependencyInformation IntMap.empty initialMap)
where
go fs rawDepInfo =
case IntSet.minView fs of
-- Queue is empty
Nothing -> pure rawDepInfo
-- Pop f from the queue and process it
Just (f, fs) -> do
let fId = FilePathId f
importsOrErr <- use GetLocatedImports $ idToPath (rawPathIdMap rawDepInfo) fId
case importsOrErr of
Nothing ->
-- File doesnt parse
let rawDepInfo' = insertImport fId (Left ModuleParseError) rawDepInfo
in go fs rawDepInfo'
Just (modImports, pkgImports) -> do
let f :: PathIdMap -> (a, Maybe NormalizedFilePath) -> (PathIdMap, (a, Maybe FilePathId))
f pathMap (imp, mbPath) = case mbPath of
Nothing -> (pathMap, (imp, Nothing))
Just path ->
let (pathId, pathMap') = getPathId path pathMap
in (pathMap', (imp, Just pathId))
-- Convert paths in imports to ids and update the path map
let (pathIdMap, modImports') = mapAccumL f (rawPathIdMap rawDepInfo) modImports
-- Files that we havent seen before are added to the queue.
let newFiles =
IntSet.fromList (coerce $ mapMaybe snd modImports')
IntSet.\\ IntMap.keysSet (rawImports rawDepInfo)
let rawDepInfo' = insertImport fId (Right $ ModuleImports modImports' pkgImports) rawDepInfo
go (newFiles `IntSet.union` fs) (rawDepInfo' { rawPathIdMap = pathIdMap })
getDependencyInformationRule :: Rules ()
getDependencyInformationRule =
define $ \GetDependencyInformation file -> fmap toIdeResult $ runExceptT $ do
define $ \GetDependencyInformation file -> do
rawDepInfo <- rawDependencyInformation file
pure $ processDependencyInformation rawDepInfo
pure ([], Just $ processDependencyInformation rawDepInfo)
reportImportCyclesRule :: Rules ()
reportImportCyclesRule =
define $ \ReportImportCycles file -> fmap (\errs -> if null errs then ([], Just ()) else (errs, Nothing)) $ do
DependencyInformation{..} <- use_ GetDependencyInformation file
case Map.lookup file depErrorNodes of
let fileId = pathToId depPathIdMap file
case IntMap.lookup (getFilePathId fileId) depErrorNodes of
Nothing -> pure []
Just errs -> do
let cycles = mapMaybe (cycleErrorInFile file) (toList errs)
let cycles = mapMaybe (cycleErrorInFile fileId) (toList errs)
-- Convert cycles of files into cycles of module names
forM cycles $ \(imp, files) -> do
modNames <- mapM getModuleName files
pure $ toDiag imp modNames
modNames <- forM files $ \fileId -> do
let file = idToPath depPathIdMap fileId
getModuleName file
pure $ toDiag imp $ sort modNames
where cycleErrorInFile f (PartOfCycle imp fs)
| f `elem` fs = Just (imp, fs)
cycleErrorInFile _ _ = Nothing
@ -261,7 +286,7 @@ getDependenciesRule :: Rules ()
getDependenciesRule =
define $ \GetDependencies file -> do
depInfo@DependencyInformation{..} <- use_ GetDependencyInformation file
let allFiles = Map.keys depModuleDeps <> Map.keys depErrorNodes
let allFiles = reachableModules depInfo
_ <- uses_ ReportImportCycles allFiles
return ([], transitiveDeps depInfo file)

View File

@ -8,21 +8,36 @@ module Development.IDE.Import.DependencyInformation
, NodeError(..)
, ModuleParseError(..)
, TransitiveDependencies(..)
, FilePathId(..)
, PathIdMap
, emptyPathIdMap
, getPathId
, insertImport
, pathToId
, idToPath
, reachableModules
, processDependencyInformation
, transitiveDeps
) where
import Control.DeepSeq
import Data.Bifunctor
import Data.Coerce
import Data.List
import Development.IDE.GHC.Orphans()
import Data.Either
import Data.Graph
import Data.List.NonEmpty (NonEmpty(..), nonEmpty)
import qualified Data.List.NonEmpty as NonEmpty
import Data.IntMap (IntMap)
import qualified Data.IntMap.Strict as IntMap
import qualified Data.IntMap.Lazy as IntMapLazy
import Data.IntSet (IntSet)
import qualified Data.IntSet as IntSet
import Data.Map (Map)
import qualified Data.Map.Strict as MS
import qualified Data.Map.Lazy as ML
import Data.Maybe
import Data.Set (Set)
import qualified Data.Set as Set
@ -37,32 +52,78 @@ import Module
-- | The imports for a given module.
data ModuleImports = ModuleImports
{ moduleImports :: ![(Located ModuleName, Maybe NormalizedFilePath)]
{ moduleImports :: ![(Located ModuleName, Maybe FilePathId)]
-- ^ Imports of a module in the current package and the file path of
-- that module on disk (if we found it)
, packageImports :: !(Set InstalledUnitId)
-- ^ Transitive package dependencies unioned for all imports.
}
-- | For processing dependency information, we need lots of maps and sets
-- of filepaths. Comparing Strings is really slow, so we work with IntMap/IntSet
-- instead and only convert at the edges
-- and
newtype FilePathId = FilePathId { getFilePathId :: Int }
deriving (Show, NFData, Eq, Ord)
data PathIdMap = PathIdMap
{ idToPathMap :: !(IntMap NormalizedFilePath)
, pathToIdMap :: !(Map NormalizedFilePath FilePathId)
}
deriving (Show, Generic)
instance NFData PathIdMap
emptyPathIdMap :: PathIdMap
emptyPathIdMap = PathIdMap IntMap.empty MS.empty
getPathId :: NormalizedFilePath -> PathIdMap -> (FilePathId, PathIdMap)
getPathId path m@PathIdMap{..} =
case MS.lookup path pathToIdMap of
Nothing ->
let !newId = FilePathId $ MS.size pathToIdMap
in (newId, insertPathId path newId m)
Just id -> (id, m)
insertPathId :: NormalizedFilePath -> FilePathId -> PathIdMap -> PathIdMap
insertPathId path id PathIdMap{..} =
PathIdMap (IntMap.insert (getFilePathId id) path idToPathMap) (MS.insert path id pathToIdMap)
insertImport :: FilePathId -> Either ModuleParseError ModuleImports -> RawDependencyInformation -> RawDependencyInformation
insertImport (FilePathId k) v rawDepInfo = rawDepInfo { rawImports = IntMap.insert k v (rawImports rawDepInfo) }
pathToId :: PathIdMap -> NormalizedFilePath -> FilePathId
pathToId PathIdMap{pathToIdMap} path = pathToIdMap MS.! path
idToPath :: PathIdMap -> FilePathId -> NormalizedFilePath
idToPath PathIdMap{idToPathMap} (FilePathId id) = idToPathMap IntMap.! id
-- | Unprocessed results that we find by following imports recursively.
newtype RawDependencyInformation = RawDependencyInformation
{ getRawDeps :: Map NormalizedFilePath (Either ModuleParseError ModuleImports)
data RawDependencyInformation = RawDependencyInformation
{ rawImports :: !(IntMap (Either ModuleParseError ModuleImports))
, rawPathIdMap :: !PathIdMap
}
pkgDependencies :: RawDependencyInformation -> Map NormalizedFilePath (Set InstalledUnitId)
pkgDependencies (RawDependencyInformation m) = MS.map (either (const Set.empty) packageImports) m
pkgDependencies :: RawDependencyInformation -> IntMap (Set InstalledUnitId)
pkgDependencies RawDependencyInformation{..} =
IntMap.map (either (const Set.empty) packageImports) rawImports
data DependencyInformation =
DependencyInformation
{ depErrorNodes :: Map NormalizedFilePath (NonEmpty NodeError)
{ depErrorNodes :: !(IntMap (NonEmpty NodeError))
-- ^ Nodes that cannot be processed correctly.
, depModuleDeps :: Map NormalizedFilePath (Set NormalizedFilePath)
, depModuleDeps :: !(IntMap IntSet)
-- ^ For a non-error node, this contains the set of module immediate dependencies
-- in the same package.
, depPkgDeps :: Map NormalizedFilePath (Set InstalledUnitId)
, depPkgDeps :: !(IntMap (Set InstalledUnitId))
-- ^ For a non-error node, this contains the set of immediate pkg deps.
, depPathIdMap :: !PathIdMap
} deriving (Show, Generic)
reachableModules :: DependencyInformation -> [NormalizedFilePath]
reachableModules DependencyInformation{..} =
map (idToPath depPathIdMap . FilePathId) $ IntMap.keys depErrorNodes <> IntMap.keys depModuleDeps
instance NFData DependencyInformation
-- | This does not contain the actual parse error as that is already reported by GetParsedModule.
@ -79,7 +140,7 @@ instance NFData LocateError
-- | An error attached to a node in the dependency graph.
data NodeError
= PartOfCycle (Located ModuleName) [NormalizedFilePath]
= PartOfCycle (Located ModuleName) [FilePathId]
-- ^ This module is part of an import cycle. The module name corresponds
-- to the import that enters the cycle starting from this module.
-- The list of filepaths represents the elements
@ -104,12 +165,12 @@ instance NFData NodeError where
-- `ErrorNode`. Otherwise it is a `SuccessNode`.
data NodeResult
= ErrorNode (NonEmpty NodeError)
| SuccessNode [(Located ModuleName, NormalizedFilePath)]
| SuccessNode [(Located ModuleName, FilePathId)]
deriving Show
partitionNodeResults
:: [(a, NodeResult)]
-> ([(a, NonEmpty NodeError)], [(a, [(Located ModuleName, NormalizedFilePath)])])
-> ([(a, NonEmpty NodeError)], [(a, [(Located ModuleName, FilePathId)])])
partitionNodeResults = partitionEithers . map f
where f (a, ErrorNode errs) = Left (a, errs)
f (a, SuccessNode imps) = Right (a, imps)
@ -121,40 +182,41 @@ instance Semigroup NodeResult where
SuccessNode a <> SuccessNode _ = SuccessNode a
processDependencyInformation :: RawDependencyInformation -> DependencyInformation
processDependencyInformation rawResults =
processDependencyInformation rawDepInfo@RawDependencyInformation{..} =
DependencyInformation
{ depErrorNodes = MS.fromList errorNodes
{ depErrorNodes = IntMap.fromList errorNodes
, depModuleDeps = moduleDeps
, depPkgDeps = pkgDependencies rawResults
, depPkgDeps = pkgDependencies rawDepInfo
, depPathIdMap = rawPathIdMap
}
where resultGraph = buildResultGraph rawResults
(errorNodes, successNodes) = partitionNodeResults $ MS.toList resultGraph
successEdges :: [(NormalizedFilePath, NormalizedFilePath, [NormalizedFilePath])]
where resultGraph = buildResultGraph rawImports
(errorNodes, successNodes) = partitionNodeResults $ IntMap.toList resultGraph
successEdges :: [(FilePathId, FilePathId, [FilePathId])]
successEdges =
map (\(file, imports) -> (file, file, map snd imports)) successNodes
map (\(file, imports) -> (FilePathId file, FilePathId file, map snd imports)) successNodes
moduleDeps =
MS.fromList $ map (\(_, v, vs) -> (v, Set.fromList vs)) successEdges
IntMap.fromList $ map (\(_, FilePathId v, vs) -> (v, IntSet.fromList $ coerce vs)) successEdges
-- | Given a dependency graph, buildResultGraph detects and propagates errors in that graph as follows:
-- 1. Mark each node that is part of an import cycle as an error node.
-- 2. Mark each node that has a parse error as an error node.
-- 3. Mark each node whose immediate children could not be located as an error.
-- 4. Recursively propagate errors to parents if they are not already error nodes.
buildResultGraph :: RawDependencyInformation -> Map NormalizedFilePath NodeResult
buildResultGraph :: IntMap (Either ModuleParseError ModuleImports) -> IntMap NodeResult
buildResultGraph g = propagatedErrors
where
sccs = stronglyConnComp (graphEdges g)
(_, cycles) = partitionSCC sccs
cycleErrors :: Map NormalizedFilePath NodeResult
cycleErrors = MS.unionsWith (<>) $ map errorsForCycle cycles
errorsForCycle :: [NormalizedFilePath] -> Map NormalizedFilePath NodeResult
cycleErrors :: IntMap NodeResult
cycleErrors = IntMap.unionsWith (<>) $ map errorsForCycle cycles
errorsForCycle :: [FilePathId] -> IntMap NodeResult
errorsForCycle files =
MS.fromListWith (<>) (concatMap (cycleErrorsForFile files) files)
cycleErrorsForFile :: [NormalizedFilePath] -> NormalizedFilePath -> [(NormalizedFilePath,NodeResult)]
IntMap.fromListWith (<>) $ coerce $ concatMap (cycleErrorsForFile files) files
cycleErrorsForFile :: [FilePathId] -> FilePathId -> [(FilePathId,NodeResult)]
cycleErrorsForFile cycle f =
let entryPoints = mapMaybe (findImport f) cycle
in map (\imp -> (f, ErrorNode (PartOfCycle imp cycle :| []))) entryPoints
otherErrors = MS.map otherErrorsForFile (getRawDeps g)
otherErrors = IntMap.map otherErrorsForFile g
otherErrorsForFile :: Either ModuleParseError ModuleImports -> NodeResult
otherErrorsForFile (Left err) = ErrorNode (ParseError err :| [])
otherErrorsForFile (Right ModuleImports{moduleImports}) =
@ -165,32 +227,32 @@ buildResultGraph g = propagatedErrors
Nothing -> SuccessNode imports'
Just errs' -> ErrorNode (NonEmpty.map FailedToLocateImport errs')
unpropagatedErrors = MS.unionWith (<>) cycleErrors otherErrors
unpropagatedErrors = IntMap.unionWith (<>) cycleErrors otherErrors
-- The recursion here is fine since we use a lazy map and
-- we only recurse on SuccessNodes. In particular, we do not recurse
-- on nodes that are part of a cycle as they are already marked as
-- error nodes.
propagatedErrors =
ML.map propagate unpropagatedErrors
IntMapLazy.map propagate unpropagatedErrors
propagate :: NodeResult -> NodeResult
propagate n@(ErrorNode _) = n
propagate n@(SuccessNode imps) =
let results = map (\(imp, dep) -> (imp, propagatedErrors MS.! dep)) imps
let results = map (\(imp, FilePathId dep) -> (imp, propagatedErrors IntMap.! dep)) imps
(errs, _) = partitionNodeResults results
in case nonEmpty errs of
Nothing -> n
Just errs' -> ErrorNode (NonEmpty.map (ParentOfErrorNode . fst) errs')
findImport :: NormalizedFilePath -> NormalizedFilePath -> Maybe (Located ModuleName)
findImport file importedFile =
case getRawDeps g MS.! file of
findImport :: FilePathId -> FilePathId -> Maybe (Located ModuleName)
findImport (FilePathId file) importedFile =
case g IntMap.! file of
Left _ -> error "Tried to call findImport on a module with a parse error"
Right ModuleImports{moduleImports} ->
fmap fst $ find (\(_, resolvedImp) -> resolvedImp == Just importedFile) moduleImports
graphEdges :: RawDependencyInformation -> [(NormalizedFilePath, NormalizedFilePath, [NormalizedFilePath])]
graphEdges :: IntMap (Either ModuleParseError ModuleImports) -> [(FilePathId, FilePathId, [FilePathId])]
graphEdges g =
map (\(k, v) -> (k, k, deps v)) $ MS.toList $ getRawDeps g
where deps :: Either e ModuleImports -> [NormalizedFilePath]
map (\(k, v) -> (FilePathId k, FilePathId k, deps v)) $ IntMap.toList g
where deps :: Either e ModuleImports -> [FilePathId]
deps (Left _) = []
deps (Right ModuleImports{moduleImports}) = mapMaybe snd moduleImports
@ -200,12 +262,20 @@ partitionSCC (AcyclicSCC x:rest) = first (x:) $ partitionSCC rest
partitionSCC [] = ([], [])
transitiveDeps :: DependencyInformation -> NormalizedFilePath -> Maybe TransitiveDependencies
transitiveDeps DependencyInformation{..} f = do
reachableVs <- Set.delete f . Set.fromList . map (fst3 . fromVertex) . reachable g <$> toVertex f
let transitiveModuleDeps = filter (\v -> v `Set.member` reachableVs) $ map (fst3 . fromVertex) vs
let transitivePkgDeps = Set.toList $ Set.unions $ map (\f -> MS.findWithDefault Set.empty f depPkgDeps) (f : transitiveModuleDeps)
transitiveDeps DependencyInformation{..} file = do
let !fileId = pathToId depPathIdMap file
reachableVs <-
IntSet.delete (getFilePathId fileId) .
IntSet.fromList . map (fst3 . fromVertex) .
reachable g <$> toVertex (getFilePathId fileId)
let transitiveModuleDepIds = filter (\v -> v `IntSet.member` reachableVs) $ map (fst3 . fromVertex) vs
let transitivePkgDeps =
Set.toList $ Set.unions $
map (\f -> IntMap.findWithDefault Set.empty f depPkgDeps) $
getFilePathId fileId : transitiveModuleDepIds
let transitiveModuleDeps = map (idToPath depPathIdMap . FilePathId) transitiveModuleDepIds
pure TransitiveDependencies {..}
where (g, fromVertex, toVertex) = graphFromEdges (map (\(f, fs) -> (f, f, Set.toList fs)) $ MS.toList depModuleDeps)
where (g, fromVertex, toVertex) = graphFromEdges (map (\(f, fs) -> (f, f, IntSet.toList fs)) $ IntMap.toList depModuleDeps)
vs = topSort g
data TransitiveDependencies = TransitiveDependencies