From cc97e1e1be4598b2f0911d143d637bc7c4669720 Mon Sep 17 00:00:00 2001 From: Moritz Kiefer Date: Wed, 7 Aug 2019 16:51:38 +0200 Subject: [PATCH] Speed up dependency information chasing (#2444) Comparing FilePaths is really slow so by mapping them to Ints, we can speed up dependency chasing significantly. We might want to switch to doing some kind of global hash consing of file paths at the Shake level but for now, this seems like a nice improvement while not being too invasive. This is roughly an ~8s speedup on my testcase. --- src/Development/IDE/Core/Rules.hs | 71 +++++--- .../IDE/Import/DependencyInformation.hs | 152 +++++++++++++----- 2 files changed, 159 insertions(+), 64 deletions(-) diff --git a/src/Development/IDE/Core/Rules.hs b/src/Development/IDE/Core/Rules.hs index 2535a143..347003a6 100644 --- a/src/Development/IDE/Core/Rules.hs +++ b/src/Development/IDE/Core/Rules.hs @@ -36,10 +36,13 @@ import Development.IDE.Import.FindImports import Development.IDE.Core.FileStore import Development.IDE.Types.Diagnostics import Development.IDE.Types.Location +import Data.Coerce import Data.Either.Extra import Data.Maybe import Data.Foldable -import qualified Data.Map.Strict as Map +import qualified Data.IntMap.Strict as IntMap +import qualified Data.IntSet as IntSet +import Data.List import qualified Data.Set as Set import qualified Data.Text as T import Development.IDE.GHC.Error @@ -203,40 +206,62 @@ getLocatedImportsRule = -- | Given a target file path, construct the raw dependency results by following -- imports recursively. -rawDependencyInformation :: NormalizedFilePath -> ExceptT [FileDiagnostic] Action RawDependencyInformation -rawDependencyInformation f = go (Set.singleton f) Map.empty - where go fs !modGraph = - case Set.minView fs of - Nothing -> pure $ RawDependencyInformation modGraph - Just (f, fs) -> do - importsOrErr <- lift $ use GetLocatedImports f - case importsOrErr of - Nothing -> - let modGraph' = Map.insert f (Left ModuleParseError) modGraph - in go fs modGraph' - Just (modImports, pkgImports) -> do - let newFiles = Set.fromList (mapMaybe snd modImports) Set.\\ Map.keysSet modGraph - modGraph' = Map.insert f (Right $ ModuleImports modImports pkgImports) modGraph - go (newFiles `Set.union` fs) modGraph' +rawDependencyInformation :: NormalizedFilePath -> Action RawDependencyInformation +rawDependencyInformation f = do + let (initialId, initialMap) = getPathId f emptyPathIdMap + go (IntSet.singleton $ getFilePathId initialId) + (RawDependencyInformation IntMap.empty initialMap) + where + go fs rawDepInfo = + case IntSet.minView fs of + -- Queue is empty + Nothing -> pure rawDepInfo + -- Pop f from the queue and process it + Just (f, fs) -> do + let fId = FilePathId f + importsOrErr <- use GetLocatedImports $ idToPath (rawPathIdMap rawDepInfo) fId + case importsOrErr of + Nothing -> + -- File doesn’t parse + let rawDepInfo' = insertImport fId (Left ModuleParseError) rawDepInfo + in go fs rawDepInfo' + Just (modImports, pkgImports) -> do + let f :: PathIdMap -> (a, Maybe NormalizedFilePath) -> (PathIdMap, (a, Maybe FilePathId)) + f pathMap (imp, mbPath) = case mbPath of + Nothing -> (pathMap, (imp, Nothing)) + Just path -> + let (pathId, pathMap') = getPathId path pathMap + in (pathMap', (imp, Just pathId)) + -- Convert paths in imports to ids and update the path map + let (pathIdMap, modImports') = mapAccumL f (rawPathIdMap rawDepInfo) modImports + -- Files that we haven’t seen before are added to the queue. + let newFiles = + IntSet.fromList (coerce $ mapMaybe snd modImports') + IntSet.\\ IntMap.keysSet (rawImports rawDepInfo) + let rawDepInfo' = insertImport fId (Right $ ModuleImports modImports' pkgImports) rawDepInfo + go (newFiles `IntSet.union` fs) (rawDepInfo' { rawPathIdMap = pathIdMap }) getDependencyInformationRule :: Rules () getDependencyInformationRule = - define $ \GetDependencyInformation file -> fmap toIdeResult $ runExceptT $ do + define $ \GetDependencyInformation file -> do rawDepInfo <- rawDependencyInformation file - pure $ processDependencyInformation rawDepInfo + pure ([], Just $ processDependencyInformation rawDepInfo) reportImportCyclesRule :: Rules () reportImportCyclesRule = define $ \ReportImportCycles file -> fmap (\errs -> if null errs then ([], Just ()) else (errs, Nothing)) $ do DependencyInformation{..} <- use_ GetDependencyInformation file - case Map.lookup file depErrorNodes of + let fileId = pathToId depPathIdMap file + case IntMap.lookup (getFilePathId fileId) depErrorNodes of Nothing -> pure [] Just errs -> do - let cycles = mapMaybe (cycleErrorInFile file) (toList errs) + let cycles = mapMaybe (cycleErrorInFile fileId) (toList errs) -- Convert cycles of files into cycles of module names forM cycles $ \(imp, files) -> do - modNames <- mapM getModuleName files - pure $ toDiag imp modNames + modNames <- forM files $ \fileId -> do + let file = idToPath depPathIdMap fileId + getModuleName file + pure $ toDiag imp $ sort modNames where cycleErrorInFile f (PartOfCycle imp fs) | f `elem` fs = Just (imp, fs) cycleErrorInFile _ _ = Nothing @@ -261,7 +286,7 @@ getDependenciesRule :: Rules () getDependenciesRule = define $ \GetDependencies file -> do depInfo@DependencyInformation{..} <- use_ GetDependencyInformation file - let allFiles = Map.keys depModuleDeps <> Map.keys depErrorNodes + let allFiles = reachableModules depInfo _ <- uses_ ReportImportCycles allFiles return ([], transitiveDeps depInfo file) diff --git a/src/Development/IDE/Import/DependencyInformation.hs b/src/Development/IDE/Import/DependencyInformation.hs index 224811d6..c3781e9f 100644 --- a/src/Development/IDE/Import/DependencyInformation.hs +++ b/src/Development/IDE/Import/DependencyInformation.hs @@ -8,21 +8,36 @@ module Development.IDE.Import.DependencyInformation , NodeError(..) , ModuleParseError(..) , TransitiveDependencies(..) + , FilePathId(..) + + , PathIdMap + , emptyPathIdMap + , getPathId + , insertImport + , pathToId + , idToPath + , reachableModules + , processDependencyInformation , transitiveDeps ) where import Control.DeepSeq import Data.Bifunctor +import Data.Coerce import Data.List import Development.IDE.GHC.Orphans() import Data.Either import Data.Graph import Data.List.NonEmpty (NonEmpty(..), nonEmpty) import qualified Data.List.NonEmpty as NonEmpty +import Data.IntMap (IntMap) +import qualified Data.IntMap.Strict as IntMap +import qualified Data.IntMap.Lazy as IntMapLazy +import Data.IntSet (IntSet) +import qualified Data.IntSet as IntSet import Data.Map (Map) import qualified Data.Map.Strict as MS -import qualified Data.Map.Lazy as ML import Data.Maybe import Data.Set (Set) import qualified Data.Set as Set @@ -37,32 +52,78 @@ import Module -- | The imports for a given module. data ModuleImports = ModuleImports - { moduleImports :: ![(Located ModuleName, Maybe NormalizedFilePath)] + { moduleImports :: ![(Located ModuleName, Maybe FilePathId)] -- ^ Imports of a module in the current package and the file path of -- that module on disk (if we found it) , packageImports :: !(Set InstalledUnitId) -- ^ Transitive package dependencies unioned for all imports. } +-- | For processing dependency information, we need lots of maps and sets +-- of filepaths. Comparing Strings is really slow, so we work with IntMap/IntSet +-- instead and only convert at the edges +-- and +newtype FilePathId = FilePathId { getFilePathId :: Int } + deriving (Show, NFData, Eq, Ord) + +data PathIdMap = PathIdMap + { idToPathMap :: !(IntMap NormalizedFilePath) + , pathToIdMap :: !(Map NormalizedFilePath FilePathId) + } + deriving (Show, Generic) + +instance NFData PathIdMap + +emptyPathIdMap :: PathIdMap +emptyPathIdMap = PathIdMap IntMap.empty MS.empty + +getPathId :: NormalizedFilePath -> PathIdMap -> (FilePathId, PathIdMap) +getPathId path m@PathIdMap{..} = + case MS.lookup path pathToIdMap of + Nothing -> + let !newId = FilePathId $ MS.size pathToIdMap + in (newId, insertPathId path newId m) + Just id -> (id, m) + +insertPathId :: NormalizedFilePath -> FilePathId -> PathIdMap -> PathIdMap +insertPathId path id PathIdMap{..} = + PathIdMap (IntMap.insert (getFilePathId id) path idToPathMap) (MS.insert path id pathToIdMap) + +insertImport :: FilePathId -> Either ModuleParseError ModuleImports -> RawDependencyInformation -> RawDependencyInformation +insertImport (FilePathId k) v rawDepInfo = rawDepInfo { rawImports = IntMap.insert k v (rawImports rawDepInfo) } + +pathToId :: PathIdMap -> NormalizedFilePath -> FilePathId +pathToId PathIdMap{pathToIdMap} path = pathToIdMap MS.! path + +idToPath :: PathIdMap -> FilePathId -> NormalizedFilePath +idToPath PathIdMap{idToPathMap} (FilePathId id) = idToPathMap IntMap.! id + -- | Unprocessed results that we find by following imports recursively. -newtype RawDependencyInformation = RawDependencyInformation - { getRawDeps :: Map NormalizedFilePath (Either ModuleParseError ModuleImports) +data RawDependencyInformation = RawDependencyInformation + { rawImports :: !(IntMap (Either ModuleParseError ModuleImports)) + , rawPathIdMap :: !PathIdMap } -pkgDependencies :: RawDependencyInformation -> Map NormalizedFilePath (Set InstalledUnitId) -pkgDependencies (RawDependencyInformation m) = MS.map (either (const Set.empty) packageImports) m +pkgDependencies :: RawDependencyInformation -> IntMap (Set InstalledUnitId) +pkgDependencies RawDependencyInformation{..} = + IntMap.map (either (const Set.empty) packageImports) rawImports data DependencyInformation = DependencyInformation - { depErrorNodes :: Map NormalizedFilePath (NonEmpty NodeError) + { depErrorNodes :: !(IntMap (NonEmpty NodeError)) -- ^ Nodes that cannot be processed correctly. - , depModuleDeps :: Map NormalizedFilePath (Set NormalizedFilePath) + , depModuleDeps :: !(IntMap IntSet) -- ^ For a non-error node, this contains the set of module immediate dependencies -- in the same package. - , depPkgDeps :: Map NormalizedFilePath (Set InstalledUnitId) + , depPkgDeps :: !(IntMap (Set InstalledUnitId)) -- ^ For a non-error node, this contains the set of immediate pkg deps. + , depPathIdMap :: !PathIdMap } deriving (Show, Generic) +reachableModules :: DependencyInformation -> [NormalizedFilePath] +reachableModules DependencyInformation{..} = + map (idToPath depPathIdMap . FilePathId) $ IntMap.keys depErrorNodes <> IntMap.keys depModuleDeps + instance NFData DependencyInformation -- | This does not contain the actual parse error as that is already reported by GetParsedModule. @@ -79,7 +140,7 @@ instance NFData LocateError -- | An error attached to a node in the dependency graph. data NodeError - = PartOfCycle (Located ModuleName) [NormalizedFilePath] + = PartOfCycle (Located ModuleName) [FilePathId] -- ^ This module is part of an import cycle. The module name corresponds -- to the import that enters the cycle starting from this module. -- The list of filepaths represents the elements @@ -104,12 +165,12 @@ instance NFData NodeError where -- `ErrorNode`. Otherwise it is a `SuccessNode`. data NodeResult = ErrorNode (NonEmpty NodeError) - | SuccessNode [(Located ModuleName, NormalizedFilePath)] + | SuccessNode [(Located ModuleName, FilePathId)] deriving Show partitionNodeResults :: [(a, NodeResult)] - -> ([(a, NonEmpty NodeError)], [(a, [(Located ModuleName, NormalizedFilePath)])]) + -> ([(a, NonEmpty NodeError)], [(a, [(Located ModuleName, FilePathId)])]) partitionNodeResults = partitionEithers . map f where f (a, ErrorNode errs) = Left (a, errs) f (a, SuccessNode imps) = Right (a, imps) @@ -121,40 +182,41 @@ instance Semigroup NodeResult where SuccessNode a <> SuccessNode _ = SuccessNode a processDependencyInformation :: RawDependencyInformation -> DependencyInformation -processDependencyInformation rawResults = +processDependencyInformation rawDepInfo@RawDependencyInformation{..} = DependencyInformation - { depErrorNodes = MS.fromList errorNodes + { depErrorNodes = IntMap.fromList errorNodes , depModuleDeps = moduleDeps - , depPkgDeps = pkgDependencies rawResults + , depPkgDeps = pkgDependencies rawDepInfo + , depPathIdMap = rawPathIdMap } - where resultGraph = buildResultGraph rawResults - (errorNodes, successNodes) = partitionNodeResults $ MS.toList resultGraph - successEdges :: [(NormalizedFilePath, NormalizedFilePath, [NormalizedFilePath])] + where resultGraph = buildResultGraph rawImports + (errorNodes, successNodes) = partitionNodeResults $ IntMap.toList resultGraph + successEdges :: [(FilePathId, FilePathId, [FilePathId])] successEdges = - map (\(file, imports) -> (file, file, map snd imports)) successNodes + map (\(file, imports) -> (FilePathId file, FilePathId file, map snd imports)) successNodes moduleDeps = - MS.fromList $ map (\(_, v, vs) -> (v, Set.fromList vs)) successEdges + IntMap.fromList $ map (\(_, FilePathId v, vs) -> (v, IntSet.fromList $ coerce vs)) successEdges -- | Given a dependency graph, buildResultGraph detects and propagates errors in that graph as follows: -- 1. Mark each node that is part of an import cycle as an error node. -- 2. Mark each node that has a parse error as an error node. -- 3. Mark each node whose immediate children could not be located as an error. -- 4. Recursively propagate errors to parents if they are not already error nodes. -buildResultGraph :: RawDependencyInformation -> Map NormalizedFilePath NodeResult +buildResultGraph :: IntMap (Either ModuleParseError ModuleImports) -> IntMap NodeResult buildResultGraph g = propagatedErrors where sccs = stronglyConnComp (graphEdges g) (_, cycles) = partitionSCC sccs - cycleErrors :: Map NormalizedFilePath NodeResult - cycleErrors = MS.unionsWith (<>) $ map errorsForCycle cycles - errorsForCycle :: [NormalizedFilePath] -> Map NormalizedFilePath NodeResult + cycleErrors :: IntMap NodeResult + cycleErrors = IntMap.unionsWith (<>) $ map errorsForCycle cycles + errorsForCycle :: [FilePathId] -> IntMap NodeResult errorsForCycle files = - MS.fromListWith (<>) (concatMap (cycleErrorsForFile files) files) - cycleErrorsForFile :: [NormalizedFilePath] -> NormalizedFilePath -> [(NormalizedFilePath,NodeResult)] + IntMap.fromListWith (<>) $ coerce $ concatMap (cycleErrorsForFile files) files + cycleErrorsForFile :: [FilePathId] -> FilePathId -> [(FilePathId,NodeResult)] cycleErrorsForFile cycle f = let entryPoints = mapMaybe (findImport f) cycle in map (\imp -> (f, ErrorNode (PartOfCycle imp cycle :| []))) entryPoints - otherErrors = MS.map otherErrorsForFile (getRawDeps g) + otherErrors = IntMap.map otherErrorsForFile g otherErrorsForFile :: Either ModuleParseError ModuleImports -> NodeResult otherErrorsForFile (Left err) = ErrorNode (ParseError err :| []) otherErrorsForFile (Right ModuleImports{moduleImports}) = @@ -165,32 +227,32 @@ buildResultGraph g = propagatedErrors Nothing -> SuccessNode imports' Just errs' -> ErrorNode (NonEmpty.map FailedToLocateImport errs') - unpropagatedErrors = MS.unionWith (<>) cycleErrors otherErrors + unpropagatedErrors = IntMap.unionWith (<>) cycleErrors otherErrors -- The recursion here is fine since we use a lazy map and -- we only recurse on SuccessNodes. In particular, we do not recurse -- on nodes that are part of a cycle as they are already marked as -- error nodes. propagatedErrors = - ML.map propagate unpropagatedErrors + IntMapLazy.map propagate unpropagatedErrors propagate :: NodeResult -> NodeResult propagate n@(ErrorNode _) = n propagate n@(SuccessNode imps) = - let results = map (\(imp, dep) -> (imp, propagatedErrors MS.! dep)) imps + let results = map (\(imp, FilePathId dep) -> (imp, propagatedErrors IntMap.! dep)) imps (errs, _) = partitionNodeResults results in case nonEmpty errs of Nothing -> n Just errs' -> ErrorNode (NonEmpty.map (ParentOfErrorNode . fst) errs') - findImport :: NormalizedFilePath -> NormalizedFilePath -> Maybe (Located ModuleName) - findImport file importedFile = - case getRawDeps g MS.! file of + findImport :: FilePathId -> FilePathId -> Maybe (Located ModuleName) + findImport (FilePathId file) importedFile = + case g IntMap.! file of Left _ -> error "Tried to call findImport on a module with a parse error" Right ModuleImports{moduleImports} -> fmap fst $ find (\(_, resolvedImp) -> resolvedImp == Just importedFile) moduleImports -graphEdges :: RawDependencyInformation -> [(NormalizedFilePath, NormalizedFilePath, [NormalizedFilePath])] +graphEdges :: IntMap (Either ModuleParseError ModuleImports) -> [(FilePathId, FilePathId, [FilePathId])] graphEdges g = - map (\(k, v) -> (k, k, deps v)) $ MS.toList $ getRawDeps g - where deps :: Either e ModuleImports -> [NormalizedFilePath] + map (\(k, v) -> (FilePathId k, FilePathId k, deps v)) $ IntMap.toList g + where deps :: Either e ModuleImports -> [FilePathId] deps (Left _) = [] deps (Right ModuleImports{moduleImports}) = mapMaybe snd moduleImports @@ -200,12 +262,20 @@ partitionSCC (AcyclicSCC x:rest) = first (x:) $ partitionSCC rest partitionSCC [] = ([], []) transitiveDeps :: DependencyInformation -> NormalizedFilePath -> Maybe TransitiveDependencies -transitiveDeps DependencyInformation{..} f = do - reachableVs <- Set.delete f . Set.fromList . map (fst3 . fromVertex) . reachable g <$> toVertex f - let transitiveModuleDeps = filter (\v -> v `Set.member` reachableVs) $ map (fst3 . fromVertex) vs - let transitivePkgDeps = Set.toList $ Set.unions $ map (\f -> MS.findWithDefault Set.empty f depPkgDeps) (f : transitiveModuleDeps) +transitiveDeps DependencyInformation{..} file = do + let !fileId = pathToId depPathIdMap file + reachableVs <- + IntSet.delete (getFilePathId fileId) . + IntSet.fromList . map (fst3 . fromVertex) . + reachable g <$> toVertex (getFilePathId fileId) + let transitiveModuleDepIds = filter (\v -> v `IntSet.member` reachableVs) $ map (fst3 . fromVertex) vs + let transitivePkgDeps = + Set.toList $ Set.unions $ + map (\f -> IntMap.findWithDefault Set.empty f depPkgDeps) $ + getFilePathId fileId : transitiveModuleDepIds + let transitiveModuleDeps = map (idToPath depPathIdMap . FilePathId) transitiveModuleDepIds pure TransitiveDependencies {..} - where (g, fromVertex, toVertex) = graphFromEdges (map (\(f, fs) -> (f, f, Set.toList fs)) $ MS.toList depModuleDeps) + where (g, fromVertex, toVertex) = graphFromEdges (map (\(f, fs) -> (f, f, IntSet.toList fs)) $ IntMap.toList depModuleDeps) vs = topSort g data TransitiveDependencies = TransitiveDependencies