mirror of
https://github.com/haskell/ghcide.git
synced 2024-12-16 22:52:41 +03:00
Speed up dependency information chasing (#2444)
Comparing FilePaths is really slow so by mapping them to Ints, we can speed up dependency chasing significantly. We might want to switch to doing some kind of global hash consing of file paths at the Shake level but for now, this seems like a nice improvement while not being too invasive. This is roughly an ~8s speedup on my testcase.
This commit is contained in:
parent
6e0a519178
commit
cc97e1e1be
@ -36,10 +36,13 @@ import Development.IDE.Import.FindImports
|
||||
import Development.IDE.Core.FileStore
|
||||
import Development.IDE.Types.Diagnostics
|
||||
import Development.IDE.Types.Location
|
||||
import Data.Coerce
|
||||
import Data.Either.Extra
|
||||
import Data.Maybe
|
||||
import Data.Foldable
|
||||
import qualified Data.Map.Strict as Map
|
||||
import qualified Data.IntMap.Strict as IntMap
|
||||
import qualified Data.IntSet as IntSet
|
||||
import Data.List
|
||||
import qualified Data.Set as Set
|
||||
import qualified Data.Text as T
|
||||
import Development.IDE.GHC.Error
|
||||
@ -203,40 +206,62 @@ getLocatedImportsRule =
|
||||
|
||||
-- | Given a target file path, construct the raw dependency results by following
|
||||
-- imports recursively.
|
||||
rawDependencyInformation :: NormalizedFilePath -> ExceptT [FileDiagnostic] Action RawDependencyInformation
|
||||
rawDependencyInformation f = go (Set.singleton f) Map.empty
|
||||
where go fs !modGraph =
|
||||
case Set.minView fs of
|
||||
Nothing -> pure $ RawDependencyInformation modGraph
|
||||
rawDependencyInformation :: NormalizedFilePath -> Action RawDependencyInformation
|
||||
rawDependencyInformation f = do
|
||||
let (initialId, initialMap) = getPathId f emptyPathIdMap
|
||||
go (IntSet.singleton $ getFilePathId initialId)
|
||||
(RawDependencyInformation IntMap.empty initialMap)
|
||||
where
|
||||
go fs rawDepInfo =
|
||||
case IntSet.minView fs of
|
||||
-- Queue is empty
|
||||
Nothing -> pure rawDepInfo
|
||||
-- Pop f from the queue and process it
|
||||
Just (f, fs) -> do
|
||||
importsOrErr <- lift $ use GetLocatedImports f
|
||||
let fId = FilePathId f
|
||||
importsOrErr <- use GetLocatedImports $ idToPath (rawPathIdMap rawDepInfo) fId
|
||||
case importsOrErr of
|
||||
Nothing ->
|
||||
let modGraph' = Map.insert f (Left ModuleParseError) modGraph
|
||||
in go fs modGraph'
|
||||
-- File doesn’t parse
|
||||
let rawDepInfo' = insertImport fId (Left ModuleParseError) rawDepInfo
|
||||
in go fs rawDepInfo'
|
||||
Just (modImports, pkgImports) -> do
|
||||
let newFiles = Set.fromList (mapMaybe snd modImports) Set.\\ Map.keysSet modGraph
|
||||
modGraph' = Map.insert f (Right $ ModuleImports modImports pkgImports) modGraph
|
||||
go (newFiles `Set.union` fs) modGraph'
|
||||
let f :: PathIdMap -> (a, Maybe NormalizedFilePath) -> (PathIdMap, (a, Maybe FilePathId))
|
||||
f pathMap (imp, mbPath) = case mbPath of
|
||||
Nothing -> (pathMap, (imp, Nothing))
|
||||
Just path ->
|
||||
let (pathId, pathMap') = getPathId path pathMap
|
||||
in (pathMap', (imp, Just pathId))
|
||||
-- Convert paths in imports to ids and update the path map
|
||||
let (pathIdMap, modImports') = mapAccumL f (rawPathIdMap rawDepInfo) modImports
|
||||
-- Files that we haven’t seen before are added to the queue.
|
||||
let newFiles =
|
||||
IntSet.fromList (coerce $ mapMaybe snd modImports')
|
||||
IntSet.\\ IntMap.keysSet (rawImports rawDepInfo)
|
||||
let rawDepInfo' = insertImport fId (Right $ ModuleImports modImports' pkgImports) rawDepInfo
|
||||
go (newFiles `IntSet.union` fs) (rawDepInfo' { rawPathIdMap = pathIdMap })
|
||||
|
||||
getDependencyInformationRule :: Rules ()
|
||||
getDependencyInformationRule =
|
||||
define $ \GetDependencyInformation file -> fmap toIdeResult $ runExceptT $ do
|
||||
define $ \GetDependencyInformation file -> do
|
||||
rawDepInfo <- rawDependencyInformation file
|
||||
pure $ processDependencyInformation rawDepInfo
|
||||
pure ([], Just $ processDependencyInformation rawDepInfo)
|
||||
|
||||
reportImportCyclesRule :: Rules ()
|
||||
reportImportCyclesRule =
|
||||
define $ \ReportImportCycles file -> fmap (\errs -> if null errs then ([], Just ()) else (errs, Nothing)) $ do
|
||||
DependencyInformation{..} <- use_ GetDependencyInformation file
|
||||
case Map.lookup file depErrorNodes of
|
||||
let fileId = pathToId depPathIdMap file
|
||||
case IntMap.lookup (getFilePathId fileId) depErrorNodes of
|
||||
Nothing -> pure []
|
||||
Just errs -> do
|
||||
let cycles = mapMaybe (cycleErrorInFile file) (toList errs)
|
||||
let cycles = mapMaybe (cycleErrorInFile fileId) (toList errs)
|
||||
-- Convert cycles of files into cycles of module names
|
||||
forM cycles $ \(imp, files) -> do
|
||||
modNames <- mapM getModuleName files
|
||||
pure $ toDiag imp modNames
|
||||
modNames <- forM files $ \fileId -> do
|
||||
let file = idToPath depPathIdMap fileId
|
||||
getModuleName file
|
||||
pure $ toDiag imp $ sort modNames
|
||||
where cycleErrorInFile f (PartOfCycle imp fs)
|
||||
| f `elem` fs = Just (imp, fs)
|
||||
cycleErrorInFile _ _ = Nothing
|
||||
@ -261,7 +286,7 @@ getDependenciesRule :: Rules ()
|
||||
getDependenciesRule =
|
||||
define $ \GetDependencies file -> do
|
||||
depInfo@DependencyInformation{..} <- use_ GetDependencyInformation file
|
||||
let allFiles = Map.keys depModuleDeps <> Map.keys depErrorNodes
|
||||
let allFiles = reachableModules depInfo
|
||||
_ <- uses_ ReportImportCycles allFiles
|
||||
return ([], transitiveDeps depInfo file)
|
||||
|
||||
|
@ -8,21 +8,36 @@ module Development.IDE.Import.DependencyInformation
|
||||
, NodeError(..)
|
||||
, ModuleParseError(..)
|
||||
, TransitiveDependencies(..)
|
||||
, FilePathId(..)
|
||||
|
||||
, PathIdMap
|
||||
, emptyPathIdMap
|
||||
, getPathId
|
||||
, insertImport
|
||||
, pathToId
|
||||
, idToPath
|
||||
, reachableModules
|
||||
|
||||
, processDependencyInformation
|
||||
, transitiveDeps
|
||||
) where
|
||||
|
||||
import Control.DeepSeq
|
||||
import Data.Bifunctor
|
||||
import Data.Coerce
|
||||
import Data.List
|
||||
import Development.IDE.GHC.Orphans()
|
||||
import Data.Either
|
||||
import Data.Graph
|
||||
import Data.List.NonEmpty (NonEmpty(..), nonEmpty)
|
||||
import qualified Data.List.NonEmpty as NonEmpty
|
||||
import Data.IntMap (IntMap)
|
||||
import qualified Data.IntMap.Strict as IntMap
|
||||
import qualified Data.IntMap.Lazy as IntMapLazy
|
||||
import Data.IntSet (IntSet)
|
||||
import qualified Data.IntSet as IntSet
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Map.Strict as MS
|
||||
import qualified Data.Map.Lazy as ML
|
||||
import Data.Maybe
|
||||
import Data.Set (Set)
|
||||
import qualified Data.Set as Set
|
||||
@ -37,32 +52,78 @@ import Module
|
||||
|
||||
-- | The imports for a given module.
|
||||
data ModuleImports = ModuleImports
|
||||
{ moduleImports :: ![(Located ModuleName, Maybe NormalizedFilePath)]
|
||||
{ moduleImports :: ![(Located ModuleName, Maybe FilePathId)]
|
||||
-- ^ Imports of a module in the current package and the file path of
|
||||
-- that module on disk (if we found it)
|
||||
, packageImports :: !(Set InstalledUnitId)
|
||||
-- ^ Transitive package dependencies unioned for all imports.
|
||||
}
|
||||
|
||||
-- | For processing dependency information, we need lots of maps and sets
|
||||
-- of filepaths. Comparing Strings is really slow, so we work with IntMap/IntSet
|
||||
-- instead and only convert at the edges
|
||||
-- and
|
||||
newtype FilePathId = FilePathId { getFilePathId :: Int }
|
||||
deriving (Show, NFData, Eq, Ord)
|
||||
|
||||
data PathIdMap = PathIdMap
|
||||
{ idToPathMap :: !(IntMap NormalizedFilePath)
|
||||
, pathToIdMap :: !(Map NormalizedFilePath FilePathId)
|
||||
}
|
||||
deriving (Show, Generic)
|
||||
|
||||
instance NFData PathIdMap
|
||||
|
||||
emptyPathIdMap :: PathIdMap
|
||||
emptyPathIdMap = PathIdMap IntMap.empty MS.empty
|
||||
|
||||
getPathId :: NormalizedFilePath -> PathIdMap -> (FilePathId, PathIdMap)
|
||||
getPathId path m@PathIdMap{..} =
|
||||
case MS.lookup path pathToIdMap of
|
||||
Nothing ->
|
||||
let !newId = FilePathId $ MS.size pathToIdMap
|
||||
in (newId, insertPathId path newId m)
|
||||
Just id -> (id, m)
|
||||
|
||||
insertPathId :: NormalizedFilePath -> FilePathId -> PathIdMap -> PathIdMap
|
||||
insertPathId path id PathIdMap{..} =
|
||||
PathIdMap (IntMap.insert (getFilePathId id) path idToPathMap) (MS.insert path id pathToIdMap)
|
||||
|
||||
insertImport :: FilePathId -> Either ModuleParseError ModuleImports -> RawDependencyInformation -> RawDependencyInformation
|
||||
insertImport (FilePathId k) v rawDepInfo = rawDepInfo { rawImports = IntMap.insert k v (rawImports rawDepInfo) }
|
||||
|
||||
pathToId :: PathIdMap -> NormalizedFilePath -> FilePathId
|
||||
pathToId PathIdMap{pathToIdMap} path = pathToIdMap MS.! path
|
||||
|
||||
idToPath :: PathIdMap -> FilePathId -> NormalizedFilePath
|
||||
idToPath PathIdMap{idToPathMap} (FilePathId id) = idToPathMap IntMap.! id
|
||||
|
||||
-- | Unprocessed results that we find by following imports recursively.
|
||||
newtype RawDependencyInformation = RawDependencyInformation
|
||||
{ getRawDeps :: Map NormalizedFilePath (Either ModuleParseError ModuleImports)
|
||||
data RawDependencyInformation = RawDependencyInformation
|
||||
{ rawImports :: !(IntMap (Either ModuleParseError ModuleImports))
|
||||
, rawPathIdMap :: !PathIdMap
|
||||
}
|
||||
|
||||
pkgDependencies :: RawDependencyInformation -> Map NormalizedFilePath (Set InstalledUnitId)
|
||||
pkgDependencies (RawDependencyInformation m) = MS.map (either (const Set.empty) packageImports) m
|
||||
pkgDependencies :: RawDependencyInformation -> IntMap (Set InstalledUnitId)
|
||||
pkgDependencies RawDependencyInformation{..} =
|
||||
IntMap.map (either (const Set.empty) packageImports) rawImports
|
||||
|
||||
data DependencyInformation =
|
||||
DependencyInformation
|
||||
{ depErrorNodes :: Map NormalizedFilePath (NonEmpty NodeError)
|
||||
{ depErrorNodes :: !(IntMap (NonEmpty NodeError))
|
||||
-- ^ Nodes that cannot be processed correctly.
|
||||
, depModuleDeps :: Map NormalizedFilePath (Set NormalizedFilePath)
|
||||
, depModuleDeps :: !(IntMap IntSet)
|
||||
-- ^ For a non-error node, this contains the set of module immediate dependencies
|
||||
-- in the same package.
|
||||
, depPkgDeps :: Map NormalizedFilePath (Set InstalledUnitId)
|
||||
, depPkgDeps :: !(IntMap (Set InstalledUnitId))
|
||||
-- ^ For a non-error node, this contains the set of immediate pkg deps.
|
||||
, depPathIdMap :: !PathIdMap
|
||||
} deriving (Show, Generic)
|
||||
|
||||
reachableModules :: DependencyInformation -> [NormalizedFilePath]
|
||||
reachableModules DependencyInformation{..} =
|
||||
map (idToPath depPathIdMap . FilePathId) $ IntMap.keys depErrorNodes <> IntMap.keys depModuleDeps
|
||||
|
||||
instance NFData DependencyInformation
|
||||
|
||||
-- | This does not contain the actual parse error as that is already reported by GetParsedModule.
|
||||
@ -79,7 +140,7 @@ instance NFData LocateError
|
||||
|
||||
-- | An error attached to a node in the dependency graph.
|
||||
data NodeError
|
||||
= PartOfCycle (Located ModuleName) [NormalizedFilePath]
|
||||
= PartOfCycle (Located ModuleName) [FilePathId]
|
||||
-- ^ This module is part of an import cycle. The module name corresponds
|
||||
-- to the import that enters the cycle starting from this module.
|
||||
-- The list of filepaths represents the elements
|
||||
@ -104,12 +165,12 @@ instance NFData NodeError where
|
||||
-- `ErrorNode`. Otherwise it is a `SuccessNode`.
|
||||
data NodeResult
|
||||
= ErrorNode (NonEmpty NodeError)
|
||||
| SuccessNode [(Located ModuleName, NormalizedFilePath)]
|
||||
| SuccessNode [(Located ModuleName, FilePathId)]
|
||||
deriving Show
|
||||
|
||||
partitionNodeResults
|
||||
:: [(a, NodeResult)]
|
||||
-> ([(a, NonEmpty NodeError)], [(a, [(Located ModuleName, NormalizedFilePath)])])
|
||||
-> ([(a, NonEmpty NodeError)], [(a, [(Located ModuleName, FilePathId)])])
|
||||
partitionNodeResults = partitionEithers . map f
|
||||
where f (a, ErrorNode errs) = Left (a, errs)
|
||||
f (a, SuccessNode imps) = Right (a, imps)
|
||||
@ -121,40 +182,41 @@ instance Semigroup NodeResult where
|
||||
SuccessNode a <> SuccessNode _ = SuccessNode a
|
||||
|
||||
processDependencyInformation :: RawDependencyInformation -> DependencyInformation
|
||||
processDependencyInformation rawResults =
|
||||
processDependencyInformation rawDepInfo@RawDependencyInformation{..} =
|
||||
DependencyInformation
|
||||
{ depErrorNodes = MS.fromList errorNodes
|
||||
{ depErrorNodes = IntMap.fromList errorNodes
|
||||
, depModuleDeps = moduleDeps
|
||||
, depPkgDeps = pkgDependencies rawResults
|
||||
, depPkgDeps = pkgDependencies rawDepInfo
|
||||
, depPathIdMap = rawPathIdMap
|
||||
}
|
||||
where resultGraph = buildResultGraph rawResults
|
||||
(errorNodes, successNodes) = partitionNodeResults $ MS.toList resultGraph
|
||||
successEdges :: [(NormalizedFilePath, NormalizedFilePath, [NormalizedFilePath])]
|
||||
where resultGraph = buildResultGraph rawImports
|
||||
(errorNodes, successNodes) = partitionNodeResults $ IntMap.toList resultGraph
|
||||
successEdges :: [(FilePathId, FilePathId, [FilePathId])]
|
||||
successEdges =
|
||||
map (\(file, imports) -> (file, file, map snd imports)) successNodes
|
||||
map (\(file, imports) -> (FilePathId file, FilePathId file, map snd imports)) successNodes
|
||||
moduleDeps =
|
||||
MS.fromList $ map (\(_, v, vs) -> (v, Set.fromList vs)) successEdges
|
||||
IntMap.fromList $ map (\(_, FilePathId v, vs) -> (v, IntSet.fromList $ coerce vs)) successEdges
|
||||
|
||||
-- | Given a dependency graph, buildResultGraph detects and propagates errors in that graph as follows:
|
||||
-- 1. Mark each node that is part of an import cycle as an error node.
|
||||
-- 2. Mark each node that has a parse error as an error node.
|
||||
-- 3. Mark each node whose immediate children could not be located as an error.
|
||||
-- 4. Recursively propagate errors to parents if they are not already error nodes.
|
||||
buildResultGraph :: RawDependencyInformation -> Map NormalizedFilePath NodeResult
|
||||
buildResultGraph :: IntMap (Either ModuleParseError ModuleImports) -> IntMap NodeResult
|
||||
buildResultGraph g = propagatedErrors
|
||||
where
|
||||
sccs = stronglyConnComp (graphEdges g)
|
||||
(_, cycles) = partitionSCC sccs
|
||||
cycleErrors :: Map NormalizedFilePath NodeResult
|
||||
cycleErrors = MS.unionsWith (<>) $ map errorsForCycle cycles
|
||||
errorsForCycle :: [NormalizedFilePath] -> Map NormalizedFilePath NodeResult
|
||||
cycleErrors :: IntMap NodeResult
|
||||
cycleErrors = IntMap.unionsWith (<>) $ map errorsForCycle cycles
|
||||
errorsForCycle :: [FilePathId] -> IntMap NodeResult
|
||||
errorsForCycle files =
|
||||
MS.fromListWith (<>) (concatMap (cycleErrorsForFile files) files)
|
||||
cycleErrorsForFile :: [NormalizedFilePath] -> NormalizedFilePath -> [(NormalizedFilePath,NodeResult)]
|
||||
IntMap.fromListWith (<>) $ coerce $ concatMap (cycleErrorsForFile files) files
|
||||
cycleErrorsForFile :: [FilePathId] -> FilePathId -> [(FilePathId,NodeResult)]
|
||||
cycleErrorsForFile cycle f =
|
||||
let entryPoints = mapMaybe (findImport f) cycle
|
||||
in map (\imp -> (f, ErrorNode (PartOfCycle imp cycle :| []))) entryPoints
|
||||
otherErrors = MS.map otherErrorsForFile (getRawDeps g)
|
||||
otherErrors = IntMap.map otherErrorsForFile g
|
||||
otherErrorsForFile :: Either ModuleParseError ModuleImports -> NodeResult
|
||||
otherErrorsForFile (Left err) = ErrorNode (ParseError err :| [])
|
||||
otherErrorsForFile (Right ModuleImports{moduleImports}) =
|
||||
@ -165,32 +227,32 @@ buildResultGraph g = propagatedErrors
|
||||
Nothing -> SuccessNode imports'
|
||||
Just errs' -> ErrorNode (NonEmpty.map FailedToLocateImport errs')
|
||||
|
||||
unpropagatedErrors = MS.unionWith (<>) cycleErrors otherErrors
|
||||
unpropagatedErrors = IntMap.unionWith (<>) cycleErrors otherErrors
|
||||
-- The recursion here is fine since we use a lazy map and
|
||||
-- we only recurse on SuccessNodes. In particular, we do not recurse
|
||||
-- on nodes that are part of a cycle as they are already marked as
|
||||
-- error nodes.
|
||||
propagatedErrors =
|
||||
ML.map propagate unpropagatedErrors
|
||||
IntMapLazy.map propagate unpropagatedErrors
|
||||
propagate :: NodeResult -> NodeResult
|
||||
propagate n@(ErrorNode _) = n
|
||||
propagate n@(SuccessNode imps) =
|
||||
let results = map (\(imp, dep) -> (imp, propagatedErrors MS.! dep)) imps
|
||||
let results = map (\(imp, FilePathId dep) -> (imp, propagatedErrors IntMap.! dep)) imps
|
||||
(errs, _) = partitionNodeResults results
|
||||
in case nonEmpty errs of
|
||||
Nothing -> n
|
||||
Just errs' -> ErrorNode (NonEmpty.map (ParentOfErrorNode . fst) errs')
|
||||
findImport :: NormalizedFilePath -> NormalizedFilePath -> Maybe (Located ModuleName)
|
||||
findImport file importedFile =
|
||||
case getRawDeps g MS.! file of
|
||||
findImport :: FilePathId -> FilePathId -> Maybe (Located ModuleName)
|
||||
findImport (FilePathId file) importedFile =
|
||||
case g IntMap.! file of
|
||||
Left _ -> error "Tried to call findImport on a module with a parse error"
|
||||
Right ModuleImports{moduleImports} ->
|
||||
fmap fst $ find (\(_, resolvedImp) -> resolvedImp == Just importedFile) moduleImports
|
||||
|
||||
graphEdges :: RawDependencyInformation -> [(NormalizedFilePath, NormalizedFilePath, [NormalizedFilePath])]
|
||||
graphEdges :: IntMap (Either ModuleParseError ModuleImports) -> [(FilePathId, FilePathId, [FilePathId])]
|
||||
graphEdges g =
|
||||
map (\(k, v) -> (k, k, deps v)) $ MS.toList $ getRawDeps g
|
||||
where deps :: Either e ModuleImports -> [NormalizedFilePath]
|
||||
map (\(k, v) -> (FilePathId k, FilePathId k, deps v)) $ IntMap.toList g
|
||||
where deps :: Either e ModuleImports -> [FilePathId]
|
||||
deps (Left _) = []
|
||||
deps (Right ModuleImports{moduleImports}) = mapMaybe snd moduleImports
|
||||
|
||||
@ -200,12 +262,20 @@ partitionSCC (AcyclicSCC x:rest) = first (x:) $ partitionSCC rest
|
||||
partitionSCC [] = ([], [])
|
||||
|
||||
transitiveDeps :: DependencyInformation -> NormalizedFilePath -> Maybe TransitiveDependencies
|
||||
transitiveDeps DependencyInformation{..} f = do
|
||||
reachableVs <- Set.delete f . Set.fromList . map (fst3 . fromVertex) . reachable g <$> toVertex f
|
||||
let transitiveModuleDeps = filter (\v -> v `Set.member` reachableVs) $ map (fst3 . fromVertex) vs
|
||||
let transitivePkgDeps = Set.toList $ Set.unions $ map (\f -> MS.findWithDefault Set.empty f depPkgDeps) (f : transitiveModuleDeps)
|
||||
transitiveDeps DependencyInformation{..} file = do
|
||||
let !fileId = pathToId depPathIdMap file
|
||||
reachableVs <-
|
||||
IntSet.delete (getFilePathId fileId) .
|
||||
IntSet.fromList . map (fst3 . fromVertex) .
|
||||
reachable g <$> toVertex (getFilePathId fileId)
|
||||
let transitiveModuleDepIds = filter (\v -> v `IntSet.member` reachableVs) $ map (fst3 . fromVertex) vs
|
||||
let transitivePkgDeps =
|
||||
Set.toList $ Set.unions $
|
||||
map (\f -> IntMap.findWithDefault Set.empty f depPkgDeps) $
|
||||
getFilePathId fileId : transitiveModuleDepIds
|
||||
let transitiveModuleDeps = map (idToPath depPathIdMap . FilePathId) transitiveModuleDepIds
|
||||
pure TransitiveDependencies {..}
|
||||
where (g, fromVertex, toVertex) = graphFromEdges (map (\(f, fs) -> (f, f, Set.toList fs)) $ MS.toList depModuleDeps)
|
||||
where (g, fromVertex, toVertex) = graphFromEdges (map (\(f, fs) -> (f, f, IntSet.toList fs)) $ IntMap.toList depModuleDeps)
|
||||
vs = topSort g
|
||||
|
||||
data TransitiveDependencies = TransitiveDependencies
|
||||
|
Loading…
Reference in New Issue
Block a user