FileExists: set one watcher instead of thousands (#831)

* FileExists: set one watcher instead of thousands

This prevents us from sending thousands of notifications to the client
on startup, which can lock up some clients like emacs. Instead we send
precisely one.

This has some consequences for the behaviour of the fast file existence
lookup, which I've noted in the code, alongside a description of how it
works (I spent a while figuring it out, I thought I might as well write
it down).

Fixes #776.

* Use fast rules only if it matches our watcher spec
This commit is contained in:
Michael Peyton Jones 2020-09-27 21:13:40 +01:00 committed by GitHub
parent 1bb4c49fda
commit b279afbce7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 175 additions and 87 deletions

View File

@ -49,6 +49,7 @@ library
fuzzy, fuzzy,
filepath, filepath,
fingertree, fingertree,
Glob,
haddock-library >= 1.8, haddock-library >= 1.8,
hashable, hashable,
haskell-lsp-types == 0.22.*, haskell-lsp-types == 0.22.*,

View File

@ -46,6 +46,8 @@ let defaultCompiler = "ghc" + lib.replaceStrings ["."] [""] haskellPackages.ghc.
diagrams-svg diagrams-svg
extra extra
fuzzy fuzzy
fingertree
Glob
ghc-check ghc-check
gitrev gitrev
happy happy

View File

@ -5,36 +5,75 @@ module Development.IDE.Core.FileExists
( fileExistsRules ( fileExistsRules
, modifyFileExists , modifyFileExists
, getFileExists , getFileExists
, watchedGlobs
) )
where where
import Control.Concurrent.Extra import Control.Concurrent.Extra
import Control.Exception import Control.Exception
import Control.Monad.Extra import Control.Monad.Extra
import qualified Data.Aeson as A
import Data.Binary import Data.Binary
import qualified Data.ByteString as BS import qualified Data.ByteString as BS
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HashMap import qualified Data.HashMap.Strict as HashMap
import Data.Maybe import Data.Maybe
import qualified Data.Text as T
import Development.IDE.Core.FileStore import Development.IDE.Core.FileStore
import Development.IDE.Core.IdeConfiguration import Development.IDE.Core.IdeConfiguration
import Development.IDE.Core.Shake import Development.IDE.Core.Shake
import Development.IDE.Types.Location import Development.IDE.Types.Location
import Development.IDE.Types.Logger import Development.IDE.Types.Options
import Development.Shake import Development.Shake
import Development.Shake.Classes import Development.Shake.Classes
import GHC.Generics import GHC.Generics
import Language.Haskell.LSP.Messages
import Language.Haskell.LSP.Types
import Language.Haskell.LSP.Types.Capabilities import Language.Haskell.LSP.Types.Capabilities
import qualified System.Directory as Dir import qualified System.Directory as Dir
import qualified System.FilePath.Glob as Glob
-- | A map for tracking the file existence {- Note [File existence cache and LSP file watchers]
Some LSP servers provide the ability to register file watches with the client, which will then notify
us of file changes. Some clients can do this more efficiently than us, or generally it's a tricky
problem
Here we use this to maintain a quick lookup cache of file existence. How this works is:
- On startup, if the client supports it we ask it to watch some files (see below).
- When those files are created or deleted (we can also see change events, but we don't
care since we're only caching existence here) we get a notification from the client.
- The notification handler calls 'modifyFileExists' to update our cache.
This means that the cache will only ever work for the files we have set up a watcher for.
So we pick the set that we mostly care about and which are likely to change existence
most often: the source files of the project (as determined by the source extensions
we're configured to care about).
For all other files we fall back to the slow path.
There are a few failure modes to think about:
1. The client doesn't send us the notifications we asked for.
There's not much we can do in this case: the whole point is to rely on the client so
we don't do the checking ourselves. If the client lets us down, we will just be wrong.
2. Races between registering watchers, getting notifications, and file changes.
If a file changes status between us asking for notifications and the client actually
setting up the notifications, we might not get told about it. But this is a relatively
small race window around startup, so we just don't worry about it.
3. Using the fast path for files that we aren't watching.
In this case we will fall back to the slow path, but cache that result forever (since
it won't get invalidated by a client notification). To prevent this we guard the
fast path by a check that the path also matches our watching patterns.
-}
-- See Note [File existence cache and LSP file watchers]
-- | A map for tracking the file existence.
-- If a path maps to 'True' then it exists; if it maps to 'False' then it doesn't exist'; and
-- if it's not in the map then we don't know.
type FileExistsMap = (HashMap NormalizedFilePath Bool) type FileExistsMap = (HashMap NormalizedFilePath Bool)
-- | A wrapper around a mutable 'FileExistsMap' -- | A wrapper around a mutable 'FileExistsState'
newtype FileExistsMapVar = FileExistsMapVar (Var FileExistsMap) newtype FileExistsMapVar = FileExistsMapVar (Var FileExistsMap)
instance IsIdeGlobal FileExistsMapVar instance IsIdeGlobal FileExistsMapVar
@ -45,22 +84,16 @@ getFileExistsMapUntracked = do
FileExistsMapVar v <- getIdeGlobalAction FileExistsMapVar v <- getIdeGlobalAction
liftIO $ readVar v liftIO $ readVar v
-- | Modify the global store of file exists -- | Modify the global store of file exists.
modifyFileExistsAction :: (FileExistsMap -> IO FileExistsMap) -> Action ()
modifyFileExistsAction f = do
FileExistsMapVar var <- getIdeGlobalAction
liftIO $ modifyVar_ var f
-- | Modify the global store of file exists
modifyFileExists :: IdeState -> [(NormalizedFilePath, Bool)] -> IO () modifyFileExists :: IdeState -> [(NormalizedFilePath, Bool)] -> IO ()
modifyFileExists state changes = do modifyFileExists state changes = do
FileExistsMapVar var <- getIdeGlobalState state FileExistsMapVar var <- getIdeGlobalState state
changesMap <- evaluate $ HashMap.fromList changes changesMap <- evaluate $ HashMap.fromList changes
-- Masked to ensure that the previous values are flushed together with the map update -- Masked to ensure that the previous values are flushed together with the map update
mask $ \_ -> do mask $ \_ -> do
-- update the map -- update the map
modifyVar_ var $ evaluate . HashMap.union changesMap modifyVar_ var $ evaluate . HashMap.union changesMap
-- See Note [Invalidating file existence results]
-- flush previous values -- flush previous values
mapM_ (deleteValue state GetFileExists . fst) changes mapM_ (deleteValue state GetFileExists . fst) changes
@ -87,86 +120,101 @@ instance Binary GetFileExists
getFileExists :: NormalizedFilePath -> Action Bool getFileExists :: NormalizedFilePath -> Action Bool
getFileExists fp = use_ GetFileExists fp getFileExists fp = use_ GetFileExists fp
{- Note [Which files should we watch?]
The watcher system gives us a lot of flexibility: we can set multiple watchers, and they can all watch on glob
patterns.
We used to have a quite precise system, where we would register a watcher for a single file path only (and always)
when we actually looked to see if it existed. The downside of this is that it sends a *lot* of notifications
to the client (thousands on a large project), and this could lock up some clients like emacs
(https://github.com/emacs-lsp/lsp-mode/issues/2165).
Now we take the opposite approach: we register a single, quite general watcher that looks for all files
with a predefined set of extensions. The consequences are:
- The client will have to watch more files. This is usually not too bad, since the pattern is a single glob,
and the clients typically call out to an optimized implementation of file watching that understands globs.
- The client will send us a lot more notifications. This isn't too bad in practice, since although
we're watching a lot of files in principle, they don't get created or destroyed that often.
- We won't ever hit the fast lookup path for files which aren't in our watch pattern, since the only way
files get into our map is when the client sends us a notification about them because we're watching them.
This is fine so long as we're watching the files we check most often, i.e. source files.
-}
-- | The list of file globs that we ask the client to watch.
watchedGlobs :: IdeOptions -> [String]
watchedGlobs opts = [ "**/*." ++ extIncBoot | ext <- optExtensions opts, extIncBoot <- [ext, ext ++ "-boot"]]
-- | Installs the 'getFileExists' rules. -- | Installs the 'getFileExists' rules.
-- Provides a fast implementation if client supports dynamic watched files. -- Provides a fast implementation if client supports dynamic watched files.
-- Creates a global state as a side effect in that case. -- Creates a global state as a side effect in that case.
fileExistsRules :: IO LspId -> ClientCapabilities -> VFSHandle -> Rules () fileExistsRules :: ClientCapabilities -> VFSHandle -> Rules ()
fileExistsRules getLspId ClientCapabilities{_workspace} vfs = do fileExistsRules ClientCapabilities{_workspace} vfs = do
-- Create the global always, although it should only be used if we have fast rules. -- Create the global always, although it should only be used if we have fast rules.
-- But there's a chance someone will send unexpected notifications anyway, -- But there's a chance someone will send unexpected notifications anyway,
-- e.g. https://github.com/digital-asset/ghcide/issues/599 -- e.g. https://github.com/digital-asset/ghcide/issues/599
addIdeGlobal . FileExistsMapVar =<< liftIO (newVar []) addIdeGlobal . FileExistsMapVar =<< liftIO (newVar [])
extras <- getShakeExtrasRules
opts <- liftIO $ getIdeOptionsIO extras
let globs = watchedGlobs opts
case () of case () of
_ | Just WorkspaceClientCapabilities{_didChangeWatchedFiles} <- _workspace _ | Just WorkspaceClientCapabilities{_didChangeWatchedFiles} <- _workspace
, Just DidChangeWatchedFilesClientCapabilities{_dynamicRegistration} <- _didChangeWatchedFiles , Just DidChangeWatchedFilesClientCapabilities{_dynamicRegistration} <- _didChangeWatchedFiles
, Just True <- _dynamicRegistration , Just True <- _dynamicRegistration
-> fileExistsRulesFast getLspId vfs -> fileExistsRulesFast globs vfs
| otherwise -> do | otherwise -> fileExistsRulesSlow vfs
logger <- logger <$> getShakeExtrasRules
liftIO $ logDebug logger "Warning: Client does not support watched files. Falling back to OS polling"
fileExistsRulesSlow vfs
-- Requires an lsp client that provides WatchedFiles notifications. -- Requires an lsp client that provides WatchedFiles notifications, but assumes that this has already been checked.
fileExistsRulesFast :: IO LspId -> VFSHandle -> Rules () fileExistsRulesFast :: [String] -> VFSHandle -> Rules ()
fileExistsRulesFast getLspId vfs = fileExistsRulesFast globs vfs =
defineEarlyCutoff $ \GetFileExists file -> do let patterns = fmap Glob.compile globs
isWf <- isWorkspaceFile file fpMatches fp = any (\p -> Glob.match p fp) patterns
if isWf in defineEarlyCutoff $ \GetFileExists file -> do
then fileExistsFast getLspId vfs file isWf <- isWorkspaceFile file
else fileExistsSlow vfs file if isWf && fpMatches (fromNormalizedFilePath file)
then fileExistsFast vfs file
else fileExistsSlow vfs file
fileExistsFast :: IO LspId -> VFSHandle -> NormalizedFilePath -> Action (Maybe BS.ByteString, ([a], Maybe Bool)) {- Note [Invalidating file existence results]
fileExistsFast getLspId vfs file = do We have two mechanisms for getting file existence information:
fileExistsMap <- getFileExistsMapUntracked - The file existence cache
let mbFilesWatched = HashMap.lookup file fileExistsMap - The VFS lookup
case mbFilesWatched of
Just fv -> pure (summarizeExists fv, ([], Just fv))
Nothing -> do
exist <- liftIO $ getFileExistsVFS vfs file
ShakeExtras { eventer } <- getShakeExtras
-- add a listener for VFS Create/Delete file events, Both of these affect the results of the 'GetFileExists' rule, so we need to make sure it
-- taking the FileExistsMap lock to prevent race conditions is invalidated properly when things change.
-- that would lead to multiple listeners for the same path
modifyFileExistsAction $ \x -> do
case HashMap.alterF (,Just exist) file x of
(Nothing, x') -> do
-- if the listener addition fails, we never recover. This is a bug.
addListener eventer file
return x'
(Just _, _) ->
-- if the key was already there, do nothing
return x
pure (summarizeExists exist, ([], Just exist)) For the file existence cache, we manually flush the results of 'GetFileExists' when we
where modify it (i.e. when a notification comes from the client). This is faster than using
addListener eventer fp = do 'alwaysRerun' in the 'fileExistsFast', and we need it to be as fast as possible.
reqId <- getLspId
let
req = RequestMessage "2.0" reqId ClientRegisterCapability regParams
fpAsId = T.pack $ fromNormalizedFilePath fp
regParams = RegistrationParams (List [registration])
registration = Registration fpAsId
WorkspaceDidChangeWatchedFiles
(Just (A.toJSON regOptions))
regOptions =
DidChangeWatchedFilesRegistrationOptions { _watchers = List [watcher] }
watchKind = WatchKind { _watchCreate = True, _watchChange = False, _watchDelete = True}
watcher = FileSystemWatcher { _globPattern = fromNormalizedFilePath fp
, _kind = Just watchKind
}
eventer $ ReqRegisterCapability req For the VFS lookup, however, we won't get prompted to flush the result, so instead
we use 'alwaysRerun'.
-}
fileExistsFast :: VFSHandle -> NormalizedFilePath -> Action (Maybe BS.ByteString, ([a], Maybe Bool))
fileExistsFast vfs file = do
-- Could in principle use 'alwaysRerun' here, but it's too slwo, See Note [Invalidating file existence results]
mp <- getFileExistsMapUntracked
let mbFilesWatched = HashMap.lookup file mp
exist <- case mbFilesWatched of
Just exist -> pure exist
-- We don't know about it: use the slow route.
-- Note that we do *not* call 'fileExistsSlow', as that would trigger 'alwaysRerun'.
Nothing -> liftIO $ getFileExistsVFS vfs file
pure (summarizeExists exist, ([], Just exist))
summarizeExists :: Bool -> Maybe BS.ByteString summarizeExists :: Bool -> Maybe BS.ByteString
summarizeExists x = Just $ if x then BS.singleton 1 else BS.empty summarizeExists x = Just $ if x then BS.singleton 1 else BS.empty
fileExistsRulesSlow:: VFSHandle -> Rules () fileExistsRulesSlow :: VFSHandle -> Rules ()
fileExistsRulesSlow vfs = fileExistsRulesSlow vfs =
defineEarlyCutoff $ \GetFileExists file -> fileExistsSlow vfs file defineEarlyCutoff $ \GetFileExists file -> fileExistsSlow vfs file
fileExistsSlow :: VFSHandle -> NormalizedFilePath -> Action (Maybe BS.ByteString, ([a], Maybe Bool)) fileExistsSlow :: VFSHandle -> NormalizedFilePath -> Action (Maybe BS.ByteString, ([a], Maybe Bool))
fileExistsSlow vfs file = do fileExistsSlow vfs file = do
-- See Note [Invalidating file existence results]
alwaysRerun alwaysRerun
exist <- liftIO $ getFileExistsVFS vfs file exist <- liftIO $ getFileExistsVFS vfs file
pure (summarizeExists exist, ([], Just exist)) pure (summarizeExists exist, ([], Just exist))

View File

@ -68,7 +68,7 @@ initialise caps mainRule getLspId toDiags wProg wIndefProg logger debouncer opti
addIdeGlobal $ GlobalIdeOptions options addIdeGlobal $ GlobalIdeOptions options
fileStoreRules vfs fileStoreRules vfs
ofInterestRules ofInterestRules
fileExistsRules getLspId caps vfs fileExistsRules caps vfs
mainRule mainRule
writeProfile :: IdeState -> FilePath -> IO () writeProfile :: IdeState -> FilePath -> IO ()

View File

@ -214,8 +214,7 @@ initHandler _ ide params = do
-- Set them to avoid a warning in VS Code output. -- Set them to avoid a warning in VS Code output.
setHandlersIgnore :: PartialHandlers config setHandlersIgnore :: PartialHandlers config
setHandlersIgnore = PartialHandlers $ \_ x -> return x setHandlersIgnore = PartialHandlers $ \_ x -> return x
{LSP.initializedHandler = none {LSP.responseHandler = none
,LSP.responseHandler = none
} }
where none = Just $ const $ return () where none = Just $ const $ return ()

View File

@ -12,6 +12,8 @@ import Development.IDE.LSP.Server
import qualified Language.Haskell.LSP.Core as LSP import qualified Language.Haskell.LSP.Core as LSP
import Language.Haskell.LSP.Types import Language.Haskell.LSP.Types
import qualified Language.Haskell.LSP.Types as LSP import qualified Language.Haskell.LSP.Types as LSP
import qualified Language.Haskell.LSP.Messages as LSP
import qualified Language.Haskell.LSP.Types.Capabilities as LSP
import Development.IDE.Core.IdeConfiguration import Development.IDE.Core.IdeConfiguration
import Development.IDE.Core.Service import Development.IDE.Core.Service
@ -21,6 +23,7 @@ import Development.IDE.Types.Logger
import Development.IDE.Types.Options import Development.IDE.Types.Options
import Control.Monad.Extra import Control.Monad.Extra
import qualified Data.Aeson as A
import Data.Foldable as F import Data.Foldable as F
import Data.Maybe import Data.Maybe
import qualified Data.HashMap.Strict as M import qualified Data.HashMap.Strict as M
@ -28,7 +31,7 @@ import qualified Data.HashSet as S
import qualified Data.Text as Text import qualified Data.Text as Text
import Development.IDE.Core.FileStore (setSomethingModified, setFileModified, typecheckParents) import Development.IDE.Core.FileStore (setSomethingModified, setFileModified, typecheckParents)
import Development.IDE.Core.FileExists (modifyFileExists) import Development.IDE.Core.FileExists (modifyFileExists, watchedGlobs)
import Development.IDE.Core.OfInterest import Development.IDE.Core.OfInterest
@ -72,6 +75,8 @@ setHandlersNotifications = PartialHandlers $ \WithMessage{..} x -> return x
logInfo (ideLogger ide) $ "Closed text document: " <> getUri _uri logInfo (ideLogger ide) $ "Closed text document: " <> getUri _uri
,LSP.didChangeWatchedFilesNotificationHandler = withNotification (LSP.didChangeWatchedFilesNotificationHandler x) $ ,LSP.didChangeWatchedFilesNotificationHandler = withNotification (LSP.didChangeWatchedFilesNotificationHandler x) $
\_ ide (DidChangeWatchedFilesParams fileEvents) -> do \_ ide (DidChangeWatchedFilesParams fileEvents) -> do
-- See Note [File existence cache and LSP file watchers] which explains why we get these notifications and
-- what we do with them
let events = let events =
mapMaybe mapMaybe
(\(FileEvent uri ev) -> (\(FileEvent uri ev) ->
@ -98,4 +103,45 @@ setHandlersNotifications = PartialHandlers $ \WithMessage{..} x -> return x
logInfo (ideLogger ide) $ "Configuration changed: " <> msg logInfo (ideLogger ide) $ "Configuration changed: " <> msg
modifyClientSettings ide (const $ Just cfg) modifyClientSettings ide (const $ Just cfg)
setSomethingModified ide setSomethingModified ide
-- Initialized handler, good time to dynamically register capabilities
,LSP.initializedHandler = withNotification (LSP.initializedHandler x) $ \lsp@LSP.LspFuncs{..} ide _ -> do
let watchSupported = case () of
_ | LSP.ClientCapabilities{_workspace} <- clientCapabilities
, Just LSP.WorkspaceClientCapabilities{_didChangeWatchedFiles} <- _workspace
, Just LSP.DidChangeWatchedFilesClientCapabilities{_dynamicRegistration} <- _didChangeWatchedFiles
, Just True <- _dynamicRegistration
-> True
| otherwise -> False
if watchSupported
then registerWatcher lsp ide
else logDebug (ideLogger ide) "Warning: Client does not support watched files. Falling back to OS polling"
} }
where
registerWatcher LSP.LspFuncs{..} ide = do
lspId <- getNextReqId
opts <- getIdeOptionsIO $ shakeExtras ide
let
req = RequestMessage "2.0" lspId ClientRegisterCapability regParams
regParams = RegistrationParams (List [registration])
-- The registration ID is arbitrary and is only used in case we want to deregister (which we won't).
-- We could also use something like a random UUID, as some other servers do, but this works for
-- our purposes.
registration = Registration "globalFileWatches"
WorkspaceDidChangeWatchedFiles
(Just (A.toJSON regOptions))
regOptions =
DidChangeWatchedFilesRegistrationOptions { _watchers = List watchers }
-- See Note [File existence cache and LSP file watchers] for why this exists, and the choice of watch kind
watchKind = WatchKind { _watchCreate = True, _watchChange = False, _watchDelete = True}
-- See Note [Which files should we watch?] for an explanation of why the pattern is the way that it is
-- The patterns will be something like "**/.hs", i.e. "any number of directory segments,
-- followed by a file with an extension 'hs'.
watcher glob = FileSystemWatcher { _globPattern = glob, _kind = Just watchKind }
-- We use multiple watchers instead of one using '{}' because lsp-test doesn't
-- support that: https://github.com/bubba/lsp-test/issues/77
watchers = [ watcher glob | glob <- watchedGlobs opts ]
sendFunc $ LSP.ReqRegisterCapability req

View File

@ -582,24 +582,16 @@ watchedFilesTests = testGroup "watched files"
_doc <- createDoc "A.hs" "haskell" "{-#LANGUAGE NoImplicitPrelude #-}\nmodule A where\nimport WatchedFilesMissingModule" _doc <- createDoc "A.hs" "haskell" "{-#LANGUAGE NoImplicitPrelude #-}\nmodule A where\nimport WatchedFilesMissingModule"
watchedFileRegs <- getWatchedFilesSubscriptionsUntil @PublishDiagnosticsNotification watchedFileRegs <- getWatchedFilesSubscriptionsUntil @PublishDiagnosticsNotification
-- Expect 4 subscriptions (A does not get any because it's VFS): -- Expect 1 subscription: we only ever send one
-- - /path-to-workspace/hie.yaml liftIO $ length watchedFileRegs @?= 1
-- - /path-to-workspace/WatchedFilesMissingModule.hs
-- - /path-to-workspace/WatchedFilesMissingModule.lhs
-- - /path-to-workspace/src/WatchedFilesMissingModule.hs
-- - /path-to-workspace/src/WatchedFilesMissingModule.lhs
liftIO $ length watchedFileRegs @?= 5
, testSession' "non workspace file" $ \sessionDir -> do , testSession' "non workspace file" $ \sessionDir -> do
liftIO $ writeFile (sessionDir </> "hie.yaml") "cradle: {direct: {arguments: [\"-i/tmp\", \"A\", \"WatchedFilesMissingModule\"]}}" liftIO $ writeFile (sessionDir </> "hie.yaml") "cradle: {direct: {arguments: [\"-i/tmp\", \"A\", \"WatchedFilesMissingModule\"]}}"
_doc <- createDoc "A.hs" "haskell" "{-# LANGUAGE NoImplicitPrelude#-}\nmodule A where\nimport WatchedFilesMissingModule" _doc <- createDoc "A.hs" "haskell" "{-# LANGUAGE NoImplicitPrelude#-}\nmodule A where\nimport WatchedFilesMissingModule"
watchedFileRegs <- getWatchedFilesSubscriptionsUntil @PublishDiagnosticsNotification watchedFileRegs <- getWatchedFilesSubscriptionsUntil @PublishDiagnosticsNotification
-- Expect 2 subscriptions (/tmp does not get any as it is out of the workspace): -- Expect 1 subscription: we only ever send one
-- - /path-to-workspace/hie.yaml liftIO $ length watchedFileRegs @?= 1
-- - /path-to-workspace/WatchedFilesMissingModule.hs
-- - /path-to-workspace/WatchedFilesMissingModule.lhs
liftIO $ length watchedFileRegs @?= 3
-- TODO add a test for didChangeWorkspaceFolder -- TODO add a test for didChangeWorkspaceFolder
] ]