Ensure slow shutdown of LS always kicks off hooks (#6665)

This change fixes the rather elusive bug where shutdown hooks could not be fired when shutdown was taking too long and termination was forced.

Under the circumstances described in detail in ticket #6515 there was a small chance that we could have a shutdown race condition. Essentially the messages received when client was disconnected and language server forced the termination could lead to language server not sending the public `ProjectClosed` message which triggers shutdown hook. Now we always do.

Also made sure that multiple `ProjectClosed` messages don't lead to firing multiple shutdown hooks, which was another possibility.

No tests as one would have to be able to introduce different delays in various message handlers to simulate the problem.
Having ability to do such chaos testing would be nice but it is beyond the scope of this ticket.
I was able to reproduce the problem 100% with my specially crafted setup so I'm fairly confident about the change.

Closes #6515.
This commit is contained in:
Hubert Plociniczak 2023-05-15 14:15:37 +02:00 committed by GitHub
parent 8569ab98c2
commit 06624f34a8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 35 additions and 12 deletions

View File

@ -297,6 +297,9 @@ class LanguageServerController(
s"Received client ($clientId) disconnect request during shutdown. Ignoring."
)
case ShutDownServer =>
logger.debug(s"Received shutdown request during shutdown. Ignoring.")
case m: StartServer =>
// This instance has not yet been shut down. Retry
context.parent.forward(Retry(m))

View File

@ -2,6 +2,7 @@ package org.enso.projectmanager.infrastructure.languageserver
import akka.actor.{Actor, ActorRef, Cancellable, PoisonPill, Props, Terminated}
import com.typesafe.scalalogging.LazyLogging
import org.enso.projectmanager.event.ProjectEvent.ProjectClosed
import org.enso.projectmanager.infrastructure.languageserver.LanguageServerController.ShutDownServer
import org.enso.projectmanager.infrastructure.languageserver.LanguageServerKiller.KillTimeout
import org.enso.projectmanager.infrastructure.languageserver.LanguageServerProtocol.{
@ -10,6 +11,7 @@ import org.enso.projectmanager.infrastructure.languageserver.LanguageServerProto
}
import org.enso.projectmanager.util.UnhandledLogging
import java.util.UUID
import scala.concurrent.duration.FiniteDuration
/** An actor that shuts all running language servers. It orchestrates all
@ -20,7 +22,7 @@ import scala.concurrent.duration.FiniteDuration
* @param shutdownTimeout a shutdown timeout
*/
class LanguageServerKiller(
controllers: List[ActorRef],
controllers: Map[UUID, ActorRef],
shutdownTimeout: FiniteDuration
) extends Actor
with LazyLogging
@ -34,20 +36,22 @@ class LanguageServerKiller(
context.stop(self)
} else {
logger.info("Killing all servers [{}].", controllers)
controllers.foreach(context.watch)
controllers.foreach(_ ! ShutDownServer)
controllers.foreach { case (_, ref) =>
context.watch(ref)
ref ! ShutDownServer
}
val cancellable =
context.system.scheduler.scheduleOnce(
shutdownTimeout,
self,
KillTimeout
)
context.become(killing(controllers.toSet, cancellable, sender()))
context.become(killing(controllers.map(_.swap), cancellable, sender()))
}
}
private def killing(
liveControllers: Set[ActorRef],
liveControllers: Map[ActorRef, UUID],
cancellable: Cancellable,
replyTo: ActorRef
): Receive = {
@ -63,7 +67,13 @@ class LanguageServerKiller(
}
case KillTimeout =>
liveControllers.foreach(_ ! PoisonPill)
logger.warn(
s"Not all language servers' controllers finished on time. Forcing termination."
)
liveControllers.foreach { case (actorRef, projectId) =>
actorRef ! PoisonPill
context.system.eventStream.publish(ProjectClosed(projectId))
}
context.stop(self)
}
@ -80,7 +90,7 @@ object LanguageServerKiller {
* @return a configuration object
*/
def props(
controllers: List[ActorRef],
controllers: Map[UUID, ActorRef],
shutdownTimeout: FiniteDuration
): Props = Props(new LanguageServerKiller(controllers, shutdownTimeout))

View File

@ -88,7 +88,7 @@ class LanguageServerRegistry(
case msg @ KillThemAll =>
val killer = context.actorOf(
LanguageServerKiller.props(
serverControllers.values.toList,
serverControllers,
timeoutConfig.shutdownTimeout
),
"language-server-killer"

View File

@ -32,12 +32,13 @@ class ShutdownHookActivator[F[+_, +_]: Exec: CovariantFlatMap]
private def running(
hooks: Map[UUID, List[ShutdownHook[F]]] =
Map.empty.withDefaultValue(List.empty)
Map.empty.withDefaultValue(List.empty),
scheduled: List[UUID] = Nil
): Receive = {
case RegisterShutdownHook(projectId, hook) =>
val realHook = hook.asInstanceOf[ShutdownHook[F]]
val updated = hooks.updated(projectId, realHook :: hooks(projectId))
context.become(running(updated))
context.become(running(updated, scheduled))
case ProjectClosed(projectId) =>
val projectHooks = hooks(projectId)
@ -45,13 +46,22 @@ class ShutdownHookActivator[F[+_, +_]: Exec: CovariantFlatMap]
context.actorOf(
ShutdownHookRunner.props[F](projectId, projectHooks.reverse)
)
context.become(running(hooks - projectId, projectId :: scheduled))
} else if (scheduled.contains(projectId)) {
logger.debug(
s"Request for starting shutdown hooks has already been filed for project ${projectId}. Ignoring."
)
} else {
logger.warn(
s"Shutdown hook activator has no recollection of project ${projectId}. Either it was closed already or it never existed. Ignoring."
)
}
case ShutdownHooksFired(projectId) =>
context.become(running(hooks - projectId))
context.become(running(hooks, scheduled.filter(_ != projectId)))
case ArePendingShutdownHooks =>
val arePending = hooks.values.map(_.size).sum != 0
val arePending = hooks.values.map(_.size).sum != 0 || scheduled.nonEmpty
sender() ! arePending
}