mirror of
https://github.com/NixOS/mobile-nixos.git
synced 2024-09-17 14:57:22 +03:00
boot/init: Handle hung tasks
The way we're handling them is to have a global timer that is reset at any point a task is ran. This gives a maximum amount of chances to any task to have its dependencies resolve. A minimum of 60s is given, but in reality the chances are the conditions for trying to resolve were already present before the timeout started counting towards that particular dependency. Note that a long running task, when successfully ran, does not cause the timeout to be reached. E.g. at 10s of timeout a task is started, the loop is not executed until the task exits. When it exits the branch followed is for a task that ran, which means that even if the task took 70s total (which gives us 80 seconds) a timeout of 60s wouldn't apply here. Though, please, don't make your tasks take that much time to run!
This commit is contained in:
parent
d41c454514
commit
00f81fa95c
@ -1,5 +1,8 @@
|
||||
# Namespace where tasks can be defined, and hosting methods harmonizing a run.
|
||||
module Tasks
|
||||
HUNG_BOOT_NOTIFICATION = 3 # seconds
|
||||
HUNG_BOOT_TIMEOUT = 60 # seconds
|
||||
|
||||
# Register a singleton task to be instantiated and ran.
|
||||
# @internal
|
||||
def self.register_singleton(klass)
|
||||
@ -29,6 +32,8 @@ module Tasks
|
||||
# unpredictable!
|
||||
@tasks.sort!
|
||||
|
||||
hung_tasks_timer = Time.now
|
||||
|
||||
until @tasks.all?(&:ran) do
|
||||
$logger.debug("=== Tasks resolution loop start ===")
|
||||
ran_one = false
|
||||
@ -53,8 +58,44 @@ module Tasks
|
||||
end
|
||||
end
|
||||
|
||||
# Don't burn the CPU if we're waiting on something...
|
||||
unless ran_one
|
||||
if ran_one
|
||||
# Reset the timer
|
||||
hung_tasks_timer = Time.now
|
||||
# And reset the hung state in the progress UI
|
||||
Progress.update({label: nil, hung: nil})
|
||||
else
|
||||
elapsed = Time.now - hung_tasks_timer
|
||||
$logger.debug("Time elapsed since something ran: #{elapsed}")
|
||||
|
||||
# Any tasks, not currently depending on another task, that have yet
|
||||
# to be ran.
|
||||
# Serves nothing to point to tasks depending on other tasks.
|
||||
failed_tasks = todo.reject(&:depends_on_any_unfulfilled_task?)
|
||||
failed_dependencies = failed_tasks.map(&:dependencies).inject(:+).uniq
|
||||
|
||||
if elapsed > HUNG_BOOT_NOTIFICATION
|
||||
label = "#{failed_tasks.length} tasks are waiting on #{failed_dependencies.length} unique dependencies.\n\n" +
|
||||
"(#{(HUNG_BOOT_TIMEOUT - elapsed).ceil} seconds left until boot is aborted.)"
|
||||
|
||||
Progress.update({label: label, hung: elapsed})
|
||||
end
|
||||
|
||||
if elapsed > HUNG_BOOT_TIMEOUT
|
||||
# Building this message is not pretty!
|
||||
msg =
|
||||
"#{failed_tasks.length} #{if failed_tasks.length == 1 then "task" else "tasks" end} " +
|
||||
"did not run within #{HUNG_BOOT_TIMEOUT} seconds.\n" +
|
||||
"\n" +
|
||||
"#{failed_dependencies.length} #{if failed_dependencies.length == 1 then "dependency" else "dependencies" end} " +
|
||||
"could not resolve:\n" +
|
||||
failed_dependencies.map(&:pretty_name).join("\n") +
|
||||
"\n"
|
||||
|
||||
# Fail with a black backdrop, and force the message to stay up 60s
|
||||
System.failure("hung_tasks", msg, color: "000000", delay: 60)
|
||||
end
|
||||
|
||||
# Don't burn the CPU if we're waiting on something...
|
||||
$logger.debug("Sleeping")
|
||||
sleep(0.1)
|
||||
end
|
||||
@ -112,6 +153,13 @@ class Task
|
||||
dependencies.all?(&:fulfilled?)
|
||||
end
|
||||
|
||||
def depends_on_any_unfulfilled_task?()
|
||||
dependencies.reject(&:fulfilled?).any? do |dep|
|
||||
dep.is_a?(Dependencies::Task) or
|
||||
dep.is_a?(Dependencies::Target)
|
||||
end
|
||||
end
|
||||
|
||||
# Internal actual way to run the task
|
||||
# This runs the `#run` method.
|
||||
# Returns true when the task was ran.
|
||||
|
@ -18,6 +18,7 @@ error has been codified as a background color.
|
||||
|
||||
* Yellow (`0xFFFF00`) means that mounting the root filesystem was not possible.
|
||||
* Fuchsia (`0xFF00FF`) means that mounting succeeded, but no compatible generation was found to boot.
|
||||
* Black (`0x000000`) means a dependency hung boot
|
||||
* Red (`0xFF0000`) means that executing (`exec`) and switching to the found generation's init failed.
|
||||
* Brown (`0x95681C`) means an uncontrolled abort happened.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user