plotting|util|tests: Fix and test re-trying of plots which failed to load (#8279)

* Revert "plotting: Fix failed_to_open_filenames re-try interval"

This reverts 8a8abc41a8 which is part of 
#7848. Seems like i confused myself there.

* plotting|util: Introduce `retry_invalid_seconds` in plot refresh params

* plotting: Cleanup `failed_to_open_filenames`

* tests: Test re-try of plots which failed to load
This commit is contained in:
dustinface 2021-09-01 18:21:29 +02:00 committed by GitHub
parent 745ad49f7d
commit 17dea1b708
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 44 additions and 2 deletions

View File

@ -162,9 +162,10 @@ class PlotManager:
if file_path.exists():
if (
file_path in self.failed_to_open_filenames
and (time.time() - self.failed_to_open_filenames[file_path]) > 1200
and (time.time() - self.failed_to_open_filenames[file_path])
< self.refresh_parameter.retry_invalid_seconds
):
# Try once every 20 minutes to open the file
# Try once every `refresh_parameter.retry_invalid_seconds` seconds to open the file
return new_provers
if file_path in self.plots:
try:
@ -268,6 +269,9 @@ class PlotManager:
result.loaded_plots += 1
result.loaded_size += stat_info.st_size
if file_path in self.failed_to_open_filenames:
del self.failed_to_open_filenames[file_path]
except Exception as e:
tb = traceback.format_exc()
log.error(f"Failed to open file {file_path}. {e} {tb}")

View File

@ -16,6 +16,7 @@ log = logging.getLogger(__name__)
@dataclass
class PlotsRefreshParameter:
interval_seconds: int = 120
retry_invalid_seconds: int = 1200
batch_size: int = 30
batch_sleep_milliseconds: int = 10

View File

@ -127,6 +127,7 @@ harvester:
num_threads: 30
plots_refresh_parameter:
interval_seconds: 120 # The interval in seconds to refresh the plot file manager
retry_invalid_seconds: 1200 # How long to wait before re-trying plots which failed to load
batch_size: 30 # How many plot files the harvester processes before it waits batch_sleep_milliseconds
batch_sleep_milliseconds: 10 # Milliseconds the harvester sleeps between batch processing

View File

@ -1,6 +1,8 @@
# flake8: noqa: E501
import logging
from pathlib import Path
from secrets import token_bytes
from shutil import copy, move
import pytest
from blspy import AugSchemeMPL
@ -198,6 +200,7 @@ class TestRpc:
await time_out_assert(5, harvester.plot_manager.needs_refresh, value=False)
result = await client_2.get_plots()
assert len(result["plots"]) == expect_total_plots
assert len(harvester.plot_manager.failed_to_open_filenames) == 0
# Add plot_dir with two new plots
await test_case(
@ -291,6 +294,39 @@ class TestRpc:
expect_total_plots=0,
)
# Test re-trying if processing a plot failed
# First save the plot
retry_test_plot = Path(plot_dir_sub / filename_2).resolve()
retry_test_plot_save = Path(plot_dir_sub / "save").resolve()
copy(retry_test_plot, retry_test_plot_save)
# Invalidate the plot
with open(plot_dir_sub / filename_2, "r+b") as file:
file.write(bytes(100))
# Add it and validate it fails to load
await harvester.add_plot_directory(str(plot_dir_sub))
expected_result.loaded_plots = 0
expected_result.removed_plots = 0
expected_result.processed_files = 1
expected_result.remaining_files = 0
harvester.plot_manager.start_refreshing()
await time_out_assert(5, harvester.plot_manager.needs_refresh, value=False)
assert retry_test_plot in harvester.plot_manager.failed_to_open_filenames
# Make sure the file stays in `failed_to_open_filenames` and doesn't get loaded or processed in the next
# update round
expected_result.loaded_plots = 0
expected_result.processed_files = 0
harvester.plot_manager.trigger_refresh()
await time_out_assert(5, harvester.plot_manager.needs_refresh, value=False)
assert retry_test_plot in harvester.plot_manager.failed_to_open_filenames
# Now decrease the re-try timeout, restore the valid plot file and make sure it properly loads now
harvester.plot_manager.refresh_parameter.retry_invalid_seconds = 0
move(retry_test_plot_save, retry_test_plot)
expected_result.loaded_plots = 1
expected_result.processed_files = 1
harvester.plot_manager.trigger_refresh()
await time_out_assert(5, harvester.plot_manager.needs_refresh, value=False)
assert retry_test_plot not in harvester.plot_manager.failed_to_open_filenames
targets_1 = await client.get_reward_targets(False)
assert "have_pool_sk" not in targets_1
assert "have_farmer_sk" not in targets_1