mirror of
https://github.com/Chia-Network/chia-blockchain.git
synced 2024-11-13 03:12:24 +03:00
plotting: Reduce Path.exists
calls (#8999)
* plotting: Move plot removals out of `PlotManager.refresh_batch` Moving this checks up in the trace so that it only runs once for each refresh cycle reduces `plot_removed` calls per cycle from `batch_count * plot_count` to `plot_count` only. This checks can be quite expensive depending on how/where the plots are stored so this should noticeably improve refresh times in slow setups and large farms even for the initial refresh cycle. I don't see any downside here considered that even if a plot gets removed during a refresh cycle (after the checks run) it will catch it in the next cycle. **Note**: This has become an issue now (e.g. in #8972) because prior to way, that it only really processed in batches for the very first run after a restart. For each repeated refresh cycle it just walked through all files in one `refresh_batch` call. Now after #8385 fixed this behavior the checks are called `batch_count * plot_count` for all refresh cycles, even for each repeated cycle where it was `plot_count` before. * plotting: Move `file_path.exists` call down There is no point in testing it before here, it only leads to redundant calls for repeated refresh cycles. The important point here is just that it runs after ``` if file_path in self.plots: return self.plots[file_path] ``` so it could still be moved around differently if there are other suggestions, tell me. I even think it might be possible to just drop it with no bad impact but lets just move it for now.
This commit is contained in:
parent
3d1aa2b01d
commit
d9143063ff
@ -231,6 +231,33 @@ class PlotManager:
|
||||
|
||||
self._refresh_callback(PlotRefreshEvents.started, PlotRefreshResult(remaining=total_size))
|
||||
|
||||
# First drop all plots we have in plot_filename_paths but not longer in the filesystem or set in config
|
||||
def plot_removed(test_path: Path):
|
||||
return not test_path.exists() or test_path.parent not in plot_directories
|
||||
|
||||
filenames_to_remove: List[str] = []
|
||||
for plot_filename, paths_entry in self.plot_filename_paths.items():
|
||||
loaded_path, duplicated_paths = paths_entry
|
||||
loaded_plot = Path(loaded_path) / Path(plot_filename)
|
||||
if plot_removed(loaded_plot):
|
||||
filenames_to_remove.append(plot_filename)
|
||||
if loaded_plot in self.plots:
|
||||
del self.plots[loaded_plot]
|
||||
total_result.removed += 1
|
||||
# No need to check the duplicates here since we drop the whole entry
|
||||
continue
|
||||
|
||||
paths_to_remove: List[str] = []
|
||||
for path in duplicated_paths:
|
||||
if plot_removed(Path(path) / Path(plot_filename)):
|
||||
paths_to_remove.append(path)
|
||||
total_result.removed += 1
|
||||
for path in paths_to_remove:
|
||||
duplicated_paths.remove(path)
|
||||
|
||||
for filename in filenames_to_remove:
|
||||
del self.plot_filename_paths[filename]
|
||||
|
||||
def batches() -> Iterator[Tuple[int, List[Path]]]:
|
||||
if total_size > 0:
|
||||
for batch_start in range(0, total_size, self.refresh_parameter.batch_size):
|
||||
@ -247,7 +274,6 @@ class PlotManager:
|
||||
# Set the remaining files since `refresh_batch()` doesn't know them but we want to report it
|
||||
batch_result.remaining = remaining
|
||||
total_result.loaded += batch_result.loaded
|
||||
total_result.removed += batch_result.removed
|
||||
total_result.processed += batch_result.processed
|
||||
total_result.duration += batch_result.duration
|
||||
|
||||
@ -294,8 +320,6 @@ class PlotManager:
|
||||
filename_str = str(file_path)
|
||||
if self.match_str is not None and self.match_str not in filename_str:
|
||||
return None
|
||||
if not file_path.exists():
|
||||
return None
|
||||
if (
|
||||
file_path in self.failed_to_open_filenames
|
||||
and (time.time() - self.failed_to_open_filenames[file_path])
|
||||
@ -314,6 +338,9 @@ class PlotManager:
|
||||
log.debug(f"Skip duplicated plot {str(file_path)}")
|
||||
return None
|
||||
try:
|
||||
if not file_path.exists():
|
||||
return None
|
||||
|
||||
prover = DiskProver(str(file_path))
|
||||
|
||||
log.debug(f"process_file {str(file_path)}")
|
||||
@ -413,35 +440,6 @@ class PlotManager:
|
||||
return new_plot_info
|
||||
|
||||
with self, ThreadPoolExecutor() as executor:
|
||||
|
||||
# First drop all plots we have in plot_filename_paths but not longer in the filesystem or set in config
|
||||
def plot_removed(test_path: Path):
|
||||
return not test_path.exists() or test_path.parent not in plot_directories
|
||||
|
||||
with self.plot_filename_paths_lock:
|
||||
filenames_to_remove: List[str] = []
|
||||
for plot_filename, paths_entry in self.plot_filename_paths.items():
|
||||
loaded_path, duplicated_paths = paths_entry
|
||||
loaded_plot = Path(loaded_path) / Path(plot_filename)
|
||||
if plot_removed(loaded_plot):
|
||||
filenames_to_remove.append(plot_filename)
|
||||
if loaded_plot in self.plots:
|
||||
del self.plots[loaded_plot]
|
||||
result.removed += 1
|
||||
# No need to check the duplicates here since we drop the whole entry
|
||||
continue
|
||||
|
||||
paths_to_remove: List[str] = []
|
||||
for path in duplicated_paths:
|
||||
if plot_removed(Path(path) / Path(plot_filename)):
|
||||
paths_to_remove.append(path)
|
||||
result.removed += 1
|
||||
for path in paths_to_remove:
|
||||
duplicated_paths.remove(path)
|
||||
|
||||
for filename in filenames_to_remove:
|
||||
del self.plot_filename_paths[filename]
|
||||
|
||||
plots_refreshed: Dict[Path, PlotInfo] = {}
|
||||
for new_plot in executor.map(process_file, plot_paths):
|
||||
if new_plot is not None:
|
||||
|
@ -172,7 +172,6 @@ class BlockTools:
|
||||
|
||||
if event == PlotRefreshEvents.batch_processed:
|
||||
self.total_result.loaded += update_result.loaded
|
||||
self.total_result.removed += update_result.removed
|
||||
self.total_result.processed += update_result.processed
|
||||
self.total_result.duration += update_result.duration
|
||||
assert update_result.remaining == len(self.expected_plots) - self.total_result.processed
|
||||
@ -180,7 +179,6 @@ class BlockTools:
|
||||
|
||||
if event == PlotRefreshEvents.done:
|
||||
assert self.total_result.loaded == update_result.loaded
|
||||
assert self.total_result.removed == update_result.removed
|
||||
assert self.total_result.processed == update_result.processed
|
||||
assert self.total_result.duration == update_result.duration
|
||||
assert update_result.remaining == 0
|
||||
|
Loading…
Reference in New Issue
Block a user