Don't load initial data older than 5 days (#10481)

* When loading initial data, don't include IPs older than 5 days in the best timestamp dict

* Don't load version data for hosts older than 5 days
This commit is contained in:
Chris Marslender 2022-02-28 17:21:31 -06:00 committed by GitHub
parent 7fb26a7a14
commit be2d8267aa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -359,6 +359,8 @@ class CrawlStore:
record = self.host_to_records[host]
if record.version == "undefined":
continue
if record.handshake_time < time.time() - 5 * 24 * 3600:
continue
versions[host] = record.version
handshake[host] = record.handshake_time
@ -367,7 +369,8 @@ class CrawlStore:
def load_best_peer_reliability(self):
best_timestamp = {}
for host, record in self.host_to_records.items():
best_timestamp[host] = record.best_timestamp
if record.best_timestamp > time.time() - 5 * 24 * 3600:
best_timestamp[host] = record.best_timestamp
return best_timestamp
async def update_version(self, host, version, now):