mononoke: log route to step on unexpected errors from walker

Summary:
On unexpected errors like missing blobstore keys the walker will now log the preceding node (source) and an interesting step to this node (not necessarily  the immediately preceding, e.g. the affected changeset).

Validate mode produces route information with interesting tracking enabled,  scrub currently does not to save time+memory. Blobstore errors in scrub mode can be reproduced in validate mode when the extra context from the graph route is needed.

Reviewed By: farnz

Differential Revision: D22600962

fbshipit-source-id: 27d46303a2f2c07219950c20cc7f1f78773163e5
This commit is contained in:
Alex Hornby 2020-07-28 05:19:37 -07:00 committed by Facebook GitHub Bot
parent e5b249cefc
commit c01ba6abfa
7 changed files with 100 additions and 28 deletions

View File

@ -27,6 +27,29 @@ validate, expecting all valid
Walked* (glob)
Nodes,Pass,Fail:40,3,0; EdgesChecked:9; CheckType:Pass,Fail Total:3,0 HgLinkNodePopulated:3,0
validate, check route is logged on unexpected error (forced with chaos blob)
$ mononoke_walker --storage-id=blobstore --blobstore-read-chaos-rate=1 --readonly-storage --cachelib-only-blobstore validate -I deep -q --bookmark master_bookmark --scuba-log-file scuba-error.json 2>&1 | strip_glog
Walking roots * (glob)
Walking edge types * (glob)
Walking node types * (glob)
Performing check types [HgLinkNodePopulated]
Execution error: * (glob)
* (glob)
Caused by:
0: Blobstore error
1: Injected failure in get to ChaosBlobstore for key repo0000.changeset.blake2.c3384961b16276f2db77df9d7c874bbe981cf0525bd6f84a502f919044f2dabd
Error: Execution failed
Check scuba data is logged for error on step and that it contains message and route info
$ wc -l < scuba-error.json
1
$ jq -r '.int * .normal | [ .check_fail, .check_type, .node_key, .node_type, .repo, .src_node_type, .via_node_type, .walk_type, .error_msg ] | @csv' < scuba-error.json
1,"step","changeset.blake2.c3384961b16276f2db77df9d7c874bbe981cf0525bd6f84a502f919044f2dabd","BonsaiChangeset","repo","Bookmark",,"validate","Could not step to OutgoingEdge { label: BookmarkToBonsaiChangeset, target: BonsaiChangeset(ChangesetId(Blake2(c3384961b16276f2db77df9d7c874bbe981cf0525bd6f84a502f919044f2dabd))), path: None }, due to Blobstore error
* (glob)
Caused by:
Injected failure in get to ChaosBlobstore for key repo0000.changeset.blake2.c3384961b16276f2db77df9d7c874bbe981cf0525bd6f84a502f919044f2dabd, via Some(ValidateRoute { src_node: Bookmark(BookmarkName { bookmark: ""master_bookmark"" }), via: [] })"
Remove all filenodes
$ sqlite3 "$TESTTMP/monsql/sqlite_dbs" "DELETE FROM filenodes where linknode=x'112478962961147124EDD43549AEDD1A335E44BF'";

View File

@ -7,7 +7,7 @@
use crate::graph::{EdgeType, Node, NodeData, NodeType, WrappedPath};
use crate::state::{StepStats, WalkState};
use crate::walk::{OutgoingEdge, WalkVisitor};
use crate::walk::{EmptyRoute, OutgoingEdge, StepRoute, WalkVisitor};
use context::{CoreContext, SamplingKey};
use dashmap::DashMap;
@ -59,6 +59,16 @@ pub struct PathTrackingRoute {
pub mtime: Option<DateTime>,
}
// No useful node info held. TODO(ahornby) be nice to expand StepRoute logging to show path if present
impl StepRoute for PathTrackingRoute {
fn source_node(&self) -> Option<&Node> {
None
}
fn via_node(&self) -> Option<&Node> {
None
}
}
// Only certain node types can have repo paths associated
fn filter_repo_path(node_type: NodeType, path: Option<&'_ WrappedPath>) -> Option<&'_ WrappedPath> {
match node_type {
@ -193,7 +203,8 @@ where
}
}
}
self.inner.start_step(ctx, route.map(|_| &()), step)
self.inner
.start_step(ctx, route.map(|_| &EmptyRoute {}), step)
}
fn visit(
@ -208,7 +219,7 @@ where
PathTrackingRoute,
Vec<OutgoingEdge>,
) {
let inner_route = route.as_ref().map(|_| ());
let inner_route = route.as_ref().map(|_| EmptyRoute {});
let mtime = match &node_data {
Some(NodeData::BonsaiChangeset(bcs)) => {
@ -241,14 +252,15 @@ where
}
// Super simple sampling visitor impl for scrubbing
impl<T> WalkVisitor<(Node, Option<NodeData>, Option<StepStats>), ()> for SamplingWalkVisitor<T>
impl<T> WalkVisitor<(Node, Option<NodeData>, Option<StepStats>), EmptyRoute>
for SamplingWalkVisitor<T>
where
T: SampleTrigger<Node>,
{
fn start_step(
&self,
mut ctx: CoreContext,
route: Option<&()>,
route: Option<&EmptyRoute>,
step: &OutgoingEdge,
) -> CoreContext {
if self.sample_node_types.contains(&step.target.get_type()) {
@ -269,7 +281,7 @@ where
self.sampler.map_keys(sampling_key, step.target.clone());
}
}
self.inner.start_step(ctx, route.map(|_| &()), step)
self.inner.start_step(ctx, route, step)
}
fn visit(
@ -277,11 +289,11 @@ where
ctx: &CoreContext,
resolved: OutgoingEdge,
node_data: Option<NodeData>,
route: Option<()>,
route: Option<EmptyRoute>,
outgoing: Vec<OutgoingEdge>,
) -> (
(Node, Option<NodeData>, Option<StepStats>),
(),
EmptyRoute,
Vec<OutgoingEdge>,
) {
self.inner.visit(ctx, resolved, node_data, route, outgoing)

View File

@ -21,6 +21,7 @@ use crate::setup::{
use crate::sizing::SizingSample;
use crate::tail::{walk_exact_tail, RepoWalkRun};
use crate::validate::TOTAL;
use crate::walk::EmptyRoute;
use anyhow::Error;
use clap::ArgMatches;
@ -390,7 +391,7 @@ pub async fn scrub_objects<'a>(
sample_rate,
sample_offset,
));
walk_exact_tail::<_, _, _, _, _, ()>(
walk_exact_tail::<_, _, _, _, _, EmptyRoute>(
fb,
logger,
datasources,

View File

@ -6,7 +6,7 @@
*/
use crate::graph::{EdgeType, Node, NodeData, NodeType, WrappedPath};
use crate::walk::{expand_checked_nodes, OutgoingEdge, WalkVisitor};
use crate::walk::{expand_checked_nodes, EmptyRoute, OutgoingEdge, WalkVisitor};
use array_init::array_init;
use context::CoreContext;
use dashmap::DashMap;
@ -157,11 +157,11 @@ impl WalkState {
}
}
impl WalkVisitor<(Node, Option<NodeData>, Option<StepStats>), ()> for WalkState {
impl WalkVisitor<(Node, Option<NodeData>, Option<StepStats>), EmptyRoute> for WalkState {
fn start_step(
&self,
ctx: CoreContext,
_route: Option<&()>,
_route: Option<&EmptyRoute>,
_step: &OutgoingEdge,
) -> CoreContext {
ctx
@ -172,11 +172,11 @@ impl WalkVisitor<(Node, Option<NodeData>, Option<StepStats>), ()> for WalkState
_ctx: &CoreContext,
resolved: OutgoingEdge,
node_data: Option<NodeData>,
_route: Option<()>,
_route: Option<EmptyRoute>,
mut outgoing: Vec<OutgoingEdge>,
) -> (
(Node, Option<NodeData>, Option<StepStats>),
(),
EmptyRoute,
Vec<OutgoingEdge>,
) {
// Filter things we don't want to enter the WalkVisitor at all.
@ -209,6 +209,6 @@ impl WalkVisitor<(Node, Option<NodeData>, Option<StepStats>), ()> for WalkState
visited_of_type: self.get_visit_count(&node.get_type()),
};
((node, node_data, Some(stats)), (), outgoing)
((node, node_data, Some(stats)), EmptyRoute {}, outgoing)
}
}

View File

@ -6,7 +6,7 @@
*/
use crate::setup::{RepoWalkDatasources, RepoWalkParams};
use crate::walk::{walk_exact, WalkVisitor};
use crate::walk::{walk_exact, StepRoute, WalkVisitor};
use anyhow::Error;
use cloned::cloned;
@ -15,7 +15,6 @@ use fbinit::FacebookInit;
use futures::{future::Future, stream::BoxStream};
use scuba_ext::ScubaSampleBuilder;
use slog::Logger;
use std::fmt::Debug;
use tokio::time::{Duration, Instant};
#[derive(Clone)]
@ -39,7 +38,7 @@ where
SinkOut: Future<Output = Result<(), Error>> + 'static + Send,
V: 'static + Clone + WalkVisitor<VOut, Route> + Send,
VOut: 'static + Send,
Route: 'static + Send + Clone + Debug,
Route: 'static + Send + Clone + StepRoute,
{
let scuba_builder = datasources.scuba_builder;
let repo = datasources.blobrepo;

View File

@ -23,7 +23,7 @@ use crate::setup::{
};
use crate::state::{StepStats, WalkState};
use crate::tail::{walk_exact_tail, RepoWalkRun};
use crate::walk::{OutgoingEdge, WalkVisitor};
use crate::walk::{EmptyRoute, OutgoingEdge, StepRoute, WalkVisitor};
use anyhow::{format_err, Error};
use clap::ArgMatches;
@ -277,6 +277,15 @@ impl ValidateRoute {
}
}
impl StepRoute for ValidateRoute {
fn source_node(&self) -> Option<&Node> {
Some(&self.src_node)
}
fn via_node(&self) -> Option<&Node> {
self.via.last()
}
}
impl WalkVisitor<(Node, Option<CheckData>, Option<StepStats>), ValidateRoute>
for ValidatingVisitor
{
@ -286,7 +295,8 @@ impl WalkVisitor<(Node, Option<CheckData>, Option<StepStats>), ValidateRoute>
route: Option<&ValidateRoute>,
step: &OutgoingEdge,
) -> CoreContext {
self.inner.start_step(ctx, route.map(|_| &()), step)
self.inner
.start_step(ctx, route.map(|_| &EmptyRoute {}), step)
}
fn visit(
@ -344,7 +354,7 @@ impl WalkVisitor<(Node, Option<CheckData>, Option<StepStats>), ValidateRoute>
&ctx,
resolved,
node_data,
route.as_ref().map(|_| ()),
route.as_ref().map(|_| EmptyRoute {}),
outgoing,
);

View File

@ -43,6 +43,26 @@ use std::{
};
use thiserror::Error;
pub trait StepRoute: Debug {
/// Where we stepped from, useful for immediate reproductions with --walk-root
fn source_node(&self) -> Option<&Node>;
/// What the check thinks is an interesting node on the route to here (e.g. the affected changeset)
fn via_node(&self) -> Option<&Node>;
}
#[derive(Clone, Debug)]
pub struct EmptyRoute();
// No useful node info held.
impl StepRoute for EmptyRoute {
fn source_node(&self) -> Option<&Node> {
None
}
fn via_node(&self) -> Option<&Node> {
None
}
}
// Holds type of edge and target Node that we want to load in next step(s)
// Combined with current node, this forms an complegte edge.
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
@ -675,7 +695,7 @@ pub fn walk_exact<V, VOut, Route>(
where
V: 'static + Clone + WalkVisitor<VOut, Route> + Send,
VOut: 'static + Send,
Route: 'static + Send + Clone + Debug,
Route: 'static + Send + Clone + StepRoute,
{
// Build lookups
let published_bookmarks = repo
@ -763,7 +783,7 @@ async fn walk_one<V, VOut, Route>(
where
V: 'static + Clone + WalkVisitor<VOut, Route> + Send,
VOut: 'static + Send,
Route: 'static + Send + Clone + Debug,
Route: 'static + Send + Clone + StepRoute,
{
let logger = ctx.logger().clone();
@ -842,22 +862,29 @@ where
let step_output = match step_result {
Ok(s) => Ok(s),
Err(e) => {
let msg = format!(
"Could not step to {:?}, due to {:?}, via {:?}",
&walk_item, e, via
);
// Log to scuba regardless
add_node_to_scuba(None, None, &walk_item.target, &mut scuba);
add_node_to_scuba(
via.as_ref().and_then(|v| v.source_node()),
via.as_ref().and_then(|v| v.via_node()),
&walk_item.target,
&mut scuba,
);
scuba
.add(EDGE_TYPE, edge_label.to_str())
.add(CHECK_TYPE, "step")
.add(CHECK_FAIL, 1)
.add("error_msg", msg.clone())
.log();
// Optionally attempt to continue
if error_as_data_node_types.contains(&walk_item.target.get_type()) {
if error_as_data_edge_types.is_empty()
|| error_as_data_edge_types.contains(&walk_item.label)
{
warn!(
logger,
"Could not step to {:?}, due to: {:?} via {:?}", &walk_item, e, via
);
warn!(logger, "{}", msg);
Ok(StepOutput(
NodeData::ErrorAsData(walk_item.target.clone()),
vec![],