From d8e775f423099f322df44654a4ac1cce69a3e4ba Mon Sep 17 00:00:00 2001 From: Jun Wu Date: Thu, 27 Aug 2020 18:11:28 -0700 Subject: [PATCH] tracing-collector: limit maximum count of spans Summary: Some functions might be called very frequently. For example, `phases.phasecache.loadphaserevs` might be called 100k+ times. That makes the tracing data harder to process. Limit the count of spans to 1k by default so the data is cheaper to process, and some highly repetitive cases can now be reasoned about. Note the limit is only put on static Span Ids. If a span uses dynamic metadata or ask for different Span Ids each time, they will not be limited. In debugshell, td = %trace repo.revs('smartlog()') len(td.serialize()) dropped from 6MB to 0.87MB. It's also possible to reason about: td = %trace len(repo.revs('ancestors(.)')) in debugshell (taking 30s, 98KB serialized, vs 21s without tracing), while previously the result would be too large to show (`%trace` just hangs). Reviewed By: DurhamG Differential Revision: D23307793 fbshipit-source-id: 3c1e9885ce7a275c2abd8935a4e4539a4f14ce83 --- eden/scm/lib/tracing-collector/src/model.rs | 91 ++++++++++++++++++--- 1 file changed, 81 insertions(+), 10 deletions(-) diff --git a/eden/scm/lib/tracing-collector/src/model.rs b/eden/scm/lib/tracing-collector/src/model.rs index d9ce666889..5380413587 100644 --- a/eden/scm/lib/tracing-collector/src/model.rs +++ b/eden/scm/lib/tracing-collector/src/model.rs @@ -60,6 +60,14 @@ pub struct TracingData { /// - Non-zero: Use a faked clock. #[serde(skip, default = "Default::default")] test_clock_step: u64, + + /// Maximum reference count of a span. + #[serde(skip, default = "default_max_span_ref_count")] + max_span_ref_count: usize, +} + +fn default_max_span_ref_count() -> usize { + 1000 } #[derive(Serialize, Deserialize, Default)] @@ -90,6 +98,10 @@ struct Espan { /// Metadata from the tracing eco-system. #[serde(skip)] metadata: Option<&'static tracing::Metadata<'static>>, + + /// Reference count of the span. + #[serde(skip, default = "Default::default")] + ref_count: usize, } #[derive(Serialize, Deserialize, Clone)] @@ -163,17 +175,34 @@ impl TracingData { Ok(clock) => clock.parse::().unwrap_or(0), Err(_) => 0, }, + max_span_ref_count: default_max_span_ref_count(), }) } + pub fn set_max_span_ref_count(&mut self, value: usize) { + self.max_span_ref_count = value; + } + /// Push an `Eventus` at the current timestamp. /// Return `true` if the [`Eventus`] was pushed. /// Return `false` if `espan_id` is invalid. fn push_eventus(&mut self, action: Action, espan_id: EspanId) -> bool { - if self.get_espan_index(espan_id).is_none() { + let espan_index = match self.get_espan_index(espan_id) { // Ignore invalid EspanId. + None => return false, + Some(index) => index, + }; + if self.espans[espan_index].ref_count >= self.max_span_ref_count { + // Exceed ref-count limit. return false; } + if matches!(action, Action::ExitSpan | Action::Event) { + // Increase ref count of the span. + // + // Do not increase on EnterSpan, making it harder to have unmatched + // events (i.e. Enter without Exit). + self.espans[espan_index].ref_count += 1; + } let timestamp = self.now_micros(); let mut thread_id = THREAD_ID.with(|thread_id| *thread_id); if thread_id == self.default_thread_id { @@ -325,7 +354,11 @@ impl TracingData { espan.record_values(&mut self.strings, &mut meta); let metadata = Some(espan.metadata()); - let espan = Espan { meta, metadata }; + let espan = Espan { + meta, + metadata, + ref_count: 0, + }; let result = EspanId(self.espans.len() as u64 + self.espan_id_offset.0); self.espans.push(espan); @@ -565,7 +598,11 @@ impl TracingData { } let metadata = None; - let espan = Espan { meta, metadata }; + let espan = Espan { + meta, + metadata, + ref_count: 0, + }; let result = EspanId(self.espans.len() as u64 + self.espan_id_offset.0); self.espans.push(espan); @@ -640,7 +677,11 @@ impl TracingData { }) .collect(); let metadata = None; - espans.push(Espan { meta, metadata }); + espans.push(Espan { + meta, + metadata, + ref_count: espan.ref_count, + }); } // Add Eventus @@ -686,6 +727,7 @@ impl TracingData { default_thread_id, relative_start, test_clock_step, + max_span_ref_count: default_max_span_ref_count(), } } } @@ -1353,12 +1395,15 @@ impl TracingData { // Use milliseconds. This is consistent with traceprof. format!("+{}", tree_span.duration / 1000) }; - let call_count = if tree_span.call_count > 1 { + let mut call_count = if tree_span.call_count > 1 { format!(" ({} times)", tree_span.call_count) } else { assert!(tree_span.call_count > 0); String::new() }; + if espan.ref_count >= ctx.this.max_span_ref_count { + call_count += " (truncated)" + } let first_row = Row { columns: vec![ @@ -1904,11 +1949,37 @@ Start Dur.ms | Name Source assert_eq!( data.ascii(&opts), r#"Process _ Thread _: -Start Dur.ms | Name Source - 2 +40014 | foo a.py line 10 - 4 +20000 \ bar (10000 times) a.py line 20 -40004 +10 \ bar a.py line 20 -40006 +6 | foo a.py line 10 +Start Dur.ms | Name Source + 2 +4010 | foo a.py line 10 + 4 +2000 \ bar (1000 times) (truncated) a.py line 20 + 4004 +6 \ foo a.py line 10 + +"# + ); + } + + #[test] + fn test_max_span_ref_count() { + let mut data = TracingData::new_for_test(); + data.set_max_span_ref_count(2000); + + let span_id1 = data.add_espan(&meta("foo", "a.py", "10"), None); + let span_id2 = data.add_espan(&meta("bar", "a.py", "20"), None); + data.add_action(span_id1, Action::EnterSpan); + for _ in 0..5000 { + data.add_action(span_id2, Action::EnterSpan); + data.add_action(span_id2, Action::ExitSpan); + } + data.add_action(span_id1, Action::ExitSpan); + + let mut opts = AsciiOptions::default(); + opts.min_duration_micros_to_hide = 3000; + assert_eq!( + data.ascii(&opts), + r#"Process _ Thread _: +Start Dur.ms | Name Source + 2 +8002 | foo a.py line 10 + 4 +4000 | bar (2000 times) (truncated) a.py line 20 "# );