From 44105e1f8073880ba77aef1eee6db403ce87d568 Mon Sep 17 00:00:00 2001 From: Conrad Irwin Date: Thu, 16 May 2024 14:10:49 -0600 Subject: [PATCH] Upload panics via collab instead of zed.dev (#11932) Release Notes: - N/A --- crates/collab/src/api/events.rs | 90 +++++++++++++++++++ .../telemetry_events/src/telemetry_events.rs | 29 ++++++ crates/zed/src/reliability.rs | 58 +++++------- 3 files changed, 139 insertions(+), 38 deletions(-) diff --git a/crates/collab/src/api/events.rs b/crates/collab/src/api/events.rs index 89ee60f634..4e8deaca92 100644 --- a/crates/collab/src/api/events.rs +++ b/crates/collab/src/api/events.rs @@ -26,6 +26,7 @@ pub fn router() -> Router { Router::new() .route("/telemetry/events", post(post_events)) .route("/telemetry/crashes", post(post_crash)) + .route("/telemetry/panics", post(post_panic)) .route("/telemetry/hangs", post(post_hang)) } @@ -325,6 +326,95 @@ pub async fn post_hang( Ok(()) } +pub async fn post_panic( + Extension(app): Extension>, + TypedHeader(ZedChecksumHeader(checksum)): TypedHeader, + body: Bytes, +) -> Result<()> { + let Some(expected) = calculate_json_checksum(app.clone(), &body) else { + return Err(Error::Http( + StatusCode::INTERNAL_SERVER_ERROR, + "events not enabled".into(), + ))?; + }; + + if checksum != expected { + return Err(Error::Http( + StatusCode::BAD_REQUEST, + "invalid checksum".into(), + ))?; + } + + let report: telemetry_events::PanicRequest = serde_json::from_slice(&body) + .map_err(|_| Error::Http(StatusCode::BAD_REQUEST, "invalid json".into()))?; + let panic = report.panic; + + tracing::error!( + service = "client", + version = %panic.app_version, + os_name = %panic.os_name, + os_version = %panic.os_version.clone().unwrap_or_default(), + installation_id = %panic.installation_id.unwrap_or_default(), + description = %panic.payload, + backtrace = %panic.backtrace.join("\n"), + "panic report"); + + let backtrace = if panic.backtrace.len() > 25 { + let total = panic.backtrace.len(); + format!( + "{}\n and {} more", + panic + .backtrace + .iter() + .take(20) + .cloned() + .collect::>() + .join("\n"), + total - 20 + ) + } else { + panic.backtrace.join("\n") + }; + let backtrace_with_summary = panic.payload + "\n" + &backtrace; + + if let Some(slack_panics_webhook) = app.config.slack_panics_webhook.clone() { + let payload = slack::WebhookBody::new(|w| { + w.add_section(|s| s.text(slack::Text::markdown("Panic request".to_string()))) + .add_section(|s| { + s.add_field(slack::Text::markdown(format!( + "*Version:*\n {} ", + panic.app_version + ))) + .add_field({ + slack::Text::markdown(format!( + "*OS:*\n{} {}", + panic.os_name, + panic.os_version.unwrap_or_default() + )) + }) + }) + .add_rich_text(|r| r.add_preformatted(|p| p.add_text(backtrace_with_summary))) + }); + let payload_json = serde_json::to_string(&payload).map_err(|err| { + log::error!("Failed to serialize payload to JSON: {err}"); + Error::Internal(anyhow!(err)) + })?; + + reqwest::Client::new() + .post(slack_panics_webhook) + .header("Content-Type", "application/json") + .body(payload_json) + .send() + .await + .map_err(|err| { + log::error!("Failed to send payload to Slack: {err}"); + Error::Internal(anyhow!(err)) + })?; + } + + Ok(()) +} + pub async fn post_events( Extension(app): Extension>, TypedHeader(ZedChecksumHeader(checksum)): TypedHeader, diff --git a/crates/telemetry_events/src/telemetry_events.rs b/crates/telemetry_events/src/telemetry_events.rs index 9e110f0e02..1f2a0f0245 100644 --- a/crates/telemetry_events/src/telemetry_events.rs +++ b/crates/telemetry_events/src/telemetry_events.rs @@ -155,3 +155,32 @@ pub struct HangReport { pub architecture: String, pub installation_id: Option, } + +#[derive(Serialize, Deserialize)] +pub struct LocationData { + pub file: String, + pub line: u32, +} + +#[derive(Serialize, Deserialize)] +pub struct Panic { + pub thread: String, + pub payload: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub location_data: Option, + pub backtrace: Vec, + pub app_version: String, + pub release_channel: String, + pub os_name: String, + pub os_version: Option, + pub architecture: String, + pub panicked_on: i64, + #[serde(skip_serializing_if = "Option::is_none")] + pub installation_id: Option, + pub session_id: String, +} + +#[derive(Serialize, Deserialize)] +pub struct PanicRequest { + pub panic: Panic, +} diff --git a/crates/zed/src/reliability.rs b/crates/zed/src/reliability.rs index 001c919a39..81a9e53324 100644 --- a/crates/zed/src/reliability.rs +++ b/crates/zed/src/reliability.rs @@ -3,13 +3,13 @@ use backtrace::{self, Backtrace}; use chrono::Utc; use db::kvp::KEY_VALUE_STORE; use gpui::{App, AppContext, SemanticVersion}; +use http::Method; use isahc::config::Configurable; use http::{self, HttpClient, HttpClientWithUrl}; use paths::{CRASHES_DIR, CRASHES_RETIRED_DIR}; use release_channel::ReleaseChannel; use release_channel::RELEASE_CHANNEL; -use serde::{Deserialize, Serialize}; use settings::Settings; use smol::stream::StreamExt; use std::{ @@ -18,39 +18,12 @@ use std::{ sync::{atomic::Ordering, Arc}, }; use std::{io::Write, panic, sync::atomic::AtomicU32, thread}; +use telemetry_events::LocationData; +use telemetry_events::Panic; +use telemetry_events::PanicRequest; use util::{paths, ResultExt}; use crate::stdout_is_a_pty; - -#[derive(Serialize, Deserialize)] -struct LocationData { - file: String, - line: u32, -} - -#[derive(Serialize, Deserialize)] -struct Panic { - thread: String, - payload: String, - #[serde(skip_serializing_if = "Option::is_none")] - location_data: Option, - backtrace: Vec, - app_version: String, - release_channel: String, - os_name: String, - os_version: Option, - architecture: String, - panicked_on: i64, - #[serde(skip_serializing_if = "Option::is_none")] - installation_id: Option, - session_id: String, -} - -#[derive(Serialize)] -struct PanicRequest { - panic: Panic, -} - static PANIC_COUNT: AtomicU32 = AtomicU32::new(0); pub fn init_panic_hook(app: &App, installation_id: Option, session_id: String) { @@ -119,7 +92,7 @@ pub fn init_panic_hook(app: &App, installation_id: Option, session_id: S backtrace.drain(0..=ix); } - let panic_data = Panic { + let panic_data = telemetry_events::Panic { thread: thread_name.into(), payload, location_data: info.location().map(|location| LocationData { @@ -397,7 +370,7 @@ async fn upload_previous_panics( http: Arc, telemetry_settings: client::TelemetrySettings, ) -> Result> { - let panic_report_url = http.build_url("/api/panic"); + let panic_report_url = http.build_zed_api_url("/telemetry/panics", &[])?; let mut children = smol::fs::read_dir(&*paths::LOGS_DIR).await?; let mut most_recent_panic = None; @@ -440,12 +413,21 @@ async fn upload_previous_panics( if let Some(panic) = panic { most_recent_panic = Some((panic.panicked_on, panic.payload.clone())); - let body = serde_json::to_string(&PanicRequest { panic }).unwrap(); + let json_bytes = serde_json::to_vec(&PanicRequest { panic }).unwrap(); + + let Some(checksum) = client::telemetry::calculate_json_checksum(&json_bytes) else { + continue; + }; + + let Ok(request) = http::Request::builder() + .method(Method::POST) + .uri(panic_report_url.as_ref()) + .header("x-zed-checksum", checksum) + .body(json_bytes.into()) + else { + continue; + }; - let request = http::Request::post(&panic_report_url) - .redirect_policy(isahc::config::RedirectPolicy::Follow) - .header("Content-Type", "application/json") - .body(body.into())?; let response = http.send(request).await.context("error sending panic")?; if !response.status().is_success() { log::error!("Error uploading panic to server: {}", response.status());