handlers: add "blame" handler

Summary:
This handler simply exposes the existing blame derived data over EdenAPI.

The main work is re-indexing the commit index so it can be a simple list instead of a VecMap.

Any error is propagated in-band in the results. I was a little surprised to learn that errors within the HandlerResult stream are ignored (i.e. not propagated to client). It seems like the stream items should have an envelope type to allow automatic Result propagation from server to client.

Reviewed By: quark-zju

Differential Revision: D44596606

fbshipit-source-id: 0ae9d501a35e3e930fd4cbe710fd3e76f493312c
This commit is contained in:
Muir Manders 2023-04-05 12:05:57 -07:00 committed by Facebook GitHub Bot
parent 42fdfd8b8e
commit 911de56fb9
7 changed files with 241 additions and 1 deletions

View File

@ -20,7 +20,7 @@ use mononoke_types::blame_v2::BlameV2;
use mononoke_types::ChangesetId;
use mononoke_types::MPath;
#[derive(Clone)]
#[derive(Clone, Debug)]
pub enum CompatBlame {
V1(BlameMaybeRejected),
V2(BlameV2),

View File

@ -0,0 +1,159 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
use std::collections::hash_map::Entry::Occupied;
use std::collections::hash_map::Entry::Vacant;
use std::collections::HashMap;
use std::num::NonZeroU64;
use anyhow::anyhow;
use anyhow::bail;
use anyhow::Context;
use anyhow::Result;
use async_trait::async_trait;
use edenapi_types::BlameData;
use edenapi_types::BlameRequest;
use edenapi_types::BlameResult;
use edenapi_types::Key;
use edenapi_types::ServerError;
use futures::stream;
use futures::StreamExt;
use mononoke_api::ChangesetId;
use mononoke_api::CompatBlame;
use mononoke_api_hg::HgRepoContext;
use mononoke_types::blame_v2::BlameV2;
use super::EdenApiHandler;
use super::EdenApiMethod;
use super::HandlerResult;
use crate::errors::ErrorKind;
use crate::utils::to_hg_path;
use crate::utils::to_mpath;
// I don't expect big blame requests, so let's keep this low.
const MAX_CONCURRENT_BLAMES_PER_REQUEST: usize = 10;
pub struct BlameHandler;
#[async_trait]
impl EdenApiHandler for BlameHandler {
type Request = BlameRequest;
type Response = BlameResult;
const HTTP_METHOD: hyper::Method = hyper::Method::POST;
const API_METHOD: EdenApiMethod = EdenApiMethod::Blame;
const ENDPOINT: &'static str = "/blame";
fn sampling_rate(_request: &Self::Request) -> NonZeroU64 {
nonzero_ext::nonzero!(100u64)
}
async fn handler(
repo: HgRepoContext,
_path: Self::PathExtractor,
_query: Self::QueryStringExtractor,
request: Self::Request,
) -> HandlerResult<'async_trait, Self::Response> {
let blames = request
.files
.into_iter()
.map(move |key| blame_file(repo.clone(), key));
Ok(stream::iter(blames)
.buffer_unordered(MAX_CONCURRENT_BLAMES_PER_REQUEST)
.boxed())
}
}
async fn blame_file(repo: HgRepoContext, key: Key) -> Result<BlameResult> {
Ok(BlameResult {
file: key.clone(),
data: blame_file_data(repo, key.clone())
.await
.map_err(|e| ServerError::generic(format!("{:?}", e))),
})
}
async fn blame_file_data(repo: HgRepoContext, key: Key) -> Result<BlameData> {
let repo = repo.repo();
let cs = repo
.changeset(key.hgid)
.await
.context("failed to resolve blame hgid")?
.ok_or(ErrorKind::HgIdNotFound(key.hgid))?;
let blame = cs
.path_with_history(to_mpath(&key.path)?.context(ErrorKind::UnexpectedEmptyPath)?)
.await?
.blame(true)
.await?;
let blame = match blame {
CompatBlame::V1(_) => bail!("unexpected BlameV1 data"),
CompatBlame::V2(BlameV2::Blame(blame)) => blame,
CompatBlame::V2(BlameV2::Rejected(rejected)) => return Err(rejected.into()),
};
let old_csid_index = blame.csid_index();
let mut csid_remap = HashMap::new();
let mut csids: Vec<ChangesetId> = Vec::new();
let ranges = blame
.ranges()
.iter()
.map(|range| {
let new_csid_idx = match csid_remap.entry(range.csid_index) {
Occupied(entry) => *entry.get(),
Vacant(vac) => {
let csid = match old_csid_index.get(range.csid_index as usize) {
Some(csid) => csid,
None => bail!("invalid blame range csid_index {}", range.csid_index),
};
csids.push(*csid);
*vac.insert(csids.len() - 1)
}
};
Ok(edenapi_types::BlameLineRange {
line_offset: range.offset,
line_count: range.length,
commit_index: new_csid_idx
.try_into()
.context("blame commit count overflows u32")?,
path_index: range.path_index,
origin_line_offset: range.origin_offset,
})
})
.collect::<Result<Vec<_>>>()?;
let paths = blame
.paths()
.iter()
.map(to_hg_path)
.collect::<Result<Vec<_>>>()?;
// Convert to hg csid, maintaining order in csids.
let mut to_hg: HashMap<_, _> = repo
.many_changeset_hg_ids(csids.clone())
.await?
.into_iter()
.collect();
let hg_csids = csids
.iter()
.map(|csid| {
to_hg
.remove(csid)
.map(Into::into)
.ok_or_else(|| anyhow!("no hg mapping for blame csid {:?}", csid))
})
.collect::<Result<Vec<_>>>()?;
Ok(BlameData {
line_ranges: ranges,
commits: hg_csids,
paths,
})
}

View File

@ -62,6 +62,7 @@ use crate::utils::monitor::Monitor;
use crate::utils::parse_wire_request;
use crate::utils::to_cbor_bytes;
mod blame;
mod bookmarks;
mod capabilities;
mod clone;
@ -84,6 +85,7 @@ pub(crate) use handler::PathExtractorWithRepo;
/// Used to identify the handler for logging and stats collection.
#[derive(Copy, Clone)]
pub enum EdenApiMethod {
Blame,
Capabilities,
Files,
Files2,
@ -118,6 +120,7 @@ pub enum EdenApiMethod {
impl fmt::Display for EdenApiMethod {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let name = match self {
Self::Blame => "blame",
Self::Capabilities => "capabilities",
Self::Files => "files",
Self::Files2 => "files2",
@ -439,6 +442,7 @@ pub fn build_router(ctx: ServerContext) -> Router {
Handlers::setup::<files::DownloadFileHandler>(route);
Handlers::setup::<commit::CommitMutationsHandler>(route);
Handlers::setup::<commit::CommitTranslateId>(route);
Handlers::setup::<blame::BlameHandler>(route);
route.get("/:repo/health_check").to(health_handler);
route
.get("/:repo/capabilities")

View File

@ -25,6 +25,7 @@ define_stats! {
failure_4xx: dynamic_timeseries("{}.failure_4xx", (method: String); Rate, Sum),
failure_5xx: dynamic_timeseries("{}.failure_5xx", (method: String); Rate, Sum),
response_bytes_sent: dynamic_histogram("{}.response_bytes_sent", (method: String); 1_500_000, 0, 150_000_000, Average, Sum, Count; P 50; P 75; P 95; P 99),
blame_duration_ms: histogram(100, 0, 5000, Average, Sum, Count; P 50; P 75; P 95; P 99),
capabilities_duration_ms: histogram(100, 0, 5000, Average, Sum, Count; P 50; P 75; P 95; P 99),
files_duration_ms: histogram(100, 0, 5000, Average, Sum, Count; P 50; P 75; P 95; P 99),
files2_duration_ms: histogram(100, 0, 5000, Average, Sum, Count; P 50; P 75; P 95; P 99),
@ -79,6 +80,7 @@ fn log_stats(state: &mut State, status: StatusCode) -> Option<()> {
use EdenApiMethod::*;
match method {
Blame => STATS::blame_duration_ms.add_value(dur_ms),
Capabilities => STATS::capabilities_duration_ms.add_value(dur_ms),
Files => STATS::files_duration_ms.add_value(dur_ms),
Files2 => STATS::files2_duration_ms.add_value(dur_ms),

View File

@ -9,6 +9,7 @@ use std::fmt;
use anyhow::Context;
use anyhow::Result;
use edenapi_types::HgId;
use ephemeral_blobstore::BubbleId;
use ephemeral_blobstore::RepoEphemeralStore;
@ -53,6 +54,12 @@ impl From<HgChangesetId> for ChangesetSpecifier {
}
}
impl From<HgId> for ChangesetSpecifier {
fn from(id: HgId) -> Self {
Self::Hg(HgChangesetId::from(id))
}
}
impl From<Globalrev> for ChangesetSpecifier {
fn from(id: Globalrev) -> Self {
Self::Globalrev(id)

View File

@ -703,6 +703,18 @@ impl BlameData {
}
}
pub fn ranges(&self) -> &[BlameRangeIndexes] {
&self.ranges
}
pub fn csid_index(&self) -> &VecMap<ChangesetId> {
&self.csids
}
pub fn paths(&self) -> &[MPath] {
&self.paths
}
/// Generate a string containing content annotated with this blame data.
fn annotate(&self, content: &str) -> Result<String> {
if content.is_empty() {

View File

@ -0,0 +1,56 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License found in the LICENSE file in the root
# directory of this source tree.
$ . "${TEST_FIXTURES}/library.sh"
$ BLAME_VERSION=2 setup_common_config
$ setconfig experimental.edenapi-blame=true
$ start_and_wait_for_mononoke_server
$ hgmn_init repo
$ cd repo
$ drawdag << EOS
> D # D/bar = zero\nuno\ntwo\n
> |
> C # C/bar = zero\none\ntwo\n (renamed from foo)
> |
> B # B/foo = one\ntwo\n
> |
> A # A/foo = one\n
> EOS
Errors are propagated:
$ hgedenapi debugapi -e blame -i "[{'path': 'bar', 'node': '$D'}]"
[{"data": {"Err": {"code": 0,
"message": "HgId not found: e9ace545f925b6f62ae34087895fdc950d168e5f"}},
"file": {"node": bin("e9ace545f925b6f62ae34087895fdc950d168e5f"),
"path": "bar"}}]
$ hgedenapi push -q -r $D --to master --create
API works:
$ hgedenapi debugapi -e blame -i "[{'path': 'bar', 'node': '$D'}]"
[{"data": {"Ok": {"paths": ["foo",
"bar"],
"commits": [bin("1ac4b616a32d09428a015bf6a11ccbd1c1410aad"),
bin("e9ace545f925b6f62ae34087895fdc950d168e5f"),
bin("4b86660b06977d770e191e5d454b6b2f2ca14818")],
"line_ranges": [{"line_count": 1,
"path_index": 1,
"line_offset": 0,
"commit_index": 0,
"origin_line_offset": 0},
{"line_count": 1,
"path_index": 1,
"line_offset": 1,
"commit_index": 1,
"origin_line_offset": 1},
{"line_count": 1,
"path_index": 0,
"line_offset": 2,
"commit_index": 2,
"origin_line_offset": 1}]}},
"file": {"node": bin("e9ace545f925b6f62ae34087895fdc950d168e5f"),
"path": "bar"}}]