mirror of
https://github.com/facebook/sapling.git
synced 2024-10-10 00:45:18 +03:00
15d0c5a8e8
Summary: Print calculated statistics in csv format, so we can generate csv file by redirecting output to some file. As a TODO we may want to add new fields to RepoStatistics struct, refactor code and create csv file using e.g. serde deserialization. Reviewed By: krallin Differential Revision: D17907650 fbshipit-source-id: 0e7f0af522cc72c067d59431039e44998d5dd354
859 lines
35 KiB
Rust
859 lines
35 KiB
Rust
/*
|
|
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
*
|
|
* This software may be used and distributed according to the terms of the
|
|
* GNU General Public License found in the LICENSE file in the root
|
|
* directory of this source tree.
|
|
*/
|
|
|
|
use blobrepo::BlobRepo;
|
|
use blobstore::Blobstore;
|
|
use bookmarks::BookmarkName;
|
|
use bytes::Bytes;
|
|
use changesets::{deserialize_cs_entries, ChangesetEntry};
|
|
use clap::{App, Arg, SubCommand};
|
|
use cloned::cloned;
|
|
use cmdlib::{args, monitoring};
|
|
use context::CoreContext;
|
|
use failure::err_msg;
|
|
use failure_ext::Error;
|
|
use fbinit::FacebookInit;
|
|
use futures::future;
|
|
use futures::future::{loop_fn, Loop};
|
|
use futures::future::{Future, IntoFuture};
|
|
use futures::stream;
|
|
use futures::stream::Stream;
|
|
use futures_ext::FutureExt;
|
|
use futures_ext::{BoxFuture, BoxStream};
|
|
use manifest::{Diff, Entry, ManifestOps};
|
|
use mercurial_types::{Changeset, FileBytes, HgChangesetId, HgFileNodeId, HgManifestId};
|
|
use mononoke_types::{FileType, RepositoryId};
|
|
use scuba_ext::ScubaSampleBuilder;
|
|
use slog::info;
|
|
use stats::{define_stats, Timeseries};
|
|
use std::collections::HashMap;
|
|
use std::fs;
|
|
use std::ops::{Add, Sub};
|
|
use std::path::Path;
|
|
use std::sync::Arc;
|
|
use std::time::Duration;
|
|
|
|
define_stats! {
|
|
prefix = "mononoke.statistics_collector";
|
|
calculated_changesets: timeseries(RATE, SUM),
|
|
}
|
|
|
|
const ARG_IN_FILENAME: &'static str = "in-filename";
|
|
|
|
const SUBCOMMAND_STATISTICS_FROM_FILE: &'static str = "statistics-from-commits-in-file";
|
|
|
|
const SCUBA_DATASET_NAME: &str = "mononoke_repository_statistics";
|
|
// Tool doesn't count number of lines from files with size greater than 10MB
|
|
const BIG_FILE_THRESHOLD: u64 = 10000000;
|
|
|
|
fn setup_app<'a, 'b>() -> App<'a, 'b> {
|
|
let app = args::MononokeApp {
|
|
hide_advanced_args: false,
|
|
};
|
|
let app = app
|
|
.build("Tool to calculate repo statistic")
|
|
.version("0.0.0")
|
|
.subcommand(
|
|
SubCommand::with_name(SUBCOMMAND_STATISTICS_FROM_FILE)
|
|
.about(
|
|
"calculate statistics for commits in provided file and save them to json file",
|
|
)
|
|
.arg(
|
|
Arg::with_name(ARG_IN_FILENAME)
|
|
.long(ARG_IN_FILENAME)
|
|
.takes_value(true)
|
|
.required(true)
|
|
.help("a file with a list of bonsai changesets to calculate stats for"),
|
|
),
|
|
)
|
|
.arg(
|
|
Arg::with_name("bookmark")
|
|
.long("bookmark")
|
|
.takes_value(true)
|
|
.required(false)
|
|
.help("bookmark from which we get statistics"),
|
|
)
|
|
.arg(
|
|
Arg::with_name("log-to-scuba")
|
|
.long("log-to-scuba")
|
|
.takes_value(false)
|
|
.required(false)
|
|
.help("if set then statistics are logged to scuba"),
|
|
);
|
|
args::add_fb303_args(app)
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
|
|
pub struct RepoStatistics {
|
|
num_files: i64,
|
|
total_file_size: i64,
|
|
num_lines: i64,
|
|
}
|
|
|
|
impl RepoStatistics {
|
|
pub fn new(num_files: i64, total_file_size: i64, num_lines: i64) -> Self {
|
|
Self {
|
|
num_files,
|
|
total_file_size,
|
|
num_lines,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Add for RepoStatistics {
|
|
type Output = RepoStatistics;
|
|
|
|
fn add(self, other: Self) -> Self {
|
|
Self {
|
|
num_files: self.num_files + other.num_files,
|
|
total_file_size: self.total_file_size + other.total_file_size,
|
|
num_lines: self.num_lines + other.num_lines,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Sub for RepoStatistics {
|
|
type Output = RepoStatistics;
|
|
|
|
fn sub(self, other: Self) -> Self {
|
|
Self {
|
|
num_files: self.num_files - other.num_files,
|
|
total_file_size: self.total_file_size - other.total_file_size,
|
|
num_lines: self.num_lines - other.num_lines,
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn number_of_lines(
|
|
bytes_stream: BoxStream<FileBytes, Error>,
|
|
) -> impl Future<Item = i64, Error = Error> {
|
|
bytes_stream
|
|
.map(|bytes| {
|
|
bytes.into_iter().fold(0, |num_lines, byte| {
|
|
if byte == '\n' as u8 {
|
|
num_lines + 1
|
|
} else {
|
|
num_lines
|
|
}
|
|
})
|
|
})
|
|
.fold(0, |result, num_lines| {
|
|
future::ok::<_, Error>(result + num_lines)
|
|
})
|
|
}
|
|
|
|
pub fn get_manifest_from_changeset(
|
|
ctx: CoreContext,
|
|
repo: BlobRepo,
|
|
changeset: HgChangesetId,
|
|
) -> impl Future<Item = HgManifestId, Error = Error> {
|
|
repo.get_changeset_by_changesetid(ctx.clone(), changeset.clone())
|
|
.map(move |changeset| changeset.manifestid())
|
|
}
|
|
|
|
pub fn get_changeset_timestamp_from_changeset(
|
|
ctx: CoreContext,
|
|
repo: BlobRepo,
|
|
hg_cs_id: HgChangesetId,
|
|
) -> impl Future<Item = i64, Error = Error> {
|
|
repo.get_changeset_by_changesetid(ctx.clone(), hg_cs_id.clone())
|
|
.map(move |changeset| changeset.time().timestamp_secs())
|
|
}
|
|
|
|
// Calculates number of lines only for regular-type file
|
|
pub fn get_statistics_from_entry(
|
|
ctx: CoreContext,
|
|
repo: BlobRepo,
|
|
entry: Entry<HgManifestId, (FileType, HgFileNodeId)>,
|
|
) -> impl Future<Item = RepoStatistics, Error = Error> {
|
|
match entry {
|
|
Entry::Leaf((file_type, filenode_id)) => repo
|
|
.get_file_size(ctx.clone(), filenode_id)
|
|
.and_then(move |size| {
|
|
if FileType::Regular == file_type && size < BIG_FILE_THRESHOLD {
|
|
number_of_lines(repo.get_file_content(ctx.clone(), filenode_id))
|
|
.join(future::ok(size))
|
|
.left_future()
|
|
} else {
|
|
future::ok((0, size)).right_future()
|
|
}
|
|
})
|
|
.map(move |(lines, size)| RepoStatistics::new(1, size as i64, lines))
|
|
.left_future(),
|
|
Entry::Tree(_) => future::ok(RepoStatistics::default()).right_future(),
|
|
}
|
|
}
|
|
|
|
pub fn get_statistics_from_changeset(
|
|
ctx: CoreContext,
|
|
repo: BlobRepo,
|
|
blobstore: impl Blobstore + Clone,
|
|
hg_cs_id: HgChangesetId,
|
|
) -> impl Future<Item = RepoStatistics, Error = Error> {
|
|
info!(
|
|
ctx.logger(),
|
|
"Started calculating statistics for changeset {}", hg_cs_id
|
|
);
|
|
get_manifest_from_changeset(ctx.clone(), repo.clone(), hg_cs_id.clone()).and_then({
|
|
cloned!(ctx, repo);
|
|
move |manifest_id| {
|
|
manifest_id
|
|
.list_leaf_entries(ctx.clone(), blobstore.clone())
|
|
.map({
|
|
cloned!(ctx);
|
|
move |(_, leaf)| {
|
|
get_statistics_from_entry(ctx.clone(), repo.clone(), Entry::Leaf(leaf))
|
|
}
|
|
})
|
|
.buffered(100)
|
|
.fold(RepoStatistics::default(), |statistics, new_stat| {
|
|
future::ok::<_, Error>(statistics + new_stat)
|
|
})
|
|
.map(move |statistics| {
|
|
info!(
|
|
ctx.logger(),
|
|
"Finished calculating statistics for changeset {}", hg_cs_id
|
|
);
|
|
statistics
|
|
})
|
|
}
|
|
})
|
|
}
|
|
|
|
pub fn update_statistics(
|
|
ctx: CoreContext,
|
|
repo: BlobRepo,
|
|
statistics: RepoStatistics,
|
|
diff: BoxStream<Diff<Entry<HgManifestId, (FileType, HgFileNodeId)>>, Error>,
|
|
) -> impl Future<Item = RepoStatistics, Error = Error> {
|
|
diff.map({
|
|
move |diff| match diff {
|
|
Diff::Added(_, entry) => {
|
|
get_statistics_from_entry(ctx.clone(), repo.clone(), entry.clone())
|
|
.map(|stat| (stat, Operation::Add))
|
|
.boxify()
|
|
}
|
|
Diff::Removed(_, entry) => {
|
|
get_statistics_from_entry(ctx.clone(), repo.clone(), entry.clone())
|
|
.map(|stat| (stat, Operation::Sub))
|
|
.boxify()
|
|
}
|
|
Diff::Changed(_, old_entry, new_entry) => {
|
|
get_statistics_from_entry(ctx.clone(), repo.clone(), old_entry.clone())
|
|
.join(get_statistics_from_entry(
|
|
ctx.clone(),
|
|
repo.clone(),
|
|
new_entry.clone(),
|
|
))
|
|
.map(|(old_stats, new_stats)| new_stats - old_stats)
|
|
.join(future::ok(Operation::Add))
|
|
.boxify()
|
|
}
|
|
}
|
|
})
|
|
.buffered(100)
|
|
.fold(
|
|
statistics,
|
|
|statistics, (file_stats, operation)| match operation {
|
|
Operation::Add => future::ok::<_, Error>(statistics + file_stats),
|
|
Operation::Sub => future::ok::<_, Error>(statistics - file_stats),
|
|
},
|
|
)
|
|
.map(move |statistics| statistics)
|
|
}
|
|
|
|
pub fn log_statistics(
|
|
ctx: CoreContext,
|
|
mut scuba_logger: ScubaSampleBuilder,
|
|
cs_timestamp: i64,
|
|
repo_name: String,
|
|
hg_cs_id: HgChangesetId,
|
|
statistics: RepoStatistics,
|
|
) {
|
|
info!(
|
|
ctx.logger(),
|
|
"Statistics for changeset {}\nCs timestamp: {}\nNumber of files {}\nTotal file size {}\nNumber of lines {}",
|
|
hg_cs_id,
|
|
cs_timestamp,
|
|
statistics.num_files,
|
|
statistics.total_file_size,
|
|
statistics.num_lines
|
|
);
|
|
scuba_logger
|
|
.add("repo_name", repo_name)
|
|
.add("num_files", statistics.num_files)
|
|
.add("total_file_size", statistics.total_file_size)
|
|
.add("num_lines", statistics.num_lines)
|
|
.add("changeset", hg_cs_id.to_hex().to_string())
|
|
.log_with_time(cs_timestamp as u64);
|
|
}
|
|
|
|
fn parse_serialized_commits<P: AsRef<Path>>(file: P) -> Result<Vec<ChangesetEntry>, Error> {
|
|
let data = fs::read(file).map_err(Error::from)?;
|
|
deserialize_cs_entries(&Bytes::from(data))
|
|
}
|
|
|
|
pub fn generate_statistics_from_file<P: AsRef<Path>>(
|
|
ctx: CoreContext,
|
|
repo: BlobRepo,
|
|
in_path: P,
|
|
) -> BoxFuture<(), Error> {
|
|
// 1 day in seconds
|
|
const REQUIRED_COMMITS_DISTANCE: i64 = 60 * 60 * 24;
|
|
let blobstore = Arc::new(repo.get_blobstore());
|
|
// TODO(dgrzegorzewski): T55705023 consider creating csv file here and save statistics using
|
|
// e.g. serde deserialize. To avoid saving fields separately it may be necessary to add new
|
|
// fields to RepoStatistics struct, like cs_timestamp, hg_cs_id, repo_id and refactor code.
|
|
println!("repo_id,hg_cs_id,cs_timestamp,num_files,total_file_size,num_lines");
|
|
parse_serialized_commits(in_path)
|
|
.into_future()
|
|
.and_then(move |changesets| {
|
|
info!(ctx.logger(), "Started calculating changesets timestamps");
|
|
stream::iter_ok(changesets.clone())
|
|
.map({
|
|
cloned!(ctx, repo);
|
|
move |cs_id| {
|
|
let repo_id = cs_id.repo_id;
|
|
repo.get_hg_from_bonsai_changeset(ctx.clone(), cs_id.cs_id)
|
|
.and_then({
|
|
cloned!(ctx, repo);
|
|
move |hg_cs_id| {
|
|
get_changeset_timestamp_from_changeset(
|
|
ctx.clone(),
|
|
repo.clone(),
|
|
hg_cs_id,
|
|
)
|
|
.map(move |cs_timestamp| (hg_cs_id, cs_timestamp, repo_id))
|
|
}
|
|
})
|
|
}
|
|
})
|
|
.buffered(100)
|
|
.collect()
|
|
.map({
|
|
cloned!(ctx);
|
|
move |mut changesets| {
|
|
info!(
|
|
ctx.logger(),
|
|
"Timestamps calculated, sorting them and starting calculating statistics"
|
|
);
|
|
changesets.sort_by_key(|(_, cs_timestamp, _)| cs_timestamp.clone());
|
|
stream::iter_ok(changesets)
|
|
}
|
|
})
|
|
.flatten_stream()
|
|
.fold(
|
|
// Mapping repo-id => (cs_creation_timestamp, hg_cs_id, statistics)
|
|
HashMap::<RepositoryId, (i64, HgChangesetId, RepoStatistics)>::new(),
|
|
move |repo_stats_map, (hg_cs_id, cs_timestamp, repo_id)| {
|
|
cloned!(ctx, repo, blobstore);
|
|
match repo_stats_map.get(&repo_id).cloned() {
|
|
Some((old_cs_timestamp, old_hg_cs_id, old_stats)) => {
|
|
// Calculate statistics for changeset only if changeset
|
|
// was created at least REQUIRED_COMMITS_DISTANCE seconds after
|
|
// changeset we used previously to calculate statistics.
|
|
if cs_timestamp - old_cs_timestamp > REQUIRED_COMMITS_DISTANCE {
|
|
info!(
|
|
ctx.logger(),
|
|
"Changeset {} with timestamp {} was created more than {} seconds after previous, calculating statistics for it",
|
|
hg_cs_id, cs_timestamp, REQUIRED_COMMITS_DISTANCE
|
|
);
|
|
get_manifest_from_changeset(
|
|
ctx.clone(),
|
|
repo.clone(),
|
|
old_hg_cs_id.clone(),
|
|
)
|
|
.join(get_manifest_from_changeset(
|
|
ctx.clone(),
|
|
repo.clone(),
|
|
hg_cs_id.clone(),
|
|
))
|
|
.and_then({
|
|
cloned!(mut repo_stats_map);
|
|
move |(old_manifest, manifest)| {
|
|
update_statistics(
|
|
ctx.clone(),
|
|
repo.clone(),
|
|
old_stats.clone(),
|
|
old_manifest.diff(
|
|
ctx.clone(),
|
|
blobstore.clone(),
|
|
manifest.clone(),
|
|
),
|
|
)
|
|
.map(move |statistics| {
|
|
info!(
|
|
ctx.logger(),
|
|
"Statistics for changeset {} calculated",
|
|
hg_cs_id
|
|
);
|
|
println!(
|
|
"{},{},{},{},{},{}",
|
|
repo_id.id(),
|
|
hg_cs_id.to_hex(),
|
|
cs_timestamp,
|
|
statistics.num_files,
|
|
statistics.total_file_size,
|
|
statistics.num_lines
|
|
);
|
|
repo_stats_map
|
|
.insert(repo_id, (cs_timestamp, hg_cs_id, statistics));
|
|
repo_stats_map
|
|
})
|
|
}
|
|
})
|
|
.boxify()
|
|
} else {
|
|
// Skip this changeset
|
|
future::ok(repo_stats_map.clone()).boxify()
|
|
}
|
|
}
|
|
None => {
|
|
info!(
|
|
ctx.logger(),
|
|
"Found first changeset for repo_id {}", repo_id.id()
|
|
);
|
|
get_statistics_from_changeset(
|
|
ctx.clone(),
|
|
repo.clone(),
|
|
blobstore.clone(),
|
|
hg_cs_id,
|
|
)
|
|
.map({
|
|
cloned!(mut repo_stats_map);
|
|
move |statistics| {
|
|
info!(
|
|
ctx.logger(),
|
|
"First changeset for repo_id {} calculated", repo_id.id()
|
|
);
|
|
println!(
|
|
"{},{},{},{},{},{}",
|
|
repo_id.id(),
|
|
hg_cs_id.to_hex().to_string(),
|
|
cs_timestamp,
|
|
statistics.num_files,
|
|
statistics.total_file_size,
|
|
statistics.num_lines
|
|
);
|
|
repo_stats_map
|
|
.insert(repo_id, (cs_timestamp, hg_cs_id, statistics));
|
|
repo_stats_map
|
|
}
|
|
})
|
|
.boxify()
|
|
}
|
|
}
|
|
},
|
|
)
|
|
.map(move |_| ())
|
|
})
|
|
.boxify()
|
|
}
|
|
|
|
enum Pass {
|
|
FirstPass(HgChangesetId),
|
|
NextPass(HgChangesetId, HgChangesetId),
|
|
}
|
|
|
|
enum Operation {
|
|
Add,
|
|
Sub,
|
|
}
|
|
|
|
#[fbinit::main]
|
|
fn main(fb: FacebookInit) -> Result<(), Error> {
|
|
let matches = setup_app().get_matches();
|
|
|
|
args::init_cachelib(fb, &matches);
|
|
|
|
let logger = args::init_logging(fb, &matches);
|
|
let ctx = CoreContext::new_with_logger(fb, logger.clone());
|
|
let bookmark = match matches.value_of("bookmark") {
|
|
Some(name) => name.to_string(),
|
|
None => String::from("master"),
|
|
};
|
|
let bookmark = BookmarkName::new(bookmark.clone())?;
|
|
let repo_name = args::get_repo_name(&matches)?;
|
|
let scuba_logger = if matches.is_present("log-to-scuba") {
|
|
ScubaSampleBuilder::new(fb, SCUBA_DATASET_NAME)
|
|
} else {
|
|
ScubaSampleBuilder::with_discard()
|
|
};
|
|
|
|
let run = args::open_repo(fb, &logger, &matches).and_then({
|
|
cloned!(matches);
|
|
move |repo| {
|
|
if let (SUBCOMMAND_STATISTICS_FROM_FILE, Some(sub_m)) = matches.subcommand() {
|
|
cloned!(ctx);
|
|
// Both arguments are set to be required
|
|
let in_filename = sub_m
|
|
.value_of(ARG_IN_FILENAME)
|
|
.expect("missing required argument");
|
|
generate_statistics_from_file(ctx.clone(), repo.clone(), in_filename)
|
|
} else {
|
|
let blobstore = Arc::new(repo.get_blobstore());
|
|
repo.get_bookmark(ctx.clone(), &bookmark)
|
|
.and_then(move |changeset| changeset.ok_or(err_msg("cannot load bookmark")))
|
|
.and_then(move |changeset| {
|
|
loop_fn::<_, (), _, _>(
|
|
(Pass::FirstPass(changeset), RepoStatistics::default()),
|
|
move |(pass, statistics)| {
|
|
cloned!(ctx, repo, blobstore, bookmark);
|
|
match pass {
|
|
Pass::FirstPass(changeset) => {
|
|
get_statistics_from_changeset(
|
|
ctx.clone(),
|
|
repo.clone(),
|
|
blobstore.clone(),
|
|
changeset.clone(),
|
|
)
|
|
.and_then({
|
|
cloned!(repo, repo_name, scuba_logger, ctx);
|
|
move |statistics| {
|
|
get_changeset_timestamp_from_changeset(
|
|
ctx.clone(),
|
|
repo,
|
|
changeset,
|
|
)
|
|
.map(move |cs_timestamp| {
|
|
log_statistics(
|
|
ctx,
|
|
scuba_logger,
|
|
cs_timestamp,
|
|
repo_name,
|
|
changeset,
|
|
statistics,
|
|
);
|
|
STATS::calculated_changesets.add_value(1);
|
|
(changeset, statistics)
|
|
})
|
|
}
|
|
})
|
|
.boxify()
|
|
}
|
|
Pass::NextPass(prev_changeset, cur_changeset) => {
|
|
if prev_changeset == cur_changeset {
|
|
let duration = Duration::from_millis(1000);
|
|
info!(
|
|
ctx.logger(),
|
|
"Changeset hasn't changed, sleeping {:?}", duration
|
|
);
|
|
tokio_timer::sleep(duration)
|
|
.from_err()
|
|
.map(move |()| (cur_changeset, statistics))
|
|
.boxify()
|
|
} else {
|
|
info!(
|
|
ctx.logger(),
|
|
"Found new changeset: {}, updating statistics",
|
|
cur_changeset
|
|
);
|
|
get_manifest_from_changeset(
|
|
ctx.clone(),
|
|
repo.clone(),
|
|
prev_changeset.clone(),
|
|
)
|
|
.join(get_manifest_from_changeset(
|
|
ctx.clone(),
|
|
repo.clone(),
|
|
cur_changeset.clone(),
|
|
))
|
|
.and_then({
|
|
cloned!(ctx, repo, repo_name, scuba_logger);
|
|
move |(prev_manifest_id, cur_manifest_id)| {
|
|
update_statistics(
|
|
ctx.clone(),
|
|
repo.clone(),
|
|
statistics.clone(),
|
|
prev_manifest_id.diff(
|
|
ctx.clone(),
|
|
blobstore.clone(),
|
|
cur_manifest_id.clone(),
|
|
),
|
|
)
|
|
.and_then({
|
|
cloned!(ctx);
|
|
info!(
|
|
ctx.logger(),
|
|
"Statistics for new changeset updated."
|
|
);
|
|
move |statistics| {
|
|
get_changeset_timestamp_from_changeset(
|
|
ctx.clone(),
|
|
repo,
|
|
cur_changeset,
|
|
)
|
|
.map(move |cs_timestamp| {
|
|
log_statistics(
|
|
ctx,
|
|
scuba_logger,
|
|
cs_timestamp,
|
|
repo_name,
|
|
cur_changeset,
|
|
statistics,
|
|
);
|
|
STATS::calculated_changesets
|
|
.add_value(1);
|
|
(cur_changeset, statistics)
|
|
})
|
|
}
|
|
})
|
|
}
|
|
})
|
|
.boxify()
|
|
}
|
|
}
|
|
}
|
|
.and_then(
|
|
move |(cur_changeset, statistics)| {
|
|
repo.get_bookmark(ctx.clone(), &bookmark)
|
|
.and_then(move |new_changeset| {
|
|
new_changeset.ok_or(err_msg("cannot load bookmark"))
|
|
})
|
|
.and_then(move |new_changeset| {
|
|
future::ok(Loop::Continue((
|
|
Pass::NextPass(cur_changeset, new_changeset),
|
|
statistics,
|
|
)))
|
|
})
|
|
},
|
|
)
|
|
},
|
|
)
|
|
})
|
|
.boxify()
|
|
}
|
|
}
|
|
});
|
|
|
|
let mut runtime = tokio::runtime::Runtime::new()?;
|
|
monitoring::start_fb303_and_stats_agg(
|
|
fb,
|
|
&mut runtime,
|
|
"statistics_collector",
|
|
&logger,
|
|
&matches,
|
|
)?;
|
|
|
|
runtime.block_on(run)?;
|
|
runtime.shutdown_on_idle();
|
|
Ok(())
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use bytes::Bytes;
|
|
use fixtures::linear;
|
|
use futures::stream;
|
|
use maplit::btreemap;
|
|
use std::str::FromStr;
|
|
use tests_utils::{create_commit, store_files};
|
|
use tokio::runtime::Runtime;
|
|
|
|
#[test]
|
|
fn test_number_of_lines_empty_stream() -> Result<(), Error> {
|
|
let mut rt = Runtime::new().unwrap();
|
|
|
|
let stream: BoxStream<FileBytes, Error> =
|
|
Box::new(stream::once(Ok(FileBytes(Bytes::from(&b""[..])))));
|
|
let result = rt.block_on(number_of_lines(stream))?;
|
|
assert_eq!(result, 0);
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn test_number_of_lines_one_line() -> Result<(), Error> {
|
|
let mut rt = Runtime::new().unwrap();
|
|
|
|
let stream: BoxStream<FileBytes, Error> = Box::new(stream::once(Ok(FileBytes(
|
|
Bytes::from(&b"First line\n"[..]),
|
|
))));
|
|
let result = rt.block_on(number_of_lines(stream))?;
|
|
assert_eq!(result, 1);
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn test_number_of_lines_many_lines() -> Result<(), Error> {
|
|
let mut rt = Runtime::new().unwrap();
|
|
|
|
let stream: BoxStream<FileBytes, Error> = Box::new(stream::once(Ok(FileBytes(
|
|
Bytes::from(&b"First line\nSecond line\nThird line\n"[..]),
|
|
))));
|
|
let result = rt.block_on(number_of_lines(stream))?;
|
|
assert_eq!(result, 3);
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn test_number_of_lines_many_items() -> Result<(), Error> {
|
|
let mut rt = Runtime::new().unwrap();
|
|
|
|
let vec = vec![
|
|
FileBytes(Bytes::from(&b"First line\n"[..])),
|
|
FileBytes(Bytes::from(&b""[..])),
|
|
FileBytes(Bytes::from(&b"First line\nSecond line\nThird line\n"[..])),
|
|
];
|
|
let stream: BoxStream<FileBytes, Error> = Box::new(stream::iter_ok(vec));
|
|
let result = rt.block_on(number_of_lines(stream))?;
|
|
assert_eq!(result, 4);
|
|
Ok(())
|
|
}
|
|
|
|
#[fbinit::test]
|
|
fn linear_test_get_statistics_from_changeset(fb: FacebookInit) {
|
|
let repo = linear::getrepo(fb);
|
|
let mut runtime = Runtime::new().unwrap();
|
|
let ctx = CoreContext::test_mock(fb);
|
|
let blobstore = repo.get_blobstore();
|
|
|
|
// Commit consists two files (name => content):
|
|
// "1" => "1\n"
|
|
// "files" => "1\n"
|
|
// */
|
|
let root = HgChangesetId::from_str("2d7d4ba9ce0a6ffd222de7785b249ead9c51c536").unwrap();
|
|
let p = repo.get_bonsai_from_hg(ctx.clone(), root);
|
|
let p = runtime.block_on(p).unwrap().unwrap();
|
|
let parents = vec![p];
|
|
|
|
let bcs_id = create_commit(
|
|
ctx.clone(),
|
|
repo.clone(),
|
|
parents,
|
|
store_files(
|
|
ctx.clone(),
|
|
btreemap! {
|
|
"dir1/dir2/file1" => Some("first line\nsecond line\n"),
|
|
"dir1/dir3/file2" => Some("first line\n"),
|
|
},
|
|
repo.clone(),
|
|
),
|
|
);
|
|
|
|
let hg_cs_id = repo.get_hg_from_bonsai_changeset(ctx.clone(), bcs_id);
|
|
let hg_cs_id = runtime.block_on(hg_cs_id).unwrap();
|
|
|
|
let stats = get_statistics_from_changeset(
|
|
ctx.clone(),
|
|
repo.clone(),
|
|
blobstore.clone(),
|
|
hg_cs_id.clone(),
|
|
);
|
|
let stats = runtime.block_on(stats).unwrap();
|
|
|
|
// (num_files, total_file_size, num_lines)
|
|
assert_eq!(stats, RepoStatistics::new(4, 38, 5));
|
|
}
|
|
|
|
#[fbinit::test]
|
|
fn linear_test_get_statistics_from_entry_tree(fb: FacebookInit) {
|
|
let repo = linear::getrepo(fb);
|
|
let mut runtime = Runtime::new().unwrap();
|
|
let ctx = CoreContext::test_mock(fb);
|
|
let blobstore = repo.get_blobstore();
|
|
|
|
// Commit consists two files (name => content):
|
|
// "1" => "1\n"
|
|
// "files" => "1\n"
|
|
// */
|
|
let root = HgChangesetId::from_str("2d7d4ba9ce0a6ffd222de7785b249ead9c51c536").unwrap();
|
|
let p = repo.get_bonsai_from_hg(ctx.clone(), root);
|
|
let p = runtime.block_on(p).unwrap().unwrap();
|
|
let parents = vec![p];
|
|
|
|
let bcs_id = create_commit(
|
|
ctx.clone(),
|
|
repo.clone(),
|
|
parents,
|
|
store_files(
|
|
ctx.clone(),
|
|
btreemap! {
|
|
"dir1/dir2/file1" => Some("first line\nsecond line\n"),
|
|
"dir1/dir3/file2" => Some("first line\n"),
|
|
},
|
|
repo.clone(),
|
|
),
|
|
);
|
|
|
|
let hg_cs_id = repo.get_hg_from_bonsai_changeset(ctx.clone(), bcs_id);
|
|
let hg_cs_id = runtime.block_on(hg_cs_id).unwrap();
|
|
|
|
let tree_entries = get_manifest_from_changeset(ctx.clone(), repo.clone(), hg_cs_id.clone())
|
|
.and_then({
|
|
cloned!(ctx);
|
|
move |manifest| {
|
|
manifest
|
|
.list_all_entries(ctx.clone(), blobstore.clone())
|
|
.filter_map(|(_, entry)| match entry {
|
|
Entry::Tree(_) => Some(entry),
|
|
_ => None,
|
|
})
|
|
.collect()
|
|
}
|
|
});
|
|
let mut tree_entries = runtime.block_on(tree_entries).unwrap();
|
|
|
|
let stats =
|
|
get_statistics_from_entry(ctx.clone(), repo.clone(), tree_entries.pop().unwrap());
|
|
let stats = runtime.block_on(stats).unwrap();
|
|
|
|
// For Entry::Tree we expect repository with all statistics equal 0
|
|
// (num_files, total_file_size, num_lines)
|
|
assert_eq!(stats, RepoStatistics::default());
|
|
}
|
|
|
|
#[fbinit::test]
|
|
fn linear_test_update_statistics(fb: FacebookInit) {
|
|
let repo = linear::getrepo(fb);
|
|
let mut runtime = Runtime::new().unwrap();
|
|
let ctx = CoreContext::test_mock(fb);
|
|
let blobstore = repo.get_blobstore();
|
|
|
|
/*
|
|
Commit consists two files (name => content):
|
|
"1" => "1\n"
|
|
"files" => "1\n"
|
|
*/
|
|
let prev_hg_cs_id =
|
|
HgChangesetId::from_str("2d7d4ba9ce0a6ffd222de7785b249ead9c51c536").unwrap();
|
|
/*
|
|
Commit consists two files (name => content):
|
|
"2" => "2\n"
|
|
"files" => "1\n2\n"
|
|
*/
|
|
let cur_hg_cs_id =
|
|
HgChangesetId::from_str("3e0e761030db6e479a7fb58b12881883f9f8c63f").unwrap();
|
|
|
|
let stats = get_statistics_from_changeset(
|
|
ctx.clone(),
|
|
repo.clone(),
|
|
blobstore.clone(),
|
|
prev_hg_cs_id.clone(),
|
|
);
|
|
let stats = runtime.block_on(stats).unwrap();
|
|
|
|
let manifests =
|
|
get_manifest_from_changeset(ctx.clone(), repo.clone(), prev_hg_cs_id.clone()).join(
|
|
get_manifest_from_changeset(ctx.clone(), repo.clone(), cur_hg_cs_id.clone()),
|
|
);
|
|
let (prev_manifest, cur_manifest) = runtime.block_on(manifests).unwrap();
|
|
|
|
let new_stats = update_statistics(
|
|
ctx.clone(),
|
|
repo.clone(),
|
|
stats.clone(),
|
|
prev_manifest.diff(ctx.clone(), blobstore.clone(), cur_manifest.clone()),
|
|
);
|
|
let new_stats = runtime.block_on(new_stats).unwrap();
|
|
|
|
// (num_files, total_file_size, num_lines)
|
|
assert_eq!(new_stats, RepoStatistics::new(3, 8, 4));
|
|
}
|
|
}
|