v2: find minimum path parts

This commit is contained in:
camchenry 2019-10-04 12:08:15 -04:00
parent d97e5e6fef
commit 7054a4da9c
3 changed files with 168 additions and 324 deletions

View File

@ -6,130 +6,74 @@ var current_profile = undefined;
function get_min_points() {
return d3.select('#minpoints_field').node().value;
// Returns the longest common path prefix between all strings in an array
// Ex: get_common_path_prefix(["/a/b/c/", "/a/b/d", "/a/e/d/"]) => "/a/"
// Ex: get_common_path_prefix(["/a/b/test.c:44", "/a/b/util.c:123"]) => "/a/b/"
function get_common_path_prefix(paths) {
if (paths.length === 1) {
return "";
// Returns the minimum number of path parts to include starting from the
// end of the path, in order for the resulting string to be unique.
function get_minimum_parts_for_unique_path(paths) {
if (paths.length <= 1) {
return 1;
var A = paths.concat().sort();
var shortest = paths.reduce(function (min, str) { return min < str ? min : str; }, paths[0]);
var longest = paths.reduce(function (min, str) { return min > str ? min : str; }, paths[0]);
var last_slash_index = shortest.lastIndexOf("/");
var i = 0;
var end = shortest.length;
// If there is a last slash, then we will stop there, so that we do not
// cut off the name of the file.
if (last_slash_index !== -1) {
end = last_slash_index + 1;
var minimum = 1;
var shortest_parts = Infinity;
var is_unique = false;
// Remove line numbers from paths
paths = paths.map(function (path) { return path.replace(/:[0-9]*/, ''); });
// Remove duplicate fully qualified path names
paths = remove_duplicates(paths);
// Special case: all of the paths are the same file. In that case, then we
// only need to return the file name.
var all_identical_paths = paths.every(function (path) { return path == paths[0]; });
if (all_identical_paths) {
return 1;
while (i < end && shortest.charAt(i) === longest.charAt(i)) {
var prefix = shortest.substring(0, i);
// Check if the string slicing produced a path with a cutoff word.
// Ex: "/a/b/c/some_path" => "/a/b/c/some"
// We want to turn that into "/a/b/c/" in that case.
var last_slash = prefix.lastIndexOf("/");
if (last_slash !== -1 && last_slash < prefix.length - 1) {
prefix = prefix.substring(0, last_slash + 1);
return prefix;
// Returns a set of the common path prefixes among a set of strings. The number
// of prefixes is determined by how many unique path prefixes there are within
// the first slash.
// Ex: get_common_path_prefixes([
// "file.c:123",
// "file.c:123",
// "a/file.c:123",
// "a/file.c:123",
// "/test/a/b/c/d/file.c:123",
// "/test/a/b/c/f/file.c:123",
// "/other/a/b/c/d/file.c:123",
// "/other/a/c/file.c:321",
// "/a/b/c/file.c:100",
// "/a/b/d/file.c:123",
// ]) => ["/test/a/b/c/", "/other/a/", "/a/b/"]
function get_common_path_prefixes(paths) {
// Paths grouped by the first path part
var grouped_paths = {};
for (var _i = 0, paths_1 = paths; _i < paths_1.length; _i++) {
var path = paths_1[_i];
var first_slash = path.indexOf("/");
var second_slash = path.indexOf("/", first_slash + 1);
var has_leading_slash = first_slash === 0;
var has_second_slash = second_slash !== -1;
// Includes paths that have two slashes like:
// * /a/b/c/file.c:123
// * /a/file.c:123
// * a/b/file.c:123
// Does not include paths like:
// * a/file.c:123
// * file.c:123
// * /file.c:123
if (has_second_slash) {
while (true) {
var trimmed_paths = paths
.map(function (path) {
var parts = path.split('/');
// Includes paths that have a leading slash like:
// * /a/b/c/file.c:123
// * /a/file.c:123
// Does not include paths like:
// * a/b/file.c:123
var initial_prefix = parts[0];
if (has_leading_slash) {
// If path is "/a/b/c/file.c:123", then prefix is "/a"
initial_prefix = "/" + parts[1];
if (grouped_paths[initial_prefix] === undefined) {
grouped_paths[initial_prefix] = [path];
else {
shortest_parts = Math.min(shortest_parts, parts.length);
return parts.slice(parts.length - minimum, parts.length).join('/');
is_unique = !has_duplicates(trimmed_paths);
if (is_unique) {
return minimum;
else if (minimum >= shortest_parts) {
// We can't possibly return a minimum parts needed that is greater than
// the smallest parts possible
return shortest_parts;
else {
minimum += 1;
// Get the largest common path prefix for each different group of paths
var common_prefixes = [];
for (var prefix in grouped_paths) {
var paths_2 = grouped_paths[prefix];
// Returns the last number of parts of a slash-separated path.
function get_last_path_parts(num_parts, path) {
// Just return the path if it does not have any slash-separated parts
if (path.indexOf('/') === -1) {
return path;
return common_prefixes;
var parts = path.split('/');
return parts.slice(parts.length - num_parts, parts.length).join('/');
var common_path_prefixes = get_common_path_prefixes([
console.log("expected:", ["/test/a/b/c/", "/other/a/", "/a/b/"]);
console.log("got:", common_path_prefixes);
// Given the common path prefix (from get_common_prefix), this will return the
// unique part of the path which should be displayable. This also checks
// for leading slashes after the slicing, which will be removed.
// Ex: get_unique_path_part("/a/b/c/", "/a/b/c/test.cpp") => "test.cpp"
// Ex: get_unique_path_aprt("/a/b/c/", "util.c") => "util.c"
function get_unique_path_part(common_paths, path) {
// For the same reasons as the common path collection logic, we will
// not shorten any paths which do not contain a slash, which is hopefully
// just paths that are only a file name.
for (var _i = 0, common_paths_1 = common_paths; _i < common_paths_1.length; _i++) {
var common_path = common_paths_1[_i];
// Check if the prefix applies to this path
if (path.indexOf(common_path) !== -1) {
return path.substr(common_path.length, path.length);
// This could be made simpler by using ES2015 Set instead.
function has_duplicates(array) {
var seen = Object.create(null);
for (var _i = 0, array_1 = array; _i < array_1.length; _i++) {
var value = array_1[_i];
if (value in seen) {
return true;
seen[value] = true;
return path;
return false;
function remove_duplicates(array) {
var uniq = {};
for (var _i = 0, array_2 = array; _i < array_2.length; _i++) {
var value = array_2[_i];
uniq[value + '::' + typeof value] = value;
return Object.keys(uniq).map(function (key) { return uniq[key]; });
var test_data = [
@ -138,11 +82,8 @@ function get_unique_path_part(common_paths, path) {
var common_paths = get_common_path_prefixes(test_data);
console.log(common_paths[0] === "/home/fitzgen/");
console.log(get_unique_path_part(common_paths, "/home/fitzgen/walrus/src/lib.rs") === "walrus/src/lib.rs");
console.log(get_unique_path_part(common_paths, "/home/fitzgen/rayon/src/lib.rs") === "rayon/src/lib.rs");
var test_data = [
@ -150,12 +91,8 @@ function get_unique_path_part(common_paths, path) {
var common_paths = get_common_path_prefixes(test_data);
console.log(common_paths[0] === "/a/b/c/");
console.log(get_unique_path_part(common_paths, "/a/b/c/test.cpp:135") === "test.cpp:135");
console.log(get_unique_path_part(common_paths, "/a/b/c/d/util.c:12") === "d/util.c:12");
console.log(get_unique_path_part(common_paths, "/a/b/c/test.cpp:23094345") === "test.cpp:23094345");
var test_data = [
@ -170,13 +107,40 @@ function get_unique_path_part(common_paths, path) {
var common_paths = get_common_path_prefixes(test_data);
console.log(common_paths.indexOf("/rustc/eae3437dfe991621e8afdc82734f4a172d7ddf9b/src/") !== -1);
console.log(common_paths.indexOf("/home/cmchenry/.cargo/registry/src/github.com-1ecc6299db9ec823/") !== -1);
console.log(common_paths.indexOf("src/") === -1);
console.log(get_unique_path_part(common_paths, "/rustc/eae3437dfe991621e8afdc82734f4a172d7ddf9b/src/libcore/slice/mod.rs:3261") === "libcore/slice/mod.rs:3261");
console.log(get_unique_path_part(common_paths, "/home/cmchenry/.cargo/registry/src/github.com-1ecc6299db9ec823/memchr-2.2.1/src/x86/sse2.rs:0") === "memchr-2.2.1/src/x86/sse2.rs:0");
var test_data = [
function display_warning(title, text) {
var warning = $("<div class=\"alert alert-warning alert-dismissible\" role=\"alert\">\n <button type=\"button\" class=\"close\" data-dismiss=\"alert\" aria-label=\"Close\"><span aria-hidden=\"true\">&times;</span></button>\n <strong>" + title + ":</strong> " + text + "\n </div>");
@ -211,20 +175,18 @@ function update(resize) {
var all_paths = [];
// Collect all of the paths that we have, in order to calculate what the
// common path prefix is among all of the paths.
.text(function (path) {
// Do not consider any paths which do not contain a slash as a way of
// filtering out paths which are already just the file name.
if (path.indexOf("/") !== -1) {
d3.selectAll('.path').text(function (path) {
// Filter out any paths which do not contain slash-separated parts
if (path.indexOf('/') !== -1) {
return path;
var common_path_prefixes = get_common_path_prefixes(all_paths);
var minimum_parts = get_minimum_parts_for_unique_path(all_paths);
// Shorten path strings
var paths = d3.selectAll('.path')
.classed('path', false).classed('shortpath', true)
.text(function (path) { return get_unique_path_part(common_path_prefixes, path); })
.text(function (path) { return get_last_path_parts(minimum_parts, path); })
.attr('title', function (datum, index, outerIndex) {
return datum;

File diff suppressed because one or more lines are too long

View File

@ -9,198 +9,82 @@ function get_min_points(): number {
return (<any> d3.select('#minpoints_field').node()).value;
// Returns the longest common path prefix between all strings in an array
// Ex: get_common_path_prefix(["/a/b/c/", "/a/b/d", "/a/e/d/"]) => "/a/"
// Ex: get_common_path_prefix(["/a/b/test.c:44", "/a/b/util.c:123"]) => "/a/b/"
function get_common_path_prefix(paths: string[]): string {
if (paths.length === 1) {
return "";
// Returns the minimum number of path parts to include starting from the
// end of the path, in order for the resulting string to be unique.
function get_minimum_parts_for_unique_path(paths: string[]): number {
if (paths.length <= 1) {
return 1;
const A = paths.concat().sort();
let minimum = 1;
let shortest_parts = Infinity;
let is_unique = false;
const shortest = paths.reduce((min, str) => min < str ? min : str, paths[0]);
const longest = paths.reduce((min, str) => min > str ? min : str, paths[0]);
const last_slash_index = shortest.lastIndexOf("/");
// Remove line numbers from paths
paths = paths.map(path => path.replace(/:[0-9]*/, ''));
let i = 0;
let end = shortest.length;
// Remove duplicate fully qualified path names
paths = remove_duplicates(paths);
// If there is a last slash, then we will stop there, so that we do not
// cut off the name of the file.
if (last_slash_index !== -1) {
end = last_slash_index + 1;
// Special case: all of the paths are the same file. In that case, then we
// only need to return the file name.
const all_identical_paths = paths.every(path => path == paths[0]);
if (all_identical_paths) {
return 1;
while (i < end && shortest.charAt(i) === longest.charAt(i)) {
while (true) {
const trimmed_paths = paths
.map(path => {
const parts: string[] = path.split('/');
shortest_parts = Math.min(shortest_parts, parts.length);
return parts.slice(parts.length - minimum, parts.length).join('/');
is_unique = !has_duplicates(trimmed_paths);
let prefix = shortest.substring(0, i);
// Check if the string slicing produced a path with a cutoff word.
// Ex: "/a/b/c/some_path" => "/a/b/c/some"
// We want to turn that into "/a/b/c/" in that case.
let last_slash = prefix.lastIndexOf("/");
if (last_slash !== -1 && last_slash < prefix.length - 1) {
prefix = prefix.substring(0, last_slash + 1);
return prefix
// Returns a set of the common path prefixes among a set of strings. The number
// of prefixes is determined by how many unique path prefixes there are within
// the first slash.
// Ex: get_common_path_prefixes([
// "file.c:123",
// "file.c:123",
// "a/file.c:123",
// "a/file.c:123",
// "/test/a/b/c/d/file.c:123",
// "/test/a/b/c/f/file.c:123",
// "/other/a/b/c/d/file.c:123",
// "/other/a/c/file.c:321",
// "/a/b/c/file.c:100",
// "/a/b/d/file.c:123",
// ]) => ["/test/a/b/c/", "/other/a/", "/a/b/"]
function get_common_path_prefixes(paths: string[]): string[] {
// Paths grouped by the first path part
const grouped_paths: {[s: string]: string[]} = {};
for (let path of paths) {
const first_slash = path.indexOf("/");
const second_slash = path.indexOf("/", first_slash+1);
const has_leading_slash = first_slash === 0;
const has_second_slash = second_slash !== -1;
// Includes paths that have two slashes like:
// * /a/b/c/file.c:123
// * /a/file.c:123
// * a/b/file.c:123
// Does not include paths like:
// * a/file.c:123
// * file.c:123
// * /file.c:123
if (has_second_slash) {
const parts = path.split('/');
// Includes paths that have a leading slash like:
// * /a/b/c/file.c:123
// * /a/file.c:123
// Does not include paths like:
// * a/b/file.c:123
let initial_prefix = parts[0];
if (has_leading_slash) {
// If path is "/a/b/c/file.c:123", then prefix is "/a"
initial_prefix = "/" + parts[1];
if (grouped_paths[initial_prefix] === undefined) {
grouped_paths[initial_prefix] = [path];
} else {
if (is_unique) {
return minimum;
} else if (minimum >= shortest_parts) {
// We can't possibly return a minimum parts needed that is greater than
// the smallest parts possible
return shortest_parts;
} else {
minimum += 1;
// Get the largest common path prefix for each different group of paths
const common_prefixes = [];
for (let prefix in grouped_paths) {
let paths = grouped_paths[prefix];
// Returns the last number of parts of a slash-separated path.
function get_last_path_parts(num_parts: number, path: string) {
// Just return the path if it does not have any slash-separated parts
if (path.indexOf('/') === -1) {
return path;
return common_prefixes;
const parts = path.split('/');
return parts.slice(parts.length - num_parts, parts.length).join('/');
const common_path_prefixes = get_common_path_prefixes([
console.log("expected:", ["/test/a/b/c/", "/other/a/", "/a/b/"]);
console.log("got:", common_path_prefixes);
// Given the common path prefix (from get_common_prefix), this will return the
// unique part of the path which should be displayable. This also checks
// for leading slashes after the slicing, which will be removed.
// Ex: get_unique_path_part("/a/b/c/", "/a/b/c/test.cpp") => "test.cpp"
// Ex: get_unique_path_aprt("/a/b/c/", "util.c") => "util.c"
function get_unique_path_part(common_paths: string[], path: string): string {
// For the same reasons as the common path collection logic, we will
// not shorten any paths which do not contain a slash, which is hopefully
// just paths that are only a file name.
for (let common_path of common_paths) {
// Check if the prefix applies to this path
if (path.indexOf(common_path) !== -1) {
return path.substr(common_path.length, path.length)
// This could be made simpler by using ES2015 Set instead.
function has_duplicates(array: string[]) {
var seen = Object.create(null);
for (let value of array) {
if (value in seen) {
return true;
seen[value] = true;
return path;
return false;
const test_data = [
function remove_duplicates(array: string[]) {
var uniq: {[key: string]: string} = {};
let common_paths = get_common_path_prefixes(test_data);
console.log(common_paths[0] === "/home/fitzgen/");
console.log(get_unique_path_part(common_paths, "/home/fitzgen/walrus/src/lib.rs") === "walrus/src/lib.rs");
console.log(get_unique_path_part(common_paths, "/home/fitzgen/rayon/src/lib.rs") === "rayon/src/lib.rs");
for (let value of array) {
uniq[value + '::' + typeof value] = value;
const test_data = [
let common_paths = get_common_path_prefixes(test_data);
console.log(common_paths[0] === "/a/b/c/");
console.log(get_unique_path_part(common_paths, "/a/b/c/test.cpp:135") === "test.cpp:135");
console.log(get_unique_path_part(common_paths, "/a/b/c/d/util.c:12") === "d/util.c:12");
console.log(get_unique_path_part(common_paths, "/a/b/c/test.cpp:23094345") === "test.cpp:23094345");
const test_data = [
let common_paths = get_common_path_prefixes(test_data);
console.log(common_paths.indexOf("/rustc/eae3437dfe991621e8afdc82734f4a172d7ddf9b/src/") !== -1);
console.log(common_paths.indexOf("/home/cmchenry/.cargo/registry/src/github.com-1ecc6299db9ec823/") !== -1);
console.log(common_paths.indexOf("src/") === -1);
console.log(get_unique_path_part(common_paths, "/rustc/eae3437dfe991621e8afdc82734f4a172d7ddf9b/src/libcore/slice/mod.rs:3261") === "libcore/slice/mod.rs:3261");
console.log(get_unique_path_part(common_paths, "/home/cmchenry/.cargo/registry/src/github.com-1ecc6299db9ec823/memchr-2.2.1/src/x86/sse2.rs:0") === "memchr-2.2.1/src/x86/sse2.rs:0");
return Object.keys(uniq).map(key => uniq[key]);
function display_warning(title: string, text: string): void {
@ -249,22 +133,20 @@ function update(resize?: boolean) {
// Collect all of the paths that we have, in order to calculate what the
// common path prefix is among all of the paths.
.text((path: string) => {
// Do not consider any paths which do not contain a slash as a way of
// filtering out paths which are already just the file name.
if (path.indexOf("/") !== -1) {
return path;
d3.selectAll('.path').text(path => {
// Filter out any paths which do not contain slash-separated parts
if (path.indexOf('/') !== -1) {
return path;
let common_path_prefixes = get_common_path_prefixes(all_paths);
let minimum_parts = get_minimum_parts_for_unique_path(all_paths);
// Shorten path strings
let paths = d3.selectAll('.path')
.classed('path', false).classed('shortpath', true)
.text((path: string) => get_unique_path_part(common_path_prefixes, path))
.text((path: string) => get_last_path_parts(minimum_parts, path))
.attr('title', (datum: string, index: number, outerIndex: number) => {
return datum;