implement non-quadratic version of uniqWith (#1834)

When given a comparator, uniqWith is quadratic.
Add uniqWithHash that uses an optional hash function
to return to approximately linear behavior,
assuming that the hash function is good.
For targets and selections,
Using the contentRange/selection as the hash value
seems to be pretty good in practice.

I've added "concise" printing versions of positions,
ranges, and selections. These are handy as hash functions,
but I've also made heavy use of them while investigating
hat allocation. They are generally useful.

"take every line" on a file containing 3000 lines
used to take about 15s on my computer.
Now it takes about 1s.
That's still not great but it's fast enough
that it doesn't appear that Cursorless has hung.

I've half a mind to try upstreaming this,
or at least sending a documentation fix,
because a quadratic API is a big footgun.

Fixes #1830 enough for now



## Checklist

- [x] I have added
[tests](https://www.cursorless.org/docs/contributing/test-case-recorder/)
- [/] I have updated the
[docs](https://github.com/cursorless-dev/cursorless/tree/main/docs) and
[cheatsheet](https://github.com/cursorless-dev/cursorless/tree/main/cursorless-talon/src/cheatsheet)
- [/] I have not broken the cheatsheet

---------

Co-authored-by: pre-commit-ci-lite[bot] <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com>
Co-authored-by: Pokey Rule <755842+pokey@users.noreply.github.com>
This commit is contained in:
Josh Bleecher Snyder 2023-09-01 07:17:58 -07:00 committed by GitHub
parent 52d3eca62b
commit ae5304ddf3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 206 additions and 9 deletions

View File

@ -138,4 +138,16 @@ export class Position {
public toEmptyRange(): Range {
return new Range(this, this);
}
/**
* Return a concise string representation of the position.
* @returns concise representation
**/
public concise(): string {
return `${this.line}:${this.character}`;
}
public toString(): string {
return this.concise();
}
}

View File

@ -146,4 +146,16 @@ export class Range {
? new Selection(this.end, this.start)
: new Selection(this.start, this.end);
}
/**
* Return a concise string representation of the range
* @returns concise representation
**/
public concise(): string {
return `${this.start.concise()}-${this.end.concise()}`;
}
public toString(): string {
return this.concise();
}
}

View File

@ -72,4 +72,16 @@ export class Selection extends Range {
this.anchor.isEqual(other.anchor) && this.active.isEqual(other.active)
);
}
/**
* Return a concise string representation of the selection
* @returns concise representation
**/
public concise(): string {
return `${this.anchor.concise()}->${this.active.concise()}`;
}
public toString(): string {
return this.concise();
}
}

View File

@ -27,6 +27,7 @@
"@types/mocha": "^8.0.4",
"@types/sbd": "^1.0.3",
"@types/sinon": "^10.0.2",
"fast-check": "3.12.0",
"js-yaml": "^4.1.0",
"mocha": "^10.2.0",
"sinon": "^11.1.1"

View File

@ -5,7 +5,7 @@ import {
Range,
ScopeType,
} from "@cursorless/common";
import { uniqWith, zip } from "lodash";
import { zip } from "lodash";
import {
PrimitiveTargetDescriptor,
RangeTargetDescriptor,
@ -18,6 +18,7 @@ import { MarkStage, ModifierStage } from "./PipelineStages.types";
import ImplicitStage from "./marks/ImplicitStage";
import { ContainingTokenIfUntypedEmptyStage } from "./modifiers/ConditionalModifierStages";
import { PlainTarget } from "./targets";
import { uniqWithHash } from "../util/uniqWithHash";
export class TargetPipelineRunner {
constructor(
@ -287,7 +288,11 @@ function calcIsReversed(anchor: Target, active: Target) {
}
function uniqTargets(array: Target[]): Target[] {
return uniqWith(array, (a, b) => a.isEqual(b));
return uniqWithHash(
array,
(a, b) => a.isEqual(b),
(a) => a.contentRange.concise(),
);
}
function ensureSingleEditor(anchorTarget: Target, activeTarget: Target) {

View File

@ -1,6 +1,6 @@
import { EditableTextEditor, Selection } from "@cursorless/common";
import uniqDeep from "./uniqDeep";
import { uniqWithHash } from "./uniqWithHash";
export async function setSelectionsAndFocusEditor(
editor: EditableTextEditor,
@ -22,5 +22,9 @@ export function setSelectionsWithoutFocusingEditor(
editor: EditableTextEditor,
selections: Selection[],
) {
editor.selections = uniqDeep(selections);
editor.selections = uniqWithHash(
selections,
(a, b) => a.isEqual(b),
(s) => s.concise(),
);
}

View File

@ -1,5 +0,0 @@
import { uniqWith, isEqual } from "lodash";
export default <T>(array: T[]): T[] => {
return uniqWith(array, isEqual);
};

View File

@ -0,0 +1,65 @@
import * as assert from "assert";
import * as fc from "fast-check";
import { uniqWith } from "lodash";
import { uniqWithHash } from "./uniqWithHash";
// known good but slow (quadratic!)
function knownGoodUniqWithHash<T>(
array: T[],
fn: (a: T, b: T) => boolean,
_: (t: T) => string,
): T[] {
return uniqWith(array, fn);
}
suite("uniqWithHash", () => {
test("uniqWithHash", () => {
// believe it or not, all these test cases are important
const testCases: number[][] = [
[],
[0],
[1],
[0, 1],
[1, 0],
[0, 0],
[1, 1],
[0, 1, 0, 1],
[1, 0, 1, 0],
[0, 1, 1, 0],
[1, 0, 0, 1],
[0, 1, 2, 3],
[0, 1, 2, 3, 0, 1, 2, 3],
[0, 0, 1, 1, 2, 2, 3, 3],
[0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3],
[0, 0, 1, 1, 2, 2, 3, 3, 0, 0, 1, 1, 2, 2, 3, 3],
[0, 1, 2, 3, 4, 5, 0, 1, 2, 3],
[0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 0, 1, 2, 3],
[0, 0, 1, 1, 2, 2, 3, 3, 4, 4],
[0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4],
];
const hashFunctions = [
(x: number) => x.toString(),
(x: number) => (x % 2).toString(),
(_: number) => "0",
];
hashFunctions.forEach((hash) => {
const check = (testCase: number[]) => {
const actual = uniqWithHash(testCase, (a, b) => a === b, hash);
const expected = knownGoodUniqWithHash(
testCase,
(a, b) => a === b,
hash,
);
assert.deepStrictEqual(actual, expected);
};
testCases.forEach(check);
// max length 50 because the known good implementation is quadratic
const randomNumbers = fc.array(fc.integer(), { maxLength: 50 });
fc.assert(fc.property(randomNumbers, check));
});
});
});

View File

@ -0,0 +1,81 @@
import { uniqWith } from "lodash";
/**
* Like lodash.uniqWith, but uses a hash function to (mostly) avoid quadratic runtime.
* @param array The array to uniq
* @param isEqual The equality function
* @param hash The hash function. It must be a valid hash function, insofar as it must return the same value for equal items. A hash function that returns a constant value is equivalent to lodash.uniqWith.
* @returns The uniq array
*/
export function uniqWithHash<T>(
array: T[],
isEqual: (a: T, b: T) => boolean,
hash: (t: T) => string,
): T[] {
// Handle the common, tiny cases without allocating anything extra.
if (array.length < 2) {
return [...array];
}
if (array.length === 2) {
if (isEqual(array[0], array[1])) {
return [array[0]];
}
return [...array];
}
// First, split up the array using the hash function.
// This keeps the sets of items passed to uniqWith small,
// so that the quadratic runtime of uniqWith less of a problem.
/* Keep track of which sets have multiple items, so that we can uniq them. */
const needsUniq: string[] = [];
const hashToItems: Map<string, T[]> = array.reduce((acc, item) => {
const key = hash(item);
const items = acc.get(key);
if (items == null) {
acc.set(key, [item]);
return acc;
}
acc.get(key)!.push(item);
if (items.length === 2) {
needsUniq.push(key);
}
return acc;
}, new Map<string, T[]>());
// Another common case: Everything is unique.
if (needsUniq.length === 0) {
return [...array];
}
// For hash collisions, uniq the items,
// letting uniqWith provide correct semantics.
needsUniq.forEach((key) => {
hashToItems.set(key, uniqWith(hashToItems.get(key)!, isEqual));
});
// To preserve order, step through the original items
// one at a time, returning it as appropriate.
// We need to do this because uniqWith preserves order,
// and we are mimicking its semantics.
// Note that items were added in order,
// and uniqWith preserved that order.
return array.flatMap((item) => {
const key = hash(item);
const items = hashToItems.get(key)!;
if (items == null || items.length === 0) {
// Removed by uniqWith.
return [];
}
const first = items[0]!;
if (!isEqual(first, item)) {
// Removed by uniqWith.
// Note that it is sufficient to check the first item,
// because uniqWith preserves order.
return [];
}
// Emit item.
items.shift();
return first;
});
}

View File

@ -257,6 +257,9 @@ importers:
'@types/sinon':
specifier: ^10.0.2
version: 10.0.13
fast-check:
specifier: 3.12.0
version: 3.12.0
js-yaml:
specifier: ^4.1.0
version: 4.1.0
@ -8090,6 +8093,13 @@ packages:
iconv-lite: 0.4.24
tmp: 0.0.33
/fast-check@3.12.0:
resolution: {integrity: sha512-SqahE9mlL3+lhjJ39joMLwcj6F+24hfZdf/tchlNO8sHcTdrUUdA5P/ZbSFZM9Xpzs36XaneGwE0FWepm/zyOA==}
engines: {node: '>=8.0.0'}
dependencies:
pure-rand: 6.0.0
dev: true
/fast-deep-equal@3.1.3:
resolution: {integrity: sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==}