mirror of
https://github.com/cursorless-dev/cursorless.git
synced 2024-10-04 21:07:21 +03:00
implement non-quadratic version of uniqWith (#1834)
When given a comparator, uniqWith is quadratic. Add uniqWithHash that uses an optional hash function to return to approximately linear behavior, assuming that the hash function is good. For targets and selections, Using the contentRange/selection as the hash value seems to be pretty good in practice. I've added "concise" printing versions of positions, ranges, and selections. These are handy as hash functions, but I've also made heavy use of them while investigating hat allocation. They are generally useful. "take every line" on a file containing 3000 lines used to take about 15s on my computer. Now it takes about 1s. That's still not great but it's fast enough that it doesn't appear that Cursorless has hung. I've half a mind to try upstreaming this, or at least sending a documentation fix, because a quadratic API is a big footgun. Fixes #1830 enough for now ## Checklist - [x] I have added [tests](https://www.cursorless.org/docs/contributing/test-case-recorder/) - [/] I have updated the [docs](https://github.com/cursorless-dev/cursorless/tree/main/docs) and [cheatsheet](https://github.com/cursorless-dev/cursorless/tree/main/cursorless-talon/src/cheatsheet) - [/] I have not broken the cheatsheet --------- Co-authored-by: pre-commit-ci-lite[bot] <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com> Co-authored-by: Pokey Rule <755842+pokey@users.noreply.github.com>
This commit is contained in:
parent
52d3eca62b
commit
ae5304ddf3
@ -138,4 +138,16 @@ export class Position {
|
||||
public toEmptyRange(): Range {
|
||||
return new Range(this, this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a concise string representation of the position.
|
||||
* @returns concise representation
|
||||
**/
|
||||
public concise(): string {
|
||||
return `${this.line}:${this.character}`;
|
||||
}
|
||||
|
||||
public toString(): string {
|
||||
return this.concise();
|
||||
}
|
||||
}
|
||||
|
@ -146,4 +146,16 @@ export class Range {
|
||||
? new Selection(this.end, this.start)
|
||||
: new Selection(this.start, this.end);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a concise string representation of the range
|
||||
* @returns concise representation
|
||||
**/
|
||||
public concise(): string {
|
||||
return `${this.start.concise()}-${this.end.concise()}`;
|
||||
}
|
||||
|
||||
public toString(): string {
|
||||
return this.concise();
|
||||
}
|
||||
}
|
||||
|
@ -72,4 +72,16 @@ export class Selection extends Range {
|
||||
this.anchor.isEqual(other.anchor) && this.active.isEqual(other.active)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a concise string representation of the selection
|
||||
* @returns concise representation
|
||||
**/
|
||||
public concise(): string {
|
||||
return `${this.anchor.concise()}->${this.active.concise()}`;
|
||||
}
|
||||
|
||||
public toString(): string {
|
||||
return this.concise();
|
||||
}
|
||||
}
|
||||
|
@ -27,6 +27,7 @@
|
||||
"@types/mocha": "^8.0.4",
|
||||
"@types/sbd": "^1.0.3",
|
||||
"@types/sinon": "^10.0.2",
|
||||
"fast-check": "3.12.0",
|
||||
"js-yaml": "^4.1.0",
|
||||
"mocha": "^10.2.0",
|
||||
"sinon": "^11.1.1"
|
||||
|
@ -5,7 +5,7 @@ import {
|
||||
Range,
|
||||
ScopeType,
|
||||
} from "@cursorless/common";
|
||||
import { uniqWith, zip } from "lodash";
|
||||
import { zip } from "lodash";
|
||||
import {
|
||||
PrimitiveTargetDescriptor,
|
||||
RangeTargetDescriptor,
|
||||
@ -18,6 +18,7 @@ import { MarkStage, ModifierStage } from "./PipelineStages.types";
|
||||
import ImplicitStage from "./marks/ImplicitStage";
|
||||
import { ContainingTokenIfUntypedEmptyStage } from "./modifiers/ConditionalModifierStages";
|
||||
import { PlainTarget } from "./targets";
|
||||
import { uniqWithHash } from "../util/uniqWithHash";
|
||||
|
||||
export class TargetPipelineRunner {
|
||||
constructor(
|
||||
@ -287,7 +288,11 @@ function calcIsReversed(anchor: Target, active: Target) {
|
||||
}
|
||||
|
||||
function uniqTargets(array: Target[]): Target[] {
|
||||
return uniqWith(array, (a, b) => a.isEqual(b));
|
||||
return uniqWithHash(
|
||||
array,
|
||||
(a, b) => a.isEqual(b),
|
||||
(a) => a.contentRange.concise(),
|
||||
);
|
||||
}
|
||||
|
||||
function ensureSingleEditor(anchorTarget: Target, activeTarget: Target) {
|
||||
|
@ -1,6 +1,6 @@
|
||||
import { EditableTextEditor, Selection } from "@cursorless/common";
|
||||
|
||||
import uniqDeep from "./uniqDeep";
|
||||
import { uniqWithHash } from "./uniqWithHash";
|
||||
|
||||
export async function setSelectionsAndFocusEditor(
|
||||
editor: EditableTextEditor,
|
||||
@ -22,5 +22,9 @@ export function setSelectionsWithoutFocusingEditor(
|
||||
editor: EditableTextEditor,
|
||||
selections: Selection[],
|
||||
) {
|
||||
editor.selections = uniqDeep(selections);
|
||||
editor.selections = uniqWithHash(
|
||||
selections,
|
||||
(a, b) => a.isEqual(b),
|
||||
(s) => s.concise(),
|
||||
);
|
||||
}
|
||||
|
@ -1,5 +0,0 @@
|
||||
import { uniqWith, isEqual } from "lodash";
|
||||
|
||||
export default <T>(array: T[]): T[] => {
|
||||
return uniqWith(array, isEqual);
|
||||
};
|
65
packages/cursorless-engine/src/util/uniqWithHash.test.ts
Normal file
65
packages/cursorless-engine/src/util/uniqWithHash.test.ts
Normal file
@ -0,0 +1,65 @@
|
||||
import * as assert from "assert";
|
||||
import * as fc from "fast-check";
|
||||
import { uniqWith } from "lodash";
|
||||
import { uniqWithHash } from "./uniqWithHash";
|
||||
|
||||
// known good but slow (quadratic!)
|
||||
function knownGoodUniqWithHash<T>(
|
||||
array: T[],
|
||||
fn: (a: T, b: T) => boolean,
|
||||
_: (t: T) => string,
|
||||
): T[] {
|
||||
return uniqWith(array, fn);
|
||||
}
|
||||
|
||||
suite("uniqWithHash", () => {
|
||||
test("uniqWithHash", () => {
|
||||
// believe it or not, all these test cases are important
|
||||
const testCases: number[][] = [
|
||||
[],
|
||||
[0],
|
||||
[1],
|
||||
[0, 1],
|
||||
[1, 0],
|
||||
[0, 0],
|
||||
[1, 1],
|
||||
[0, 1, 0, 1],
|
||||
[1, 0, 1, 0],
|
||||
[0, 1, 1, 0],
|
||||
[1, 0, 0, 1],
|
||||
[0, 1, 2, 3],
|
||||
[0, 1, 2, 3, 0, 1, 2, 3],
|
||||
[0, 0, 1, 1, 2, 2, 3, 3],
|
||||
[0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3],
|
||||
[0, 0, 1, 1, 2, 2, 3, 3, 0, 0, 1, 1, 2, 2, 3, 3],
|
||||
[0, 1, 2, 3, 4, 5, 0, 1, 2, 3],
|
||||
[0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 0, 1, 2, 3],
|
||||
[0, 0, 1, 1, 2, 2, 3, 3, 4, 4],
|
||||
[0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4],
|
||||
];
|
||||
|
||||
const hashFunctions = [
|
||||
(x: number) => x.toString(),
|
||||
(x: number) => (x % 2).toString(),
|
||||
(_: number) => "0",
|
||||
];
|
||||
|
||||
hashFunctions.forEach((hash) => {
|
||||
const check = (testCase: number[]) => {
|
||||
const actual = uniqWithHash(testCase, (a, b) => a === b, hash);
|
||||
const expected = knownGoodUniqWithHash(
|
||||
testCase,
|
||||
(a, b) => a === b,
|
||||
hash,
|
||||
);
|
||||
assert.deepStrictEqual(actual, expected);
|
||||
};
|
||||
|
||||
testCases.forEach(check);
|
||||
|
||||
// max length 50 because the known good implementation is quadratic
|
||||
const randomNumbers = fc.array(fc.integer(), { maxLength: 50 });
|
||||
fc.assert(fc.property(randomNumbers, check));
|
||||
});
|
||||
});
|
||||
});
|
81
packages/cursorless-engine/src/util/uniqWithHash.ts
Normal file
81
packages/cursorless-engine/src/util/uniqWithHash.ts
Normal file
@ -0,0 +1,81 @@
|
||||
import { uniqWith } from "lodash";
|
||||
|
||||
/**
|
||||
* Like lodash.uniqWith, but uses a hash function to (mostly) avoid quadratic runtime.
|
||||
* @param array The array to uniq
|
||||
* @param isEqual The equality function
|
||||
* @param hash The hash function. It must be a valid hash function, insofar as it must return the same value for equal items. A hash function that returns a constant value is equivalent to lodash.uniqWith.
|
||||
* @returns The uniq array
|
||||
*/
|
||||
export function uniqWithHash<T>(
|
||||
array: T[],
|
||||
isEqual: (a: T, b: T) => boolean,
|
||||
hash: (t: T) => string,
|
||||
): T[] {
|
||||
// Handle the common, tiny cases without allocating anything extra.
|
||||
if (array.length < 2) {
|
||||
return [...array];
|
||||
}
|
||||
if (array.length === 2) {
|
||||
if (isEqual(array[0], array[1])) {
|
||||
return [array[0]];
|
||||
}
|
||||
return [...array];
|
||||
}
|
||||
// First, split up the array using the hash function.
|
||||
// This keeps the sets of items passed to uniqWith small,
|
||||
// so that the quadratic runtime of uniqWith less of a problem.
|
||||
|
||||
/* Keep track of which sets have multiple items, so that we can uniq them. */
|
||||
const needsUniq: string[] = [];
|
||||
const hashToItems: Map<string, T[]> = array.reduce((acc, item) => {
|
||||
const key = hash(item);
|
||||
const items = acc.get(key);
|
||||
if (items == null) {
|
||||
acc.set(key, [item]);
|
||||
return acc;
|
||||
}
|
||||
|
||||
acc.get(key)!.push(item);
|
||||
if (items.length === 2) {
|
||||
needsUniq.push(key);
|
||||
}
|
||||
return acc;
|
||||
}, new Map<string, T[]>());
|
||||
|
||||
// Another common case: Everything is unique.
|
||||
if (needsUniq.length === 0) {
|
||||
return [...array];
|
||||
}
|
||||
|
||||
// For hash collisions, uniq the items,
|
||||
// letting uniqWith provide correct semantics.
|
||||
needsUniq.forEach((key) => {
|
||||
hashToItems.set(key, uniqWith(hashToItems.get(key)!, isEqual));
|
||||
});
|
||||
|
||||
// To preserve order, step through the original items
|
||||
// one at a time, returning it as appropriate.
|
||||
// We need to do this because uniqWith preserves order,
|
||||
// and we are mimicking its semantics.
|
||||
// Note that items were added in order,
|
||||
// and uniqWith preserved that order.
|
||||
return array.flatMap((item) => {
|
||||
const key = hash(item);
|
||||
const items = hashToItems.get(key)!;
|
||||
if (items == null || items.length === 0) {
|
||||
// Removed by uniqWith.
|
||||
return [];
|
||||
}
|
||||
const first = items[0]!;
|
||||
if (!isEqual(first, item)) {
|
||||
// Removed by uniqWith.
|
||||
// Note that it is sufficient to check the first item,
|
||||
// because uniqWith preserves order.
|
||||
return [];
|
||||
}
|
||||
// Emit item.
|
||||
items.shift();
|
||||
return first;
|
||||
});
|
||||
}
|
@ -257,6 +257,9 @@ importers:
|
||||
'@types/sinon':
|
||||
specifier: ^10.0.2
|
||||
version: 10.0.13
|
||||
fast-check:
|
||||
specifier: 3.12.0
|
||||
version: 3.12.0
|
||||
js-yaml:
|
||||
specifier: ^4.1.0
|
||||
version: 4.1.0
|
||||
@ -8090,6 +8093,13 @@ packages:
|
||||
iconv-lite: 0.4.24
|
||||
tmp: 0.0.33
|
||||
|
||||
/fast-check@3.12.0:
|
||||
resolution: {integrity: sha512-SqahE9mlL3+lhjJ39joMLwcj6F+24hfZdf/tchlNO8sHcTdrUUdA5P/ZbSFZM9Xpzs36XaneGwE0FWepm/zyOA==}
|
||||
engines: {node: '>=8.0.0'}
|
||||
dependencies:
|
||||
pure-rand: 6.0.0
|
||||
dev: true
|
||||
|
||||
/fast-deep-equal@3.1.3:
|
||||
resolution: {integrity: sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user