1
1
mirror of https://github.com/github/semantic.git synced 2024-11-28 10:15:55 +03:00

Merge pull request #218 from github/generalize-ses-to-arbitrary-collection-type

Generalize SES to arbitrary CollectionTypes
This commit is contained in:
Josh Vera 2015-11-05 16:52:57 -05:00
commit fb698f0136
2 changed files with 27 additions and 25 deletions

View File

@ -3,44 +3,46 @@
/// These values are populated by a function from the coordinates of a given cell to the matrixs element type. /// These values are populated by a function from the coordinates of a given cell to the matrixs element type.
/// ///
/// Values are retrieved by subscripting with row/column indices. Out-of-bound indices produce `nil` values, rather than asserting. /// Values are retrieved by subscripting with row/column indices. Out-of-bound indices produce `nil` values, rather than asserting.
public struct Matrix<A> { public struct Matrix<A, I: ForwardIndexType> {
public init(width: Int, height: Int, compute: (Int, Int) -> A) { public init(across: Range<I>, down: Range<I>, compute: (I, I) -> A) {
self.init(width: width, height: height, values: constructRowMajor(width, height: height, forEach: { i, j in Memo { compute(i, j) } })) self.init(across: across, down: down, values: constructRowMajor(across, down: down, forEach: { i, j in Memo { compute(i, j) } }))
} }
public let width: Int public let across: Range<I>
public let height: Int public let down: Range<I>
private let values: [Memo<A>] private let values: [Memo<A>]
public subscript (i: Int, j: Int) -> Memo<A>? { public subscript (i: I, j: I) -> Memo<A>? {
guard i < width && j < height else { return nil } guard across.contains(i) && down.contains(j) else { return nil }
return values[i + j * width] let i = across.startIndex.distanceTo(i)
let j = down.startIndex.distanceTo(j)
return values[Int((i + j * across.count).toIntMax())]
} }
// MARK: Functor // MARK: Functor
public func map<Other>(transform: A -> Other) -> Matrix<Other> { public func map<Other>(transform: A -> Other) -> Matrix<Other, I> {
return Matrix<Other>(width: width, height: height, values: values.map { $0.map(transform) }) return Matrix<Other, I>(across: across, down: down, values: values.map { $0.map(transform) })
} }
// MARK: Implementation details // MARK: Implementation details
private init(width: Int, height: Int, values: [Memo<A>]) { private init(across: Range<I>, down: Range<I>, values: [Memo<A>]) {
self.width = width self.across = across
self.height = height self.down = down
self.values = values self.values = values
} }
} }
/// Constructs a row-major ordering of values produced with `forEach`. /// Constructs a row-major ordering of values produced with `forEach`.
private func constructRowMajor<A>(width: Int, height: Int, @noescape forEach: (Int, Int) -> A) -> [A] { private func constructRowMajor<A, I: ForwardIndexType>(across: Range<I>, down: Range<I>, @noescape forEach: (I, I) -> A) -> [A] {
var values: [A] = [] var values: [A] = []
values.reserveCapacity(width * height) values.reserveCapacity(Int(across.count.toIntMax()) * Int(down.count.toIntMax()))
for j in 0..<height { for j in down {
for i in 0..<width { for i in across {
values.append(forEach(i, j)) values.append(forEach(i, j))
} }
} }

View File

@ -1,8 +1,8 @@
/// Computes the SES (shortest edit script), i.e. the shortest sequence of diffs (`Free<Leaf, Annotation, Patch<Term>>`) for two arrays of `Term`s which would suffice to transform `a` into `b`. /// Computes the SES (shortest edit script), i.e. the shortest sequence of diffs (`Free<Leaf, Annotation, Patch<Term>>`) for two arrays of `Term`s which would suffice to transform `a` into `b`.
/// ///
/// This is computed w.r.t. an `equals` function, which computes the equality of leaf nodes within terms, and a `recur` function, which produces diffs representing matched-up terms. /// This is computed w.r.t. an `equals` function, which computes the equality of leaf nodes within terms, and a `recur` function, which produces diffs representing matched-up terms.
public func SES<Term, Leaf, Annotation>(a: [Term], _ b: [Term], cost: Free<Leaf, Annotation, Patch<Term>> -> Int, recur: (Term, Term) -> Free<Leaf, Annotation, Patch<Term>>?) -> [Free<Leaf, Annotation, Patch<Term>>] { public func SES<Leaf, Annotation, C: CollectionType>(a: C, _ b: C, cost: Free<Leaf, Annotation, Patch<C.Generator.Element>> -> Int, recur: (C.Generator.Element, C.Generator.Element) -> Free<Leaf, Annotation, Patch<C.Generator.Element>>?) -> [Free<Leaf, Annotation, Patch<C.Generator.Element>>] {
typealias Diff = Free<Leaf, Annotation, Patch<Term>> typealias Diff = Free<Leaf, Annotation, Patch<C.Generator.Element>>
if a.isEmpty { return b.map { .Insert($0) } } if a.isEmpty { return b.map { .Insert($0) } }
if b.isEmpty { return a.map { .Delete($0) } } if b.isEmpty { return a.map { .Delete($0) } }
@ -20,17 +20,17 @@ public func SES<Term, Leaf, Annotation>(a: [Term], _ b: [Term], cost: Free<Leaf,
} }
// A matrix whose values are streams representing paths through the edit graph, carrying both the diff & the cost of the remainder of the path. // A matrix whose values are streams representing paths through the edit graph, carrying both the diff & the cost of the remainder of the path.
var matrix: Matrix<Stream<(Diff, Int)>>! var matrix: Matrix<Stream<(Diff, Int)>, C.Index>!
matrix = Matrix(width: a.count + 1, height: b.count + 1) { i, j in matrix = Matrix(across: a.startIndex..<a.endIndex.successor(), down: b.startIndex..<b.endIndex.successor()) { i, j in
// Some explanation is warranted: // Some explanation is warranted:
// //
// 1. `matrix` captures itself during construction, because each vertex in the edit graph depends on other vertices. This is safe, because a) `Matrix` populates its fields lazily, and b) vertices only depend on those vertices downwards and rightwards of them. // 1. `matrix` captures itself during construction, because each vertex in the edit graph depends on other vertices. This is safe, because a) `Matrix` populates its fields lazily, and b) vertices only depend on those vertices downwards and rightwards of them.
// //
// 2. `matrix` is sized bigger than `a.count` x `b.count`. This is safe, because a) we only get a[i]/b[j] when right/down are non-nil (respectively), and b) right/down are found by looking up elements (i + 1, j) & (i, j + 1) in the matrix, which returns `nil` when out of bounds. So we only access a[i] and b[j] when i and j are in bounds. // 2. `matrix` is sized bigger than `a.count` x `b.count`. This is safe, because a) we only get a[i]/b[j] when right/down are non-nil (respectively), and b) right/down are found by looking up elements (i + 1, j) & (i, j + 1) in the matrix, which returns `nil` when out of bounds. So we only access a[i] and b[j] when i and j are in bounds.
let right = matrix[i + 1, j] let right = matrix[i.successor(), j]
let down = matrix[i, j + 1] let down = matrix[i, j.successor()]
let diagonal = matrix[i + 1, j + 1] let diagonal = matrix[i.successor(), j.successor()]
if let right = right, down = down, diagonal = diagonal { if let right = right, down = down, diagonal = diagonal {
let here = recur(a[i], b[j]) let here = recur(a[i], b[j])
@ -62,7 +62,7 @@ public func SES<Term, Leaf, Annotation>(a: [Term], _ b: [Term], cost: Free<Leaf,
return Stream.Nil return Stream.Nil
} }
return Array(matrix[0, 0]!.value.map { diff, _ in diff }) return Array(matrix[a.startIndex, b.startIndex]!.value.map { diff, _ in diff })
} }