Optimize incompatible character scan

This commit is contained in:
1024jp 2023-11-04 03:40:11 +09:00
parent 7cb8cdfe07
commit ec4ddd95a8
5 changed files with 21 additions and 53 deletions

View File

@ -5,6 +5,10 @@ Change Log
4.6.5 (unreleased)
--------------------------
### Improvements
- Optimize the performance of the incompatible character scan.
4.6.4 (599)

View File

@ -2000,6 +2000,7 @@
2AC6BFCF21D00A8500FF325C /* Regex Parser */,
2AA375461D40BDCB0080C27C /* LineEnding.swift */,
2A8E25BA24DC59C400FCC33A /* FileEncoding.swift */,
2A8C338E1D3E1C040005B0B7 /* IncompatibleCharacter.swift */,
2AAD61EF1D2B0856008FE772 /* FuzzyRange.swift */,
2A4257BB1D239F850086DAAD /* Invisible.swift */,
2AF073E21D33C3AB00770BA6 /* Theme.swift */,
@ -2093,7 +2094,6 @@
2AD7B9AE1D3E832E00E5D6D7 /* DocumentAnalyzer.swift */,
2AF45E1D1E6C0D920030CD60 /* EditorCounter.swift */,
2A8C338B1D3E16B00005B0B7 /* IncompatibleCharacterScanner.swift */,
2A8C338E1D3E1C040005B0B7 /* IncompatibleCharacter.swift */,
2A80BE8C27FFA61700D2F7FF /* LineEndingScanner.swift */,
2A1125C523F6EFB2006A1DB2 /* URLDetector.swift */,
);

View File

@ -9,7 +9,7 @@
// ---------------------------------------------------------------------------
//
// © 2004-2007 nakamuxu
// © 2014-2022 1024jp
// © 2014-2023 1024jp
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -24,7 +24,7 @@
// limitations under the License.
//
import Foundation
import Foundation.NSRange
struct IncompatibleCharacter: Equatable {
@ -36,62 +36,27 @@ struct IncompatibleCharacter: Equatable {
}
// MARK: -
extension String {
/// list characters cannot be converted to the passed-in encoding
/// List characters cannot be converted to the passed-in encoding.
///
/// - Parameter encoding: The string encoding to test compatibility.
/// - Returns: An array of IncompatibleCharacter.
/// - Throws: `CancellationError`
func scanIncompatibleCharacters(with encoding: String.Encoding) throws -> [IncompatibleCharacter] {
func charactersIncompatible(with encoding: String.Encoding) throws -> [IncompatibleCharacter] {
guard !self.canBeConverted(to: encoding) else { return [] }
guard
let data = self.data(using: encoding, allowLossyConversion: true), // lossy conversion must always success
let convertedString = String(data: data, encoding: encoding)
else { assertionFailure(); return [] }
try Task.checkCancellation()
if self.length == convertedString.length, self.length > 10_000 {
return try self.quickIncompatibleFind(with: convertedString)
}
return try convertedString.difference(from: self).removals.lazy
.map { (change) in
guard case let .remove(offset, character, _) = change else { preconditionFailure() }
return try zip(self.indices, self).lazy
.compactMap { (index, character) in
try Task.checkCancellation()
let converted: String? = String(character)
.data(using: encoding, allowLossyConversion: true)
.flatMap { String(data: $0, encoding: encoding) }
let location = self.index(self.startIndex, offsetBy: offset).utf16Offset(in: self)
let string = String(character)
let converted = String(data: string.data(using: encoding, allowLossyConversion: true)!, encoding: encoding)
return IncompatibleCharacter(character: character,
convertedCharacter: converted,
location: location)
}
}
// MARK: Private Methods
private func quickIncompatibleFind(with convertedString: String) throws -> [IncompatibleCharacter] {
try zip(self, convertedString).enumerated().lazy
.filter { $1.0 != $1.1 }
.map { (offset, characters) in
let location = self.index(self.startIndex, offsetBy: offset).utf16Offset(in: self)
guard converted != string else { return nil }
try Task.checkCancellation()
return IncompatibleCharacter(character: characters.0,
convertedCharacter: String(characters.1),
location: location)
return IncompatibleCharacter(character: character, convertedCharacter: converted, location: index.utf16Offset(in: self))
}
}
}

View File

@ -82,8 +82,7 @@ final class IncompatibleCharacterScanner {
try await Task.sleep(for: .seconds(0.4), tolerance: .seconds(0.1)) // debounce
let string = await MainActor.run { document.textStorage.string.immutable }
let incompatibleCharacters = try string.scanIncompatibleCharacters(with: encoding)
self.incompatibleCharacters = incompatibleCharacters
self.incompatibleCharacters = try string.charactersIncompatible(with: encoding)
}
}
}

View File

@ -32,7 +32,7 @@ final class IncompatibleCharacterTests: XCTestCase {
func testIncompatibleCharacterScan() throws {
let string = "abc\\ \n ¥ \n ~"
let incompatibles = try string.scanIncompatibleCharacters(with: .plainShiftJIS)
let incompatibles = try string.charactersIncompatible(with: .plainShiftJIS)
XCTAssertEqual(incompatibles.count, 2)
@ -53,7 +53,7 @@ final class IncompatibleCharacterTests: XCTestCase {
func testSequentialIncompatibleCharactersScan() throws {
let string = "~~"
let incompatibles = try string.scanIncompatibleCharacters(with: .plainShiftJIS)
let incompatibles = try string.charactersIncompatible(with: .plainShiftJIS)
XCTAssertEqual(incompatibles.count, 2)
@ -68,7 +68,7 @@ final class IncompatibleCharacterTests: XCTestCase {
func testIncompatibleCharacterScanWithLengthShift() throws {
let string = "family 👨‍👨‍👦 with 🐕"
let incompatibles = try string.scanIncompatibleCharacters(with: .japaneseEUC)
let incompatibles = try string.charactersIncompatible(with: .japaneseEUC)
XCTAssertEqual(incompatibles.count, 2)