mirror of
https://github.com/coteditor/CotEditor.git
synced 2024-10-26 10:58:05 +03:00
Optimize incompatible character scan
This commit is contained in:
parent
7cb8cdfe07
commit
ec4ddd95a8
@ -5,6 +5,10 @@ Change Log
|
||||
4.6.5 (unreleased)
|
||||
--------------------------
|
||||
|
||||
### Improvements
|
||||
|
||||
- Optimize the performance of the incompatible character scan.
|
||||
|
||||
|
||||
|
||||
4.6.4 (599)
|
||||
|
@ -2000,6 +2000,7 @@
|
||||
2AC6BFCF21D00A8500FF325C /* Regex Parser */,
|
||||
2AA375461D40BDCB0080C27C /* LineEnding.swift */,
|
||||
2A8E25BA24DC59C400FCC33A /* FileEncoding.swift */,
|
||||
2A8C338E1D3E1C040005B0B7 /* IncompatibleCharacter.swift */,
|
||||
2AAD61EF1D2B0856008FE772 /* FuzzyRange.swift */,
|
||||
2A4257BB1D239F850086DAAD /* Invisible.swift */,
|
||||
2AF073E21D33C3AB00770BA6 /* Theme.swift */,
|
||||
@ -2093,7 +2094,6 @@
|
||||
2AD7B9AE1D3E832E00E5D6D7 /* DocumentAnalyzer.swift */,
|
||||
2AF45E1D1E6C0D920030CD60 /* EditorCounter.swift */,
|
||||
2A8C338B1D3E16B00005B0B7 /* IncompatibleCharacterScanner.swift */,
|
||||
2A8C338E1D3E1C040005B0B7 /* IncompatibleCharacter.swift */,
|
||||
2A80BE8C27FFA61700D2F7FF /* LineEndingScanner.swift */,
|
||||
2A1125C523F6EFB2006A1DB2 /* URLDetector.swift */,
|
||||
);
|
||||
|
@ -9,7 +9,7 @@
|
||||
// ---------------------------------------------------------------------------
|
||||
//
|
||||
// © 2004-2007 nakamuxu
|
||||
// © 2014-2022 1024jp
|
||||
// © 2014-2023 1024jp
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -24,7 +24,7 @@
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import Foundation.NSRange
|
||||
|
||||
struct IncompatibleCharacter: Equatable {
|
||||
|
||||
@ -36,62 +36,27 @@ struct IncompatibleCharacter: Equatable {
|
||||
}
|
||||
|
||||
|
||||
|
||||
// MARK: -
|
||||
|
||||
extension String {
|
||||
|
||||
/// list characters cannot be converted to the passed-in encoding
|
||||
/// List characters cannot be converted to the passed-in encoding.
|
||||
///
|
||||
/// - Parameter encoding: The string encoding to test compatibility.
|
||||
/// - Returns: An array of IncompatibleCharacter.
|
||||
/// - Throws: `CancellationError`
|
||||
func scanIncompatibleCharacters(with encoding: String.Encoding) throws -> [IncompatibleCharacter] {
|
||||
func charactersIncompatible(with encoding: String.Encoding) throws -> [IncompatibleCharacter] {
|
||||
|
||||
guard !self.canBeConverted(to: encoding) else { return [] }
|
||||
|
||||
guard
|
||||
let data = self.data(using: encoding, allowLossyConversion: true), // lossy conversion must always success
|
||||
let convertedString = String(data: data, encoding: encoding)
|
||||
else { assertionFailure(); return [] }
|
||||
|
||||
try Task.checkCancellation()
|
||||
|
||||
if self.length == convertedString.length, self.length > 10_000 {
|
||||
return try self.quickIncompatibleFind(with: convertedString)
|
||||
}
|
||||
|
||||
return try convertedString.difference(from: self).removals.lazy
|
||||
.map { (change) in
|
||||
guard case let .remove(offset, character, _) = change else { preconditionFailure() }
|
||||
|
||||
return try zip(self.indices, self).lazy
|
||||
.compactMap { (index, character) in
|
||||
try Task.checkCancellation()
|
||||
|
||||
let converted: String? = String(character)
|
||||
.data(using: encoding, allowLossyConversion: true)
|
||||
.flatMap { String(data: $0, encoding: encoding) }
|
||||
let location = self.index(self.startIndex, offsetBy: offset).utf16Offset(in: self)
|
||||
let string = String(character)
|
||||
let converted = String(data: string.data(using: encoding, allowLossyConversion: true)!, encoding: encoding)
|
||||
|
||||
return IncompatibleCharacter(character: character,
|
||||
convertedCharacter: converted,
|
||||
location: location)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// MARK: Private Methods
|
||||
|
||||
private func quickIncompatibleFind(with convertedString: String) throws -> [IncompatibleCharacter] {
|
||||
|
||||
try zip(self, convertedString).enumerated().lazy
|
||||
.filter { $1.0 != $1.1 }
|
||||
.map { (offset, characters) in
|
||||
let location = self.index(self.startIndex, offsetBy: offset).utf16Offset(in: self)
|
||||
guard converted != string else { return nil }
|
||||
|
||||
try Task.checkCancellation()
|
||||
|
||||
return IncompatibleCharacter(character: characters.0,
|
||||
convertedCharacter: String(characters.1),
|
||||
location: location)
|
||||
return IncompatibleCharacter(character: character, convertedCharacter: converted, location: index.utf16Offset(in: self))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -82,8 +82,7 @@ final class IncompatibleCharacterScanner {
|
||||
try await Task.sleep(for: .seconds(0.4), tolerance: .seconds(0.1)) // debounce
|
||||
|
||||
let string = await MainActor.run { document.textStorage.string.immutable }
|
||||
let incompatibleCharacters = try string.scanIncompatibleCharacters(with: encoding)
|
||||
self.incompatibleCharacters = incompatibleCharacters
|
||||
self.incompatibleCharacters = try string.charactersIncompatible(with: encoding)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -32,7 +32,7 @@ final class IncompatibleCharacterTests: XCTestCase {
|
||||
func testIncompatibleCharacterScan() throws {
|
||||
|
||||
let string = "abc\\ \n ¥ \n ~"
|
||||
let incompatibles = try string.scanIncompatibleCharacters(with: .plainShiftJIS)
|
||||
let incompatibles = try string.charactersIncompatible(with: .plainShiftJIS)
|
||||
|
||||
XCTAssertEqual(incompatibles.count, 2)
|
||||
|
||||
@ -53,7 +53,7 @@ final class IncompatibleCharacterTests: XCTestCase {
|
||||
func testSequentialIncompatibleCharactersScan() throws {
|
||||
|
||||
let string = "~~"
|
||||
let incompatibles = try string.scanIncompatibleCharacters(with: .plainShiftJIS)
|
||||
let incompatibles = try string.charactersIncompatible(with: .plainShiftJIS)
|
||||
|
||||
XCTAssertEqual(incompatibles.count, 2)
|
||||
|
||||
@ -68,7 +68,7 @@ final class IncompatibleCharacterTests: XCTestCase {
|
||||
func testIncompatibleCharacterScanWithLengthShift() throws {
|
||||
|
||||
let string = "family 👨👨👦 with 🐕"
|
||||
let incompatibles = try string.scanIncompatibleCharacters(with: .japaneseEUC)
|
||||
let incompatibles = try string.charactersIncompatible(with: .japaneseEUC)
|
||||
|
||||
XCTAssertEqual(incompatibles.count, 2)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user