Fix char count with single regional indicator

This commit is contained in:
1024jp 2015-11-27 18:14:28 +09:00
parent df3e5d2ee2
commit 2581edc907
4 changed files with 22 additions and 25 deletions

View File

@ -40,6 +40,7 @@ develop
- Fix an issue where text view drawing was distorted while resizing window.
- Fix an issue where line endings of a document that has a line ending chacter at the beginning of the file cannot be interpreted its line ending type correctly.
- Fix an issue where character inspector returned always `U+000A` (LF) for line ending even the actual line ending of the document is not LF.
- Fix character count with a single regional indicator symbol.
- Fix wrong undo action name on encoding conversion via script.

View File

@ -42,26 +42,27 @@
// count composed chars
__block NSUInteger count = 0;
__block BOOL isRegionalIndicator = NO;
__block BOOL isLastCharRegionalIndicator = NO;
NSRange regionalIndicatorRange = NSMakeRange(0xDDE6, 0xDDFF - 0xDDE6 + 1);
[string enumerateSubstringsInRange:NSMakeRange(0, [string length])
options:NSStringEnumerationByComposedCharacterSequences | NSStringEnumerationSubstringNotRequired
usingBlock:^(NSString *substring, NSRange substringRange, NSRange enclosingRange, BOOL *stop)
{
// skip if the last composed character was a regional indicator surrogate-pair
// 'Cause the so-called national flag emojis consist of two such surrogate pairs
// and the first one is already counted in the last loop.
// (To simplify the process, we don't check whether this character is also a regional indicator.)
if (isRegionalIndicator) {
isRegionalIndicator = NO;
return;
}
// detect regional indicator surrogate pair.
BOOL isRegionalIndicator = ((substringRange.length == 2) &&
NSLocationInRange([string characterAtIndex:substringRange.location], regionalIndicatorRange) &&
NSLocationInRange([string characterAtIndex:substringRange.location + 1], regionalIndicatorRange));
// detect regional surrogate pair.
if ((substringRange.length == 2) &&
(NSLocationInRange([string characterAtIndex:substringRange.location + 1], regionalIndicatorRange)))
{
isRegionalIndicator = YES;
// skip if the last composed character was a regional indicator surrogate-pair
// -> 'Cause the so-called national flag emojis consist of two such surrogate pairs
// and the first one is already counted in the last loop.
if (isLastCharRegionalIndicator) {
isLastCharRegionalIndicator = NO;
if (isRegionalIndicator) {
return;
}
} else if (isRegionalIndicator) {
isLastCharRegionalIndicator = YES;
}
count++;

View File

@ -30,19 +30,12 @@ import XCTest
class StringExtensionsTests: XCTestCase {
override func setUp() {
super.setUp()
}
override func tearDown() {
super.tearDown()
}
func testComposedCharactersCount() {
XCTAssertEqual("foo".numberOfComposedCharacters(), 3)
XCTAssertEqual("😀🇯🇵".numberOfComposedCharacters(), 2)
XCTAssertEqual("😀🇯🇵a".numberOfComposedCharacters(), 3)
// single regional indicator
XCTAssertEqual("🇦 ".numberOfComposedCharacters(), 2)
}

View File

@ -26,6 +26,8 @@ combining character:
national indicators:
🇯🇵 U+1F1EF (U+D83C U+DDEF) U+1F1F5 (U+D83C U+DDF5)
🇯 U+1F1EF (U+D83C U+DDEF)
🇦🇦 U+1F1E6 (U+D83C U+DDE6) U+1F1E6 (U+D83C U+DDE6)
sound mark:
が U+304C