Remove ISO-2022-JP detection

This commit is contained in:
1024jp 2022-08-09 00:17:15 +09:00
parent 1bf7980f3e
commit 750ae371cf
10 changed files with 23 additions and 39 deletions

View File

@ -25,6 +25,7 @@ Change Log
- Display the error message in the pattern sort dialog if the regular expression pattern is invalid.
- Improve the algorithm to parse numbers in the Sort by Pattern command.
- Improve the algorithm of uncommenting.
- Improve the algorithm of encoding detection.
- Deprecate the “Ignore line endings when counting characters” option.
- [trivial] Organize the editor's contextual menu.
- [trivial] Improve the basic regular expression syntax reference.

View File

@ -66,18 +66,6 @@ extension Unicode {
}
private let ISO2022JPEscapeSequences: [Data] = [
[0x1B, 0x28, 0x42], // ASCII
[0x1B, 0x28, 0x49], // kana
[0x1B, 0x24, 0x40], // 1978
[0x1B, 0x24, 0x42], // 1983
[0x1B, 0x24, 0x28, 0x44], // JISX0212
].map { Data($0) }
private let maxDetectionLength = 1024 * 8
// MARK: -
@ -167,38 +155,27 @@ extension String {
init(data: Data, suggestedCFEncodings: [CFStringEncoding], usedEncoding: inout String.Encoding?) throws {
// detect encoding from so-called "magic numbers"
// check Unicode's BOM
for bom in Unicode.BOM.allCases {
guard
data.starts(with: bom.sequence),
let string = String(bomCapableData: data, encoding: bom.encoding)
else { continue }
else { continue }
usedEncoding = bom.encoding
self = string
return
}
// try ISO-2022-JP by checking the existence of typical escape sequences
// -> It's not perfect yet works in most cases. (2016-01)
if data.prefix(maxDetectionLength).contains(0x1B),
ISO2022JPEscapeSequences.contains(where: { data.range(of: $0) != nil }),
let string = String(data: data, encoding: .iso2022JP)
{
usedEncoding = .iso2022JP
self = string
return
}
// try encodings in order from the top of the encoding list
for cfEncoding in suggestedCFEncodings {
let encoding = String.Encoding(cfEncoding: cfEncoding)
guard
let string = String(data: data, encoding: encoding)
else { continue }
if let string = String(data: data, encoding: encoding) {
usedEncoding = encoding
self = string
return
}
usedEncoding = encoding
self = string
return
}
throw CocoaError(.fileReadUnknownStringEncoding)

View File

@ -125,7 +125,7 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRES
<section id="InfoSource">
<h2>Other Information Sources</h2>
<ul>
<li>Thanks to FJDDetectEncoding by FUJIDANA &lt;<a>http://blogs.dion.ne.jp/fujidana/archives/4169016.html</a>&gt; on which CotEditor's encoding detection (ISO 2022-JP, UTF-8, and UTF-16) is based. FJDDetectEncoding had been released under the <em>BSD licence</em>.</li>
<li>Thanks to FJDDetectEncoding by FUJIDANA &lt;<a>http://blogs.dion.ne.jp/fujidana/archives/4169016.html</a>&gt; on which CotEditor's encoding detection for UTF-8 and UTF-16 is based. FJDDetectEncoding had been released under the <em>BSD licence</em>.</li>
<li>Thanks to mi by Daisuke Kamiyama &lt;<a href="https://www.mimikaki.net/">https://www.mimikaki.net</a>&gt; on which the variables of CotEditor's File Drop feature are based.</li>
</ul>
</section>

View File

@ -125,7 +125,7 @@ THE SOFTWARE IS PROVIDED &quot;AS IS&quot;, WITHOUT WARRANTY OF ANY KIND, EXPRES
<section id="InfoSource">
<h2>Other Information Sources</h2>
<ul>
<li>Thanks to FJDDetectEncoding by FUJIDANA &lt;<a>http://blogs.dion.ne.jp/fujidana/archives/4169016.html</a>&gt; on which CotEditor's encoding detection (ISO 2022-JP, UTF-8, and UTF-16) is based. FJDDetectEncoding had been released under the <em>BSD license</em>.</li>
<li>Thanks to FJDDetectEncoding by FUJIDANA &lt;<a>http://blogs.dion.ne.jp/fujidana/archives/4169016.html</a>&gt; on which CotEditor's encoding detection for UTF-8 and UTF-16 is based. FJDDetectEncoding had been released under the <em>BSD license</em>.</li>
<li>Thanks to mi by Daisuke Kamiyama &lt;<a href="https://www.mimikaki.net/">https://www.mimikaki.net</a>&gt; on which the variables of CotEditor's File Drop feature are based.</li>
</ul>
</section>

View File

@ -125,7 +125,7 @@ THE SOFTWARE IS PROVIDED &quot;AS IS&quot;, WITHOUT WARRANTY OF ANY KIND, EXPRES
<section id="InfoSource">
<h2>その他の情報ソース</h2>
<ul>
<li>テキストエンコーディングの自動認識でのISO 2022-JP、UTF-8およびUTF-16の判定コードは、藤棚工房別棟 -徒然-&lt;<a>http://blogs.dion.ne.jp/fujidana/</a>&gt;の記事「Cocoaで文字エンコーディングの自動判別プログラムを書いてみました<a>http://blogs.dion.ne.jp/fujidana/archives/4169016.html</a>」で公開されているFJDDetectEncodingを参考にさせていただきました。FJDDetectEncodingは<em>BSDライセンス</em>で公開されていました。</li>
<li>テキストエンコーディングの自動認識でのUTF-8およびUTF-16の判定コードは、藤棚工房別棟 -徒然-&lt;<a>http://blogs.dion.ne.jp/fujidana/</a>&gt;の記事「Cocoaで文字エンコーディングの自動判別プログラムを書いてみました<a>http://blogs.dion.ne.jp/fujidana/archives/4169016.html</a>」で公開されているFJDDetectEncodingを参考にさせていただきました。FJDDetectEncodingは<em>BSDライセンス</em>で公開されていました。</li>
<li>ファイルドロップでの文字列挿入機能の、生成定義文字列は上山大輔によるmi &lt;<a href="https://www.mimikaki.net/">https://www.mimikaki.net</a>&gt;の挿入文字列定義フォーマットを参考にさせていただいてます。</li>
</ul>
</section>

View File

@ -125,7 +125,7 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
<section id="InfoSource">
<h2>Outras Fontes de Informações</h2>
<ul>
<li>Obrigado ao FJDDetectEncoding de FUJIDANA &lt;<a>http://blogs.dion.ne.jp/fujidana/archives/4169016.html</a>&gt; no qual a detecção automática de codificação do CotEditor (ISO 2022-JP, UTF-8, UTF-16) se baseia. O FJDDetectEncoding foi lançado sob a <em>licença BSD</em>.</li>
<li>Obrigado ao FJDDetectEncoding de FUJIDANA &lt;<a>http://blogs.dion.ne.jp/fujidana/archives/4169016.html</a>&gt; no qual a detecção automática de codificação do CotEditor (UTF-8, UTF-16) se baseia. O FJDDetectEncoding foi lançado sob a <em>licença BSD</em>.</li>
<li>Obrigado ao mi de Daisuke Kamiyama &lt;<a href="https://www.mimikaki.net/">https://www.mimikaki.net</a>&gt; no qual as variáveis da função de Arquivos Soltos do CotEditor se baseiam.</li>
</ul>
</section>

View File

@ -125,7 +125,7 @@ THE SOFTWARE IS PROVIDED &quot;AS IS&quot;, WITHOUT WARRANTY OF ANY KIND, EXPRES
<section id="InfoSource">
<h2>Diğer Bilgi Kaynakları</h2>
<ul>
<li>FUJIDANA tarafından sağlanan FJDDetectEncoding'e teşekkürlerimizi sunarız. &lt;<a>http://blogs.dion.ne.jp/fujidana/archives/4169016.html</a>&gt; CotEditor'un kodlama otomatik algılamasının (ISO 2022-JP, UTF-8, UTF-16) temel aldığı. FJDDetectEncoding, <em>BSD lisansı</em> altında yayınlandı.</li>
<li>FUJIDANA tarafından sağlanan FJDDetectEncoding'e teşekkürlerimizi sunarız. &lt;<a>http://blogs.dion.ne.jp/fujidana/archives/4169016.html</a>&gt; CotEditor'un kodlama otomatik algılamasının (UTF-8, UTF-16) temel aldığı. FJDDetectEncoding, <em>BSD lisansı</em> altında yayınlandı.</li>
<li>CotEditor'un Dosya Bırakma işlevinin değişkenlerinin dayandığı Daisuke Kamiyama tarafından yazılan Mi &lt;<a href="https://www.mimikaki.net/">https://www.mimikaki.net</a>&gt; .</li>
</ul>
</section>

View File

@ -125,7 +125,7 @@ THE SOFTWARE IS PROVIDED &quot;AS IS&quot;, WITHOUT WARRANTY OF ANY KIND, EXPRES
<section id="InfoSource">
<h2>其他信息</h2>
<ul>
<li>感谢FUJIDANA &lt;<a>http://blogs.dion.ne.jp/fujidana/archives/4169016.html</a>&gt;的FJDDetectEncodingCotEditor的编码自动检测 (ISO 2022-JP, UTF-8, UTF-16)是基于其开发的。FJDDetectEncoding遵守<em>BSD许可证</em></li>
<li>感谢FUJIDANA &lt;<a>http://blogs.dion.ne.jp/fujidana/archives/4169016.html</a>&gt;的FJDDetectEncodingCotEditor的编码自动检测 (UTF-8, UTF-16)是基于其开发的。FJDDetectEncoding遵守<em>BSD许可证</em></li>
<li>感谢Daisuke Kamiyama &lt;<a href="https://www.mimikaki.net/">https://www.mimikaki.net</a>&gt;的mi提供了CotEditor's文件拖拽功能用到的预格式化字符串。</li>
</ul>
</section>

View File

@ -125,7 +125,7 @@ THE SOFTWARE IS PROVIDED &quot;AS IS&quot;, WITHOUT WARRANTY OF ANY KIND, EXPRES
<section id="InfoSource">
<h2>其他資訊</h2>
<ul>
<li>感謝FUJIDANA &lt;<a>http://blogs.dion.ne.jp/fujidana/archives/4169016.html</a>&gt;的FJDDetectEncodingCotEditor的編碼自動檢測 (ISO 2022-JP, UTF-8, UTF-16)是基於其開發的。FJDDetectEncoding遵守<em>BSD許可證</em></li>
<li>感謝FUJIDANA &lt;<a>http://blogs.dion.ne.jp/fujidana/archives/4169016.html</a>&gt;的FJDDetectEncodingCotEditor的編碼自動檢測 (UTF-8, UTF-16)是基於其開發的。FJDDetectEncoding遵守<em>BSD許可證</em></li>
<li>感謝Daisuke Kamiyama &lt;<a href="https://www.mimikaki.net/">https://www.mimikaki.net</a>&gt;的mi提供了CotEditor's檔案拖拽功能用到的預格式化字串。</li>
</ul>
</section>

View File

@ -9,7 +9,7 @@
//
// ---------------------------------------------------------------------------
//
// © 2016-2021 1024jp
// © 2016-2022 1024jp
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -70,8 +70,14 @@ final class EncodingDetectionTests: XCTestCase {
func testISO2022() throws {
let data = try self.dataForFileName("ISO 2022-JP")
let encodings: [String.Encoding] = [.utf8, .iso2022JP, .utf16]
let cfEncodings = encodings
.map(\.rawValue)
.map(CFStringConvertNSStringEncodingToEncoding)
var encoding: String.Encoding?
let string = try self.encodedStringForFileName("ISO 2022-JP", usedEncoding: &encoding)
let string = try String(data: data, suggestedCFEncodings: cfEncodings, usedEncoding: &encoding)
XCTAssertEqual(string, "dog犬")
XCTAssertEqual(encoding, .iso2022JP)