Review Text encoding/decoding problem handling (#5701)

Closes #5112
This commit is contained in:
Radosław Waśko 2023-02-23 13:53:44 +01:00 committed by GitHub
parent 625172a6d2
commit ed6d3d0f97
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 36 additions and 16 deletions

View File

@ -608,17 +608,14 @@ Text.bytes self encoding on_problems=Problem_Behavior.Report_Warning =
- bytes: The vector of UTF-8 bytes.
- encoding: The text encoding to decode the bytes with. Defaults to UTF-8.
- on_problems: Specifies the behavior when a problem occurs during the
function.
By default, a warning is issued, but the operation proceeds.
If set to `Report_Error`, the operation fails with a dataflow error.
If set to `Ignore`, the operation proceeds without errors or warnings.
function. By default, a dataflow error is raised.
> Example
Get the ASCII bytes of the text "Hello".
"Hello".bytes (Encoding.ascii)
Text.from_bytes : Vector Integer -> Encoding -> Problem_Behavior -> Text
Text.from_bytes bytes encoding on_problems=Problem_Behavior.Report_Warning =
Text.from_bytes bytes encoding on_problems=Problem_Behavior.Report_Error =
result = Encoding_Utils.from_bytes bytes.to_array (encoding . to_java_charset)
if result.warnings.is_nothing then result.result else
on_problems.attach_problems_after result.result [Encoding_Error.Error result.warnings]
@ -649,10 +646,7 @@ Text.utf_8 self on_problems=Problem_Behavior.Report_Warning =
Arguments:
- bytes: The vector of UTF-8 bytes.
- on_problems: Specifies the behavior when a problem occurs during the
function.
By default, a warning is issued, but the operation proceeds.
If set to `Report_Error`, the operation fails with a dataflow error.
If set to `Ignore`, the operation proceeds without errors or warnings.
function. By default, a dataflow error is raised.
This is useful for low-level operations, such as binary data encoding and
decoding.
@ -662,7 +656,7 @@ Text.utf_8 self on_problems=Problem_Behavior.Report_Warning =
Text.from_utf_8 [-32, -92, -107, -32, -91, -115, -32, -92, -73, -32, -92, -65]
Text.from_utf_8 : Vector Integer -> Problem_Behavior -> Text
Text.from_utf_8 bytes on_problems=Problem_Behavior.Report_Warning =
Text.from_utf_8 bytes on_problems=Problem_Behavior.Report_Error =
Text.from_bytes bytes Encoding.utf_8 on_problems
## Returns a vector containing the UTF-16 characters that encode the input text.

View File

@ -5,6 +5,8 @@ import Standard.Base.Error.Illegal_Argument.Illegal_Argument
from Standard.Test import Test, Test_Suite, Problems
import Standard.Test.Extensions
polyglot java import java.lang.String as Java_String
spec =
Test.group "Encoding object" <|
Test.specify "Can get standard UTF encodings" <|
@ -46,18 +48,26 @@ spec =
Test.assert_no_problems result
result . should_equal test
Test.specify "Invalid ASCII should raise a warning when decoding" <|
Test.specify "Invalid ASCII should raise a problem when decoding (error by default)" <|
action = Text.from_bytes invalid_ascii Encoding.ascii on_problems=_
tester result = result . should_equal invalid
problems = [Encoding_Error.Error "Encoding issues at 12."]
Problems.test_problem_handling action problems tester
Test.specify "Invalid ASCII should raise a warning when encoding" <|
default_error = Text.from_bytes invalid_ascii Encoding.ascii
default_error.should_fail_with Encoding_Error
default_error.catch.message . should_equal "Encoding issues at 12."
Test.specify "Invalid ASCII should raise a problem when encoding (warning by default)" <|
action = invalid.bytes Encoding.ascii on_problems=_
tester result = result . should_equal invalid_ascii_out
problems = [Encoding_Error.Error "Encoding issues at 12."]
Problems.test_problem_handling action problems tester
default_warning = invalid.bytes Encoding.ascii
default_warning.should_equal invalid_ascii_out
Problems.get_attached_warnings default_warning . should_contain_the_same_elements_as problems
Test.group "UTF_8" <|
kshi = '\u0915\u094D\u0937\u093F'
kshi_utf_8 = [-32, -92, -107, -32, -91, -115, -32, -92, -73, -32, -92, -65]
@ -75,6 +85,18 @@ spec =
Test.assert_no_problems dump
dump.should_equal kshi_utf_8
Test.specify "should raise a problem when encoding (warning by default)" <|
unpaired_surrogate = Integer.parse "DEDC" 16
text = Text.from_codepoints [unpaired_surrogate]
action = text.utf_8 on_problems=_
tester _ = Nothing
problems = [Encoding_Error.Error "Encoding issues at 0."]
Problems.test_problem_handling action problems tester
default_warning = text.utf_8
Problems.get_attached_warnings default_warning . should_contain_the_same_elements_as problems
Test.specify "should convert an array of bytes to text via encoding" <|
result = Text.from_bytes kshi_utf_8 Encoding.utf_8
Test.assert_no_problems result
@ -85,18 +107,22 @@ spec =
Test.assert_no_problems result
result . should_equal kshi
Test.specify "Invalid UTF-8 should raise a warning when decoding via encoding" <|
Test.specify "Invalid UTF-8 should raise a problem when decoding via encoding" <|
action = Text.from_bytes invalid_utf_8 Encoding.utf_8 on_problems=_
tester result = result . should_equal invalid
problems = [Encoding_Error.Error "Encoding issues at 19."]
Problems.test_problem_handling action problems tester
Test.specify "Invalid UTF-8 should raise a warning when decoding" <|
Test.specify "Invalid UTF-8 should raise a problem when decoding (error by default)" <|
action = Text.from_utf_8 invalid_utf_8 on_problems=_
tester result = result . should_equal invalid
problems = [Encoding_Error.Error "Encoding issues at 19."]
Problems.test_problem_handling action problems tester
default_error = Text.from_utf_8 invalid_utf_8
default_error.should_fail_with Encoding_Error
default_error.catch.message . should_equal "Encoding issues at 19."
Test.group "UTF_16 BigEndian" <|
kshi = '\u0915\u094D\u0937\u093F'
kshi_utf_16 = [9, 21, 9, 77, 9, 55, 9, 63]
@ -153,13 +179,13 @@ spec =
Test.assert_no_problems result
result . should_equal test
Test.specify "Invalid Windows-1252 should raise a warning when decoding" <|
Test.specify "Invalid Windows-1252 should raise a problem when decoding" <|
action = Text.from_bytes invalid_windows Encoding.windows_1252 on_problems=_
tester result = result . should_equal invalid
problems = [Encoding_Error.Error "Encoding issues at 16."]
Problems.test_problem_handling action problems tester
Test.specify "Invalid Windows-1252 should raise a warning when encoding" <|
Test.specify "Invalid Windows-1252 should raise a problem when encoding" <|
action = invalid.bytes Encoding.windows_1252 on_problems=_
tester result = result . should_equal invalid_windows_out
problems = [Encoding_Error.Error "Encoding issues at 16."]