mirror of
https://github.com/ilyakooo0/streamly.git
synced 2024-09-11 08:25:40 +03:00
Add unicode normalization test-suite
This commit is contained in:
parent
b7543dfad8
commit
08ee39b078
@ -109,6 +109,7 @@ targets () {
|
||||
INDIVIDUAL_TARGETS="\
|
||||
Data.Unfold \
|
||||
Unicode.Stream \
|
||||
Unicode.Char \
|
||||
Unicode.Utf8 \
|
||||
FileSystem.Handle \
|
||||
`test_only FileSystem.Event` \
|
||||
|
@ -97,11 +97,13 @@ extra-source-files:
|
||||
test/Streamly/Test/Network/Inet/TCP.hs
|
||||
test/Streamly/Test/Prelude.hs
|
||||
test/Streamly/Test/Prelude/*.hs
|
||||
test/Streamly/Test/Unicode/Stream.hs
|
||||
test/Streamly/Test/Unicode/*.hs
|
||||
test/lib/Streamly/Test/Common.hs
|
||||
test/lib/Streamly/Test/Prelude/Common.hs
|
||||
test/streamly-tests.cabal
|
||||
test/version-bounds.hs
|
||||
test/Streamly/Test/Unicode/ucd/NormalizationTest.txt
|
||||
test/Streamly/Test/Unicode/extra/NormalizationTest.txt
|
||||
|
||||
extra-doc-files:
|
||||
CONTRIBUTING.md
|
||||
|
136
test/Streamly/Test/Unicode/Char.hs
Normal file
136
test/Streamly/Test/Unicode/Char.hs
Normal file
@ -0,0 +1,136 @@
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE TupleSections #-}
|
||||
|
||||
-- |
|
||||
-- Copyright : (c) 2020 Composewell Technologies
|
||||
--
|
||||
-- License : BSD-3-Clause
|
||||
-- Maintainer : streamly@composewell.com
|
||||
-- Stability : experimental
|
||||
-- Portability : GHC
|
||||
--
|
||||
|
||||
module Streamly.Test.Unicode.Char (main) where
|
||||
|
||||
import Control.Monad (when)
|
||||
import Data.Char (chr, isSpace, ord, toUpper)
|
||||
import Data.List (intercalate, isPrefixOf)
|
||||
#if MIN_VERSION_base(4,8,0)
|
||||
import Data.Function ((&))
|
||||
#endif
|
||||
import Streamly.Internal.Unicode.Char
|
||||
( NormalizationMode(NFC, NFD, NFKC, NFKD)
|
||||
, normalize
|
||||
)
|
||||
import Streamly.Internal.Data.Stream.IsStream (SerialT)
|
||||
import Text.Printf (printf)
|
||||
import System.FilePath.Posix ((</>))
|
||||
|
||||
import qualified Streamly.Internal.Data.Stream.IsStream as S
|
||||
import qualified Streamly.Internal.Data.Fold as FL
|
||||
import qualified System.Directory as Dir
|
||||
|
||||
#if !MIN_VERSION_base(4,8,0)
|
||||
(&) :: a -> (a -> b) -> b
|
||||
x & f = f x
|
||||
#endif
|
||||
|
||||
chrToHex :: Char -> [Char]
|
||||
chrToHex = map toUpper . printf "%.4x" . ord
|
||||
|
||||
strToHex :: [Char] -> String
|
||||
strToHex = unwords . map chrToHex
|
||||
|
||||
type Text = SerialT IO Char
|
||||
|
||||
checkEqual :: String -> (Text -> Text) -> (Text, Text) -> IO Bool
|
||||
checkEqual opName op (mc1, mc2) = do
|
||||
c1 <- S.toList mc1
|
||||
c2 <- S.toList mc2
|
||||
opc2 <- S.toList $ op mc2
|
||||
if c1 /= opc2 then do
|
||||
putStrLn $ opName ++ " " ++ strToHex c2
|
||||
++ " = " ++ strToHex opc2
|
||||
++ "; Expected: " ++ strToHex c1
|
||||
return False
|
||||
else return True
|
||||
|
||||
checkOp :: String -> NormalizationMode -> [(Text, Text)] -> IO Bool
|
||||
checkOp name op pairs = do
|
||||
res <- mapM (checkEqual name (normalize op)) pairs
|
||||
return $ all (== True) res
|
||||
|
||||
checkNFC :: (Text, Text, Text, Text, Text) -> IO Bool
|
||||
checkNFC (c1, c2, c3, c4, c5) =
|
||||
checkOp "toNFC" NFC $ map (c2, ) [c1, c2, c3] ++ map (c4, ) [c4, c5]
|
||||
|
||||
checkNFD :: (Text, Text, Text, Text, Text) -> IO Bool
|
||||
checkNFD (c1, c2, c3, c4, c5) =
|
||||
checkOp "toNFD" NFD $ map (c3, ) [c1, c2, c3] ++ map (c5, ) [c4, c5]
|
||||
|
||||
checkNFKC :: (Text, Text, Text, Text, Text) -> IO Bool
|
||||
checkNFKC (c1, c2, c3, c4, c5) =
|
||||
checkOp "toNFKC" NFKC $ map (c4,) [c1, c2, c3, c4, c5]
|
||||
|
||||
checkNFKD :: (Text, Text, Text, Text, Text) -> IO Bool
|
||||
checkNFKD (c1, c2, c3, c4, c5) =
|
||||
checkOp "toNFKD" NFKD $ map (c5,) [c1, c2, c3, c4, c5]
|
||||
|
||||
checkAllTestCases :: Int -> String -> IO ()
|
||||
checkAllTestCases lineno line = do
|
||||
cs <- S.toList $ S.splitOn (== ';') FL.toList $ S.fromList line
|
||||
case cs of
|
||||
c1 : c2 : c3 : c4 : c5 : _ -> do
|
||||
let cps = map cpToText [c1, c2, c3, c4, c5]
|
||||
mapM_ (checkOneTestCase cps)
|
||||
[checkNFD, checkNFKD, checkNFC, checkNFKC]
|
||||
_ -> error $ "Unrecognized line: " ++ line
|
||||
where
|
||||
cpToText xs = S.fromList $ map (chr . read . ("0x" ++)) (words xs)
|
||||
|
||||
checkOneTestCase cps f = do
|
||||
res <- f (tuplify cps)
|
||||
when (not res) $ do
|
||||
strs <- mapM S.toList cps
|
||||
let codes = intercalate ";" $ map strToHex strs
|
||||
txt = intercalate "; " strs
|
||||
putStrLn ("Failed at line: " ++ show lineno)
|
||||
putStrLn line
|
||||
putStrLn $ codes ++ "; # (" ++ txt
|
||||
error "Bailing out"
|
||||
|
||||
tuplify [c1, c2, c3, c4, c5] = (c1, c2, c3, c4, c5)
|
||||
tuplify _ = error "tuplify bad arguments"
|
||||
|
||||
checkLine :: (Int, String) -> IO ()
|
||||
checkLine (lineno, line) = do
|
||||
-- marker lines indicating a test block start with @
|
||||
if "@" `isPrefixOf` line
|
||||
then
|
||||
putStrLn line
|
||||
else
|
||||
checkAllTestCases lineno line
|
||||
|
||||
testNormalize :: FilePath -> IO ()
|
||||
testNormalize file = do
|
||||
contents <- readFile file
|
||||
let ls = lines contents -- split into lines
|
||||
& map (dropWhile isSpace) -- trim leading spaces
|
||||
& zip [1..] -- add numbering
|
||||
& filter (not . null . snd) -- remove blank lines
|
||||
& filter (not . ("#" `isPrefixOf`) . snd) -- remove comments
|
||||
checkAll ls
|
||||
where
|
||||
checkAll (x:xs) = checkLine x >> checkAll xs
|
||||
checkAll [] = return ()
|
||||
|
||||
filesDir :: String
|
||||
filesDir = "Streamly/Test/Unicode"
|
||||
|
||||
main :: IO ()
|
||||
main = do
|
||||
cdir <- Dir.getCurrentDirectory
|
||||
testNormalize $ cdir </> filesDir </> "ucd/NormalizationTest.txt"
|
||||
-- Additional test cases not in the unicode standard suite
|
||||
testNormalize $ cdir </> filesDir </> "extra/NormalizationTest.txt"
|
9
test/Streamly/Test/Unicode/extra/NormalizationTest.txt
Normal file
9
test/Streamly/Test/Unicode/extra/NormalizationTest.txt
Normal file
@ -0,0 +1,9 @@
|
||||
# Columns (c1, c2,...) are separated by semicolons
|
||||
# They have the following meaning:
|
||||
# source; NFC; NFD; NFKC; NFKD
|
||||
|
||||
@Extra test cases not in the unicode standard test suite
|
||||
AC00 11A7;AC00 11A7;1100 1161 11A7;AC00 11A7;1100 1161 11A7; # 11A7 is not a valid T and should not be combined with LV AC00 to give AC00 as NFC.
|
||||
AC00 11A8;AC01;1100 1161 11A8;AC01;1100 1161 11A8; # Combine precomposed LV and a T i.e <LV,T> sequence.
|
||||
1100 11A8;1100 11A8;1100 11A8;1100 11A8;1100 11A8; # Invalid <L,T> sequence
|
||||
2FA1E;2FA1E;2FA1E;2FA1E;2FA1E; # Beyond decomposable max
|
18908
test/Streamly/Test/Unicode/ucd/NormalizationTest.txt
Normal file
18908
test/Streamly/Test/Unicode/ucd/NormalizationTest.txt
Normal file
File diff suppressed because it is too large
Load Diff
@ -158,7 +158,6 @@ library
|
||||
if flag(limit-build-mem)
|
||||
ghc-options: +RTS -M1500M -RTS
|
||||
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
-- Test suite options
|
||||
-------------------------------------------------------------------------------
|
||||
@ -367,6 +366,12 @@ test-suite Unicode.Stream
|
||||
main-is: Streamly/Test/Unicode/Stream.hs
|
||||
ghc-options: -main-is Streamly.Test.Unicode.Stream.main
|
||||
|
||||
test-suite Unicode.Char
|
||||
import: test-options
|
||||
type: exitcode-stdio-1.0
|
||||
main-is: Streamly/Test/Unicode/Char.hs
|
||||
ghc-options: -main-is Streamly.Test.Unicode.Char.main
|
||||
|
||||
test-suite version-bounds
|
||||
import: test-options
|
||||
type: exitcode-stdio-1.0
|
||||
|
Loading…
Reference in New Issue
Block a user