parsing: read dos line endings, deprecate encoding compatibility layer

We should now read all text in universal newline mode, so eg journal
files with DOS/windows line endings are fine.

This also deprecates and disables our IO encoding compatibility layer,
which prevented many encoding-related problems with certain platforms
and GHC versions. With modern GHC (7.x) this is now hopefully totally
unnecessary, but the module remains in place just in case.
This commit is contained in:
Simon Michael 2013-04-12 16:18:20 -07:00
parent 78837c66a6
commit 42385fc8bf
2 changed files with 56 additions and 48 deletions

View File

@ -36,7 +36,7 @@ import System.Directory (doesFileExist, getHomeDirectory)
import System.Environment (getEnv) import System.Environment (getEnv)
import System.Exit (exitFailure) import System.Exit (exitFailure)
import System.FilePath ((</>)) import System.FilePath ((</>))
import System.IO (IOMode(..), withFile, stderr) import System.IO (IOMode(..), withFile, stdin, stderr, hSetNewlineMode, universalNewlineMode)
import Test.HUnit import Test.HUnit
import Text.Printf import Text.Printf
@ -158,10 +158,14 @@ readersForPathAndData (f,s) = filter (\r -> (rDetector r) f s) readers
-- formats. A CSV conversion rules file may be specified for better -- formats. A CSV conversion rules file may be specified for better
-- conversion of that format. -- conversion of that format.
readJournalFile :: Maybe Format -> Maybe FilePath -> FilePath -> IO (Either String Journal) readJournalFile :: Maybe Format -> Maybe FilePath -> FilePath -> IO (Either String Journal)
readJournalFile format rulesfile "-" = getContents >>= readJournal format rulesfile (Just "(stdin)") readJournalFile format rulesfile "-" = do
hSetNewlineMode stdin universalNewlineMode
getContents >>= readJournal format rulesfile (Just "(stdin)")
readJournalFile format rulesfile f = do readJournalFile format rulesfile f = do
requireJournalFileExists f requireJournalFileExists f
withFile f ReadMode $ \h -> hGetContents h >>= readJournal format rulesfile (Just f) withFile f ReadMode $ \h -> do
hSetNewlineMode h universalNewlineMode
hGetContents h >>= readJournal format rulesfile (Just f)
-- | If the specified journal file does not exist, give a helpful error and quit. -- | If the specified journal file does not exist, give a helpful error and quit.
requireJournalFileExists :: FilePath -> IO () requireJournalFileExists :: FilePath -> IO ()

View File

@ -11,6 +11,10 @@ Example usage:
import UTF8IOCompat (readFile,writeFile,appendFile,getContents,putStr,putStrLn) import UTF8IOCompat (readFile,writeFile,appendFile,getContents,putStr,putStrLn)
import UTF8IOCompat (SystemString,fromSystemString,toSystemString,error',userError') import UTF8IOCompat (SystemString,fromSystemString,toSystemString,error',userError')
2013/4/10 update: we now trust that current GHC versions & platforms
do the right thing, so this file is a no-op and on its way to being removed.
Not carefully tested.
-} -}
module Hledger.Utils.UTF8IOCompat ( module Hledger.Utils.UTF8IOCompat (
@ -37,51 +41,51 @@ import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy.Char8 as B8 import qualified Data.ByteString.Lazy.Char8 as B8
import qualified Data.ByteString.Lazy.UTF8 as U8 (toString, fromString) import qualified Data.ByteString.Lazy.UTF8 as U8 (toString, fromString)
import Prelude hiding (readFile, writeFile, appendFile, getContents, putStr, putStrLn) import Prelude hiding (readFile, writeFile, appendFile, getContents, putStr, putStrLn)
import System.IO (Handle) import System.IO -- (Handle)
#if __GLASGOW_HASKELL__ < 702 -- #if __GLASGOW_HASKELL__ < 702
import Codec.Binary.UTF8.String as UTF8 (decodeString, encodeString, isUTF8Encoded) -- import Codec.Binary.UTF8.String as UTF8 (decodeString, encodeString, isUTF8Encoded)
import System.Info (os) -- import System.Info (os)
#endif -- #endif
bom :: B.ByteString -- bom :: B.ByteString
bom = B.pack [0xEF, 0xBB, 0xBF] -- bom = B.pack [0xEF, 0xBB, 0xBF]
stripBOM :: B.ByteString -> B.ByteString -- stripBOM :: B.ByteString -> B.ByteString
stripBOM s | bom `B.isPrefixOf` s = B.drop 3 s -- stripBOM s | bom `B.isPrefixOf` s = B.drop 3 s
stripBOM s = s -- stripBOM s = s
readFile :: FilePath -> IO String -- readFile :: FilePath -> IO String
readFile = liftM (U8.toString . stripBOM) . B.readFile -- readFile = liftM (U8.toString . stripBOM) . B.readFile
writeFile :: FilePath -> String -> IO () -- writeFile :: FilePath -> String -> IO ()
writeFile f = B.writeFile f . U8.fromString -- writeFile f = B.writeFile f . U8.fromString
appendFile :: FilePath -> String -> IO () -- appendFile :: FilePath -> String -> IO ()
appendFile f = B.appendFile f . U8.fromString -- appendFile f = B.appendFile f . U8.fromString
getContents :: IO String -- getContents :: IO String
getContents = liftM (U8.toString . stripBOM) B.getContents -- getContents = liftM (U8.toString . stripBOM) B.getContents
hGetContents :: Handle -> IO String -- hGetContents :: Handle -> IO String
hGetContents h = liftM (U8.toString . stripBOM) (B.hGetContents h) -- hGetContents h = liftM (U8.toString . stripBOM) (B.hGetContents h)
putStr :: String -> IO () -- putStr :: String -> IO ()
putStr = bs_putStr . U8.fromString -- putStr = bs_putStr . U8.fromString
putStrLn :: String -> IO () -- putStrLn :: String -> IO ()
putStrLn = bs_putStrLn . U8.fromString -- putStrLn = bs_putStrLn . U8.fromString
hPutStr :: Handle -> String -> IO () -- hPutStr :: Handle -> String -> IO ()
hPutStr h = bs_hPutStr h . U8.fromString -- hPutStr h = bs_hPutStr h . U8.fromString
hPutStrLn :: Handle -> String -> IO () -- hPutStrLn :: Handle -> String -> IO ()
hPutStrLn h = bs_hPutStrLn h . U8.fromString -- hPutStrLn h = bs_hPutStrLn h . U8.fromString
-- span GHC versions including 6.12.3 - 7.4.1: -- -- span GHC versions including 6.12.3 - 7.4.1:
bs_putStr = B8.putStr -- bs_putStr = B8.putStr
bs_putStrLn = B8.putStrLn -- bs_putStrLn = B8.putStrLn
bs_hPutStr = B8.hPut -- bs_hPutStr = B8.hPut
bs_hPutStrLn h bs = B8.hPut h bs >> B8.hPut h (B.singleton 0x0a) -- bs_hPutStrLn h bs = B8.hPut h bs >> B8.hPut h (B.singleton 0x0a)
-- | A string received from or being passed to the operating system, such -- | A string received from or being passed to the operating system, such
@ -94,24 +98,24 @@ type SystemString = String
-- | Convert a system string to an ordinary string, decoding from UTF-8 if -- | Convert a system string to an ordinary string, decoding from UTF-8 if
-- it appears to be UTF8-encoded and GHC version is less than 7.2. -- it appears to be UTF8-encoded and GHC version is less than 7.2.
fromSystemString :: SystemString -> String fromSystemString :: SystemString -> String
#if __GLASGOW_HASKELL__ < 702 -- #if __GLASGOW_HASKELL__ < 702
fromSystemString s = if UTF8.isUTF8Encoded s then UTF8.decodeString s else s -- fromSystemString s = if UTF8.isUTF8Encoded s then UTF8.decodeString s else s
#else -- #else
fromSystemString = id fromSystemString = id
#endif -- #endif
-- | Convert a unicode string to a system string, encoding with UTF-8 if -- | Convert a unicode string to a system string, encoding with UTF-8 if
-- we are on a posix platform with GHC < 7.2. -- we are on a posix platform with GHC < 7.2.
toSystemString :: String -> SystemString toSystemString :: String -> SystemString
#if __GLASGOW_HASKELL__ < 702 -- #if __GLASGOW_HASKELL__ < 702
toSystemString = case os of -- toSystemString = case os of
"unix" -> UTF8.encodeString -- "unix" -> UTF8.encodeString
"linux" -> UTF8.encodeString -- "linux" -> UTF8.encodeString
"darwin" -> UTF8.encodeString -- "darwin" -> UTF8.encodeString
_ -> id -- _ -> id
#else -- #else
toSystemString = id toSystemString = id
#endif -- #endif
-- | A SystemString-aware version of error. -- | A SystemString-aware version of error.
error' :: String -> a error' :: String -> a