mirror of
https://github.com/simonmichael/hledger.git
synced 2024-12-31 22:31:54 +03:00
parsing: read dos line endings, deprecate encoding compatibility layer
We should now read all text in universal newline mode, so eg journal files with DOS/windows line endings are fine. This also deprecates and disables our IO encoding compatibility layer, which prevented many encoding-related problems with certain platforms and GHC versions. With modern GHC (7.x) this is now hopefully totally unnecessary, but the module remains in place just in case.
This commit is contained in:
parent
78837c66a6
commit
42385fc8bf
@ -36,7 +36,7 @@ import System.Directory (doesFileExist, getHomeDirectory)
|
|||||||
import System.Environment (getEnv)
|
import System.Environment (getEnv)
|
||||||
import System.Exit (exitFailure)
|
import System.Exit (exitFailure)
|
||||||
import System.FilePath ((</>))
|
import System.FilePath ((</>))
|
||||||
import System.IO (IOMode(..), withFile, stderr)
|
import System.IO (IOMode(..), withFile, stdin, stderr, hSetNewlineMode, universalNewlineMode)
|
||||||
import Test.HUnit
|
import Test.HUnit
|
||||||
import Text.Printf
|
import Text.Printf
|
||||||
|
|
||||||
@ -158,10 +158,14 @@ readersForPathAndData (f,s) = filter (\r -> (rDetector r) f s) readers
|
|||||||
-- formats. A CSV conversion rules file may be specified for better
|
-- formats. A CSV conversion rules file may be specified for better
|
||||||
-- conversion of that format.
|
-- conversion of that format.
|
||||||
readJournalFile :: Maybe Format -> Maybe FilePath -> FilePath -> IO (Either String Journal)
|
readJournalFile :: Maybe Format -> Maybe FilePath -> FilePath -> IO (Either String Journal)
|
||||||
readJournalFile format rulesfile "-" = getContents >>= readJournal format rulesfile (Just "(stdin)")
|
readJournalFile format rulesfile "-" = do
|
||||||
|
hSetNewlineMode stdin universalNewlineMode
|
||||||
|
getContents >>= readJournal format rulesfile (Just "(stdin)")
|
||||||
readJournalFile format rulesfile f = do
|
readJournalFile format rulesfile f = do
|
||||||
requireJournalFileExists f
|
requireJournalFileExists f
|
||||||
withFile f ReadMode $ \h -> hGetContents h >>= readJournal format rulesfile (Just f)
|
withFile f ReadMode $ \h -> do
|
||||||
|
hSetNewlineMode h universalNewlineMode
|
||||||
|
hGetContents h >>= readJournal format rulesfile (Just f)
|
||||||
|
|
||||||
-- | If the specified journal file does not exist, give a helpful error and quit.
|
-- | If the specified journal file does not exist, give a helpful error and quit.
|
||||||
requireJournalFileExists :: FilePath -> IO ()
|
requireJournalFileExists :: FilePath -> IO ()
|
||||||
|
@ -11,6 +11,10 @@ Example usage:
|
|||||||
import UTF8IOCompat (readFile,writeFile,appendFile,getContents,putStr,putStrLn)
|
import UTF8IOCompat (readFile,writeFile,appendFile,getContents,putStr,putStrLn)
|
||||||
import UTF8IOCompat (SystemString,fromSystemString,toSystemString,error',userError')
|
import UTF8IOCompat (SystemString,fromSystemString,toSystemString,error',userError')
|
||||||
|
|
||||||
|
2013/4/10 update: we now trust that current GHC versions & platforms
|
||||||
|
do the right thing, so this file is a no-op and on its way to being removed.
|
||||||
|
Not carefully tested.
|
||||||
|
|
||||||
-}
|
-}
|
||||||
|
|
||||||
module Hledger.Utils.UTF8IOCompat (
|
module Hledger.Utils.UTF8IOCompat (
|
||||||
@ -37,51 +41,51 @@ import qualified Data.ByteString.Lazy as B
|
|||||||
import qualified Data.ByteString.Lazy.Char8 as B8
|
import qualified Data.ByteString.Lazy.Char8 as B8
|
||||||
import qualified Data.ByteString.Lazy.UTF8 as U8 (toString, fromString)
|
import qualified Data.ByteString.Lazy.UTF8 as U8 (toString, fromString)
|
||||||
import Prelude hiding (readFile, writeFile, appendFile, getContents, putStr, putStrLn)
|
import Prelude hiding (readFile, writeFile, appendFile, getContents, putStr, putStrLn)
|
||||||
import System.IO (Handle)
|
import System.IO -- (Handle)
|
||||||
#if __GLASGOW_HASKELL__ < 702
|
-- #if __GLASGOW_HASKELL__ < 702
|
||||||
import Codec.Binary.UTF8.String as UTF8 (decodeString, encodeString, isUTF8Encoded)
|
-- import Codec.Binary.UTF8.String as UTF8 (decodeString, encodeString, isUTF8Encoded)
|
||||||
import System.Info (os)
|
-- import System.Info (os)
|
||||||
#endif
|
-- #endif
|
||||||
|
|
||||||
bom :: B.ByteString
|
-- bom :: B.ByteString
|
||||||
bom = B.pack [0xEF, 0xBB, 0xBF]
|
-- bom = B.pack [0xEF, 0xBB, 0xBF]
|
||||||
|
|
||||||
stripBOM :: B.ByteString -> B.ByteString
|
-- stripBOM :: B.ByteString -> B.ByteString
|
||||||
stripBOM s | bom `B.isPrefixOf` s = B.drop 3 s
|
-- stripBOM s | bom `B.isPrefixOf` s = B.drop 3 s
|
||||||
stripBOM s = s
|
-- stripBOM s = s
|
||||||
|
|
||||||
readFile :: FilePath -> IO String
|
-- readFile :: FilePath -> IO String
|
||||||
readFile = liftM (U8.toString . stripBOM) . B.readFile
|
-- readFile = liftM (U8.toString . stripBOM) . B.readFile
|
||||||
|
|
||||||
writeFile :: FilePath -> String -> IO ()
|
-- writeFile :: FilePath -> String -> IO ()
|
||||||
writeFile f = B.writeFile f . U8.fromString
|
-- writeFile f = B.writeFile f . U8.fromString
|
||||||
|
|
||||||
appendFile :: FilePath -> String -> IO ()
|
-- appendFile :: FilePath -> String -> IO ()
|
||||||
appendFile f = B.appendFile f . U8.fromString
|
-- appendFile f = B.appendFile f . U8.fromString
|
||||||
|
|
||||||
getContents :: IO String
|
-- getContents :: IO String
|
||||||
getContents = liftM (U8.toString . stripBOM) B.getContents
|
-- getContents = liftM (U8.toString . stripBOM) B.getContents
|
||||||
|
|
||||||
hGetContents :: Handle -> IO String
|
-- hGetContents :: Handle -> IO String
|
||||||
hGetContents h = liftM (U8.toString . stripBOM) (B.hGetContents h)
|
-- hGetContents h = liftM (U8.toString . stripBOM) (B.hGetContents h)
|
||||||
|
|
||||||
putStr :: String -> IO ()
|
-- putStr :: String -> IO ()
|
||||||
putStr = bs_putStr . U8.fromString
|
-- putStr = bs_putStr . U8.fromString
|
||||||
|
|
||||||
putStrLn :: String -> IO ()
|
-- putStrLn :: String -> IO ()
|
||||||
putStrLn = bs_putStrLn . U8.fromString
|
-- putStrLn = bs_putStrLn . U8.fromString
|
||||||
|
|
||||||
hPutStr :: Handle -> String -> IO ()
|
-- hPutStr :: Handle -> String -> IO ()
|
||||||
hPutStr h = bs_hPutStr h . U8.fromString
|
-- hPutStr h = bs_hPutStr h . U8.fromString
|
||||||
|
|
||||||
hPutStrLn :: Handle -> String -> IO ()
|
-- hPutStrLn :: Handle -> String -> IO ()
|
||||||
hPutStrLn h = bs_hPutStrLn h . U8.fromString
|
-- hPutStrLn h = bs_hPutStrLn h . U8.fromString
|
||||||
|
|
||||||
-- span GHC versions including 6.12.3 - 7.4.1:
|
-- -- span GHC versions including 6.12.3 - 7.4.1:
|
||||||
bs_putStr = B8.putStr
|
-- bs_putStr = B8.putStr
|
||||||
bs_putStrLn = B8.putStrLn
|
-- bs_putStrLn = B8.putStrLn
|
||||||
bs_hPutStr = B8.hPut
|
-- bs_hPutStr = B8.hPut
|
||||||
bs_hPutStrLn h bs = B8.hPut h bs >> B8.hPut h (B.singleton 0x0a)
|
-- bs_hPutStrLn h bs = B8.hPut h bs >> B8.hPut h (B.singleton 0x0a)
|
||||||
|
|
||||||
|
|
||||||
-- | A string received from or being passed to the operating system, such
|
-- | A string received from or being passed to the operating system, such
|
||||||
@ -94,24 +98,24 @@ type SystemString = String
|
|||||||
-- | Convert a system string to an ordinary string, decoding from UTF-8 if
|
-- | Convert a system string to an ordinary string, decoding from UTF-8 if
|
||||||
-- it appears to be UTF8-encoded and GHC version is less than 7.2.
|
-- it appears to be UTF8-encoded and GHC version is less than 7.2.
|
||||||
fromSystemString :: SystemString -> String
|
fromSystemString :: SystemString -> String
|
||||||
#if __GLASGOW_HASKELL__ < 702
|
-- #if __GLASGOW_HASKELL__ < 702
|
||||||
fromSystemString s = if UTF8.isUTF8Encoded s then UTF8.decodeString s else s
|
-- fromSystemString s = if UTF8.isUTF8Encoded s then UTF8.decodeString s else s
|
||||||
#else
|
-- #else
|
||||||
fromSystemString = id
|
fromSystemString = id
|
||||||
#endif
|
-- #endif
|
||||||
|
|
||||||
-- | Convert a unicode string to a system string, encoding with UTF-8 if
|
-- | Convert a unicode string to a system string, encoding with UTF-8 if
|
||||||
-- we are on a posix platform with GHC < 7.2.
|
-- we are on a posix platform with GHC < 7.2.
|
||||||
toSystemString :: String -> SystemString
|
toSystemString :: String -> SystemString
|
||||||
#if __GLASGOW_HASKELL__ < 702
|
-- #if __GLASGOW_HASKELL__ < 702
|
||||||
toSystemString = case os of
|
-- toSystemString = case os of
|
||||||
"unix" -> UTF8.encodeString
|
-- "unix" -> UTF8.encodeString
|
||||||
"linux" -> UTF8.encodeString
|
-- "linux" -> UTF8.encodeString
|
||||||
"darwin" -> UTF8.encodeString
|
-- "darwin" -> UTF8.encodeString
|
||||||
_ -> id
|
-- _ -> id
|
||||||
#else
|
-- #else
|
||||||
toSystemString = id
|
toSystemString = id
|
||||||
#endif
|
-- #endif
|
||||||
|
|
||||||
-- | A SystemString-aware version of error.
|
-- | A SystemString-aware version of error.
|
||||||
error' :: String -> a
|
error' :: String -> a
|
||||||
|
Loading…
Reference in New Issue
Block a user