fix unicode handling on GHC >= 7.2, unify utf8 IO compatibility layer

tests pass again from GHC 6.12.3 to 7.4.1
This commit is contained in:
Simon Michael 2012-03-29 19:06:31 +00:00
parent d4451ce5e3
commit 8492f6cae4
15 changed files with 134 additions and 141 deletions

View File

@ -41,7 +41,7 @@ import Hledger.Read.TimelogReader as TimelogReader
import Hledger.Read.CsvReader as CsvReader
import Hledger.Utils
import Prelude hiding (getContents, writeFile)
import Hledger.Utils.UTF8 (getContents, hGetContents, writeFile)
import Hledger.Utils.UTF8IOCompat (getContents, hGetContents, writeFile)
journalEnvVar = "LEDGER_FILE"

View File

@ -51,7 +51,7 @@ import Text.Printf (hPrintf)
import Hledger.Data
import Prelude hiding (getContents)
import Hledger.Utils.UTF8 (getContents)
import Hledger.Utils.UTF8IOCompat (getContents)
import Hledger.Utils
import Hledger.Data.FormatStrings as FormatStrings
import Hledger.Read.JournalReader (ledgeraccountname, someamount)

View File

@ -54,7 +54,7 @@ import System.Time (getClockTime)
import Hledger.Data
import Hledger.Utils
import Prelude hiding (readFile)
import Hledger.Utils.UTF8 (readFile)
import Hledger.Utils.UTF8IOCompat (readFile)
-- let's get to it

View File

@ -1,3 +1,4 @@
{-# LANGUAGE CPP #-}
{-|
Standard imports and utilities which are useful everywhere, or needed low
@ -19,12 +20,12 @@ module Hledger.Utils (---- provide these frequently used modules - or not, for c
-- module Text.Printf,
---- all of this one:
module Hledger.Utils,
Debug.Trace.trace
---- and this for i18n - needs to be done in each module I think:
-- module Hledger.Utils.UTF8
Debug.Trace.trace,
-- module Hledger.Utils.UTF8IOCompat
SystemString,fromSystemString,toSystemString,error',userError'
-- the rest need to be done in each module I think
)
where
import Codec.Binary.UTF8.String as UTF8 (decodeString, encodeString, isUTF8Encoded)
import Control.Monad.Error
import Data.Char
import Data.List
@ -35,15 +36,15 @@ import Data.Tree
import Debug.Trace
import System.Directory (getHomeDirectory)
import System.FilePath(takeDirectory,combine)
import System.Info (os)
import Test.HUnit
import Text.ParserCombinators.Parsec
import Text.Printf
import Text.RegexPR
-- import qualified Data.Map as Map
--
-- import Prelude hiding (readFile,writeFile,getContents,putStr,putStrLn)
-- import Hledger.Utils.UTF8
-- import Prelude hiding (readFile,writeFile,appendFile,getContents,putStr,putStrLn)
-- import Hledger.Utils.UTF8IOCompat (readFile,writeFile,appendFile,getContents,putStr,putStrLn)
import Hledger.Utils.UTF8IOCompat (SystemString,fromSystemString,toSystemString,error',userError')
-- strings
@ -183,41 +184,6 @@ fitto w h s = intercalate "\n" $ take h $ rows ++ repeat blankline
fit w = take w . (++ repeat ' ')
blankline = replicate w ' '
-- encoded platform strings
-- | A platform string is a string value from or for the operating system,
-- such as a file path or command-line argument (or environment variable's
-- name or value ?). On some platforms (such as unix) these are not real
-- unicode strings but have some encoding such as UTF-8. This alias does
-- no type enforcement but aids code clarity.
type PlatformString = String
-- | Convert a possibly encoded platform string to a real unicode string.
-- We decode the UTF-8 encoding recommended for unix systems
-- (cf http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html)
-- and leave anything else unchanged.
fromPlatformString :: PlatformString -> String
fromPlatformString s = if UTF8.isUTF8Encoded s then UTF8.decodeString s else s
-- | Convert a unicode string to a possibly encoded platform string.
-- On unix we encode with the recommended UTF-8
-- (cf http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html)
-- and elsewhere we leave it unchanged.
toPlatformString :: String -> PlatformString
toPlatformString = case os of
"unix" -> UTF8.encodeString
"linux" -> UTF8.encodeString
"darwin" -> UTF8.encodeString
_ -> id
-- | A version of error that's better at displaying unicode.
error' :: String -> a
error' = error . toPlatformString
-- | A version of userError that's better at displaying unicode.
userError' :: String -> IOError
userError' = userError . toPlatformString
-- math
difforzero :: (Num a, Ord a) => a -> a -> a

View File

@ -1,87 +0,0 @@
{-
From pandoc, slightly extended. Example usage:
import Prelude hiding (readFile,writeFile,getContents,putStr,putStrLn)
import Hledger.Utils.UTF8 (readFile,writeFile,getContents,putStr,putStrLn)
----------------------------------------------------------------------
Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.UTF8
Copyright : Copyright (C) 2010 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
UTF-8 aware string IO functions that will work with GHC 6.10 or 6.12.
-}
module Hledger.Utils.UTF8 ( readFile
, writeFile
, appendFile
, getContents
, hGetContents
, putStr
, putStrLn
, hPutStr
, hPutStrLn
)
where
import qualified Data.ByteString.Lazy as B
import Data.ByteString.Lazy.UTF8 (toString, fromString)
import Prelude hiding (readFile, writeFile, appendFile, getContents, putStr, putStrLn)
import System.IO (Handle)
import Control.Monad (liftM)
bom :: B.ByteString
bom = B.pack [0xEF, 0xBB, 0xBF]
stripBOM :: B.ByteString -> B.ByteString
stripBOM s | bom `B.isPrefixOf` s = B.drop 3 s
stripBOM s = s
readFile :: FilePath -> IO String
readFile = liftM (toString . stripBOM) . B.readFile
writeFile :: FilePath -> String -> IO ()
writeFile f = B.writeFile f . fromString
appendFile :: FilePath -> String -> IO ()
appendFile f = B.appendFile f . fromString
getContents :: IO String
getContents = liftM (toString . stripBOM) B.getContents
hGetContents :: Handle -> IO String
hGetContents h = liftM (toString . stripBOM) (B.hGetContents h)
putStr :: String -> IO ()
putStr = B.putStr . fromString
putStrLn :: String -> IO ()
putStrLn = B.putStrLn . fromString
hPutStr :: Handle -> String -> IO ()
hPutStr h = B.hPutStr h . fromString
hPutStrLn :: Handle -> String -> IO ()
hPutStrLn h s = hPutStr h (s ++ "\n")

View File

@ -0,0 +1,114 @@
{-# LANGUAGE CPP #-}
{-
UTF-8 aware string IO functions that will work across multiple platforms
and GHC versions. Includes code from Text.Pandoc.UTF8 ((C) 2010 John
MacFarlane <jgm@berkeley.edu>).
Example usage:
import Prelude hiding (readFile,writeFile,appendFile,getContents,putStr,putStrLn)
import UTF8IOCompat (readFile,writeFile,appendFile,getContents,putStr,putStrLn)
import UTF8IOCompat (SystemString,fromSystemString,toSystemString,error',userError')
-}
module Hledger.Utils.UTF8IOCompat (
readFile,
writeFile,
appendFile,
getContents,
hGetContents,
putStr,
putStrLn,
hPutStr,
hPutStrLn,
--
SystemString,
fromSystemString,
toSystemString,
error',
userError',
)
where
import qualified Data.ByteString.Lazy as B
import Data.ByteString.Lazy.UTF8 (toString, fromString)
import Prelude hiding (readFile, writeFile, appendFile, getContents, putStr, putStrLn)
import System.IO (Handle)
import Control.Monad (liftM)
#if __GLASGOW_HASKELL__ < 702
import Codec.Binary.UTF8.String as UTF8 (decodeString, encodeString, isUTF8Encoded)
import System.Info (os)
#endif
bom :: B.ByteString
bom = B.pack [0xEF, 0xBB, 0xBF]
stripBOM :: B.ByteString -> B.ByteString
stripBOM s | bom `B.isPrefixOf` s = B.drop 3 s
stripBOM s = s
readFile :: FilePath -> IO String
readFile = liftM (toString . stripBOM) . B.readFile
writeFile :: FilePath -> String -> IO ()
writeFile f = B.writeFile f . fromString
appendFile :: FilePath -> String -> IO ()
appendFile f = B.appendFile f . fromString
getContents :: IO String
getContents = liftM (toString . stripBOM) B.getContents
hGetContents :: Handle -> IO String
hGetContents h = liftM (toString . stripBOM) (B.hGetContents h)
putStr :: String -> IO ()
putStr = B.putStr . fromString
putStrLn :: String -> IO ()
putStrLn = B.putStrLn . fromString
hPutStr :: Handle -> String -> IO ()
hPutStr h = B.hPutStr h . fromString
hPutStrLn :: Handle -> String -> IO ()
hPutStrLn h s = hPutStr h (s ++ "\n")
-- | A string received from or being passed to the operating system, such
-- as a file path, command-line argument, or environment variable name or
-- value. With GHC versions before 7.2 on some platforms (posix) these are
-- typically encoded. When converting, we assume the encoding is UTF-8 (cf
-- http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html#UTF8).
type SystemString = String
-- | Convert a system string to an ordinary string, decoding from UTF-8 if
-- it appears to be UTF8-encoded and GHC version is less than 7.2.
fromSystemString :: SystemString -> String
#if __GLASGOW_HASKELL__ < 702
fromSystemString s = if UTF8.isUTF8Encoded s then UTF8.decodeString s else s
#else
fromSystemString = id
#endif
-- | Convert a unicode string to a system string, encoding with UTF-8 if
-- we are on a posix platform with GHC < 7.2.
toSystemString :: String -> SystemString
#if __GLASGOW_HASKELL__ < 702
toSystemString = case os of
"unix" -> UTF8.encodeString
"linux" -> UTF8.encodeString
"darwin" -> UTF8.encodeString
_ -> id
#else
toSystemString = id
#endif
-- | A SystemString-aware version of error.
error' :: String -> a
error' = error . toSystemString
-- | A SystemString-aware version of userError.
userError' :: String -> IOError
userError' = userError . toSystemString

View File

@ -52,7 +52,7 @@ library
Hledger.Read.Utils
Hledger.Reports
Hledger.Utils
Hledger.Utils.UTF8
Hledger.Utils.UTF8IOCompat
Build-Depends:
base >= 3 && < 5
,bytestring

View File

@ -26,7 +26,7 @@ import Yesod.Logger (makeLogger)
import Hledger
import Hledger.Cli hiding (progname,prognameandversion)
import Prelude hiding (putStrLn)
import Hledger.Utils.UTF8 (putStrLn)
import Hledger.Utils.UTF8IOCompat (putStrLn)
import Hledger.Web

View File

@ -29,7 +29,7 @@ import qualified Data.Set as Set
import Hledger
import Prelude hiding (putStr, putStrLn, appendFile)
import Hledger.Utils.UTF8 (putStr, putStrLn, appendFile)
import Hledger.Utils.UTF8IOCompat (putStr, putStrLn, appendFile)
import Hledger.Cli.Options
import Hledger.Cli.Register (postingsReportAsText)
import Hledger.Cli.Utils

View File

@ -105,7 +105,7 @@ import Test.HUnit
import Hledger
import Prelude hiding (putStr)
import Hledger.Utils.UTF8 (putStr)
import Hledger.Utils.UTF8IOCompat (putStr)
import Hledger.Data.FormatStrings
import qualified Hledger.Data.FormatStrings as Format
import Hledger.Cli.Options

View File

@ -15,7 +15,7 @@ import Hledger.Cli.Options
import Hledger.Data
import Hledger.Reports
import Prelude hiding (putStr)
import Hledger.Utils.UTF8 (putStr)
import Hledger.Utils.UTF8IOCompat (putStr)
barchar = '*'

View File

@ -363,7 +363,7 @@ getEnvSafe v = getEnv v `catch` (\_ -> return "")
getDirectoryContentsSafe d = getDirectoryContents d `catch` (\_ -> return [])
-- | Convert possibly encoded option values to regular unicode strings.
decodeRawOpts = map (\(name,val) -> (name, fromPlatformString val))
decodeRawOpts = map (\(name,val) -> (name, fromSystemString val))
-- A hacky workaround for http://code.google.com/p/ndmitchell/issues/detail?id=470 :
-- we'd like to permit options before COMMAND as well as after it.

View File

@ -12,7 +12,7 @@ import Data.List
import Hledger
import Prelude hiding (putStr)
import Hledger.Utils.UTF8 (putStr)
import Hledger.Utils.UTF8IOCompat (putStr)
import Hledger.Cli.Options
-- | Print journal transactions in standard format.

View File

@ -18,7 +18,7 @@ import Text.Printf
import Hledger
import Prelude hiding (putStr)
import Hledger.Utils.UTF8 (putStr)
import Hledger.Utils.UTF8IOCompat (putStr)
import Hledger.Cli.Options

View File

@ -16,7 +16,7 @@ import qualified Data.Map as Map
import Hledger
import Hledger.Cli.Options
import Prelude hiding (putStr)
import Hledger.Utils.UTF8 (putStr)
import Hledger.Utils.UTF8IOCompat (putStr)
-- like Register.summarisePostings