smarter file reading: detect (or specify) intended data format and show appropriate error messages

New data reader modules need to provide just reader :: Reader, which is
the format name, a detector predicate, and a parser.
This commit is contained in:
Simon Michael 2010-06-25 14:56:48 +00:00
parent 1ec1f7c4ea
commit 8a64792ba7
7 changed files with 106 additions and 64 deletions

View File

@ -116,7 +116,7 @@ journalFileModifiedTime Journal{filepath=f}
reload :: Journal -> IO Journal reload :: Journal -> IO Journal
reload Journal{filepath=f} = do reload Journal{filepath=f} = do
j' <- readJournalFile f j' <- readJournalFile Nothing f
putValue "hledger" "journal" j' putValue "hledger" "journal" j'
return j' return j'

View File

@ -64,8 +64,8 @@ tests = TestList [
tests_Hledger_Commands, tests_Hledger_Commands,
"account directive" ~: "account directive" ~:
let sameParse str1 str2 = do j1 <- readJournal str1 let sameParse str1 str2 = do j1 <- readJournal Nothing str1
j2 <- readJournal str2 j2 <- readJournal Nothing str2
j1 `is` j2{filereadtime=filereadtime j1, jtext=jtext j1} j1 `is` j2{filereadtime=filereadtime j1, jtext=jtext j1}
in TestList in TestList
[ [
@ -232,7 +232,7 @@ tests = TestList [
] ]
,"balance report with cost basis" ~: do ,"balance report with cost basis" ~: do
j <- readJournal $ unlines j <- readJournal Nothing $ unlines
["" [""
,"2008/1/1 test " ,"2008/1/1 test "
," a:b 10h @ $50" ," a:b 10h @ $50"
@ -375,7 +375,7 @@ tests = TestList [
"assets:bank" `isSubAccountNameOf` "my assets" `is` False "assets:bank" `isSubAccountNameOf` "my assets" `is` False
,"default year" ~: do ,"default year" ~: do
rl <- readJournal defaultyear_ledger_str rl <- readJournal Nothing defaultyear_ledger_str
tdate (head $ jtxns rl) `is` fromGregorian 2009 1 1 tdate (head $ jtxns rl) `is` fromGregorian 2009 1 1
return () return ()

View File

@ -36,12 +36,12 @@ withJournalDo opts args cmdname cmd = do
runcmd = cmd opts args . costify runcmd = cmd opts args . costify
if creating if creating
then runcmd nulljournal then runcmd nulljournal
else readJournalFile f >>= runcmd else readJournalFile Nothing f >>= runcmd
-- | Get a journal from the given string and options, or throw an error. -- | Get a journal from the given string and options, or throw an error.
readJournalWithOpts :: [Opt] -> String -> IO Journal readJournalWithOpts :: [Opt] -> String -> IO Journal
readJournalWithOpts opts s = do readJournalWithOpts opts s = do
j <- readJournal s j <- readJournal Nothing s
let cost = CostBasis `elem` opts let cost = CostBasis `elem` opts
return $ (if cost then journalConvertAmountsToCost else id) j return $ (if cost then journalConvertAmountsToCost else id) j

View File

@ -7,22 +7,23 @@ Read hledger data from various data formats, and related utilities.
module Hledger.Read ( module Hledger.Read (
tests_Hledger_Read, tests_Hledger_Read,
readJournalFile,
readJournal,
myLedgerPath, myLedgerPath,
myTimelogPath, myTimelogPath,
myJournal, myJournal,
myTimelog, myTimelog,
readJournalFile,
readJournal,
) )
where where
import Hledger.Data.Types (Journal(..)) import Hledger.Data.Types (Journal(..))
import Hledger.Data.Utils import Hledger.Data.Utils
import Hledger.Read.Common import Hledger.Read.Common
import qualified Hledger.Read.Journal (parseJournal,ledgerFile,tests_Journal) import Hledger.Read.Journal as Journal
import qualified Hledger.Read.Timelog (parseJournal,tests_Timelog) import Hledger.Read.Timelog as Timelog
import Control.Monad.Error import Control.Monad.Error
import Data.Either (partitionEithers) import Data.Either (partitionEithers)
import Safe (headDef)
import System.Directory (getHomeDirectory) import System.Directory (getHomeDirectory)
import System.Environment (getEnv) import System.Environment (getEnv)
import System.FilePath ((</>)) import System.FilePath ((</>))
@ -36,26 +37,61 @@ import System.IO (hPutStrLn)
#endif #endif
formats = [
"journal"
,"timelog"
-- ,"csv"
]
unknownformatmsg fp = printf "could not recognise %sdata in %s" (fmt formats) fp
where fmt [] = ""
fmt [f] = f ++ " "
fmt fs = intercalate ", " (init fs) ++ " or " ++ last fs ++ " "
parsers = [Hledger.Read.Journal.parseJournal
,Hledger.Read.Timelog.parseJournal
]
ledgerenvvar = "LEDGER" ledgerenvvar = "LEDGER"
timelogenvvar = "TIMELOG" timelogenvvar = "TIMELOG"
ledgerdefaultfilename = ".ledger" ledgerdefaultfilename = ".ledger"
timelogdefaultfilename = ".timelog" timelogdefaultfilename = ".timelog"
-- Here are the available readers. The first is the default, used for unknown data formats.
readers :: [Reader]
readers = [
Journal.reader
,Timelog.reader
]
formats = map rFormat readers
readerForFormat :: String -> Maybe Reader
readerForFormat s | null rs = Nothing
| otherwise = Just $ head rs
where
rs = filter ((s==).rFormat) readers :: [Reader]
-- | Read a Journal from this string (and file path), auto-detecting the
-- data format, or give an error. Tries to parse each known data format in
-- turn. If none succeed, gives the error message specific to the intended
-- data format, which if not specified is guessed from the file suffix and
-- possibly the data.
journalFromPathAndString :: Maybe String -> FilePath -> String -> IO Journal
journalFromPathAndString format fp s = do
let readers' = case format of Just f -> case readerForFormat f of Just r -> [r]
Nothing -> []
Nothing -> readers
(errors, journals) <- partitionEithers `fmap` mapM try readers'
case journals of j:_ -> return j
_ -> hPutStrLn stderr (errMsg errors) >> exitWith (ExitFailure 1)
where
try r = (runErrorT . (rParser r) fp) s
errMsg [] = unknownFormatMsg
errMsg es = printf "could not parse %s data in %s\n%s" (rFormat r) fp e
where (r,e) = headDef (head readers, head es) $ filter detects $ zip readers es
detects (r,_) = (rDetector r) fp s
unknownFormatMsg = printf "could not parse %sdata in %s" (fmt formats) fp
where fmt [] = ""
fmt [f] = f ++ " "
fmt fs = intercalate ", " (init fs) ++ " or " ++ last fs ++ " "
-- | Read a journal from this file, using the specified data format or
-- trying all known formats, or give an error.
readJournalFile :: Maybe String -> FilePath -> IO Journal
readJournalFile format "-" = getContents >>= journalFromPathAndString format "(stdin)"
readJournalFile format f = readFile f >>= journalFromPathAndString format f
-- | Read a Journal from this string, using the specified data format or
-- trying all known formats, or give an error.
readJournal :: Maybe String -> String -> IO Journal
readJournal format s = journalFromPathAndString format "(string)" s
-- | Get the user's default ledger file path. -- | Get the user's default ledger file path.
myLedgerPath :: IO String myLedgerPath :: IO String
myLedgerPath = myLedgerPath =
@ -74,43 +110,20 @@ myTimelogPath =
-- | Read the user's default journal file, or give an error. -- | Read the user's default journal file, or give an error.
myJournal :: IO Journal myJournal :: IO Journal
myJournal = myLedgerPath >>= readJournalFile myJournal = myLedgerPath >>= readJournalFile Nothing
-- | Read the user's default timelog file, or give an error. -- | Read the user's default timelog file, or give an error.
myTimelog :: IO Journal myTimelog :: IO Journal
myTimelog = myTimelogPath >>= readJournalFile myTimelog = myTimelogPath >>= readJournalFile Nothing
-- | Read a journal from this file, trying all known data formats,
-- or give an error.
readJournalFile :: FilePath -> IO Journal
readJournalFile "-" = getContents >>= journalFromPathAndString "(stdin)"
readJournalFile f = readFile f >>= journalFromPathAndString f
-- | Read a Journal from this string, trying all known data formats, or
-- give an error.
readJournal :: String -> IO Journal
readJournal = journalFromPathAndString "(string)"
-- | Read a Journal from this string, trying each known data format in
-- turn, or give an error. The file path is also required.
journalFromPathAndString :: FilePath -> String -> IO Journal
journalFromPathAndString f s = do
(errors, journals) <- partitionEithers `fmap` mapM try parsers
case journals of j:_ -> return j
_ -> hPutStrLn stderr (errmsg errors) >> exitWith (ExitFailure 1)
where
try p = (runErrorT . p f) s
errmsg [] = unknownformatmsg f
errmsg (e:_) = unlines [unknownformatmsg f, e]
tests_Hledger_Read = TestList tests_Hledger_Read = TestList
[ [
"ledgerFile" ~: do "ledgerFile" ~: do
assertBool "ledgerFile should parse an empty file" (isRight $ parseWithCtx emptyCtx Hledger.Read.Journal.ledgerFile "") assertBool "ledgerFile should parse an empty file" (isRight $ parseWithCtx emptyCtx Journal.ledgerFile "")
r <- readJournal "" -- don't know how to get it from ledgerFile r <- readJournal Nothing "" -- don't know how to get it from ledgerFile
assertBool "ledgerFile parsing an empty file should give an empty ledger" $ null $ jtxns r assertBool "ledgerFile parsing an empty file should give an empty ledger" $ null $ jtxns r
,Hledger.Read.Journal.tests_Journal ,Journal.tests_Journal
,Hledger.Read.Timelog.tests_Timelog ,Timelog.tests_Timelog
] ]

View File

@ -18,6 +18,12 @@ import System.Time (getClockTime)
import Text.ParserCombinators.Parsec import Text.ParserCombinators.Parsec
-- | A hledger data reader is a triple of format name, format-detecting predicate, and a parser to Journal.
data Reader = Reader {rFormat :: String
,rDetector :: FilePath -> String -> Bool
,rParser :: FilePath -> String -> ErrorT String IO Journal
}
-- | A JournalUpdate is some transformation of a "Journal". It can do I/O -- | A JournalUpdate is some transformation of a "Journal". It can do I/O
-- or raise an error. -- or raise an error.
type JournalUpdate = ErrorT String IO (Journal -> Journal) type JournalUpdate = ErrorT String IO (Journal -> Journal)
@ -70,3 +76,5 @@ expandPath pos fp = liftM mkRelative (expandHome fp)
return $ homedir ++ drop 1 inname return $ homedir ++ drop 1 inname
| otherwise = return inname | otherwise = return inname
fileSuffix :: FilePath -> String
fileSuffix = reverse . takeWhile (/='.') . reverse . dropWhile (/='.')

View File

@ -105,7 +105,7 @@ i, o, b, h
module Hledger.Read.Journal ( module Hledger.Read.Journal (
tests_Journal, tests_Journal,
parseJournal, reader,
ledgerFile, ledgerFile,
someamount, someamount,
ledgeraccountname, ledgeraccountname,
@ -117,7 +117,7 @@ module Hledger.Read.Journal (
) )
where where
import Control.Monad.Error (ErrorT(..), throwError, catchError) import Control.Monad.Error (ErrorT(..), throwError, catchError)
import Text.ParserCombinators.Parsec import Text.ParserCombinators.Parsec hiding (parse)
#if __GLASGOW_HASKELL__ <= 610 #if __GLASGOW_HASKELL__ <= 610
import Prelude hiding (readFile, putStr, putStrLn, print, getContents) import Prelude hiding (readFile, putStr, putStrLn, print, getContents)
import System.IO.UTF8 import System.IO.UTF8
@ -136,10 +136,20 @@ import Hledger.Read.Common
-- let's get to it -- let's get to it
reader :: Reader
reader = Reader format detect parse
format :: String
format = "journal"
-- | Does the given file path and data provide hledger's journal file format ?
detect :: FilePath -> String -> Bool
detect f _ = fileSuffix f == format
-- | Parse and post-process a "Journal" from hledger's journal file -- | Parse and post-process a "Journal" from hledger's journal file
-- format, or give an error. -- format, or give an error.
parseJournal :: FilePath -> String -> ErrorT String IO Journal parse :: FilePath -> String -> ErrorT String IO Journal
parseJournal = parseJournalWith ledgerFile parse = parseJournalWith ledgerFile
-- | Top-level journal parser. Returns a single composite, I/O performing, -- | Top-level journal parser. Returns a single composite, I/O performing,
-- error-raising "JournalUpdate" which can be applied to an empty journal -- error-raising "JournalUpdate" which can be applied to an empty journal

View File

@ -44,21 +44,32 @@ o 2007/03/10 17:26:02
module Hledger.Read.Timelog ( module Hledger.Read.Timelog (
tests_Timelog, tests_Timelog,
parseJournal, reader,
) )
where where
import Control.Monad.Error (ErrorT(..)) import Control.Monad.Error (ErrorT(..))
import Text.ParserCombinators.Parsec import Text.ParserCombinators.Parsec hiding (parse)
import Hledger.Data import Hledger.Data
import Hledger.Read.Common import Hledger.Read.Common
import Hledger.Read.Journal hiding (parseJournal) import Hledger.Read.Journal (ledgerExclamationDirective, ledgerHistoricalPrice,
ledgerDefaultYear, emptyLine, ledgerdatetime)
reader :: Reader
reader = Reader format detect parse
format :: String
format = "timelog"
-- | Does the given file path and data provide timeclock.el's timelog format ?
detect :: FilePath -> String -> Bool
detect f _ = fileSuffix f == format
-- | Parse and post-process a "Journal" from timeclock.el's timelog -- | Parse and post-process a "Journal" from timeclock.el's timelog
-- format, saving the provided file path and the current time, or give an -- format, saving the provided file path and the current time, or give an
-- error. -- error.
parseJournal :: FilePath -> String -> ErrorT String IO Journal parse :: FilePath -> String -> ErrorT String IO Journal
parseJournal = parseJournalWith timelogFile parse = parseJournalWith timelogFile
timelogFile :: GenParser Char LedgerFileCtx JournalUpdate timelogFile :: GenParser Char LedgerFileCtx JournalUpdate
timelogFile = do items <- many timelogItem timelogFile = do items <- many timelogItem