mirror of
https://github.com/simonmichael/hledger.git
synced 2024-12-27 12:24:43 +03:00
csv and general reader fixes, cleanups
- The CSV reader no longer writes a "(stdin).rules" file when reading from stdin. - Selection of reader(s) is now smarter when input is coming from stdin. Previously, all readers were considered applicable for stdin. This meant that when reading a CSV file from stdin, the journal and timelog readers were always tried first, and if the CSV file was unparseable, you'd see the first (journal) reader's error instead of the CSV reader's. Now, the readers do some basic content sniffing when reading stdin, so it generally tries only the one right reader and we'll see the right errors. - The read system now has more debug output.
This commit is contained in:
parent
6871c4cce5
commit
cf3d21afef
@ -182,7 +182,7 @@ data Reader = Reader {
|
||||
,rParser :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal
|
||||
}
|
||||
|
||||
instance Show Reader where show r = "Reader for "++rFormat r
|
||||
instance Show Reader where show r = rFormat r ++ " reader"
|
||||
|
||||
-- format strings
|
||||
|
||||
|
@ -116,7 +116,6 @@ tests_readJournal' = [
|
||||
-- A CSV conversion rules file may also be specified for use by the CSV reader.
|
||||
readJournal :: Maybe StorageFormat -> Maybe FilePath -> Maybe FilePath -> String -> IO (Either String Journal)
|
||||
readJournal format rulesfile path s =
|
||||
-- trace (show (format, rulesfile, path)) $
|
||||
tryReaders $ readersFor (format, path, s)
|
||||
where
|
||||
-- try each reader in turn, returning the error of the first if all fail
|
||||
@ -126,8 +125,9 @@ readJournal format rulesfile path s =
|
||||
firstSuccessOrBestError :: [String] -> [Reader] -> IO (Either String Journal)
|
||||
firstSuccessOrBestError [] [] = return $ Left "no readers found"
|
||||
firstSuccessOrBestError errs (r:rs) = do
|
||||
-- printf "trying %s reader\n" (rFormat r)
|
||||
dbgAtM 1 "trying reader" (rFormat r)
|
||||
result <- (runErrorT . (rParser r) rulesfile path') s
|
||||
dbgAtM 1 "reader result" $ either id show result
|
||||
case result of Right j -> return $ Right j -- success!
|
||||
Left e -> firstSuccessOrBestError (errs++[e]) rs -- keep trying
|
||||
firstSuccessOrBestError (e:_) [] = return $ Left e -- none left, return first error
|
||||
@ -136,11 +136,11 @@ readJournal format rulesfile path s =
|
||||
-- | Which readers are worth trying for this (possibly unspecified) format, filepath, and data ?
|
||||
readersFor :: (Maybe StorageFormat, Maybe FilePath, String) -> [Reader]
|
||||
readersFor (format,path,s) =
|
||||
dbg ("possible readers for "++show (format,path,elideRight 30 s)) $
|
||||
case format of
|
||||
Just f -> case readerForStorageFormat f of Just r -> [r]
|
||||
Nothing -> []
|
||||
Nothing -> case path of Nothing -> readers
|
||||
Just "-" -> readers
|
||||
Just p -> case readersForPathAndData (p,s) of [] -> readers
|
||||
rs -> rs
|
||||
|
||||
@ -162,7 +162,7 @@ readersForPathAndData (f,s) = filter (\r -> (rDetector r) f s) readers
|
||||
readJournalFile :: Maybe StorageFormat -> Maybe FilePath -> FilePath -> IO (Either String Journal)
|
||||
readJournalFile format rulesfile "-" = do
|
||||
hSetNewlineMode stdin universalNewlineMode
|
||||
getContents >>= readJournal format rulesfile (Just "(stdin)")
|
||||
getContents >>= readJournal format rulesfile (Just "-")
|
||||
readJournalFile format rulesfile f = do
|
||||
requireJournalFileExists f
|
||||
withFile f ReadMode $ \h -> do
|
||||
|
@ -53,15 +53,16 @@ reader = Reader format detect parse
|
||||
format :: String
|
||||
format = "csv"
|
||||
|
||||
-- | Does the given file path and data look like CSV ?
|
||||
-- | Does the given file path and data look like it might be CSV ?
|
||||
detect :: FilePath -> String -> Bool
|
||||
detect f _ = takeExtension f == '.':format
|
||||
detect f s
|
||||
| f /= "-" = takeExtension f == '.':format -- from a file: yes if the extension is .csv
|
||||
| otherwise = length (filter (==',') s) >= 2 -- from stdin: yes if there are two or more commas
|
||||
|
||||
-- | Parse and post-process a "Journal" from CSV data, or give an error.
|
||||
-- XXX currently ignores the string and reads from the file path
|
||||
parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal
|
||||
parse rulesfile f s = -- trace ("running "++format++" reader") $
|
||||
do
|
||||
parse rulesfile f s = do
|
||||
r <- liftIO $ readJournalFromCsv rulesfile f s
|
||||
case r of Left e -> throwError e
|
||||
Right j -> return j
|
||||
@ -78,7 +79,7 @@ parse rulesfile f s = -- trace ("running "++format++" reader") $
|
||||
-- 5. convert the CSV records to a journal using the rules
|
||||
-- @
|
||||
readJournalFromCsv :: Maybe FilePath -> FilePath -> String -> IO (Either String Journal)
|
||||
readJournalFromCsv Nothing "-" _ = return $ Left "please use --rules-file when converting stdin"
|
||||
readJournalFromCsv Nothing "-" _ = return $ Left "please use --rules-file when reading CSV from stdin"
|
||||
readJournalFromCsv mrulesfile csvfile csvdata =
|
||||
handle (\e -> return $ Left $ show (e :: IOException)) $ do
|
||||
let throwerr = throw.userError
|
||||
@ -90,7 +91,7 @@ readJournalFromCsv mrulesfile csvfile csvdata =
|
||||
then hPrintf stderr "creating default conversion rules file %s, edit this file for better results\n" rulesfile
|
||||
else hPrintf stderr "using conversion rules file %s\n" rulesfile
|
||||
rules <- either (throwerr.show) id `fmap` parseRulesFile rulesfile
|
||||
return $ dbg "" rules
|
||||
dbgAtM 2 "rules" rules
|
||||
|
||||
-- apply skip directive
|
||||
let skip = maybe 0 oneorerror $ getDirective "skip" rules
|
||||
@ -99,8 +100,13 @@ readJournalFromCsv mrulesfile csvfile csvdata =
|
||||
oneorerror s = readDef (throwerr $ "could not parse skip value: " ++ show s) s
|
||||
|
||||
-- parse csv
|
||||
records <- (either throwerr id . validateCsv skip) `fmap` parseCsv csvfile csvdata
|
||||
dbgAtM 1 "" $ take 3 records
|
||||
-- parsec seems to fail if you pass it "-" here
|
||||
let parsecfilename = if csvfile == "-" then "(stdin)" else csvfile
|
||||
records <- (either throwerr id .
|
||||
dbgAt 2 "validateCsv" . validateCsv skip .
|
||||
dbgAt 2 "parseCsv")
|
||||
`fmap` parseCsv parsecfilename csvdata
|
||||
dbgAtM 1 "first 3 csv records" $ take 3 records
|
||||
|
||||
-- identify header lines
|
||||
-- let (headerlines, datalines) = identifyHeaderLines records
|
||||
@ -378,13 +384,13 @@ rulesp = do
|
||||
,rconditionalblocks=reverse $ rconditionalblocks r
|
||||
}
|
||||
|
||||
blankorcommentline = pdbg 1 "trying blankorcommentline" >> choice' [blankline, commentline]
|
||||
blankorcommentline = pdbg 3 "trying blankorcommentline" >> choice' [blankline, commentline]
|
||||
blankline = many spacenonewline >> newline >> return () <?> "blank line"
|
||||
commentline = many spacenonewline >> commentchar >> restofline >> return () <?> "comment line"
|
||||
commentchar = oneOf ";#"
|
||||
|
||||
directive = do
|
||||
pdbg 1 "trying directive"
|
||||
pdbg 3 "trying directive"
|
||||
d <- choice' $ map string directives
|
||||
v <- (((char ':' >> many spacenonewline) <|> many1 spacenonewline) >> directiveval)
|
||||
<|> (optional (char ':') >> many spacenonewline >> eolof >> return "")
|
||||
@ -404,7 +410,7 @@ directives =
|
||||
directiveval = anyChar `manyTill` eolof
|
||||
|
||||
fieldnamelist = (do
|
||||
pdbg 1 "trying fieldnamelist"
|
||||
pdbg 3 "trying fieldnamelist"
|
||||
string "fields"
|
||||
optional $ char ':'
|
||||
many1 spacenonewline
|
||||
@ -426,7 +432,7 @@ quotedfieldname = do
|
||||
barefieldname = many1 $ noneOf " \t\n,;#~"
|
||||
|
||||
fieldassignment = do
|
||||
pdbg 1 "trying fieldassignment"
|
||||
pdbg 3 "trying fieldassignment"
|
||||
f <- journalfieldname
|
||||
assignmentseparator
|
||||
v <- fieldval
|
||||
@ -466,7 +472,7 @@ fieldval = do
|
||||
anyChar `manyTill` eolof
|
||||
|
||||
conditionalblock = do
|
||||
pdbg 1 "trying conditionalblock"
|
||||
pdbg 3 "trying conditionalblock"
|
||||
string "if" >> many spacenonewline >> optional newline
|
||||
ms <- many1 recordmatcher
|
||||
as <- many (many1 spacenonewline >> fieldassignment)
|
||||
|
@ -76,15 +76,17 @@ reader = Reader format detect parse
|
||||
format :: String
|
||||
format = "journal"
|
||||
|
||||
-- | Does the given file path and data provide hledger's journal file format ?
|
||||
-- | Does the given file path and data look like it might be hledger's journal format ?
|
||||
detect :: FilePath -> String -> Bool
|
||||
detect f _ = takeExtension f `elem` ['.':format, ".j"]
|
||||
detect f s
|
||||
| f /= "-" = takeExtension f `elem` ['.':format, ".j"] -- from a file: yes if the extension is .journal or .j
|
||||
-- from stdin: yes if we can see something that looks like a journal entry (digits in column 0 with the next line indented)
|
||||
| otherwise = isJust $ regexMatch "^[0-9]+.*\n[ \t]+" s
|
||||
|
||||
-- | Parse and post-process a "Journal" from hledger's journal file
|
||||
-- format, or give an error.
|
||||
parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal
|
||||
parse _ = -- trace ("running "++format++" reader") .
|
||||
parseJournalWith journal
|
||||
parse _ = parseJournalWith journal
|
||||
|
||||
-- parsing utils
|
||||
|
||||
|
@ -49,6 +49,7 @@ module Hledger.Read.TimelogReader (
|
||||
where
|
||||
import Control.Monad
|
||||
import Control.Monad.Error
|
||||
import Data.List (isPrefixOf)
|
||||
import Test.HUnit
|
||||
import Text.ParserCombinators.Parsec hiding (parse)
|
||||
import System.FilePath
|
||||
@ -68,16 +69,17 @@ reader = Reader format detect parse
|
||||
format :: String
|
||||
format = "timelog"
|
||||
|
||||
-- | Does the given file path and data provide timeclock.el's timelog format ?
|
||||
-- | Does the given file path and data look like it might be timeclock.el's timelog format ?
|
||||
detect :: FilePath -> String -> Bool
|
||||
detect f _ = takeExtension f == '.':format
|
||||
detect f s
|
||||
| f /= "-" = takeExtension f == '.':format -- from a file: yes if the extension is .timelog
|
||||
| otherwise = "i " `isPrefixOf` s || "o " `isPrefixOf` s -- from stdin: yes if it starts with "i " or "o "
|
||||
|
||||
-- | Parse and post-process a "Journal" from timeclock.el's timelog
|
||||
-- format, saving the provided file path and the current time, or give an
|
||||
-- error.
|
||||
parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal
|
||||
parse _ = -- trace ("running "++format++" reader") .
|
||||
parseJournalWith timelogFile
|
||||
parse _ = parseJournalWith timelogFile
|
||||
|
||||
timelogFile :: GenParser Char JournalContext (JournalUpdate,JournalContext)
|
||||
timelogFile = do items <- many timelogItem
|
||||
|
Loading…
Reference in New Issue
Block a user