csv and general reader fixes, cleanups

- The CSV reader no longer writes a "(stdin).rules" file when reading
  from stdin.

- Selection of reader(s) is now smarter when input is coming from stdin.
  Previously, all readers were considered applicable for stdin.  This
  meant that when reading a CSV file from stdin, the journal and timelog
  readers were always tried first, and if the CSV file was unparseable,
  you'd see the first (journal) reader's error instead of the CSV
  reader's.  Now, the readers do some basic content sniffing when
  reading stdin, so it generally tries only the one right reader and
  we'll see the right errors.

- The read system now has more debug output.
This commit is contained in:
Simon Michael 2014-05-09 17:55:32 -07:00
parent 6871c4cce5
commit cf3d21afef
5 changed files with 36 additions and 26 deletions

View File

@ -182,7 +182,7 @@ data Reader = Reader {
,rParser :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal ,rParser :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal
} }
instance Show Reader where show r = "Reader for "++rFormat r instance Show Reader where show r = rFormat r ++ " reader"
-- format strings -- format strings

View File

@ -116,7 +116,6 @@ tests_readJournal' = [
-- A CSV conversion rules file may also be specified for use by the CSV reader. -- A CSV conversion rules file may also be specified for use by the CSV reader.
readJournal :: Maybe StorageFormat -> Maybe FilePath -> Maybe FilePath -> String -> IO (Either String Journal) readJournal :: Maybe StorageFormat -> Maybe FilePath -> Maybe FilePath -> String -> IO (Either String Journal)
readJournal format rulesfile path s = readJournal format rulesfile path s =
-- trace (show (format, rulesfile, path)) $
tryReaders $ readersFor (format, path, s) tryReaders $ readersFor (format, path, s)
where where
-- try each reader in turn, returning the error of the first if all fail -- try each reader in turn, returning the error of the first if all fail
@ -126,8 +125,9 @@ readJournal format rulesfile path s =
firstSuccessOrBestError :: [String] -> [Reader] -> IO (Either String Journal) firstSuccessOrBestError :: [String] -> [Reader] -> IO (Either String Journal)
firstSuccessOrBestError [] [] = return $ Left "no readers found" firstSuccessOrBestError [] [] = return $ Left "no readers found"
firstSuccessOrBestError errs (r:rs) = do firstSuccessOrBestError errs (r:rs) = do
-- printf "trying %s reader\n" (rFormat r) dbgAtM 1 "trying reader" (rFormat r)
result <- (runErrorT . (rParser r) rulesfile path') s result <- (runErrorT . (rParser r) rulesfile path') s
dbgAtM 1 "reader result" $ either id show result
case result of Right j -> return $ Right j -- success! case result of Right j -> return $ Right j -- success!
Left e -> firstSuccessOrBestError (errs++[e]) rs -- keep trying Left e -> firstSuccessOrBestError (errs++[e]) rs -- keep trying
firstSuccessOrBestError (e:_) [] = return $ Left e -- none left, return first error firstSuccessOrBestError (e:_) [] = return $ Left e -- none left, return first error
@ -136,11 +136,11 @@ readJournal format rulesfile path s =
-- | Which readers are worth trying for this (possibly unspecified) format, filepath, and data ? -- | Which readers are worth trying for this (possibly unspecified) format, filepath, and data ?
readersFor :: (Maybe StorageFormat, Maybe FilePath, String) -> [Reader] readersFor :: (Maybe StorageFormat, Maybe FilePath, String) -> [Reader]
readersFor (format,path,s) = readersFor (format,path,s) =
dbg ("possible readers for "++show (format,path,elideRight 30 s)) $
case format of case format of
Just f -> case readerForStorageFormat f of Just r -> [r] Just f -> case readerForStorageFormat f of Just r -> [r]
Nothing -> [] Nothing -> []
Nothing -> case path of Nothing -> readers Nothing -> case path of Nothing -> readers
Just "-" -> readers
Just p -> case readersForPathAndData (p,s) of [] -> readers Just p -> case readersForPathAndData (p,s) of [] -> readers
rs -> rs rs -> rs
@ -162,7 +162,7 @@ readersForPathAndData (f,s) = filter (\r -> (rDetector r) f s) readers
readJournalFile :: Maybe StorageFormat -> Maybe FilePath -> FilePath -> IO (Either String Journal) readJournalFile :: Maybe StorageFormat -> Maybe FilePath -> FilePath -> IO (Either String Journal)
readJournalFile format rulesfile "-" = do readJournalFile format rulesfile "-" = do
hSetNewlineMode stdin universalNewlineMode hSetNewlineMode stdin universalNewlineMode
getContents >>= readJournal format rulesfile (Just "(stdin)") getContents >>= readJournal format rulesfile (Just "-")
readJournalFile format rulesfile f = do readJournalFile format rulesfile f = do
requireJournalFileExists f requireJournalFileExists f
withFile f ReadMode $ \h -> do withFile f ReadMode $ \h -> do

View File

@ -53,15 +53,16 @@ reader = Reader format detect parse
format :: String format :: String
format = "csv" format = "csv"
-- | Does the given file path and data look like CSV ? -- | Does the given file path and data look like it might be CSV ?
detect :: FilePath -> String -> Bool detect :: FilePath -> String -> Bool
detect f _ = takeExtension f == '.':format detect f s
| f /= "-" = takeExtension f == '.':format -- from a file: yes if the extension is .csv
| otherwise = length (filter (==',') s) >= 2 -- from stdin: yes if there are two or more commas
-- | Parse and post-process a "Journal" from CSV data, or give an error. -- | Parse and post-process a "Journal" from CSV data, or give an error.
-- XXX currently ignores the string and reads from the file path -- XXX currently ignores the string and reads from the file path
parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal
parse rulesfile f s = -- trace ("running "++format++" reader") $ parse rulesfile f s = do
do
r <- liftIO $ readJournalFromCsv rulesfile f s r <- liftIO $ readJournalFromCsv rulesfile f s
case r of Left e -> throwError e case r of Left e -> throwError e
Right j -> return j Right j -> return j
@ -78,7 +79,7 @@ parse rulesfile f s = -- trace ("running "++format++" reader") $
-- 5. convert the CSV records to a journal using the rules -- 5. convert the CSV records to a journal using the rules
-- @ -- @
readJournalFromCsv :: Maybe FilePath -> FilePath -> String -> IO (Either String Journal) readJournalFromCsv :: Maybe FilePath -> FilePath -> String -> IO (Either String Journal)
readJournalFromCsv Nothing "-" _ = return $ Left "please use --rules-file when converting stdin" readJournalFromCsv Nothing "-" _ = return $ Left "please use --rules-file when reading CSV from stdin"
readJournalFromCsv mrulesfile csvfile csvdata = readJournalFromCsv mrulesfile csvfile csvdata =
handle (\e -> return $ Left $ show (e :: IOException)) $ do handle (\e -> return $ Left $ show (e :: IOException)) $ do
let throwerr = throw.userError let throwerr = throw.userError
@ -90,7 +91,7 @@ readJournalFromCsv mrulesfile csvfile csvdata =
then hPrintf stderr "creating default conversion rules file %s, edit this file for better results\n" rulesfile then hPrintf stderr "creating default conversion rules file %s, edit this file for better results\n" rulesfile
else hPrintf stderr "using conversion rules file %s\n" rulesfile else hPrintf stderr "using conversion rules file %s\n" rulesfile
rules <- either (throwerr.show) id `fmap` parseRulesFile rulesfile rules <- either (throwerr.show) id `fmap` parseRulesFile rulesfile
return $ dbg "" rules dbgAtM 2 "rules" rules
-- apply skip directive -- apply skip directive
let skip = maybe 0 oneorerror $ getDirective "skip" rules let skip = maybe 0 oneorerror $ getDirective "skip" rules
@ -99,8 +100,13 @@ readJournalFromCsv mrulesfile csvfile csvdata =
oneorerror s = readDef (throwerr $ "could not parse skip value: " ++ show s) s oneorerror s = readDef (throwerr $ "could not parse skip value: " ++ show s) s
-- parse csv -- parse csv
records <- (either throwerr id . validateCsv skip) `fmap` parseCsv csvfile csvdata -- parsec seems to fail if you pass it "-" here
dbgAtM 1 "" $ take 3 records let parsecfilename = if csvfile == "-" then "(stdin)" else csvfile
records <- (either throwerr id .
dbgAt 2 "validateCsv" . validateCsv skip .
dbgAt 2 "parseCsv")
`fmap` parseCsv parsecfilename csvdata
dbgAtM 1 "first 3 csv records" $ take 3 records
-- identify header lines -- identify header lines
-- let (headerlines, datalines) = identifyHeaderLines records -- let (headerlines, datalines) = identifyHeaderLines records
@ -378,13 +384,13 @@ rulesp = do
,rconditionalblocks=reverse $ rconditionalblocks r ,rconditionalblocks=reverse $ rconditionalblocks r
} }
blankorcommentline = pdbg 1 "trying blankorcommentline" >> choice' [blankline, commentline] blankorcommentline = pdbg 3 "trying blankorcommentline" >> choice' [blankline, commentline]
blankline = many spacenonewline >> newline >> return () <?> "blank line" blankline = many spacenonewline >> newline >> return () <?> "blank line"
commentline = many spacenonewline >> commentchar >> restofline >> return () <?> "comment line" commentline = many spacenonewline >> commentchar >> restofline >> return () <?> "comment line"
commentchar = oneOf ";#" commentchar = oneOf ";#"
directive = do directive = do
pdbg 1 "trying directive" pdbg 3 "trying directive"
d <- choice' $ map string directives d <- choice' $ map string directives
v <- (((char ':' >> many spacenonewline) <|> many1 spacenonewline) >> directiveval) v <- (((char ':' >> many spacenonewline) <|> many1 spacenonewline) >> directiveval)
<|> (optional (char ':') >> many spacenonewline >> eolof >> return "") <|> (optional (char ':') >> many spacenonewline >> eolof >> return "")
@ -404,7 +410,7 @@ directives =
directiveval = anyChar `manyTill` eolof directiveval = anyChar `manyTill` eolof
fieldnamelist = (do fieldnamelist = (do
pdbg 1 "trying fieldnamelist" pdbg 3 "trying fieldnamelist"
string "fields" string "fields"
optional $ char ':' optional $ char ':'
many1 spacenonewline many1 spacenonewline
@ -426,7 +432,7 @@ quotedfieldname = do
barefieldname = many1 $ noneOf " \t\n,;#~" barefieldname = many1 $ noneOf " \t\n,;#~"
fieldassignment = do fieldassignment = do
pdbg 1 "trying fieldassignment" pdbg 3 "trying fieldassignment"
f <- journalfieldname f <- journalfieldname
assignmentseparator assignmentseparator
v <- fieldval v <- fieldval
@ -466,7 +472,7 @@ fieldval = do
anyChar `manyTill` eolof anyChar `manyTill` eolof
conditionalblock = do conditionalblock = do
pdbg 1 "trying conditionalblock" pdbg 3 "trying conditionalblock"
string "if" >> many spacenonewline >> optional newline string "if" >> many spacenonewline >> optional newline
ms <- many1 recordmatcher ms <- many1 recordmatcher
as <- many (many1 spacenonewline >> fieldassignment) as <- many (many1 spacenonewline >> fieldassignment)

View File

@ -76,15 +76,17 @@ reader = Reader format detect parse
format :: String format :: String
format = "journal" format = "journal"
-- | Does the given file path and data provide hledger's journal file format ? -- | Does the given file path and data look like it might be hledger's journal format ?
detect :: FilePath -> String -> Bool detect :: FilePath -> String -> Bool
detect f _ = takeExtension f `elem` ['.':format, ".j"] detect f s
| f /= "-" = takeExtension f `elem` ['.':format, ".j"] -- from a file: yes if the extension is .journal or .j
-- from stdin: yes if we can see something that looks like a journal entry (digits in column 0 with the next line indented)
| otherwise = isJust $ regexMatch "^[0-9]+.*\n[ \t]+" s
-- | Parse and post-process a "Journal" from hledger's journal file -- | Parse and post-process a "Journal" from hledger's journal file
-- format, or give an error. -- format, or give an error.
parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal
parse _ = -- trace ("running "++format++" reader") . parse _ = parseJournalWith journal
parseJournalWith journal
-- parsing utils -- parsing utils

View File

@ -49,6 +49,7 @@ module Hledger.Read.TimelogReader (
where where
import Control.Monad import Control.Monad
import Control.Monad.Error import Control.Monad.Error
import Data.List (isPrefixOf)
import Test.HUnit import Test.HUnit
import Text.ParserCombinators.Parsec hiding (parse) import Text.ParserCombinators.Parsec hiding (parse)
import System.FilePath import System.FilePath
@ -68,16 +69,17 @@ reader = Reader format detect parse
format :: String format :: String
format = "timelog" format = "timelog"
-- | Does the given file path and data provide timeclock.el's timelog format ? -- | Does the given file path and data look like it might be timeclock.el's timelog format ?
detect :: FilePath -> String -> Bool detect :: FilePath -> String -> Bool
detect f _ = takeExtension f == '.':format detect f s
| f /= "-" = takeExtension f == '.':format -- from a file: yes if the extension is .timelog
| otherwise = "i " `isPrefixOf` s || "o " `isPrefixOf` s -- from stdin: yes if it starts with "i " or "o "
-- | Parse and post-process a "Journal" from timeclock.el's timelog -- | Parse and post-process a "Journal" from timeclock.el's timelog
-- format, saving the provided file path and the current time, or give an -- format, saving the provided file path and the current time, or give an
-- error. -- error.
parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal
parse _ = -- trace ("running "++format++" reader") . parse _ = parseJournalWith timelogFile
parseJournalWith timelogFile
timelogFile :: GenParser Char JournalContext (JournalUpdate,JournalContext) timelogFile :: GenParser Char JournalContext (JournalUpdate,JournalContext)
timelogFile = do items <- many timelogItem timelogFile = do items <- many timelogItem