csv and general reader fixes, cleanups

- The CSV reader no longer writes a "(stdin).rules" file when reading
  from stdin.

- Selection of reader(s) is now smarter when input is coming from stdin.
  Previously, all readers were considered applicable for stdin.  This
  meant that when reading a CSV file from stdin, the journal and timelog
  readers were always tried first, and if the CSV file was unparseable,
  you'd see the first (journal) reader's error instead of the CSV
  reader's.  Now, the readers do some basic content sniffing when
  reading stdin, so it generally tries only the one right reader and
  we'll see the right errors.

- The read system now has more debug output.
This commit is contained in:
Simon Michael 2014-05-09 17:55:32 -07:00
parent 6871c4cce5
commit cf3d21afef
5 changed files with 36 additions and 26 deletions

View File

@ -182,7 +182,7 @@ data Reader = Reader {
,rParser :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal
}
instance Show Reader where show r = "Reader for "++rFormat r
instance Show Reader where show r = rFormat r ++ " reader"
-- format strings

View File

@ -116,7 +116,6 @@ tests_readJournal' = [
-- A CSV conversion rules file may also be specified for use by the CSV reader.
readJournal :: Maybe StorageFormat -> Maybe FilePath -> Maybe FilePath -> String -> IO (Either String Journal)
readJournal format rulesfile path s =
-- trace (show (format, rulesfile, path)) $
tryReaders $ readersFor (format, path, s)
where
-- try each reader in turn, returning the error of the first if all fail
@ -126,8 +125,9 @@ readJournal format rulesfile path s =
firstSuccessOrBestError :: [String] -> [Reader] -> IO (Either String Journal)
firstSuccessOrBestError [] [] = return $ Left "no readers found"
firstSuccessOrBestError errs (r:rs) = do
-- printf "trying %s reader\n" (rFormat r)
dbgAtM 1 "trying reader" (rFormat r)
result <- (runErrorT . (rParser r) rulesfile path') s
dbgAtM 1 "reader result" $ either id show result
case result of Right j -> return $ Right j -- success!
Left e -> firstSuccessOrBestError (errs++[e]) rs -- keep trying
firstSuccessOrBestError (e:_) [] = return $ Left e -- none left, return first error
@ -136,11 +136,11 @@ readJournal format rulesfile path s =
-- | Which readers are worth trying for this (possibly unspecified) format, filepath, and data ?
readersFor :: (Maybe StorageFormat, Maybe FilePath, String) -> [Reader]
readersFor (format,path,s) =
dbg ("possible readers for "++show (format,path,elideRight 30 s)) $
case format of
Just f -> case readerForStorageFormat f of Just r -> [r]
Nothing -> []
Nothing -> case path of Nothing -> readers
Just "-" -> readers
Just p -> case readersForPathAndData (p,s) of [] -> readers
rs -> rs
@ -162,7 +162,7 @@ readersForPathAndData (f,s) = filter (\r -> (rDetector r) f s) readers
readJournalFile :: Maybe StorageFormat -> Maybe FilePath -> FilePath -> IO (Either String Journal)
readJournalFile format rulesfile "-" = do
hSetNewlineMode stdin universalNewlineMode
getContents >>= readJournal format rulesfile (Just "(stdin)")
getContents >>= readJournal format rulesfile (Just "-")
readJournalFile format rulesfile f = do
requireJournalFileExists f
withFile f ReadMode $ \h -> do

View File

@ -53,15 +53,16 @@ reader = Reader format detect parse
format :: String
format = "csv"
-- | Does the given file path and data look like CSV ?
-- | Does the given file path and data look like it might be CSV ?
detect :: FilePath -> String -> Bool
detect f _ = takeExtension f == '.':format
detect f s
| f /= "-" = takeExtension f == '.':format -- from a file: yes if the extension is .csv
| otherwise = length (filter (==',') s) >= 2 -- from stdin: yes if there are two or more commas
-- | Parse and post-process a "Journal" from CSV data, or give an error.
-- XXX currently ignores the string and reads from the file path
parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal
parse rulesfile f s = -- trace ("running "++format++" reader") $
do
parse rulesfile f s = do
r <- liftIO $ readJournalFromCsv rulesfile f s
case r of Left e -> throwError e
Right j -> return j
@ -78,7 +79,7 @@ parse rulesfile f s = -- trace ("running "++format++" reader") $
-- 5. convert the CSV records to a journal using the rules
-- @
readJournalFromCsv :: Maybe FilePath -> FilePath -> String -> IO (Either String Journal)
readJournalFromCsv Nothing "-" _ = return $ Left "please use --rules-file when converting stdin"
readJournalFromCsv Nothing "-" _ = return $ Left "please use --rules-file when reading CSV from stdin"
readJournalFromCsv mrulesfile csvfile csvdata =
handle (\e -> return $ Left $ show (e :: IOException)) $ do
let throwerr = throw.userError
@ -90,7 +91,7 @@ readJournalFromCsv mrulesfile csvfile csvdata =
then hPrintf stderr "creating default conversion rules file %s, edit this file for better results\n" rulesfile
else hPrintf stderr "using conversion rules file %s\n" rulesfile
rules <- either (throwerr.show) id `fmap` parseRulesFile rulesfile
return $ dbg "" rules
dbgAtM 2 "rules" rules
-- apply skip directive
let skip = maybe 0 oneorerror $ getDirective "skip" rules
@ -99,8 +100,13 @@ readJournalFromCsv mrulesfile csvfile csvdata =
oneorerror s = readDef (throwerr $ "could not parse skip value: " ++ show s) s
-- parse csv
records <- (either throwerr id . validateCsv skip) `fmap` parseCsv csvfile csvdata
dbgAtM 1 "" $ take 3 records
-- parsec seems to fail if you pass it "-" here
let parsecfilename = if csvfile == "-" then "(stdin)" else csvfile
records <- (either throwerr id .
dbgAt 2 "validateCsv" . validateCsv skip .
dbgAt 2 "parseCsv")
`fmap` parseCsv parsecfilename csvdata
dbgAtM 1 "first 3 csv records" $ take 3 records
-- identify header lines
-- let (headerlines, datalines) = identifyHeaderLines records
@ -378,13 +384,13 @@ rulesp = do
,rconditionalblocks=reverse $ rconditionalblocks r
}
blankorcommentline = pdbg 1 "trying blankorcommentline" >> choice' [blankline, commentline]
blankorcommentline = pdbg 3 "trying blankorcommentline" >> choice' [blankline, commentline]
blankline = many spacenonewline >> newline >> return () <?> "blank line"
commentline = many spacenonewline >> commentchar >> restofline >> return () <?> "comment line"
commentchar = oneOf ";#"
directive = do
pdbg 1 "trying directive"
pdbg 3 "trying directive"
d <- choice' $ map string directives
v <- (((char ':' >> many spacenonewline) <|> many1 spacenonewline) >> directiveval)
<|> (optional (char ':') >> many spacenonewline >> eolof >> return "")
@ -404,7 +410,7 @@ directives =
directiveval = anyChar `manyTill` eolof
fieldnamelist = (do
pdbg 1 "trying fieldnamelist"
pdbg 3 "trying fieldnamelist"
string "fields"
optional $ char ':'
many1 spacenonewline
@ -426,7 +432,7 @@ quotedfieldname = do
barefieldname = many1 $ noneOf " \t\n,;#~"
fieldassignment = do
pdbg 1 "trying fieldassignment"
pdbg 3 "trying fieldassignment"
f <- journalfieldname
assignmentseparator
v <- fieldval
@ -466,7 +472,7 @@ fieldval = do
anyChar `manyTill` eolof
conditionalblock = do
pdbg 1 "trying conditionalblock"
pdbg 3 "trying conditionalblock"
string "if" >> many spacenonewline >> optional newline
ms <- many1 recordmatcher
as <- many (many1 spacenonewline >> fieldassignment)

View File

@ -76,15 +76,17 @@ reader = Reader format detect parse
format :: String
format = "journal"
-- | Does the given file path and data provide hledger's journal file format ?
-- | Does the given file path and data look like it might be hledger's journal format ?
detect :: FilePath -> String -> Bool
detect f _ = takeExtension f `elem` ['.':format, ".j"]
detect f s
| f /= "-" = takeExtension f `elem` ['.':format, ".j"] -- from a file: yes if the extension is .journal or .j
-- from stdin: yes if we can see something that looks like a journal entry (digits in column 0 with the next line indented)
| otherwise = isJust $ regexMatch "^[0-9]+.*\n[ \t]+" s
-- | Parse and post-process a "Journal" from hledger's journal file
-- format, or give an error.
parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal
parse _ = -- trace ("running "++format++" reader") .
parseJournalWith journal
parse _ = parseJournalWith journal
-- parsing utils

View File

@ -49,6 +49,7 @@ module Hledger.Read.TimelogReader (
where
import Control.Monad
import Control.Monad.Error
import Data.List (isPrefixOf)
import Test.HUnit
import Text.ParserCombinators.Parsec hiding (parse)
import System.FilePath
@ -68,16 +69,17 @@ reader = Reader format detect parse
format :: String
format = "timelog"
-- | Does the given file path and data provide timeclock.el's timelog format ?
-- | Does the given file path and data look like it might be timeclock.el's timelog format ?
detect :: FilePath -> String -> Bool
detect f _ = takeExtension f == '.':format
detect f s
| f /= "-" = takeExtension f == '.':format -- from a file: yes if the extension is .timelog
| otherwise = "i " `isPrefixOf` s || "o " `isPrefixOf` s -- from stdin: yes if it starts with "i " or "o "
-- | Parse and post-process a "Journal" from timeclock.el's timelog
-- format, saving the provided file path and the current time, or give an
-- error.
parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal
parse _ = -- trace ("running "++format++" reader") .
parseJournalWith timelogFile
parse _ = parseJournalWith timelogFile
timelogFile :: GenParser Char JournalContext (JournalUpdate,JournalContext)
timelogFile = do items <- many timelogItem