mirror of
https://github.com/simonmichael/hledger.git
synced 2024-12-27 12:24:43 +03:00
fix: csv: skip header lines before attempting to parse records (#1967)
This commit is contained in:
parent
71bd102b4b
commit
a9b63bb694
@ -699,12 +699,6 @@ readJournalFromCsv mrulesfile csvfile csvdata = do
|
|||||||
rules <- liftEither $ parseAndValidateCsvRules rulesfile rulestext
|
rules <- liftEither $ parseAndValidateCsvRules rulesfile rulestext
|
||||||
dbg6IO "csv rules" rules
|
dbg6IO "csv rules" rules
|
||||||
|
|
||||||
-- parse the skip directive's value, if any
|
|
||||||
skiplines <- case getDirective "skip" rules of
|
|
||||||
Nothing -> return 0
|
|
||||||
Just "" -> return 1
|
|
||||||
Just s -> maybe (throwError $ "could not parse skip value: " ++ show s) return . readMay $ T.unpack s
|
|
||||||
|
|
||||||
mtzin <- case getDirective "timezone" rules of
|
mtzin <- case getDirective "timezone" rules of
|
||||||
Nothing -> return Nothing
|
Nothing -> return Nothing
|
||||||
Just s ->
|
Just s ->
|
||||||
@ -712,6 +706,13 @@ readJournalFromCsv mrulesfile csvfile csvdata = do
|
|||||||
parseTimeM False defaultTimeLocale "%Z" $ T.unpack s
|
parseTimeM False defaultTimeLocale "%Z" $ T.unpack s
|
||||||
tzout <- liftIO getCurrentTimeZone
|
tzout <- liftIO getCurrentTimeZone
|
||||||
|
|
||||||
|
-- skip header lines, if there is a top-level skip rule
|
||||||
|
skiplines <- case getDirective "skip" rules of
|
||||||
|
Nothing -> return 0
|
||||||
|
Just "" -> return 1
|
||||||
|
Just s -> maybe (throwError $ "could not parse skip value: " ++ show s) return . readMay $ T.unpack s
|
||||||
|
let csvdata' = T.unlines $ drop skiplines $ T.lines csvdata
|
||||||
|
|
||||||
-- parse csv
|
-- parse csv
|
||||||
let
|
let
|
||||||
-- parsec seems to fail if you pass it "-" here TODO: try again with megaparsec
|
-- parsec seems to fail if you pass it "-" here TODO: try again with megaparsec
|
||||||
@ -725,8 +726,8 @@ readJournalFromCsv mrulesfile csvfile csvdata = do
|
|||||||
where
|
where
|
||||||
ext = map toLower $ drop 1 $ takeExtension csvfile
|
ext = map toLower $ drop 1 $ takeExtension csvfile
|
||||||
dbg6IO "using separator" separator
|
dbg6IO "using separator" separator
|
||||||
csv <- dbg7 "parseCsv" <$> parseCsv separator parsecfilename csvdata
|
csv <- dbg7 "parseCsv" <$> parseCsv separator parsecfilename csvdata'
|
||||||
records <- liftEither $ dbg7 "validateCsv" <$> validateCsv rules skiplines csv
|
records <- liftEither $ dbg7 "validateCsv" <$> validateCsv rules csv
|
||||||
dbg6IO "first 3 csv records" $ take 3 records
|
dbg6IO "first 3 csv records" $ take 3 records
|
||||||
|
|
||||||
-- identify header lines
|
-- identify header lines
|
||||||
@ -818,8 +819,8 @@ printCSV = TB.toLazyText . unlinesB . map printRecord
|
|||||||
printField = wrap "\"" "\"" . T.replace "\"" "\"\""
|
printField = wrap "\"" "\"" . T.replace "\"" "\"\""
|
||||||
|
|
||||||
-- | Return the cleaned up and validated CSV data (can be empty), or an error.
|
-- | Return the cleaned up and validated CSV data (can be empty), or an error.
|
||||||
validateCsv :: CsvRules -> Int -> CSV -> Either String [CsvRecord]
|
validateCsv :: CsvRules -> CSV -> Either String [CsvRecord]
|
||||||
validateCsv rules numhdrlines = validate . applyConditionalSkips . drop numhdrlines . filternulls
|
validateCsv rules = validate . applyConditionalSkips . filternulls
|
||||||
where
|
where
|
||||||
filternulls = filter (/=[""])
|
filternulls = filter (/=[""])
|
||||||
skipnum r =
|
skipnum r =
|
||||||
|
@ -1033,8 +1033,7 @@ $ ./csvtest.sh
|
|||||||
<
|
<
|
||||||
"2021-12-23","caffe_siciliaexpenses:cibo:dolce","-10.5"
|
"2021-12-23","caffe_siciliaexpenses:cibo:dolce","-10.5"
|
||||||
|
|
||||||
RULES file
|
RULES
|
||||||
|
|
||||||
account1 assets:bank:checking
|
account1 assets:bank:checking
|
||||||
fields date, description, account2, amount
|
fields date, description, account2, amount
|
||||||
|
|
||||||
@ -1042,6 +1041,26 @@ $ ./csvtest.sh
|
|||||||
>2 /transaction is unbalanced/
|
>2 /transaction is unbalanced/
|
||||||
>=1
|
>=1
|
||||||
|
|
||||||
|
# 52. We can't parse double quotes inside an unquoted field, or other non-RFC4180 data. (#1966)
|
||||||
|
<
|
||||||
|
2022-01-01,B"B",C
|
||||||
|
RULES
|
||||||
|
fields date, b, c
|
||||||
|
$ ./csvtest.sh
|
||||||
|
>2 /unexpected '"'/
|
||||||
|
>=1
|
||||||
|
|
||||||
|
# 53. A top-level skip directive is able to skip lines which would fail to parse as CSV. (#1967)
|
||||||
|
<
|
||||||
|
2022-01-01,B"B",C
|
||||||
|
RULES
|
||||||
|
skip 1
|
||||||
|
fields date, b, c
|
||||||
|
$ ./csvtest.sh
|
||||||
|
>=
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## .
|
## .
|
||||||
#<
|
#<
|
||||||
#$ ./csvtest.sh
|
#$ ./csvtest.sh
|
||||||
|
Loading…
Reference in New Issue
Block a user