From 834e9ec1045146a7207215d411dc23dcdeb70d20 Mon Sep 17 00:00:00 2001 From: Dmitry Astapov Date: Thu, 18 Jun 2020 00:58:49 +0100 Subject: [PATCH] lib: condition tables in csv rules + tests --- hledger-lib/Hledger/Read/CsvReader.hs | 84 +++++++++---- hledger-lib/hledger_csv.m4.md | 53 +++++++- tests/csv.test | 168 ++++++++++++++++++++++++++ tests/csvtest.sh | 4 +- 4 files changed, 285 insertions(+), 24 deletions(-) diff --git a/hledger-lib/Hledger/Read/CsvReader.hs b/hledger-lib/Hledger/Read/CsvReader.hs index 0fca1422f..3cc0e5004 100644 --- a/hledger-lib/Hledger/Read/CsvReader.hs +++ b/hledger-lib/Hledger/Read/CsvReader.hs @@ -42,16 +42,16 @@ where --- ** imports import Prelude () import "base-compat-batteries" Prelude.Compat hiding (fail) -import qualified "base-compat-batteries" Control.Monad.Fail.Compat as Fail (fail) import Control.Exception (IOException, handle, throw) import Control.Monad (liftM, unless, when) import Control.Monad.Except (ExceptT, throwError) import Control.Monad.IO.Class (MonadIO, liftIO) import Control.Monad.State.Strict (StateT, get, modify', evalStateT) import Control.Monad.Trans.Class (lift) -import Data.Char (toLower, isDigit, isSpace, ord) +import Data.Char (toLower, isDigit, isSpace, isAlphaNum, ord) import Data.Bifunctor (first) import "base-compat-batteries" Data.List.Compat +import qualified Data.List.Split as LS (splitOn) import Data.Maybe import Data.Ord import qualified Data.Set as S @@ -186,6 +186,9 @@ addAssignmentsFromList fs r = foldl' maybeAddAssignment r journalfieldnames addConditionalBlock :: ConditionalBlock -> CsvRules -> CsvRules addConditionalBlock b r = r{rconditionalblocks=b:rconditionalblocks r} +addConditionalBlocks :: [ConditionalBlock] -> CsvRules -> CsvRules +addConditionalBlocks bs r = r{rconditionalblocks=bs++rconditionalblocks r} + getDirective :: DirectiveName -> CsvRules -> Maybe FieldTemplate getDirective directivename = lookup directivename . rdirectives @@ -367,12 +370,15 @@ DIGIT: 0-9 rulesp :: CsvRulesParser CsvRules rulesp = do - _ <- many $ choiceInState - [blankorcommentlinep "blank or comment line" - ,(directivep >>= modify' . addDirective) "directive" - ,(fieldnamelistp >>= modify' . setIndexesAndAssignmentsFromList) "field name list" - ,(fieldassignmentp >>= modify' . addAssignment) "field assignment" - ,(conditionalblockp >>= modify' . addConditionalBlock) "conditional block" + _ <- many $ choice + [blankorcommentlinep "blank or comment line" + ,(directivep >>= modify' . addDirective) "directive" + ,(fieldnamelistp >>= modify' . setIndexesAndAssignmentsFromList) "field name list" + ,(fieldassignmentp >>= modify' . addAssignment) "field assignment" + -- conditionaltablep backtracks because it shares "if" prefix with conditionalblockp and the + -- reverse is there to ensure that conditions are added in the order they listed in the file + ,try (conditionaltablep >>= modify' . addConditionalBlocks . reverse) "conditional table" + ,(conditionalblockp >>= modify' . addConditionalBlock) "conditional block" ] eof r <- get @@ -504,26 +510,60 @@ fieldvalp = do conditionalblockp :: CsvRulesParser ConditionalBlock conditionalblockp = do lift $ dbgparse 8 "trying conditionalblockp" - string "if" >> lift (skipMany spacenonewline) >> optional newline + -- "if\nMATCHER" or "if \nMATCHER" or "if MATCHER" + start <- getOffset + string "if" >> ( (newline >> return Nothing) + <|> (lift (skipSome spacenonewline) >> optional newline)) ms <- some matcherp - as <- many (try $ lift (skipSome spacenonewline) >> fieldassignmentp) + as <- catMaybes <$> + many (lift (skipSome spacenonewline) >> + choice [ lift eolof >> return Nothing + , fmap Just fieldassignmentp + ]) when (null as) $ - Fail.fail "start of conditional block found, but no assignment rules afterward\n(assignment rules in a conditional block should be indented)\n" + customFailure $ parseErrorAt start $ "start of conditional block found, but no assignment rules afterward\n(assignment rules in a conditional block should be indented)\n" return $ CB{cbMatchers=ms, cbAssignments=as} "conditional block" +-- A conditional table: "if" followed by separator, followed by some field names, +-- followed by many lines, each of which has: +-- one matchers, followed by field assignments (as many as there were fields) +conditionaltablep :: CsvRulesParser [ConditionalBlock] +conditionaltablep = do + lift $ dbgparse 8 "trying conditionaltablep" + start <- getOffset + string "if" + sep <- lift $ satisfy (not.isAlphaNum) + fields <- journalfieldnamep `sepBy1` (char sep) + newline + body <- flip manyTill (lift eolof) $ do + off <- getOffset + m <- matcherp' (char sep >> return ()) + vs <- LS.splitOn [sep] <$> lift restofline + if (length vs /= length fields) + then customFailure $ parseErrorAt off $ ((printf "line of conditional table should have %d values, but this one has only %d\n" (length fields) (length vs)) :: String) + else return (m,vs) + when (null body) $ + customFailure $ parseErrorAt start $ "start of conditional table found, but no assignment rules afterward\n" + return $ flip map body $ \(m,vs) -> + CB{cbMatchers=[m], cbAssignments=zip fields vs} + "conditional table" + -- A single matcher, on one line. +matcherp' :: CsvRulesParser () -> CsvRulesParser Matcher +matcherp' end = try (fieldmatcherp end) <|> recordmatcherp end + matcherp :: CsvRulesParser Matcher -matcherp = try fieldmatcherp <|> recordmatcherp +matcherp = matcherp' (lift eolof) -- A single whole-record matcher. -- A pattern on the whole line, not beginning with a csv field reference. -recordmatcherp :: CsvRulesParser Matcher -recordmatcherp = do - lift $ dbgparse 8 "trying matcherp" +recordmatcherp :: CsvRulesParser () -> CsvRulesParser Matcher +recordmatcherp end = do + lift $ dbgparse 8 "trying recordmatcherp" -- pos <- currentPos -- _ <- optional (matchoperatorp >> lift (skipMany spacenonewline) >> optional newline) - r <- regexp + r <- regexp end -- when (null ps) $ -- Fail.fail "start of record matcher found, but no patterns afterward\n(patterns should not be indented)\n" return $ RecordMatcher r @@ -533,8 +573,8 @@ recordmatcherp = do -- (like %date or %1), and a pattern on the rest of the line, -- optionally space-separated. Eg: -- %description chez jacques -fieldmatcherp :: CsvRulesParser Matcher -fieldmatcherp = do +fieldmatcherp :: CsvRulesParser () -> CsvRulesParser Matcher +fieldmatcherp end = do lift $ dbgparse 8 "trying fieldmatcher" -- An optional fieldname (default: "all") -- f <- fromMaybe "all" `fmap` (optional $ do @@ -545,7 +585,7 @@ fieldmatcherp = do -- optional operator.. just ~ (case insensitive infix regex) for now -- _op <- fromMaybe "~" <$> optional matchoperatorp lift (skipMany spacenonewline) - r <- regexp + r <- regexp end return $ FieldMatcher f r "field matcher" @@ -557,12 +597,12 @@ csvfieldreferencep = do return $ '%' : quoteIfNeeded f -- A single regular expression -regexp :: CsvRulesParser RegexpPattern -regexp = do +regexp :: CsvRulesParser () -> CsvRulesParser RegexpPattern +regexp end = do lift $ dbgparse 8 "trying regexp" -- notFollowedBy matchoperatorp c <- lift nonspace - cs <- anySingle `manyTill` lift eolof + cs <- anySingle `manyTill` end return $ strip $ c:cs -- -- A match operator, indicating the type of match to perform. diff --git a/hledger-lib/hledger_csv.m4.md b/hledger-lib/hledger_csv.m4.md index e37cbcc42..4a728c05a 100644 --- a/hledger-lib/hledger_csv.m4.md +++ b/hledger-lib/hledger_csv.m4.md @@ -520,7 +520,7 @@ separator TAB See also: [File Extension](#file-extension). -## `if` +## `if` block ```rules if MATCHER @@ -590,6 +590,57 @@ banking thru software comment XXX deductible ? check it ``` +## `if` table + +```rules +if,CSVFIELDNAME1,CSVFIELDNAME2,...,CSVFIELDNAMEn +MATCHER1,VALUE11,VALUE12,...,VALUE1n +MATCHER2,VALUE21,VALUE22,...,VALUE2n +MATCHER3,VALUE31,VALUE32,...,VALUE3n + +``` + +Conditional tables ("if tables") are a different syntax to specify +field assignments that will be applied only to CSV records which match certain patterns. + +MATCHER could be either field or record matcher, as described above. When MATCHER matches, +values from that row would be assigned to the CSV fields named on the `if` line, in the same order. + +Therefore `if` table is exactly equivalent to a sequence of of `if` blocks: +```rules +if MATCHER1 + CSVFIELDNAME1 VALUE11 + CSVFIELDNAME2 VALUE12 + ... + CSVFIELDNAMEn VALUE1n + +if MATCHER2 + CSVFIELDNAME1 VALUE21 + CSVFIELDNAME2 VALUE22 + ... + CSVFIELDNAMEn VALUE2n + +if MATCHER3 + CSVFIELDNAME1 VALUE31 + CSVFIELDNAME2 VALUE32 + ... + CSVFIELDNAMEn VALUE3n +``` + +Each line starting with MATCHER should contain enough (possibly empty) values for all the listed fields. + +Rules would be checked and applied in the order they are listed in the table and, like with `if` blocks, later rules (in the same or another table) or `if` blocks could override the effect of any rule. + +Instead of ',' you can use a variety of other non-alphanumeric characters as a separator. First character after `if` is taken to be the separator for the rest of the table. It is the responsibility of the user to ensure that separator does not occur inside MATCHERs and values - there is no way to escape separator. + + +Example: +```rules +if,account2,comment +atm transaction fee,expenses:business:banking,deductible? check it +%description groceries,expenses:groceries, +2020/01/12.*Plumbing LLC,expenses:house:upkeep,emergency plumbing call-out +``` ## `end` diff --git a/tests/csv.test b/tests/csv.test index ecbdc28d7..d937a06ea 100644 --- a/tests/csv.test +++ b/tests/csv.test @@ -665,6 +665,174 @@ $ ./csvtest.sh >=0 +# 35. tabular rules assigning multiple fields +< +10/2009/09,Flubber Co,50 + +RULES +fields date, description, amount +date-format %d/%Y/%m +currency $ +account1 assets:myacct +if,account2,comment +Flubber,acct,cmt +$ ./csvtest.sh +2009-09-10 Flubber Co ; cmt + assets:myacct $50 + acct $-50 + +>=0 + +# 36. tabular rules assigning multiple fields followed by regular rules +< +10/2009/09,Flubber Co,50 +10/2009/09,Blubber Co,150 + +RULES +fields date, description, amount +date-format %d/%Y/%m +currency $ +account1 assets:myacct +if,account2,comment +Flubber,acct,cmt + +if +Blubber + account2 acct2 + comment cmt2 +$ ./csvtest.sh +2009-09-10 Flubber Co ; cmt + assets:myacct $50 + acct $-50 + +2009-09-10 Blubber Co ; cmt2 + assets:myacct $150 + acct2 $-150 + +>=0 + +# 37. tabular rules with empty values +< +10/2009/09,Flubber Co,50 +10/2009/09,Blubber Co,150 + +RULES +fields date, description, amount +date-format %d/%Y/%m +currency $ +account1 assets:myacct +if,account2,comment +Flubber,acct, +Blubber,acct2, +$ ./csvtest.sh +2009-09-10 Flubber Co + assets:myacct $50 + acct $-50 + +2009-09-10 Blubber Co + assets:myacct $150 + acct2 $-150 + +>=0 + +# 38. tabular rules with field matchers and '|' separator +< +10/2009/09,Flubber Co,50 +10/2009/09,Blubber Co,150 + +RULES +fields date, description, amount +date-format %d/%Y/%m +currency $ +account1 assets:myacct +if|account2|comment +%description Flubber|acct| +%amount 150|acct2|cmt2 +$ ./csvtest.sh +2009-09-10 Flubber Co + assets:myacct $50 + acct $-50 + +2009-09-10 Blubber Co ; cmt2 + assets:myacct $150 + acct2 $-150 + +>=0 + +# 39. Insfficient number of values in tabular rules error +< +10/2009/09,Flubber Co,50 +10/2009/09,Blubber Co,150 + +RULES +fields date, description, amount +date-format %d/%Y/%m +currency $ +account1 assets:myacct +if|account2|comment +%amount 150|acct2 +%description Flubber|acct| +$ ./csvtest.sh +>2 +hledger: user error (input.rules:6:1: + | +6 | %amount 150|acct2 + | ^ +line of conditional table should have 2 values, but this one has only 1 + +) +>=1 + +# 40. unindented condition block error +< +10/2009/09,Flubber Co,50 + +RULES +fields date, description, amount +date-format %d/%Y/%m +currency $ +account1 assets:myacct +if Flubber +account2 acct +comment cmt +$ ./csvtest.sh +>2 +hledger: user error (input.rules:5:1: + | +5 | if Flubber + | ^ +start of conditional block found, but no assignment rules afterward +(assignment rules in a conditional block should be indented) + +) +>=1 + +# 41. Assignment to custom field (#1264) + spaces after the if (#1120) +< +10/2009/09,Flubber Co,50 + +RULES +fields date, description, amount +date-format %d/%Y/%m +currency $ +account1 assets:myacct +if Flubber + myaccount2 acct + comment cmt + + +account2 %myaccount2 +$ ./csvtest.sh +>2 +hledger: user error (input.rules:6:3: + | +6 | myaccount2 acct + | ^^^^^^^^^^^^ +unexpected "myaccount2 a" +expecting end of input, field assignment, or newline +) +>=1 + ## . #< #$ ./csvtest.sh diff --git a/tests/csvtest.sh b/tests/csvtest.sh index 0804e8e4a..08169d1df 100755 --- a/tests/csvtest.sh +++ b/tests/csvtest.sh @@ -12,4 +12,6 @@ BEGIN{output=CSV} trap "rm -f t.$$.csv t.$$.csv.rules" EXIT ERR -hledger -f csv:t.$$.csv --rules-file t.$$.csv.rules print "$@" +# Remove variable file name from error messages +:; ( hledger -f csv:t.$$.csv --rules-file t.$$.csv.rules print "$@" ) \ + 2> >( sed -re "s/t.*.csv/input/" >&2 )