lib: condition tables in csv rules + tests

2024-11-08 07:09:28 +03:00 · 2020-06-18 00:58:49 +01:00 · 2020-06-18 00:58:49 +01:00 · 834e9ec104
commit 834e9ec104
parent a7bb6b9207
4 changed files with 285 additions and 24 deletions
--- a/hledger-lib/Hledger/Read/CsvReader.hs
+++ b/hledger-lib/Hledger/Read/CsvReader.hs
@ -42,16 +42,16 @@ where
 --- ** imports
 import Prelude ()
 import "base-compat-batteries" Prelude.Compat hiding (fail)
-import qualified "base-compat-batteries" Control.Monad.Fail.Compat as Fail (fail)
 import Control.Exception          (IOException, handle, throw)
 import Control.Monad              (liftM, unless, when)
 import Control.Monad.Except       (ExceptT, throwError)
 import Control.Monad.IO.Class     (MonadIO, liftIO)
 import Control.Monad.State.Strict (StateT, get, modify', evalStateT)
 import Control.Monad.Trans.Class  (lift)
-import Data.Char                  (toLower, isDigit, isSpace, ord)
+import Data.Char                  (toLower, isDigit, isSpace, isAlphaNum, ord)
 import Data.Bifunctor             (first)
 import "base-compat-batteries" Data.List.Compat
+import qualified Data.List.Split as LS (splitOn)
 import Data.Maybe
 import Data.Ord
 import qualified Data.Set as S
@ -186,6 +186,9 @@ addAssignmentsFromList fs r = foldl' maybeAddAssignment r journalfieldnames
 addConditionalBlock :: ConditionalBlock -> CsvRules -> CsvRules
 addConditionalBlock b r = r{rconditionalblocks=b:rconditionalblocks r}

+addConditionalBlocks :: [ConditionalBlock] -> CsvRules -> CsvRules
+addConditionalBlocks bs r = r{rconditionalblocks=bs++rconditionalblocks r}
+
 getDirective :: DirectiveName -> CsvRules -> Maybe FieldTemplate
 getDirective directivename = lookup directivename . rdirectives

@ -367,12 +370,15 @@ DIGIT: 0-9

 rulesp :: CsvRulesParser CsvRules
 rulesp = do
-  _ <- many $ choiceInState
-    [blankorcommentlinep                                                <?> "blank or comment line"
-    ,(directivep        >>= modify' . addDirective)                     <?> "directive"
-    ,(fieldnamelistp    >>= modify' . setIndexesAndAssignmentsFromList) <?> "field name list"
-    ,(fieldassignmentp  >>= modify' . addAssignment)                    <?> "field assignment"
-    ,(conditionalblockp >>= modify' . addConditionalBlock)              <?> "conditional block"
+  _ <- many $ choice
+    [blankorcommentlinep                                                  <?> "blank or comment line"
+    ,(directivep        >>= modify' . addDirective)                       <?> "directive"
+    ,(fieldnamelistp    >>= modify' . setIndexesAndAssignmentsFromList)   <?> "field name list"
+    ,(fieldassignmentp  >>= modify' . addAssignment)                      <?> "field assignment"
+    -- conditionaltablep backtracks because it shares "if" prefix with conditionalblockp and the
+    -- reverse is there to ensure that conditions are added in the order they listed in the file
+    ,try (conditionaltablep >>= modify' . addConditionalBlocks . reverse) <?> "conditional table"
+    ,(conditionalblockp >>= modify' . addConditionalBlock)                <?> "conditional block"
    ]
  eof
  r <- get
@ -504,26 +510,60 @@ fieldvalp = do
 conditionalblockp :: CsvRulesParser ConditionalBlock
 conditionalblockp = do
  lift $ dbgparse 8 "trying conditionalblockp"
-  string "if" >> lift (skipMany spacenonewline) >> optional newline
+  -- "if\nMATCHER" or "if    \nMATCHER" or "if MATCHER"
+  start <- getOffset
+  string "if" >> ( (newline >> return Nothing)
+                  <|> (lift (skipSome spacenonewline) >> optional newline))
  ms <- some matcherp
-  as <- many (try $ lift (skipSome spacenonewline) >> fieldassignmentp)
+  as <- catMaybes <$>
+    many (lift (skipSome spacenonewline) >>
+          choice [ lift eolof >> return Nothing
+                 , fmap Just fieldassignmentp
+                 ])
  when (null as) $
-    Fail.fail "start of conditional block found, but no assignment rules afterward\n(assignment rules in a conditional block should be indented)\n"
+    customFailure $ parseErrorAt start $  "start of conditional block found, but no assignment rules afterward\n(assignment rules in a conditional block should be indented)\n"
  return $ CB{cbMatchers=ms, cbAssignments=as}
  <?> "conditional block"

+-- A conditional table: "if" followed by separator, followed by some field names,
+-- followed by many lines, each of which has:
+-- one matchers, followed by field assignments (as many as there were fields)
+conditionaltablep :: CsvRulesParser [ConditionalBlock]
+conditionaltablep = do
+  lift $ dbgparse 8 "trying conditionaltablep"
+  start <- getOffset
+  string "if" 
+  sep <- lift $ satisfy (not.isAlphaNum)
+  fields <- journalfieldnamep `sepBy1` (char sep)
+  newline
+  body <- flip manyTill (lift eolof) $ do
+    off <- getOffset
+    m <- matcherp' (char sep >> return ())
+    vs <- LS.splitOn [sep] <$> lift restofline
+    if (length vs /= length fields)
+      then customFailure $ parseErrorAt off $ ((printf "line of conditional table should have %d values, but this one has only %d\n" (length fields) (length vs)) :: String)
+      else return (m,vs)
+  when (null body) $
+    customFailure $ parseErrorAt start $ "start of conditional table found, but no assignment rules afterward\n"
+  return $ flip map body $ \(m,vs) ->
+    CB{cbMatchers=[m], cbAssignments=zip fields vs}
+  <?> "conditional table"
+
 -- A single matcher, on one line.
+matcherp' :: CsvRulesParser () -> CsvRulesParser Matcher
+matcherp' end = try (fieldmatcherp end) <|> recordmatcherp end
+
 matcherp :: CsvRulesParser Matcher
-matcherp = try fieldmatcherp <|> recordmatcherp
+matcherp = matcherp' (lift eolof)

 -- A single whole-record matcher.
 -- A pattern on the whole line, not beginning with a csv field reference.
-recordmatcherp :: CsvRulesParser Matcher
-recordmatcherp = do
-  lift $ dbgparse 8 "trying matcherp"
+recordmatcherp :: CsvRulesParser () -> CsvRulesParser Matcher
+recordmatcherp end = do
+  lift $ dbgparse 8 "trying recordmatcherp"
  -- pos <- currentPos
  -- _  <- optional (matchoperatorp >> lift (skipMany spacenonewline) >> optional newline)
-  r <- regexp
+  r <- regexp end
  -- when (null ps) $
  --   Fail.fail "start of record matcher found, but no patterns afterward\n(patterns should not be indented)\n"
  return $ RecordMatcher r
@ -533,8 +573,8 @@ recordmatcherp = do
 -- (like %date or %1), and a pattern on the rest of the line,
 -- optionally space-separated. Eg:
 -- %description chez jacques
-fieldmatcherp :: CsvRulesParser Matcher
-fieldmatcherp = do
+fieldmatcherp :: CsvRulesParser () -> CsvRulesParser Matcher
+fieldmatcherp end = do
  lift $ dbgparse 8 "trying fieldmatcher"
  -- An optional fieldname (default: "all")
  -- f <- fromMaybe "all" `fmap` (optional $ do
@ -545,7 +585,7 @@ fieldmatcherp = do
  -- optional operator.. just ~ (case insensitive infix regex) for now
  -- _op <- fromMaybe "~" <$> optional matchoperatorp
  lift (skipMany spacenonewline)
-  r <- regexp
+  r <- regexp end
  return $ FieldMatcher f r
  <?> "field matcher"

@ -557,12 +597,12 @@ csvfieldreferencep = do
  return $ '%' : quoteIfNeeded f

 -- A single regular expression
-regexp :: CsvRulesParser RegexpPattern
-regexp = do
+regexp :: CsvRulesParser () -> CsvRulesParser RegexpPattern
+regexp end = do
  lift $ dbgparse 8 "trying regexp"
  -- notFollowedBy matchoperatorp
  c <- lift nonspace
-  cs <- anySingle `manyTill` lift eolof
+  cs <- anySingle `manyTill` end
  return $ strip $ c:cs

 -- -- A match operator, indicating the type of match to perform.
--- a/hledger-lib/hledger_csv.m4.md
+++ b/hledger-lib/hledger_csv.m4.md
@ -520,7 +520,7 @@ separator TAB

 See also: [File Extension](#file-extension).

-## `if`
+## `if` block

 ```rules
 if MATCHER
@ -590,6 +590,57 @@ banking thru software
 comment  XXX deductible ? check it
 ```

+## `if` table
+
+```rules
+if,CSVFIELDNAME1,CSVFIELDNAME2,...,CSVFIELDNAMEn
+MATCHER1,VALUE11,VALUE12,...,VALUE1n
+MATCHER2,VALUE21,VALUE22,...,VALUE2n
+MATCHER3,VALUE31,VALUE32,...,VALUE3n
+<empty line>
+```
+
+Conditional tables ("if tables") are a different syntax to specify
+field assignments that will be applied only to CSV records which match certain patterns.
+
+MATCHER could be either field or record matcher, as described above. When MATCHER matches,
+values from that row would be assigned to the CSV fields named on the `if` line, in the same order.
+
+Therefore `if` table is exactly equivalent to a sequence of of `if` blocks:
+```rules
+if MATCHER1
+  CSVFIELDNAME1 VALUE11
+  CSVFIELDNAME2 VALUE12
+  ...
+  CSVFIELDNAMEn VALUE1n
+
+if MATCHER2
+  CSVFIELDNAME1 VALUE21
+  CSVFIELDNAME2 VALUE22
+  ...
+  CSVFIELDNAMEn VALUE2n
+
+if MATCHER3
+  CSVFIELDNAME1 VALUE31
+  CSVFIELDNAME2 VALUE32
+  ...
+  CSVFIELDNAMEn VALUE3n
+```
+
+Each line starting with MATCHER should contain enough (possibly empty) values for all the listed fields.
+
+Rules would be checked and applied in the order they are listed in the table and, like with `if` blocks, later rules (in the same or another table) or `if` blocks could override the effect of any rule.
+
+Instead of ',' you can use a variety of other non-alphanumeric characters as a separator. First character after `if` is taken to be the separator for the rest of the table. It is the responsibility of the user to ensure that separator does not occur inside MATCHERs and values - there is no way to escape separator.
+
+
+Example:
+```rules
+if,account2,comment
+atm transaction fee,expenses:business:banking,deductible? check it
+%description groceries,expenses:groceries,
+2020/01/12.*Plumbing LLC,expenses:house:upkeep,emergency plumbing call-out
+```

 ## `end`

--- a/tests/csv.test
+++ b/tests/csv.test
@ -665,6 +665,174 @@ $  ./csvtest.sh

 >=0

+# 35. tabular rules assigning multiple fields
+<
+10/2009/09,Flubber Co,50
+
+RULES
+fields date, description, amount
+date-format %d/%Y/%m
+currency $
+account1 assets:myacct
+if,account2,comment
+Flubber,acct,cmt
+$  ./csvtest.sh
+2009-09-10 Flubber Co  ; cmt
+    assets:myacct             $50
+    acct                     $-50
+
+>=0
+
+# 36. tabular rules assigning multiple fields followed by regular rules
+<
+10/2009/09,Flubber Co,50
+10/2009/09,Blubber Co,150
+
+RULES
+fields date, description, amount
+date-format %d/%Y/%m
+currency $
+account1 assets:myacct
+if,account2,comment
+Flubber,acct,cmt
+
+if
+Blubber
+  account2   acct2
+  comment     cmt2
+$  ./csvtest.sh
+2009-09-10 Flubber Co  ; cmt
+    assets:myacct             $50
+    acct                     $-50
+
+2009-09-10 Blubber Co  ; cmt2
+    assets:myacct            $150
+    acct2                   $-150
+
+>=0
+
+# 37. tabular rules with empty values
+<
+10/2009/09,Flubber Co,50
+10/2009/09,Blubber Co,150
+
+RULES
+fields date, description, amount
+date-format %d/%Y/%m
+currency $
+account1 assets:myacct
+if,account2,comment
+Flubber,acct,
+Blubber,acct2,
+$  ./csvtest.sh
+2009-09-10 Flubber Co
+    assets:myacct             $50
+    acct                     $-50
+
+2009-09-10 Blubber Co
+    assets:myacct            $150
+    acct2                   $-150
+
+>=0
+
+# 38. tabular rules with field matchers and '|' separator
+<
+10/2009/09,Flubber Co,50
+10/2009/09,Blubber Co,150
+
+RULES
+fields date, description, amount
+date-format %d/%Y/%m
+currency $
+account1 assets:myacct
+if|account2|comment
+%description Flubber|acct|
+%amount 150|acct2|cmt2
+$  ./csvtest.sh
+2009-09-10 Flubber Co
+    assets:myacct             $50
+    acct                     $-50
+
+2009-09-10 Blubber Co  ; cmt2
+    assets:myacct            $150
+    acct2                   $-150
+
+>=0
+
+# 39. Insfficient number of values in tabular rules error
+<
+10/2009/09,Flubber Co,50
+10/2009/09,Blubber Co,150
+
+RULES
+fields date, description, amount
+date-format %d/%Y/%m
+currency $
+account1 assets:myacct
+if|account2|comment
+%amount 150|acct2
+%description Flubber|acct|
+$  ./csvtest.sh
+>2
+hledger: user error (input.rules:6:1:
+  |
+6 | %amount 150|acct2
+  | ^
+line of conditional table should have 2 values, but this one has only 1
+
+)
+>=1
+
+# 40. unindented condition block error
+<
+10/2009/09,Flubber Co,50
+
+RULES
+fields date, description, amount
+date-format %d/%Y/%m
+currency $
+account1 assets:myacct
+if Flubber
+account2 acct
+comment cmt
+$  ./csvtest.sh
+>2
+hledger: user error (input.rules:5:1:
+  |
+5 | if Flubber
+  | ^
+start of conditional block found, but no assignment rules afterward
+(assignment rules in a conditional block should be indented)
+
+)
+>=1
+
+# 41. Assignment to custom field (#1264) + spaces after the if (#1120)
+<
+10/2009/09,Flubber Co,50
+
+RULES
+fields date, description, amount
+date-format %d/%Y/%m
+currency $
+account1 assets:myacct
+if Flubber
+  myaccount2 acct
+  comment cmt
+
+
+account2 %myaccount2
+$  ./csvtest.sh
+>2
+hledger: user error (input.rules:6:3:
+  |
+6 |   myaccount2 acct
+  |   ^^^^^^^^^^^^
+unexpected "myaccount2 a"
+expecting end of input, field assignment, or newline
+)
+>=1
+
 ## . 
 #<
 #$  ./csvtest.sh
--- a/tests/csvtest.sh
+++ b/tests/csvtest.sh
@ -12,4 +12,6 @@ BEGIN{output=CSV}

 trap "rm -f t.$$.csv t.$$.csv.rules" EXIT ERR

-hledger -f csv:t.$$.csv --rules-file t.$$.csv.rules print "$@"
+# Remove variable file name from error messages
+:; ( hledger -f csv:t.$$.csv --rules-file t.$$.csv.rules print "$@" ) \
+       2> >( sed -re "s/t.*.csv/input/" >&2 )