lib: condition tables in csv rules + tests

This commit is contained in:
Dmitry Astapov 2020-06-18 00:58:49 +01:00 committed by Simon Michael
parent a7bb6b9207
commit 834e9ec104
4 changed files with 285 additions and 24 deletions

View File

@ -42,16 +42,16 @@ where
--- ** imports
import Prelude ()
import "base-compat-batteries" Prelude.Compat hiding (fail)
import qualified "base-compat-batteries" Control.Monad.Fail.Compat as Fail (fail)
import Control.Exception (IOException, handle, throw)
import Control.Monad (liftM, unless, when)
import Control.Monad.Except (ExceptT, throwError)
import Control.Monad.IO.Class (MonadIO, liftIO)
import Control.Monad.State.Strict (StateT, get, modify', evalStateT)
import Control.Monad.Trans.Class (lift)
import Data.Char (toLower, isDigit, isSpace, ord)
import Data.Char (toLower, isDigit, isSpace, isAlphaNum, ord)
import Data.Bifunctor (first)
import "base-compat-batteries" Data.List.Compat
import qualified Data.List.Split as LS (splitOn)
import Data.Maybe
import Data.Ord
import qualified Data.Set as S
@ -186,6 +186,9 @@ addAssignmentsFromList fs r = foldl' maybeAddAssignment r journalfieldnames
addConditionalBlock :: ConditionalBlock -> CsvRules -> CsvRules
addConditionalBlock b r = r{rconditionalblocks=b:rconditionalblocks r}
addConditionalBlocks :: [ConditionalBlock] -> CsvRules -> CsvRules
addConditionalBlocks bs r = r{rconditionalblocks=bs++rconditionalblocks r}
getDirective :: DirectiveName -> CsvRules -> Maybe FieldTemplate
getDirective directivename = lookup directivename . rdirectives
@ -367,12 +370,15 @@ DIGIT: 0-9
rulesp :: CsvRulesParser CsvRules
rulesp = do
_ <- many $ choiceInState
[blankorcommentlinep <?> "blank or comment line"
,(directivep >>= modify' . addDirective) <?> "directive"
,(fieldnamelistp >>= modify' . setIndexesAndAssignmentsFromList) <?> "field name list"
,(fieldassignmentp >>= modify' . addAssignment) <?> "field assignment"
,(conditionalblockp >>= modify' . addConditionalBlock) <?> "conditional block"
_ <- many $ choice
[blankorcommentlinep <?> "blank or comment line"
,(directivep >>= modify' . addDirective) <?> "directive"
,(fieldnamelistp >>= modify' . setIndexesAndAssignmentsFromList) <?> "field name list"
,(fieldassignmentp >>= modify' . addAssignment) <?> "field assignment"
-- conditionaltablep backtracks because it shares "if" prefix with conditionalblockp and the
-- reverse is there to ensure that conditions are added in the order they listed in the file
,try (conditionaltablep >>= modify' . addConditionalBlocks . reverse) <?> "conditional table"
,(conditionalblockp >>= modify' . addConditionalBlock) <?> "conditional block"
]
eof
r <- get
@ -504,26 +510,60 @@ fieldvalp = do
conditionalblockp :: CsvRulesParser ConditionalBlock
conditionalblockp = do
lift $ dbgparse 8 "trying conditionalblockp"
string "if" >> lift (skipMany spacenonewline) >> optional newline
-- "if\nMATCHER" or "if \nMATCHER" or "if MATCHER"
start <- getOffset
string "if" >> ( (newline >> return Nothing)
<|> (lift (skipSome spacenonewline) >> optional newline))
ms <- some matcherp
as <- many (try $ lift (skipSome spacenonewline) >> fieldassignmentp)
as <- catMaybes <$>
many (lift (skipSome spacenonewline) >>
choice [ lift eolof >> return Nothing
, fmap Just fieldassignmentp
])
when (null as) $
Fail.fail "start of conditional block found, but no assignment rules afterward\n(assignment rules in a conditional block should be indented)\n"
customFailure $ parseErrorAt start $ "start of conditional block found, but no assignment rules afterward\n(assignment rules in a conditional block should be indented)\n"
return $ CB{cbMatchers=ms, cbAssignments=as}
<?> "conditional block"
-- A conditional table: "if" followed by separator, followed by some field names,
-- followed by many lines, each of which has:
-- one matchers, followed by field assignments (as many as there were fields)
conditionaltablep :: CsvRulesParser [ConditionalBlock]
conditionaltablep = do
lift $ dbgparse 8 "trying conditionaltablep"
start <- getOffset
string "if"
sep <- lift $ satisfy (not.isAlphaNum)
fields <- journalfieldnamep `sepBy1` (char sep)
newline
body <- flip manyTill (lift eolof) $ do
off <- getOffset
m <- matcherp' (char sep >> return ())
vs <- LS.splitOn [sep] <$> lift restofline
if (length vs /= length fields)
then customFailure $ parseErrorAt off $ ((printf "line of conditional table should have %d values, but this one has only %d\n" (length fields) (length vs)) :: String)
else return (m,vs)
when (null body) $
customFailure $ parseErrorAt start $ "start of conditional table found, but no assignment rules afterward\n"
return $ flip map body $ \(m,vs) ->
CB{cbMatchers=[m], cbAssignments=zip fields vs}
<?> "conditional table"
-- A single matcher, on one line.
matcherp' :: CsvRulesParser () -> CsvRulesParser Matcher
matcherp' end = try (fieldmatcherp end) <|> recordmatcherp end
matcherp :: CsvRulesParser Matcher
matcherp = try fieldmatcherp <|> recordmatcherp
matcherp = matcherp' (lift eolof)
-- A single whole-record matcher.
-- A pattern on the whole line, not beginning with a csv field reference.
recordmatcherp :: CsvRulesParser Matcher
recordmatcherp = do
lift $ dbgparse 8 "trying matcherp"
recordmatcherp :: CsvRulesParser () -> CsvRulesParser Matcher
recordmatcherp end = do
lift $ dbgparse 8 "trying recordmatcherp"
-- pos <- currentPos
-- _ <- optional (matchoperatorp >> lift (skipMany spacenonewline) >> optional newline)
r <- regexp
r <- regexp end
-- when (null ps) $
-- Fail.fail "start of record matcher found, but no patterns afterward\n(patterns should not be indented)\n"
return $ RecordMatcher r
@ -533,8 +573,8 @@ recordmatcherp = do
-- (like %date or %1), and a pattern on the rest of the line,
-- optionally space-separated. Eg:
-- %description chez jacques
fieldmatcherp :: CsvRulesParser Matcher
fieldmatcherp = do
fieldmatcherp :: CsvRulesParser () -> CsvRulesParser Matcher
fieldmatcherp end = do
lift $ dbgparse 8 "trying fieldmatcher"
-- An optional fieldname (default: "all")
-- f <- fromMaybe "all" `fmap` (optional $ do
@ -545,7 +585,7 @@ fieldmatcherp = do
-- optional operator.. just ~ (case insensitive infix regex) for now
-- _op <- fromMaybe "~" <$> optional matchoperatorp
lift (skipMany spacenonewline)
r <- regexp
r <- regexp end
return $ FieldMatcher f r
<?> "field matcher"
@ -557,12 +597,12 @@ csvfieldreferencep = do
return $ '%' : quoteIfNeeded f
-- A single regular expression
regexp :: CsvRulesParser RegexpPattern
regexp = do
regexp :: CsvRulesParser () -> CsvRulesParser RegexpPattern
regexp end = do
lift $ dbgparse 8 "trying regexp"
-- notFollowedBy matchoperatorp
c <- lift nonspace
cs <- anySingle `manyTill` lift eolof
cs <- anySingle `manyTill` end
return $ strip $ c:cs
-- -- A match operator, indicating the type of match to perform.

View File

@ -520,7 +520,7 @@ separator TAB
See also: [File Extension](#file-extension).
## `if`
## `if` block
```rules
if MATCHER
@ -590,6 +590,57 @@ banking thru software
comment XXX deductible ? check it
```
## `if` table
```rules
if,CSVFIELDNAME1,CSVFIELDNAME2,...,CSVFIELDNAMEn
MATCHER1,VALUE11,VALUE12,...,VALUE1n
MATCHER2,VALUE21,VALUE22,...,VALUE2n
MATCHER3,VALUE31,VALUE32,...,VALUE3n
<empty line>
```
Conditional tables ("if tables") are a different syntax to specify
field assignments that will be applied only to CSV records which match certain patterns.
MATCHER could be either field or record matcher, as described above. When MATCHER matches,
values from that row would be assigned to the CSV fields named on the `if` line, in the same order.
Therefore `if` table is exactly equivalent to a sequence of of `if` blocks:
```rules
if MATCHER1
CSVFIELDNAME1 VALUE11
CSVFIELDNAME2 VALUE12
...
CSVFIELDNAMEn VALUE1n
if MATCHER2
CSVFIELDNAME1 VALUE21
CSVFIELDNAME2 VALUE22
...
CSVFIELDNAMEn VALUE2n
if MATCHER3
CSVFIELDNAME1 VALUE31
CSVFIELDNAME2 VALUE32
...
CSVFIELDNAMEn VALUE3n
```
Each line starting with MATCHER should contain enough (possibly empty) values for all the listed fields.
Rules would be checked and applied in the order they are listed in the table and, like with `if` blocks, later rules (in the same or another table) or `if` blocks could override the effect of any rule.
Instead of ',' you can use a variety of other non-alphanumeric characters as a separator. First character after `if` is taken to be the separator for the rest of the table. It is the responsibility of the user to ensure that separator does not occur inside MATCHERs and values - there is no way to escape separator.
Example:
```rules
if,account2,comment
atm transaction fee,expenses:business:banking,deductible? check it
%description groceries,expenses:groceries,
2020/01/12.*Plumbing LLC,expenses:house:upkeep,emergency plumbing call-out
```
## `end`

View File

@ -665,6 +665,174 @@ $ ./csvtest.sh
>=0
# 35. tabular rules assigning multiple fields
<
10/2009/09,Flubber Co,50
RULES
fields date, description, amount
date-format %d/%Y/%m
currency $
account1 assets:myacct
if,account2,comment
Flubber,acct,cmt
$ ./csvtest.sh
2009-09-10 Flubber Co ; cmt
assets:myacct $50
acct $-50
>=0
# 36. tabular rules assigning multiple fields followed by regular rules
<
10/2009/09,Flubber Co,50
10/2009/09,Blubber Co,150
RULES
fields date, description, amount
date-format %d/%Y/%m
currency $
account1 assets:myacct
if,account2,comment
Flubber,acct,cmt
if
Blubber
account2 acct2
comment cmt2
$ ./csvtest.sh
2009-09-10 Flubber Co ; cmt
assets:myacct $50
acct $-50
2009-09-10 Blubber Co ; cmt2
assets:myacct $150
acct2 $-150
>=0
# 37. tabular rules with empty values
<
10/2009/09,Flubber Co,50
10/2009/09,Blubber Co,150
RULES
fields date, description, amount
date-format %d/%Y/%m
currency $
account1 assets:myacct
if,account2,comment
Flubber,acct,
Blubber,acct2,
$ ./csvtest.sh
2009-09-10 Flubber Co
assets:myacct $50
acct $-50
2009-09-10 Blubber Co
assets:myacct $150
acct2 $-150
>=0
# 38. tabular rules with field matchers and '|' separator
<
10/2009/09,Flubber Co,50
10/2009/09,Blubber Co,150
RULES
fields date, description, amount
date-format %d/%Y/%m
currency $
account1 assets:myacct
if|account2|comment
%description Flubber|acct|
%amount 150|acct2|cmt2
$ ./csvtest.sh
2009-09-10 Flubber Co
assets:myacct $50
acct $-50
2009-09-10 Blubber Co ; cmt2
assets:myacct $150
acct2 $-150
>=0
# 39. Insfficient number of values in tabular rules error
<
10/2009/09,Flubber Co,50
10/2009/09,Blubber Co,150
RULES
fields date, description, amount
date-format %d/%Y/%m
currency $
account1 assets:myacct
if|account2|comment
%amount 150|acct2
%description Flubber|acct|
$ ./csvtest.sh
>2
hledger: user error (input.rules:6:1:
|
6 | %amount 150|acct2
| ^
line of conditional table should have 2 values, but this one has only 1
)
>=1
# 40. unindented condition block error
<
10/2009/09,Flubber Co,50
RULES
fields date, description, amount
date-format %d/%Y/%m
currency $
account1 assets:myacct
if Flubber
account2 acct
comment cmt
$ ./csvtest.sh
>2
hledger: user error (input.rules:5:1:
|
5 | if Flubber
| ^
start of conditional block found, but no assignment rules afterward
(assignment rules in a conditional block should be indented)
)
>=1
# 41. Assignment to custom field (#1264) + spaces after the if (#1120)
<
10/2009/09,Flubber Co,50
RULES
fields date, description, amount
date-format %d/%Y/%m
currency $
account1 assets:myacct
if Flubber
myaccount2 acct
comment cmt
account2 %myaccount2
$ ./csvtest.sh
>2
hledger: user error (input.rules:6:3:
|
6 | myaccount2 acct
| ^^^^^^^^^^^^
unexpected "myaccount2 a"
expecting end of input, field assignment, or newline
)
>=1
## .
#<
#$ ./csvtest.sh

View File

@ -12,4 +12,6 @@ BEGIN{output=CSV}
trap "rm -f t.$$.csv t.$$.csv.rules" EXIT ERR
hledger -f csv:t.$$.csv --rules-file t.$$.csv.rules print "$@"
# Remove variable file name from error messages
:; ( hledger -f csv:t.$$.csv --rules-file t.$$.csv.rules print "$@" ) \
2> >( sed -re "s/t.*.csv/input/" >&2 )