2020-03-02 09:00:39 +03:00
|
|
|
--- * -*- eval: (orgstruct-mode 1); orgstruct-heading-prefix-regexp:"--- "; -*-
|
|
|
|
--- ** doc
|
2020-02-28 10:28:33 +03:00
|
|
|
-- In Emacs, use TAB on lines beginning with "-- *" to collapse/expand sections.
|
2012-03-11 01:55:48 +04:00
|
|
|
{-|
|
|
|
|
|
2013-03-30 02:56:55 +04:00
|
|
|
A reader for CSV data, using an extra rules file to help interpret the data.
|
2012-03-11 01:55:48 +04:00
|
|
|
|
|
|
|
-}
|
2020-02-26 04:54:16 +03:00
|
|
|
-- Lots of haddocks in this file are for non-exported types.
|
|
|
|
-- Here's a command that will render them:
|
|
|
|
-- stack haddock hledger-lib --fast --no-haddock-deps --haddock-arguments='--ignore-all-exports' --open
|
|
|
|
|
2020-03-02 09:00:39 +03:00
|
|
|
--- ** language
|
2017-07-05 18:04:48 +03:00
|
|
|
{-# LANGUAGE CPP #-}
|
2014-11-03 08:52:12 +03:00
|
|
|
{-# LANGUAGE FlexibleContexts #-}
|
2016-09-25 22:56:28 +03:00
|
|
|
{-# LANGUAGE FlexibleInstances #-}
|
2020-02-26 04:54:16 +03:00
|
|
|
{-# LANGUAGE MultiWayIf #-}
|
|
|
|
{-# LANGUAGE NamedFieldPuns #-}
|
2020-02-28 10:28:33 +03:00
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
{-# LANGUAGE PackageImports #-}
|
2020-02-12 06:04:03 +03:00
|
|
|
{-# LANGUAGE RecordWildCards #-}
|
2020-02-28 10:28:33 +03:00
|
|
|
{-# LANGUAGE ScopedTypeVariables #-}
|
|
|
|
{-# LANGUAGE TypeFamilies #-}
|
|
|
|
{-# LANGUAGE TypeSynonymInstances #-}
|
|
|
|
{-# LANGUAGE ViewPatterns #-}
|
|
|
|
|
2020-03-02 09:00:39 +03:00
|
|
|
--- ** exports
|
2012-03-11 01:55:48 +04:00
|
|
|
module Hledger.Read.CsvReader (
|
2012-03-24 22:08:11 +04:00
|
|
|
-- * Reader
|
2012-03-23 20:21:41 +04:00
|
|
|
reader,
|
2012-05-30 01:00:49 +04:00
|
|
|
-- * Misc.
|
2020-02-28 10:28:33 +03:00
|
|
|
CSV, CsvRecord, CsvValue,
|
|
|
|
csvFileFor,
|
2012-05-30 01:00:49 +04:00
|
|
|
rulesFileFor,
|
2013-03-30 02:56:55 +04:00
|
|
|
parseRulesFile,
|
2018-09-07 20:12:13 +03:00
|
|
|
printCSV,
|
2012-03-24 22:08:11 +04:00
|
|
|
-- * Tests
|
2018-09-06 23:08:26 +03:00
|
|
|
tests_CsvReader,
|
2012-03-11 01:55:48 +04:00
|
|
|
)
|
|
|
|
where
|
2020-02-28 10:28:33 +03:00
|
|
|
|
2020-03-02 09:00:39 +03:00
|
|
|
--- ** imports
|
2019-12-01 20:31:36 +03:00
|
|
|
import Prelude ()
|
|
|
|
import "base-compat-batteries" Prelude.Compat hiding (fail)
|
lib, cli, ui: start using Control.Monad.Fail, allow base-compat 0.11
fail is moving out of Monad and into it's own MonadFail class.
This will be enforced in GHC 8.8 (I think).
base-compat/base-compat-batteries 0.11.0 have adapted to this,
and are approaching stackage nightly
(https://github.com/commercialhaskell/stackage/issues/4802).
hledger is now ready to build with base-compat-batteries 0.11.0, once
all of our deps do (eg aeson). We are still compatible with the older
0.10.x and GHC 7.10.3 as well.
For now we are using both fails:
- new fail (from Control.Monad.Fail), used in our parsers, imported
via base-compat-batteries Control.Monad.Fail.Compat to work with
older GHC versions.
- old fail (from GHC.Base, exported by Prelude, Control.Monad,
Control.Monad.State.Strict, Prelude.Compat, ...), used in easytest's
Test, since I couldn't find their existing fail implementation to update.
To reduce (my) confusion, these are imported carefully, consistently,
and qualified everywhere as Fail.fail and Prelude.fail, with clashing
re-exports suppressed, like so:
import Prelude hiding (fail)
import qualified Prelude (fail)
import Control.Monad.State.Strict hiding (fail)
import "base-compat-batteries" Prelude.Compat hiding (fail)
import qualified "base-compat-batteries" Control.Monad.Fail.Compat as Fail
2019-09-09 03:13:47 +03:00
|
|
|
import qualified "base-compat-batteries" Control.Monad.Fail.Compat as Fail (fail)
|
2019-09-14 04:11:40 +03:00
|
|
|
import Control.Exception (IOException, handle, throw)
|
|
|
|
import Control.Monad (liftM, unless, when)
|
2019-09-14 12:04:00 +03:00
|
|
|
import Control.Monad.Except (ExceptT, throwError)
|
2020-03-01 21:16:52 +03:00
|
|
|
import Control.Monad.IO.Class (MonadIO, liftIO)
|
2016-07-29 19:55:02 +03:00
|
|
|
import Control.Monad.State.Strict (StateT, get, modify', evalStateT)
|
2019-09-14 04:11:40 +03:00
|
|
|
import Control.Monad.Trans.Class (lift)
|
|
|
|
import Data.Char (toLower, isDigit, isSpace, ord)
|
2019-09-14 12:04:00 +03:00
|
|
|
import Data.Bifunctor (first)
|
2018-06-05 02:28:28 +03:00
|
|
|
import "base-compat-batteries" Data.List.Compat
|
2012-03-11 01:55:48 +04:00
|
|
|
import Data.Maybe
|
|
|
|
import Data.Ord
|
2018-05-22 01:47:56 +03:00
|
|
|
import qualified Data.Set as S
|
lib: textification: parse stream
10% more allocation, but 35% lower maximum residency, and slightly quicker.
hledger -f data/100x100x10.journal stats
<<ghc: 39327768 bytes, 77 GCs, 196834/269496 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.010 elapsed), 0.020 MUT (0.092 elapsed), 0.014 GC (0.119 elapsed) :ghc>>
<<ghc: 42842136 bytes, 84 GCs, 194010/270912 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.009 elapsed), 0.016 MUT (0.029 elapsed), 0.012 GC (0.120 elapsed) :ghc>>
hledger -f data/1000x1000x10.journal stats
<<ghc: 314291440 bytes, 612 GCs, 2070776/6628048 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.000 elapsed), 0.128 MUT (0.144 elapsed), 0.059 GC (0.070 elapsed) :ghc>>
<<ghc: 349558872 bytes, 681 GCs, 1397597/4106384 avg/max bytes residency (7 samples), 11M in use, 0.000 INIT (0.004 elapsed), 0.124 MUT (0.133 elapsed), 0.047 GC (0.053 elapsed) :ghc>>
hledger -f data/10000x1000x10.journal stats
<<ghc: 3070026824 bytes, 5973 GCs, 12698030/62951784 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.002 elapsed), 1.268 MUT (1.354 elapsed), 0.514 GC (0.587 elapsed) :ghc>>
<<ghc: 3424013128 bytes, 6658 GCs, 11405501/41071624 avg/max bytes residency (11 samples), 111M in use, 0.000 INIT (0.001 elapsed), 1.343 MUT (1.406 elapsed), 0.511 GC (0.573 elapsed) :ghc>>
hledger -f data/100000x1000x10.journal stats
<<ghc: 30753387392 bytes, 59811 GCs, 117615462/666703600 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.000 elapsed), 12.068 MUT (12.238 elapsed), 6.015 GC (7.190 elapsed) :ghc>>
<<ghc: 34306530696 bytes, 66727 GCs, 76806196/414629312 avg/max bytes residency (14 samples), 1009M in use, 0.000 INIT (0.010 elapsed), 14.357 MUT (16.370 elapsed), 5.298 GC (6.534 elapsed) :ghc>>
2016-05-25 01:58:23 +03:00
|
|
|
import Data.Text (Text)
|
lib: textification begins! account names
The first of several conversions from String to (strict) Text, hopefully
reducing space and time usage.
This one shows a small improvement, with GHC 7.10.3 and text-1.2.2.1:
hledger -f data/100x100x10.journal stats
string: <<ghc: 39471064 bytes, 77 GCs, 198421/275048 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.001 elapsed), 0.015 MUT (0.020 elapsed), 0.010 GC (0.014 elapsed) :ghc>>
text: <<ghc: 39268024 bytes, 77 GCs, 197018/270840 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.002 elapsed), 0.016 MUT (0.022 elapsed), 0.009 GC (0.011 elapsed) :ghc>>
hledger -f data/1000x100x10.journal stats
string: <<ghc: 318555920 bytes, 617 GCs, 2178997/7134472 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.001 elapsed), 0.129 MUT (0.136 elapsed), 0.067 GC (0.077 elapsed) :ghc>>
text: <<ghc: 314248496 bytes, 612 GCs, 2074045/6617960 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.003 elapsed), 0.137 MUT (0.145 elapsed), 0.067 GC (0.079 elapsed) :ghc>>
hledger -f data/10000x100x10.journal stats
string: <<ghc: 3114763608 bytes, 6026 GCs, 18858950/75552024 avg/max bytes residency (11 samples), 201M in use, 0.000 INIT (0.000 elapsed), 1.331 MUT (1.372 elapsed), 0.699 GC (0.812 elapsed) :ghc>>
text: <<ghc: 3071468920 bytes, 5968 GCs, 14120344/62951360 avg/max bytes residency (9 samples), 124M in use, 0.000 INIT (0.003 elapsed), 1.272 MUT (1.349 elapsed), 0.513 GC (0.578 elapsed) :ghc>>
hledger -f data/100000x100x10.journal stats
string: <<ghc: 31186579432 bytes, 60278 GCs, 135332581/740228992 avg/max bytes residency (13 samples), 1697M in use, 0.000 INIT (0.008 elapsed), 14.677 MUT (15.508 elapsed), 7.081 GC (8.074 elapsed) :ghc>>
text: <<ghc: 30753427672 bytes, 59763 GCs, 117595958/666457240 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.008 elapsed), 13.713 MUT (13.966 elapsed), 6.220 GC (7.108 elapsed) :ghc>>
2016-05-24 04:16:21 +03:00
|
|
|
import qualified Data.Text as T
|
2018-09-07 20:12:13 +03:00
|
|
|
import qualified Data.Text.Encoding as T
|
2017-07-05 18:04:48 +03:00
|
|
|
import qualified Data.Text.IO as T
|
2013-03-30 02:56:55 +04:00
|
|
|
import Data.Time.Calendar (Day)
|
2015-03-28 01:42:32 +03:00
|
|
|
#if MIN_VERSION_time(1,5,0)
|
2015-03-30 02:12:54 +03:00
|
|
|
import Data.Time.Format (parseTimeM, defaultTimeLocale)
|
2015-03-28 01:42:32 +03:00
|
|
|
#else
|
2015-03-30 02:12:54 +03:00
|
|
|
import Data.Time.Format (parseTime)
|
2015-03-28 01:42:32 +03:00
|
|
|
import System.Locale (defaultTimeLocale)
|
|
|
|
#endif
|
2012-03-11 01:55:48 +04:00
|
|
|
import Safe
|
|
|
|
import System.Directory (doesFileExist)
|
2012-03-24 22:08:11 +04:00
|
|
|
import System.FilePath
|
2018-09-07 20:12:13 +03:00
|
|
|
import qualified Data.Csv as Cassava
|
|
|
|
import qualified Data.Csv.Parser.Megaparsec as CassavaMP
|
|
|
|
import qualified Data.ByteString as B
|
2018-09-30 04:32:08 +03:00
|
|
|
import qualified Data.ByteString.Lazy as BL
|
2018-09-07 20:12:13 +03:00
|
|
|
import Data.Foldable
|
2018-05-22 01:47:56 +03:00
|
|
|
import Text.Megaparsec hiding (parse)
|
|
|
|
import Text.Megaparsec.Char
|
2018-09-30 04:32:08 +03:00
|
|
|
import Text.Megaparsec.Custom
|
2017-07-06 16:07:46 +03:00
|
|
|
import Text.Printf (printf)
|
2012-03-11 01:55:48 +04:00
|
|
|
|
|
|
|
import Hledger.Data
|
|
|
|
import Hledger.Utils
|
2020-01-05 20:56:04 +03:00
|
|
|
import Hledger.Read.Common (Reader(..),InputOpts(..),amountp, statusp, genericSourcePos, finaliseJournal)
|
2012-03-24 22:08:11 +04:00
|
|
|
|
2020-03-02 09:00:39 +03:00
|
|
|
--- ** doctest setup
|
|
|
|
-- $setup
|
|
|
|
-- >>> :set -XOverloadedStrings
|
|
|
|
|
|
|
|
--- ** some types
|
2018-09-07 20:12:13 +03:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
type CSV = [CsvRecord]
|
|
|
|
type CsvRecord = [CsvValue]
|
|
|
|
type CsvValue = String
|
2018-09-07 20:12:13 +03:00
|
|
|
|
2020-03-02 09:00:39 +03:00
|
|
|
--- ** reader
|
2018-09-07 20:12:13 +03:00
|
|
|
|
2020-03-01 21:16:52 +03:00
|
|
|
reader :: MonadIO m => Reader m
|
2016-11-19 00:24:57 +03:00
|
|
|
reader = Reader
|
|
|
|
{rFormat = "csv"
|
2020-01-20 23:56:31 +03:00
|
|
|
,rExtensions = ["csv","tsv","ssv"]
|
2020-03-01 21:16:52 +03:00
|
|
|
,rReadFn = parse
|
|
|
|
,rParser = error' "sorry, CSV files can't be included yet"
|
2016-11-19 00:24:57 +03:00
|
|
|
}
|
2012-03-11 01:55:48 +04:00
|
|
|
|
|
|
|
-- | Parse and post-process a "Journal" from CSV data, or give an error.
|
2019-11-08 05:58:12 +03:00
|
|
|
-- Does not check balance assertions.
|
|
|
|
-- XXX currently ignores the provided data, reads it from the file path instead.
|
2018-04-17 00:47:04 +03:00
|
|
|
parse :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal
|
|
|
|
parse iopts f t = do
|
|
|
|
let rulesfile = mrules_file_ iopts
|
2020-01-05 20:56:04 +03:00
|
|
|
r <- liftIO $ readJournalFromCsv rulesfile f t
|
2019-11-08 05:58:12 +03:00
|
|
|
case r of Left e -> throwError e
|
|
|
|
Right pj -> finaliseJournal iopts{ignore_assertions_=True} f t pj'
|
|
|
|
where
|
|
|
|
-- finaliseJournal assumes the journal's items are
|
|
|
|
-- reversed, as produced by JournalReader's parser.
|
|
|
|
-- But here they are already properly ordered. So we'd
|
|
|
|
-- better preemptively reverse them once more. XXX inefficient
|
|
|
|
pj' = journalReverse pj
|
2012-03-11 01:55:48 +04:00
|
|
|
|
2020-03-02 09:00:39 +03:00
|
|
|
--- ** reading rules files
|
|
|
|
--- *** rules utilities
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
-- Not used by hledger; just for lib users,
|
|
|
|
-- | An pure-exception-throwing IO action that parses this file's content
|
|
|
|
-- as CSV conversion rules, interpolating any included files first,
|
|
|
|
-- and runs some extra validation checks.
|
|
|
|
parseRulesFile :: FilePath -> ExceptT String IO CsvRules
|
|
|
|
parseRulesFile f =
|
|
|
|
liftIO (readFilePortably f >>= expandIncludes (takeDirectory f))
|
|
|
|
>>= either throwError return . parseAndValidateCsvRules f
|
2012-05-28 22:40:36 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
-- | Given a CSV file path, what would normally be the corresponding rules file ?
|
2013-03-30 02:56:55 +04:00
|
|
|
rulesFileFor :: FilePath -> FilePath
|
|
|
|
rulesFileFor = (++ ".rules")
|
2012-03-24 22:08:11 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
-- | Given a CSV rules file path, what would normally be the corresponding CSV file ?
|
2013-03-30 02:56:55 +04:00
|
|
|
csvFileFor :: FilePath -> FilePath
|
|
|
|
csvFileFor = reverse . drop 6 . reverse
|
2012-03-11 01:55:48 +04:00
|
|
|
|
2016-11-23 00:59:31 +03:00
|
|
|
defaultRulesText :: FilePath -> Text
|
|
|
|
defaultRulesText csvfile = T.pack $ unlines
|
|
|
|
["# hledger csv conversion rules for " ++ csvFileFor (takeFileName csvfile)
|
2014-08-08 00:15:40 +04:00
|
|
|
,"# cf http://hledger.org/manual#csv-files"
|
2013-03-30 02:56:55 +04:00
|
|
|
,""
|
|
|
|
,"account1 assets:bank:checking"
|
|
|
|
,""
|
2019-10-16 00:42:55 +03:00
|
|
|
,"fields date, description, amount1"
|
2013-03-30 02:56:55 +04:00
|
|
|
,""
|
|
|
|
,"#skip 1"
|
2017-07-05 17:24:17 +03:00
|
|
|
,"#newest-first"
|
2013-03-30 02:56:55 +04:00
|
|
|
,""
|
|
|
|
,"#date-format %-d/%-m/%Y"
|
|
|
|
,"#date-format %-m/%-d/%Y"
|
|
|
|
,"#date-format %Y-%h-%d"
|
|
|
|
,""
|
|
|
|
,"#currency $"
|
|
|
|
,""
|
|
|
|
,"if ITUNES"
|
|
|
|
," account2 expenses:entertainment"
|
|
|
|
,""
|
|
|
|
,"if (TO|FROM) SAVINGS"
|
|
|
|
," account2 assets:bank:savings\n"
|
|
|
|
]
|
2012-03-11 01:55:48 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
addDirective :: (DirectiveName, String) -> CsvRules -> CsvRules
|
|
|
|
addDirective d r = r{rdirectives=d:rdirectives r}
|
2012-03-11 01:55:48 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
addAssignment :: (HledgerFieldName, FieldTemplate) -> CsvRules -> CsvRules
|
|
|
|
addAssignment a r = r{rassignments=a:rassignments r}
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
setIndexesAndAssignmentsFromList :: [CsvFieldName] -> CsvRules -> CsvRules
|
|
|
|
setIndexesAndAssignmentsFromList fs r = addAssignmentsFromList fs . setCsvFieldIndexesFromList fs $ r
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
setCsvFieldIndexesFromList :: [CsvFieldName] -> CsvRules -> CsvRules
|
|
|
|
setCsvFieldIndexesFromList fs r = r{rcsvfieldindexes=zip fs [1..]}
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
addAssignmentsFromList :: [CsvFieldName] -> CsvRules -> CsvRules
|
|
|
|
addAssignmentsFromList fs r = foldl' maybeAddAssignment r journalfieldnames
|
|
|
|
where
|
|
|
|
maybeAddAssignment rules f = (maybe id addAssignmentFromIndex $ elemIndex f fs) rules
|
|
|
|
where
|
|
|
|
addAssignmentFromIndex i = addAssignment (f, "%"++show (i+1))
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
addConditionalBlock :: ConditionalBlock -> CsvRules -> CsvRules
|
|
|
|
addConditionalBlock b r = r{rconditionalblocks=b:rconditionalblocks r}
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
getDirective :: DirectiveName -> CsvRules -> Maybe FieldTemplate
|
|
|
|
getDirective directivename = lookup directivename . rdirectives
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
instance ShowErrorComponent String where
|
|
|
|
showErrorComponent = id
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
-- | Inline all files referenced by include directives in this hledger CSV rules text, recursively.
|
|
|
|
-- Included file paths may be relative to the directory of the provided file path.
|
|
|
|
-- This is done as a pre-parse step to simplify the CSV rules parser.
|
|
|
|
expandIncludes :: FilePath -> Text -> IO Text
|
|
|
|
expandIncludes dir content = mapM (expandLine dir) (T.lines content) >>= return . T.unlines
|
|
|
|
where
|
|
|
|
expandLine dir line =
|
|
|
|
case line of
|
|
|
|
(T.stripPrefix "include " -> Just f) -> expandIncludes dir' =<< T.readFile f'
|
|
|
|
where
|
|
|
|
f' = dir </> dropWhile isSpace (T.unpack f)
|
|
|
|
dir' = takeDirectory f'
|
|
|
|
_ -> return line
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
-- | An error-throwing IO action that parses this text as CSV conversion rules
|
|
|
|
-- and runs some extra validation checks. The file path is used in error messages.
|
|
|
|
parseAndValidateCsvRules :: FilePath -> T.Text -> Either String CsvRules
|
|
|
|
parseAndValidateCsvRules rulesfile s =
|
|
|
|
case parseCsvRules rulesfile s of
|
|
|
|
Left err -> Left $ customErrorBundlePretty err
|
|
|
|
Right rules -> first makeFancyParseError $ validateRules rules
|
|
|
|
where
|
|
|
|
makeFancyParseError :: String -> String
|
|
|
|
makeFancyParseError errorString =
|
|
|
|
parseErrorPretty (FancyError 0 (S.singleton $ ErrorFail errorString) :: ParseError Text String)
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
-- | Parse this text as CSV conversion rules. The file path is for error messages.
|
|
|
|
parseCsvRules :: FilePath -> T.Text -> Either (ParseErrorBundle T.Text CustomErr) CsvRules
|
|
|
|
-- parseCsvRules rulesfile s = runParser csvrulesfile nullrules{baseAccount=takeBaseName rulesfile} rulesfile s
|
|
|
|
parseCsvRules rulesfile s =
|
|
|
|
runParser (evalStateT rulesp defrules) rulesfile s
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
-- | Return the validated rules, or an error.
|
|
|
|
validateRules :: CsvRules -> Either String CsvRules
|
|
|
|
validateRules rules = do
|
|
|
|
unless (isAssigned "date") $ Left "Please specify (at top level) the date field. Eg: date %1\n"
|
|
|
|
Right rules
|
|
|
|
where
|
|
|
|
isAssigned f = isJust $ getEffectiveAssignment rules [] f
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2020-03-02 09:00:39 +03:00
|
|
|
--- *** rules types
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
-- | A set of data definitions and account-matching patterns sufficient to
|
|
|
|
-- convert a particular CSV data file into meaningful journal transactions.
|
2013-03-30 02:56:55 +04:00
|
|
|
data CsvRules = CsvRules {
|
|
|
|
rdirectives :: [(DirectiveName,String)],
|
2020-02-26 04:54:16 +03:00
|
|
|
-- ^ top-level rules, as (keyword, value) pairs
|
2013-03-30 02:56:55 +04:00
|
|
|
rcsvfieldindexes :: [(CsvFieldName, CsvFieldIndex)],
|
2020-02-26 04:54:16 +03:00
|
|
|
-- ^ csv field names and their column number, if declared by a fields list
|
2020-02-27 12:00:35 +03:00
|
|
|
rassignments :: [(HledgerFieldName, FieldTemplate)],
|
2020-02-26 04:54:16 +03:00
|
|
|
-- ^ top-level assignments to hledger fields, as (field name, value template) pairs
|
2013-03-30 02:56:55 +04:00
|
|
|
rconditionalblocks :: [ConditionalBlock]
|
2020-02-26 04:54:16 +03:00
|
|
|
-- ^ conditional blocks, which containing additional assignments/rules to apply to matched csv records
|
2013-03-30 02:56:55 +04:00
|
|
|
} deriving (Show, Eq)
|
|
|
|
|
2017-07-27 14:59:55 +03:00
|
|
|
type CsvRulesParser a = StateT CsvRules SimpleTextParser a
|
2016-07-29 18:57:10 +03:00
|
|
|
|
2020-02-26 04:54:16 +03:00
|
|
|
-- | The keyword of a CSV rule - "fields", "skip", "if", etc.
|
2013-03-30 02:56:55 +04:00
|
|
|
type DirectiveName = String
|
2020-02-28 10:28:33 +03:00
|
|
|
|
2020-02-26 04:54:16 +03:00
|
|
|
-- | CSV field name.
|
2013-03-30 02:56:55 +04:00
|
|
|
type CsvFieldName = String
|
2020-02-28 10:28:33 +03:00
|
|
|
|
2020-02-26 04:54:16 +03:00
|
|
|
-- | 1-based CSV column number.
|
2013-03-30 02:56:55 +04:00
|
|
|
type CsvFieldIndex = Int
|
2020-02-28 10:28:33 +03:00
|
|
|
|
2020-02-26 04:54:16 +03:00
|
|
|
-- | Percent symbol followed by a CSV field name or column number. Eg: %date, %1.
|
|
|
|
type CsvFieldReference = String
|
2020-02-28 10:28:33 +03:00
|
|
|
|
2020-02-27 12:00:35 +03:00
|
|
|
-- | One of the standard hledger fields or pseudo-fields that can be assigned to.
|
|
|
|
-- Eg date, account1, amount, amount1-in, date-format.
|
|
|
|
type HledgerFieldName = String
|
2020-02-28 10:28:33 +03:00
|
|
|
|
2020-02-26 04:54:16 +03:00
|
|
|
-- | A text value to be assigned to a hledger field, possibly
|
|
|
|
-- containing csv field references to be interpolated.
|
2013-03-30 02:56:55 +04:00
|
|
|
type FieldTemplate = String
|
2020-02-28 10:28:33 +03:00
|
|
|
|
2020-02-26 04:54:16 +03:00
|
|
|
-- | A strptime date parsing pattern, as supported by Data.Time.Format.
|
2020-02-12 17:20:40 +03:00
|
|
|
type DateFormat = String
|
2020-02-28 10:28:33 +03:00
|
|
|
|
2020-02-26 04:54:16 +03:00
|
|
|
-- | A regular expression.
|
2020-02-12 17:20:40 +03:00
|
|
|
type RegexpPattern = String
|
|
|
|
|
|
|
|
-- | A single test for matching a CSV record, in one way or another.
|
|
|
|
data Matcher =
|
2020-02-26 21:25:36 +03:00
|
|
|
RecordMatcher RegexpPattern -- ^ match if this regexp matches the overall CSV record
|
2020-02-26 04:54:16 +03:00
|
|
|
| FieldMatcher CsvFieldReference RegexpPattern -- ^ match if this regexp matches the referenced CSV field's value
|
2020-02-12 17:20:40 +03:00
|
|
|
deriving (Show, Eq)
|
2020-02-12 06:04:03 +03:00
|
|
|
|
2020-02-12 17:20:40 +03:00
|
|
|
-- | A conditional block: a set of CSV record matchers, and a sequence
|
2020-02-26 04:54:16 +03:00
|
|
|
-- of rules which will be enabled only if one or more of the matchers
|
|
|
|
-- succeeds.
|
2020-02-12 17:20:40 +03:00
|
|
|
--
|
|
|
|
-- Three types of rule are allowed inside conditional blocks: field
|
2020-02-12 06:04:03 +03:00
|
|
|
-- assignments, skip, end. (A skip or end rule is stored as if it was
|
|
|
|
-- a field assignment, and executed in validateCsv. XXX)
|
|
|
|
data ConditionalBlock = CB {
|
2020-02-12 17:20:40 +03:00
|
|
|
cbMatchers :: [Matcher]
|
2020-02-27 12:00:35 +03:00
|
|
|
,cbAssignments :: [(HledgerFieldName, FieldTemplate)]
|
2020-02-12 06:04:03 +03:00
|
|
|
} deriving (Show, Eq)
|
|
|
|
|
2019-09-14 03:51:14 +03:00
|
|
|
defrules = CsvRules {
|
2013-03-30 02:56:55 +04:00
|
|
|
rdirectives=[],
|
|
|
|
rcsvfieldindexes=[],
|
|
|
|
rassignments=[],
|
|
|
|
rconditionalblocks=[]
|
|
|
|
}
|
|
|
|
|
2020-03-02 09:00:39 +03:00
|
|
|
--- *** rules parsers
|
2020-02-28 10:28:33 +03:00
|
|
|
|
|
|
|
{-
|
|
|
|
Grammar for the CSV conversion rules, more or less:
|
|
|
|
|
|
|
|
RULES: RULE*
|
|
|
|
|
|
|
|
RULE: ( FIELD-LIST | FIELD-ASSIGNMENT | CONDITIONAL-BLOCK | SKIP | NEWEST-FIRST | DATE-FORMAT | COMMENT | BLANK ) NEWLINE
|
|
|
|
|
|
|
|
FIELD-LIST: fields SPACE FIELD-NAME ( SPACE? , SPACE? FIELD-NAME )*
|
|
|
|
|
|
|
|
FIELD-NAME: QUOTED-FIELD-NAME | BARE-FIELD-NAME
|
|
|
|
|
|
|
|
QUOTED-FIELD-NAME: " (any CHAR except double-quote)+ "
|
|
|
|
|
|
|
|
BARE-FIELD-NAME: any CHAR except space, tab, #, ;
|
|
|
|
|
|
|
|
FIELD-ASSIGNMENT: JOURNAL-FIELD ASSIGNMENT-SEPARATOR FIELD-VALUE
|
|
|
|
|
|
|
|
JOURNAL-FIELD: date | date2 | status | code | description | comment | account1 | account2 | amount | JOURNAL-PSEUDO-FIELD
|
|
|
|
|
|
|
|
JOURNAL-PSEUDO-FIELD: amount-in | amount-out | currency
|
|
|
|
|
|
|
|
ASSIGNMENT-SEPARATOR: SPACE | ( : SPACE? )
|
|
|
|
|
|
|
|
FIELD-VALUE: VALUE (possibly containing CSV-FIELD-REFERENCEs)
|
|
|
|
|
|
|
|
CSV-FIELD-REFERENCE: % CSV-FIELD
|
|
|
|
|
|
|
|
CSV-FIELD: ( FIELD-NAME | FIELD-NUMBER ) (corresponding to a CSV field)
|
|
|
|
|
|
|
|
FIELD-NUMBER: DIGIT+
|
|
|
|
|
|
|
|
CONDITIONAL-BLOCK: if ( FIELD-MATCHER NEWLINE )+ INDENTED-BLOCK
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
FIELD-MATCHER: ( CSV-FIELD-NAME SPACE? )? ( MATCHOP SPACE? )? PATTERNS
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
MATCHOP: ~
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
PATTERNS: ( NEWLINE REGEXP )* REGEXP
|
2012-03-11 01:55:48 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
INDENTED-BLOCK: ( SPACE ( FIELD-ASSIGNMENT | COMMENT ) NEWLINE )+
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
REGEXP: ( NONSPACE CHAR* ) SPACE?
|
2012-03-11 01:55:48 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
VALUE: SPACE? ( CHAR* ) SPACE?
|
2012-03-11 01:55:48 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
COMMENT: SPACE? COMMENT-CHAR VALUE
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
COMMENT-CHAR: # | ;
|
2017-02-06 13:34:18 +03:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
NONSPACE: any CHAR not a SPACE-CHAR
|
2013-08-04 07:47:43 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
BLANK: SPACE?
|
2018-05-22 01:47:56 +03:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
SPACE: SPACE-CHAR+
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
SPACE-CHAR: space | tab
|
|
|
|
|
|
|
|
CHAR: any character except newline
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2020-02-28 10:28:33 +03:00
|
|
|
DIGIT: 0-9
|
|
|
|
|
|
|
|
-}
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2016-07-29 18:57:10 +03:00
|
|
|
rulesp :: CsvRulesParser CsvRules
|
2013-03-30 02:56:55 +04:00
|
|
|
rulesp = do
|
2020-01-02 19:25:19 +03:00
|
|
|
_ <- many $ choiceInState
|
2016-07-29 18:57:10 +03:00
|
|
|
[blankorcommentlinep <?> "blank or comment line"
|
|
|
|
,(directivep >>= modify' . addDirective) <?> "directive"
|
|
|
|
,(fieldnamelistp >>= modify' . setIndexesAndAssignmentsFromList) <?> "field name list"
|
|
|
|
,(fieldassignmentp >>= modify' . addAssignment) <?> "field assignment"
|
|
|
|
,(conditionalblockp >>= modify' . addConditionalBlock) <?> "conditional block"
|
2013-03-30 02:56:55 +04:00
|
|
|
]
|
|
|
|
eof
|
2016-07-29 18:57:10 +03:00
|
|
|
r <- get
|
2013-03-30 02:56:55 +04:00
|
|
|
return r{rdirectives=reverse $ rdirectives r
|
|
|
|
,rassignments=reverse $ rassignments r
|
|
|
|
,rconditionalblocks=reverse $ rconditionalblocks r
|
|
|
|
}
|
|
|
|
|
2016-07-29 18:57:10 +03:00
|
|
|
blankorcommentlinep :: CsvRulesParser ()
|
2018-07-16 17:28:58 +03:00
|
|
|
blankorcommentlinep = lift (dbgparse 3 "trying blankorcommentlinep") >> choiceInState [blanklinep, commentlinep]
|
2014-11-03 08:52:12 +03:00
|
|
|
|
2016-07-29 18:57:10 +03:00
|
|
|
blanklinep :: CsvRulesParser ()
|
2018-03-25 16:53:44 +03:00
|
|
|
blanklinep = lift (skipMany spacenonewline) >> newline >> return () <?> "blank line"
|
2014-11-03 08:52:12 +03:00
|
|
|
|
2016-07-29 18:57:10 +03:00
|
|
|
commentlinep :: CsvRulesParser ()
|
2018-03-25 16:53:44 +03:00
|
|
|
commentlinep = lift (skipMany spacenonewline) >> commentcharp >> lift restofline >> return () <?> "comment line"
|
2014-11-03 08:52:12 +03:00
|
|
|
|
2016-07-29 18:57:10 +03:00
|
|
|
commentcharp :: CsvRulesParser Char
|
|
|
|
commentcharp = oneOf (";#*" :: [Char])
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2016-07-29 18:57:10 +03:00
|
|
|
directivep :: CsvRulesParser (DirectiveName, String)
|
2016-05-27 01:51:59 +03:00
|
|
|
directivep = (do
|
2018-07-16 17:28:58 +03:00
|
|
|
lift $ dbgparse 3 "trying directive"
|
2018-05-22 01:47:56 +03:00
|
|
|
d <- fmap T.unpack $ choiceInState $ map (lift . string . T.pack) directives
|
2016-07-29 18:57:10 +03:00
|
|
|
v <- (((char ':' >> lift (many spacenonewline)) <|> lift (some spacenonewline)) >> directivevalp)
|
2018-03-25 16:53:44 +03:00
|
|
|
<|> (optional (char ':') >> lift (skipMany spacenonewline) >> lift eolof >> return "")
|
2017-07-27 14:59:55 +03:00
|
|
|
return (d, v)
|
2016-05-27 01:51:59 +03:00
|
|
|
) <?> "directive"
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2020-01-02 19:26:13 +03:00
|
|
|
directives :: [String]
|
2013-03-30 02:56:55 +04:00
|
|
|
directives =
|
|
|
|
["date-format"
|
2020-01-02 19:26:59 +03:00
|
|
|
,"separator"
|
2020-02-28 10:28:33 +03:00
|
|
|
-- ,"default-account"
|
2013-03-30 02:56:55 +04:00
|
|
|
-- ,"default-currency"
|
|
|
|
,"skip"
|
2017-07-05 17:24:17 +03:00
|
|
|
,"newest-first"
|
2019-11-13 12:24:50 +03:00
|
|
|
, "balance-type"
|
2013-03-30 02:56:55 +04:00
|
|
|
]
|
2012-03-11 01:55:48 +04:00
|
|
|
|
2016-07-29 18:57:10 +03:00
|
|
|
directivevalp :: CsvRulesParser String
|
2018-09-30 04:32:08 +03:00
|
|
|
directivevalp = anySingle `manyTill` lift eolof
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2016-07-29 18:57:10 +03:00
|
|
|
fieldnamelistp :: CsvRulesParser [CsvFieldName]
|
2015-10-17 21:51:45 +03:00
|
|
|
fieldnamelistp = (do
|
2018-07-16 17:28:58 +03:00
|
|
|
lift $ dbgparse 3 "trying fieldnamelist"
|
2013-03-30 02:56:55 +04:00
|
|
|
string "fields"
|
|
|
|
optional $ char ':'
|
2018-03-25 16:53:44 +03:00
|
|
|
lift (skipSome spacenonewline)
|
|
|
|
let separator = lift (skipMany spacenonewline) >> char ',' >> lift (skipMany spacenonewline)
|
2016-07-29 18:57:10 +03:00
|
|
|
f <- fromMaybe "" <$> optional fieldnamep
|
|
|
|
fs <- some $ (separator >> fromMaybe "" <$> optional fieldnamep)
|
|
|
|
lift restofline
|
2013-03-30 02:56:55 +04:00
|
|
|
return $ map (map toLower) $ f:fs
|
|
|
|
) <?> "field name list"
|
|
|
|
|
2016-07-29 18:57:10 +03:00
|
|
|
fieldnamep :: CsvRulesParser String
|
2015-10-17 21:51:45 +03:00
|
|
|
fieldnamep = quotedfieldnamep <|> barefieldnamep
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2016-07-29 18:57:10 +03:00
|
|
|
quotedfieldnamep :: CsvRulesParser String
|
2015-10-17 21:51:45 +03:00
|
|
|
quotedfieldnamep = do
|
2013-03-30 02:56:55 +04:00
|
|
|
char '"'
|
2016-07-29 18:57:10 +03:00
|
|
|
f <- some $ noneOf ("\"\n:;#~" :: [Char])
|
2013-03-30 02:56:55 +04:00
|
|
|
char '"'
|
|
|
|
return f
|
|
|
|
|
2016-07-29 18:57:10 +03:00
|
|
|
barefieldnamep :: CsvRulesParser String
|
|
|
|
barefieldnamep = some $ noneOf (" \t\n,;#~" :: [Char])
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2020-02-27 12:00:35 +03:00
|
|
|
fieldassignmentp :: CsvRulesParser (HledgerFieldName, FieldTemplate)
|
2015-10-17 21:51:45 +03:00
|
|
|
fieldassignmentp = do
|
2018-07-16 17:28:58 +03:00
|
|
|
lift $ dbgparse 3 "trying fieldassignmentp"
|
2015-10-17 21:51:45 +03:00
|
|
|
f <- journalfieldnamep
|
2019-10-13 01:35:57 +03:00
|
|
|
v <- choiceInState [ assignmentseparatorp >> fieldvalp
|
|
|
|
, lift eolof >> return ""
|
|
|
|
]
|
2013-03-30 02:56:55 +04:00
|
|
|
return (f,v)
|
|
|
|
<?> "field assignment"
|
|
|
|
|
2016-07-29 18:57:10 +03:00
|
|
|
journalfieldnamep :: CsvRulesParser String
|
2017-07-27 14:59:55 +03:00
|
|
|
journalfieldnamep = do
|
2018-07-16 17:28:58 +03:00
|
|
|
lift (dbgparse 2 "trying journalfieldnamep")
|
2018-05-22 01:47:56 +03:00
|
|
|
T.unpack <$> choiceInState (map (lift . string . T.pack) journalfieldnames)
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2018-09-07 20:12:13 +03:00
|
|
|
-- Transaction fields and pseudo fields for CSV conversion.
|
|
|
|
-- Names must precede any other name they contain, for the parser
|
2017-04-19 19:05:21 +03:00
|
|
|
-- (amount-in before amount; date2 before date). TODO: fix
|
2019-10-12 02:36:17 +03:00
|
|
|
journalfieldnames =
|
|
|
|
concat [[ "account" ++ i
|
|
|
|
,"amount" ++ i ++ "-in"
|
|
|
|
,"amount" ++ i ++ "-out"
|
|
|
|
,"amount" ++ i
|
|
|
|
,"balance" ++ i
|
|
|
|
,"comment" ++ i
|
|
|
|
,"currency" ++ i
|
|
|
|
] | x <- [1..9], let i = show x]
|
|
|
|
++
|
|
|
|
["amount-in"
|
2013-03-30 02:56:55 +04:00
|
|
|
,"amount-out"
|
2017-04-19 19:05:21 +03:00
|
|
|
,"amount"
|
|
|
|
,"balance"
|
|
|
|
,"code"
|
|
|
|
,"comment"
|
2013-03-30 02:56:55 +04:00
|
|
|
,"currency"
|
|
|
|
,"date2"
|
|
|
|
,"date"
|
|
|
|
,"description"
|
2017-04-19 19:05:21 +03:00
|
|
|
,"status"
|
2019-10-16 01:47:19 +03:00
|
|
|
,"skip" -- skip and end are not really fields, but we list it here to allow conditional rules that skip records
|
|
|
|
,"end"
|
2013-03-30 02:56:55 +04:00
|
|
|
]
|
|
|
|
|
2016-07-29 18:57:10 +03:00
|
|
|
assignmentseparatorp :: CsvRulesParser ()
|
2015-10-17 21:51:45 +03:00
|
|
|
assignmentseparatorp = do
|
2018-07-16 17:28:58 +03:00
|
|
|
lift $ dbgparse 3 "trying assignmentseparatorp"
|
2019-10-13 01:35:57 +03:00
|
|
|
_ <- choiceInState [ lift (skipMany spacenonewline) >> char ':' >> lift (skipMany spacenonewline)
|
|
|
|
, lift (skipSome spacenonewline)
|
|
|
|
]
|
2014-11-03 08:52:12 +03:00
|
|
|
return ()
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2016-07-29 18:57:10 +03:00
|
|
|
fieldvalp :: CsvRulesParser String
|
2015-10-17 21:51:45 +03:00
|
|
|
fieldvalp = do
|
2018-07-16 17:28:58 +03:00
|
|
|
lift $ dbgparse 2 "trying fieldvalp"
|
2018-09-30 04:32:08 +03:00
|
|
|
anySingle `manyTill` lift eolof
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2020-02-26 04:54:16 +03:00
|
|
|
-- A conditional block: one or more matchers, one per line, followed by one or more indented rules.
|
2016-07-29 18:57:10 +03:00
|
|
|
conditionalblockp :: CsvRulesParser ConditionalBlock
|
2015-10-17 21:51:45 +03:00
|
|
|
conditionalblockp = do
|
2018-07-16 17:28:58 +03:00
|
|
|
lift $ dbgparse 3 "trying conditionalblockp"
|
2018-03-25 16:53:44 +03:00
|
|
|
string "if" >> lift (skipMany spacenonewline) >> optional newline
|
2020-02-12 17:20:40 +03:00
|
|
|
ms <- some matcherp
|
2019-11-13 16:41:32 +03:00
|
|
|
as <- many (try $ lift (skipSome spacenonewline) >> fieldassignmentp)
|
2013-03-30 02:56:55 +04:00
|
|
|
when (null as) $
|
lib, cli, ui: start using Control.Monad.Fail, allow base-compat 0.11
fail is moving out of Monad and into it's own MonadFail class.
This will be enforced in GHC 8.8 (I think).
base-compat/base-compat-batteries 0.11.0 have adapted to this,
and are approaching stackage nightly
(https://github.com/commercialhaskell/stackage/issues/4802).
hledger is now ready to build with base-compat-batteries 0.11.0, once
all of our deps do (eg aeson). We are still compatible with the older
0.10.x and GHC 7.10.3 as well.
For now we are using both fails:
- new fail (from Control.Monad.Fail), used in our parsers, imported
via base-compat-batteries Control.Monad.Fail.Compat to work with
older GHC versions.
- old fail (from GHC.Base, exported by Prelude, Control.Monad,
Control.Monad.State.Strict, Prelude.Compat, ...), used in easytest's
Test, since I couldn't find their existing fail implementation to update.
To reduce (my) confusion, these are imported carefully, consistently,
and qualified everywhere as Fail.fail and Prelude.fail, with clashing
re-exports suppressed, like so:
import Prelude hiding (fail)
import qualified Prelude (fail)
import Control.Monad.State.Strict hiding (fail)
import "base-compat-batteries" Prelude.Compat hiding (fail)
import qualified "base-compat-batteries" Control.Monad.Fail.Compat as Fail
2019-09-09 03:13:47 +03:00
|
|
|
Fail.fail "start of conditional block found, but no assignment rules afterward\n(assignment rules in a conditional block should be indented)\n"
|
2020-02-12 17:20:40 +03:00
|
|
|
return $ CB{cbMatchers=ms, cbAssignments=as}
|
2013-03-30 02:56:55 +04:00
|
|
|
<?> "conditional block"
|
|
|
|
|
2020-02-26 04:54:16 +03:00
|
|
|
-- A single matcher, on one line.
|
2020-02-12 17:20:40 +03:00
|
|
|
matcherp :: CsvRulesParser Matcher
|
2020-02-26 04:54:16 +03:00
|
|
|
matcherp = try fieldmatcherp <|> recordmatcherp
|
|
|
|
|
|
|
|
-- A single whole-record matcher.
|
2020-02-26 21:25:36 +03:00
|
|
|
-- A pattern on the whole line, not beginning with a csv field reference.
|
2020-02-26 04:54:16 +03:00
|
|
|
recordmatcherp :: CsvRulesParser Matcher
|
|
|
|
recordmatcherp = do
|
2020-02-12 17:20:40 +03:00
|
|
|
lift $ dbgparse 2 "trying matcherp"
|
2013-03-30 02:56:55 +04:00
|
|
|
-- pos <- currentPos
|
2020-02-26 21:25:36 +03:00
|
|
|
-- _ <- optional (matchoperatorp >> lift (skipMany spacenonewline) >> optional newline)
|
2020-02-12 17:20:40 +03:00
|
|
|
r <- regexp
|
|
|
|
-- when (null ps) $
|
|
|
|
-- Fail.fail "start of record matcher found, but no patterns afterward\n(patterns should not be indented)\n"
|
|
|
|
return $ RecordMatcher r
|
2013-03-30 02:56:55 +04:00
|
|
|
<?> "record matcher"
|
|
|
|
|
2020-02-26 21:25:36 +03:00
|
|
|
-- | A single matcher for a specific field. A csv field reference
|
|
|
|
-- (like %date or %1), and a pattern on the rest of the line,
|
|
|
|
-- optionally space-separated. Eg:
|
|
|
|
-- %description chez jacques
|
2020-02-26 04:54:16 +03:00
|
|
|
fieldmatcherp :: CsvRulesParser Matcher
|
|
|
|
fieldmatcherp = do
|
|
|
|
lift $ dbgparse 2 "trying fieldmatcher"
|
|
|
|
-- An optional fieldname (default: "all")
|
|
|
|
-- f <- fromMaybe "all" `fmap` (optional $ do
|
|
|
|
-- f' <- fieldnamep
|
|
|
|
-- lift (skipMany spacenonewline)
|
|
|
|
-- return f')
|
|
|
|
f <- csvfieldreferencep <* lift (skipMany spacenonewline)
|
|
|
|
-- optional operator.. just ~ (case insensitive infix regex) for now
|
2020-02-26 21:25:36 +03:00
|
|
|
-- _op <- fromMaybe "~" <$> optional matchoperatorp
|
2020-02-26 04:54:16 +03:00
|
|
|
lift (skipMany spacenonewline)
|
|
|
|
r <- regexp
|
|
|
|
return $ FieldMatcher f r
|
|
|
|
<?> "field matcher"
|
|
|
|
|
|
|
|
csvfieldreferencep :: CsvRulesParser CsvFieldReference
|
|
|
|
csvfieldreferencep = do
|
|
|
|
lift $ dbgparse 3 "trying csvfieldreferencep"
|
|
|
|
char '%'
|
|
|
|
f <- fieldnamep
|
|
|
|
return $ '%' : quoteIfNeeded f
|
|
|
|
|
2020-02-12 17:20:40 +03:00
|
|
|
-- A single regular expression
|
|
|
|
regexp :: CsvRulesParser RegexpPattern
|
2013-03-30 02:56:55 +04:00
|
|
|
regexp = do
|
2018-07-16 17:28:58 +03:00
|
|
|
lift $ dbgparse 3 "trying regexp"
|
2020-02-26 21:25:36 +03:00
|
|
|
-- notFollowedBy matchoperatorp
|
2016-07-29 18:57:10 +03:00
|
|
|
c <- lift nonspace
|
2018-09-30 04:32:08 +03:00
|
|
|
cs <- anySingle `manyTill` lift eolof
|
2013-03-30 02:56:55 +04:00
|
|
|
return $ strip $ c:cs
|
|
|
|
|
2020-02-26 21:25:36 +03:00
|
|
|
-- -- A match operator, indicating the type of match to perform.
|
|
|
|
-- -- Currently just ~ meaning case insensitive infix regex match.
|
|
|
|
-- matchoperatorp :: CsvRulesParser String
|
|
|
|
-- matchoperatorp = fmap T.unpack $ choiceInState $ map string
|
|
|
|
-- ["~"
|
|
|
|
-- -- ,"!~"
|
|
|
|
-- -- ,"="
|
|
|
|
-- -- ,"!="
|
|
|
|
-- ]
|
|
|
|
|
2020-03-02 09:00:39 +03:00
|
|
|
--- ** reading csv files
|
2020-02-28 10:28:33 +03:00
|
|
|
|
|
|
|
-- | Read a Journal from the given CSV data (and filename, used for error
|
|
|
|
-- messages), or return an error. Proceed as follows:
|
|
|
|
--
|
|
|
|
-- 1. parse CSV conversion rules from the specified rules file, or from
|
|
|
|
-- the default rules file for the specified CSV file, if it exists,
|
|
|
|
-- or throw a parse error; if it doesn't exist, use built-in default rules
|
|
|
|
--
|
|
|
|
-- 2. parse the CSV data, or throw a parse error
|
|
|
|
--
|
|
|
|
-- 3. convert the CSV records to transactions using the rules
|
|
|
|
--
|
|
|
|
-- 4. if the rules file didn't exist, create it with the default rules and filename
|
|
|
|
--
|
|
|
|
-- 5. return the transactions as a Journal
|
|
|
|
--
|
|
|
|
readJournalFromCsv :: Maybe FilePath -> FilePath -> Text -> IO (Either String Journal)
|
|
|
|
readJournalFromCsv Nothing "-" _ = return $ Left "please use --rules-file when reading CSV from stdin"
|
|
|
|
readJournalFromCsv mrulesfile csvfile csvdata =
|
|
|
|
handle (\(e::IOException) -> return $ Left $ show e) $ do
|
|
|
|
|
|
|
|
-- make and throw an IO exception.. which we catch and convert to an Either above ?
|
|
|
|
let throwerr = throw . userError
|
|
|
|
|
|
|
|
-- parse the csv rules
|
|
|
|
let rulesfile = fromMaybe (rulesFileFor csvfile) mrulesfile
|
|
|
|
rulesfileexists <- doesFileExist rulesfile
|
|
|
|
rulestext <-
|
|
|
|
if rulesfileexists
|
|
|
|
then do
|
|
|
|
dbg1IO "using conversion rules file" rulesfile
|
|
|
|
readFilePortably rulesfile >>= expandIncludes (takeDirectory rulesfile)
|
|
|
|
else
|
|
|
|
return $ defaultRulesText rulesfile
|
|
|
|
rules <- either throwerr return $ parseAndValidateCsvRules rulesfile rulestext
|
|
|
|
dbg2IO "rules" rules
|
|
|
|
|
|
|
|
-- parse the skip directive's value, if any
|
|
|
|
let skiplines = case getDirective "skip" rules of
|
|
|
|
Nothing -> 0
|
|
|
|
Just "" -> 1
|
|
|
|
Just s -> readDef (throwerr $ "could not parse skip value: " ++ show s) s
|
|
|
|
|
|
|
|
-- parse csv
|
|
|
|
-- parsec seems to fail if you pass it "-" here TODO: try again with megaparsec
|
|
|
|
let parsecfilename = if csvfile == "-" then "(stdin)" else csvfile
|
|
|
|
let separator = fromMaybe ',' (getDirective "separator" rules >>= parseSeparator)
|
|
|
|
dbg2IO "separator" separator
|
|
|
|
records <- (either throwerr id .
|
|
|
|
dbg2 "validateCsv" . validateCsv rules skiplines .
|
|
|
|
dbg2 "parseCsv")
|
|
|
|
`fmap` parseCsv separator parsecfilename csvdata
|
|
|
|
dbg1IO "first 3 csv records" $ take 3 records
|
|
|
|
|
|
|
|
-- identify header lines
|
|
|
|
-- let (headerlines, datalines) = identifyHeaderLines records
|
|
|
|
-- mfieldnames = lastMay headerlines
|
|
|
|
|
|
|
|
let
|
|
|
|
-- convert CSV records to transactions
|
|
|
|
txns = snd $ mapAccumL
|
|
|
|
(\pos r ->
|
|
|
|
let
|
|
|
|
SourcePos name line col = pos
|
|
|
|
line' = (mkPos . (+1) . unPos) line
|
|
|
|
pos' = SourcePos name line' col
|
|
|
|
in
|
|
|
|
(pos, transactionFromCsvRecord pos' rules r)
|
|
|
|
)
|
|
|
|
(initialPos parsecfilename) records
|
|
|
|
|
|
|
|
-- Ensure transactions are ordered chronologically.
|
|
|
|
-- First, if the CSV records seem to be most-recent-first (because
|
|
|
|
-- there's an explicit "newest-first" directive, or there's more
|
|
|
|
-- than one date and the first date is more recent than the last):
|
|
|
|
-- reverse them to get same-date transactions ordered chronologically.
|
|
|
|
txns' =
|
|
|
|
(if newestfirst || mseemsnewestfirst == Just True then reverse else id) txns
|
|
|
|
where
|
|
|
|
newestfirst = dbg3 "newestfirst" $ isJust $ getDirective "newest-first" rules
|
|
|
|
mseemsnewestfirst = dbg3 "mseemsnewestfirst" $
|
|
|
|
case nub $ map tdate txns of
|
|
|
|
ds | length ds > 1 -> Just $ head ds > last ds
|
|
|
|
_ -> Nothing
|
|
|
|
-- Second, sort by date.
|
|
|
|
txns'' = sortBy (comparing tdate) txns'
|
|
|
|
|
|
|
|
when (not rulesfileexists) $ do
|
|
|
|
dbg1IO "creating conversion rules file" rulesfile
|
|
|
|
writeFile rulesfile $ T.unpack rulestext
|
|
|
|
|
|
|
|
return $ Right nulljournal{jtxns=txns''}
|
|
|
|
|
|
|
|
-- | Parse special separator names TAB and SPACE, or return the first
|
|
|
|
-- character. Return Nothing on empty string
|
|
|
|
parseSeparator :: String -> Maybe Char
|
|
|
|
parseSeparator = specials . map toLower
|
|
|
|
where specials "space" = Just ' '
|
|
|
|
specials "tab" = Just '\t'
|
|
|
|
specials (x:_) = Just x
|
|
|
|
specials [] = Nothing
|
|
|
|
|
|
|
|
parseCsv :: Char -> FilePath -> Text -> IO (Either String CSV)
|
|
|
|
parseCsv separator filePath csvdata =
|
|
|
|
case filePath of
|
|
|
|
"-" -> liftM (parseCassava separator "(stdin)") T.getContents
|
|
|
|
_ -> return $ parseCassava separator filePath csvdata
|
|
|
|
|
|
|
|
parseCassava :: Char -> FilePath -> Text -> Either String CSV
|
|
|
|
parseCassava separator path content =
|
|
|
|
either (Left . errorBundlePretty) (Right . parseResultToCsv) <$>
|
|
|
|
CassavaMP.decodeWith (decodeOptions separator) Cassava.NoHeader path $
|
|
|
|
BL.fromStrict $ T.encodeUtf8 content
|
|
|
|
|
|
|
|
decodeOptions :: Char -> Cassava.DecodeOptions
|
|
|
|
decodeOptions separator = Cassava.defaultDecodeOptions {
|
|
|
|
Cassava.decDelimiter = fromIntegral (ord separator)
|
|
|
|
}
|
|
|
|
|
|
|
|
parseResultToCsv :: (Foldable t, Functor t) => t (t B.ByteString) -> CSV
|
|
|
|
parseResultToCsv = toListList . unpackFields
|
|
|
|
where
|
|
|
|
toListList = toList . fmap toList
|
|
|
|
unpackFields = (fmap . fmap) (T.unpack . T.decodeUtf8)
|
|
|
|
|
|
|
|
printCSV :: CSV -> String
|
|
|
|
printCSV records = unlined (printRecord `map` records)
|
|
|
|
where printRecord = concat . intersperse "," . map printField
|
|
|
|
printField f = "\"" ++ concatMap escape f ++ "\""
|
|
|
|
escape '"' = "\"\""
|
|
|
|
escape x = [x]
|
|
|
|
unlined = concat . intersperse "\n"
|
|
|
|
|
|
|
|
-- | Return the cleaned up and validated CSV data (can be empty), or an error.
|
|
|
|
validateCsv :: CsvRules -> Int -> Either String CSV -> Either String [CsvRecord]
|
|
|
|
validateCsv _ _ (Left err) = Left err
|
|
|
|
validateCsv rules numhdrlines (Right rs) = validate $ applyConditionalSkips $ drop numhdrlines $ filternulls rs
|
|
|
|
where
|
|
|
|
filternulls = filter (/=[""])
|
|
|
|
skipCount r =
|
|
|
|
case (getEffectiveAssignment rules r "end", getEffectiveAssignment rules r "skip") of
|
|
|
|
(Nothing, Nothing) -> Nothing
|
|
|
|
(Just _, _) -> Just maxBound
|
|
|
|
(Nothing, Just "") -> Just 1
|
|
|
|
(Nothing, Just x) -> Just (read x)
|
|
|
|
applyConditionalSkips [] = []
|
|
|
|
applyConditionalSkips (r:rest) =
|
|
|
|
case skipCount r of
|
|
|
|
Nothing -> r:(applyConditionalSkips rest)
|
|
|
|
Just cnt -> applyConditionalSkips (drop (cnt-1) rest)
|
|
|
|
validate [] = Right []
|
|
|
|
validate rs@(_first:_)
|
|
|
|
| isJust lessthan2 = let r = fromJust lessthan2 in
|
|
|
|
Left $ printf "CSV record %s has less than two fields" (show r)
|
|
|
|
| otherwise = Right rs
|
|
|
|
where
|
|
|
|
lessthan2 = headMay $ filter ((<2).length) rs
|
|
|
|
|
|
|
|
-- -- | The highest (0-based) field index referenced in the field
|
|
|
|
-- -- definitions, or -1 if no fields are defined.
|
|
|
|
-- maxFieldIndex :: CsvRules -> Int
|
|
|
|
-- maxFieldIndex r = maximumDef (-1) $ catMaybes [
|
|
|
|
-- dateField r
|
|
|
|
-- ,statusField r
|
|
|
|
-- ,codeField r
|
|
|
|
-- ,amountField r
|
|
|
|
-- ,amountInField r
|
|
|
|
-- ,amountOutField r
|
|
|
|
-- ,currencyField r
|
|
|
|
-- ,accountField r
|
|
|
|
-- ,account2Field r
|
|
|
|
-- ,date2Field r
|
|
|
|
-- ]
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2020-03-02 09:00:39 +03:00
|
|
|
--- ** converting csv records to transactions
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2019-10-12 02:36:17 +03:00
|
|
|
showRules rules record =
|
|
|
|
unlines $ catMaybes [ (("the "++fld++" rule is: ")++) <$> getEffectiveAssignment rules record fld | fld <- journalfieldnames]
|
|
|
|
|
2020-02-27 22:19:37 +03:00
|
|
|
-- | Look up the value (template) of a csv rule by rule keyword.
|
|
|
|
csvRule :: CsvRules -> DirectiveName -> Maybe FieldTemplate
|
|
|
|
csvRule rules = (`getDirective` rules)
|
|
|
|
|
|
|
|
-- | Look up the final value assigned to a csv rule by rule keyword, taking
|
|
|
|
-- into account the current record and conditional rules.
|
|
|
|
-- Generally rules with keywords ("directives") don't have interpolated
|
|
|
|
-- values, but for now it's possible.
|
|
|
|
csvRuleValue :: CsvRules -> CsvRecord -> DirectiveName -> Maybe String
|
|
|
|
csvRuleValue rules record = fmap (renderTemplate rules record) . csvRule rules
|
|
|
|
|
|
|
|
-- | Look up the value template assigned to a hledger field by field
|
|
|
|
-- list/field assignment rules, taking into account the current record and
|
|
|
|
-- conditional rules.
|
|
|
|
hledgerField :: CsvRules -> CsvRecord -> HledgerFieldName -> Maybe FieldTemplate
|
|
|
|
hledgerField = getEffectiveAssignment
|
|
|
|
|
|
|
|
-- | Look up the final value assigned to a hledger field, with csv field
|
|
|
|
-- references interpolated.
|
|
|
|
hledgerFieldValue :: CsvRules -> CsvRecord -> HledgerFieldName -> Maybe String
|
|
|
|
hledgerFieldValue rules record = fmap (renderTemplate rules record) . hledgerField rules record
|
|
|
|
|
|
|
|
s `withDefault` def = if null s then def else s
|
|
|
|
|
2014-08-01 04:32:42 +04:00
|
|
|
transactionFromCsvRecord :: SourcePos -> CsvRules -> CsvRecord -> Transaction
|
|
|
|
transactionFromCsvRecord sourcepos rules record = t
|
2013-03-30 02:56:55 +04:00
|
|
|
where
|
2020-02-27 23:09:39 +03:00
|
|
|
----------------------------------------------------------------------
|
|
|
|
-- 1. Define some helpers:
|
|
|
|
|
2020-02-27 22:19:37 +03:00
|
|
|
rule = csvRule rules :: DirectiveName -> Maybe FieldTemplate
|
2020-02-27 22:46:36 +03:00
|
|
|
-- ruleval = csvRuleValue rules record :: DirectiveName -> Maybe String
|
2020-02-27 22:19:37 +03:00
|
|
|
field = hledgerField rules record :: HledgerFieldName -> Maybe FieldTemplate
|
|
|
|
fieldval = hledgerFieldValue rules record :: HledgerFieldName -> Maybe String
|
2020-02-27 23:09:39 +03:00
|
|
|
parsedate' = parseDateWithCustomOrDefaultFormats (rule "date-format")
|
|
|
|
mkdateerror datefield datevalue mdateformat = unlines
|
|
|
|
["error: could not parse \""++datevalue++"\" as a date using date format "
|
|
|
|
++maybe "\"YYYY/M/D\", \"YYYY-M-D\" or \"YYYY.M.D\"" show mdateformat
|
|
|
|
,showRecord record
|
2020-02-27 21:59:18 +03:00
|
|
|
,"the "++datefield++" rule is: "++(fromMaybe "required, but missing" $ field datefield)
|
2013-03-30 02:56:55 +04:00
|
|
|
,"the date-format is: "++fromMaybe "unspecified" mdateformat
|
|
|
|
,"you may need to "
|
2020-02-27 23:09:39 +03:00
|
|
|
++"change your "++datefield++" rule, "
|
|
|
|
++maybe "add a" (const "change your") mdateformat++" date-format rule, "
|
|
|
|
++"or "++maybe "add a" (const "change your") mskip++" skip rule"
|
2013-03-30 02:56:55 +04:00
|
|
|
,"for m/d/y or d/m/y dates, use date-format %-m/%-d/%Y or date-format %-d/%-m/%Y"
|
|
|
|
]
|
2020-02-27 11:27:51 +03:00
|
|
|
where
|
2020-02-27 21:59:18 +03:00
|
|
|
mskip = rule "skip"
|
2020-02-27 23:09:39 +03:00
|
|
|
|
|
|
|
----------------------------------------------------------------------
|
|
|
|
-- 2. Gather values needed for the transaction itself, by evaluating the
|
|
|
|
-- field assignment rules using the CSV record's data, and parsing a bit
|
|
|
|
-- more where needed (dates, status).
|
|
|
|
|
|
|
|
mdateformat = rule "date-format"
|
|
|
|
date = fromMaybe "" $ fieldval "date"
|
|
|
|
date' = fromMaybe (error' $ mkdateerror "date" date mdateformat) $ parsedate' date
|
|
|
|
mdate2 = fieldval "date2"
|
|
|
|
mdate2' = maybe Nothing (maybe (error' $ mkdateerror "date2" (fromMaybe "" mdate2) mdateformat) Just . parsedate') mdate2
|
|
|
|
status =
|
2020-02-27 21:59:18 +03:00
|
|
|
case fieldval "status" of
|
|
|
|
Nothing -> Unmarked
|
|
|
|
Just s -> either statuserror id $ runParser (statusp <* eof) "" $ T.pack s
|
2015-05-16 21:51:35 +03:00
|
|
|
where
|
|
|
|
statuserror err = error' $ unlines
|
2020-02-27 21:59:18 +03:00
|
|
|
["error: could not parse \""++s++"\" as a cleared status (should be *, ! or empty)"
|
2019-03-05 17:23:11 +03:00
|
|
|
,"the parse error is: "++customErrorBundlePretty err
|
2015-05-16 21:51:35 +03:00
|
|
|
]
|
2020-02-27 21:59:18 +03:00
|
|
|
code = maybe "" singleline $ fieldval "code"
|
|
|
|
description = maybe "" singleline $ fieldval "description"
|
|
|
|
comment = maybe "" singleline $ fieldval "comment"
|
|
|
|
precomment = maybe "" singleline $ fieldval "precomment"
|
2019-10-12 02:36:17 +03:00
|
|
|
|
2020-02-27 11:27:51 +03:00
|
|
|
----------------------------------------------------------------------
|
2020-02-27 23:09:39 +03:00
|
|
|
-- 3. Generate the postings
|
2019-10-12 02:36:17 +03:00
|
|
|
|
2020-02-27 23:09:39 +03:00
|
|
|
-- Make posting 1 if possible, with special support for old syntax to
|
2020-02-27 11:27:51 +03:00
|
|
|
-- support pre-1.16 rules.
|
2020-02-27 22:46:36 +03:00
|
|
|
posting1 = mkPosting rules record "1"
|
2019-11-12 23:07:18 +03:00
|
|
|
("account1" `withAlias` "account")
|
|
|
|
("amount1" `withAlias` "amount")
|
|
|
|
("amount1-in" `withAlias` "amount-in")
|
|
|
|
("amount1-out" `withAlias` "amount-out")
|
|
|
|
("balance1" `withAlias` "balance")
|
|
|
|
"comment1" -- comment1 does not have legacy alias
|
2020-02-27 22:46:36 +03:00
|
|
|
t
|
2020-02-27 11:27:51 +03:00
|
|
|
where
|
|
|
|
withAlias fld alias =
|
2020-02-27 21:59:18 +03:00
|
|
|
case (field fld, field alias) of
|
2020-02-27 11:27:51 +03:00
|
|
|
(Just fld, Just alias) -> error' $ unlines
|
|
|
|
[ "error: both \"" ++ fld ++ "\" and \"" ++ alias ++ "\" have values."
|
|
|
|
, showRecord record
|
|
|
|
, showRules rules record
|
|
|
|
]
|
|
|
|
(Nothing, Just _) -> alias
|
|
|
|
(_, Nothing) -> fld
|
2019-11-12 23:07:18 +03:00
|
|
|
|
2020-02-27 11:27:51 +03:00
|
|
|
-- Make other postings where possible, and gather all that were generated.
|
|
|
|
postings = catMaybes $ posting1 : otherpostings
|
|
|
|
where
|
|
|
|
otherpostings = [mkPostingN i | x<-[2..9], let i = show x]
|
2020-02-27 22:46:36 +03:00
|
|
|
where
|
|
|
|
mkPostingN n = mkPosting rules record n
|
|
|
|
("account"++n) ("amount"++n) ("amount"++n++"-in")
|
|
|
|
("amount"++n++"-out") ("balance"++n) ("comment"++n) t
|
|
|
|
|
2020-02-27 23:58:30 +03:00
|
|
|
-- Auto-generate a second posting or second posting amount,
|
|
|
|
-- for compatibility with pre-1.16 rules.
|
2020-02-27 11:27:51 +03:00
|
|
|
postings' =
|
|
|
|
case postings of
|
2020-02-27 23:58:30 +03:00
|
|
|
-- when rules generate just one posting, of a kind that needs to be
|
|
|
|
-- balanced, generate the second posting to balance it.
|
|
|
|
[p1@(p1',_)] ->
|
|
|
|
if ptype p1' == VirtualPosting then [p1] else [p1, p2]
|
2020-02-27 08:46:11 +03:00
|
|
|
where
|
2020-02-27 23:58:30 +03:00
|
|
|
p2 = (nullposting{paccount=unknownExpenseAccount
|
|
|
|
,pamount=costOfMixedAmount (-pamount p1')
|
|
|
|
,ptransaction=Just t}, False)
|
2020-02-27 11:27:51 +03:00
|
|
|
-- when rules generate exactly two postings, and only the second has
|
|
|
|
-- no amount, give it the balancing amount.
|
2020-02-27 23:58:30 +03:00
|
|
|
[p1@(p1',_), p2@(p2',final2)] ->
|
|
|
|
if hasAmount p1' && not (hasAmount p2')
|
|
|
|
then [p1, (p2'{pamount=costOfMixedAmount(-(pamount p1'))}, final2)]
|
|
|
|
else [p1, p2]
|
|
|
|
--
|
|
|
|
ps -> ps
|
|
|
|
|
|
|
|
-- Finally, wherever default "unknown" accounts were used, refine them
|
|
|
|
-- based on the sign of the posting amount if it's now known.
|
|
|
|
postings'' = map maybeImprove postings'
|
2020-02-27 12:23:28 +03:00
|
|
|
where
|
2020-02-27 23:58:30 +03:00
|
|
|
maybeImprove (p,final) = if final then p else improveUnknownAccountName p
|
2020-02-27 11:27:51 +03:00
|
|
|
|
|
|
|
----------------------------------------------------------------------
|
2020-02-27 23:09:39 +03:00
|
|
|
-- 4. Build the transaction (and name it, so the postings can reference it).
|
2020-02-27 11:27:51 +03:00
|
|
|
|
2019-10-19 03:18:07 +03:00
|
|
|
t = nulltransaction{
|
2020-02-27 11:27:51 +03:00
|
|
|
tsourcepos = genericSourcePos sourcepos -- the CSV line number
|
|
|
|
,tdate = date'
|
|
|
|
,tdate2 = mdate2'
|
|
|
|
,tstatus = status
|
|
|
|
,tcode = T.pack code
|
|
|
|
,tdescription = T.pack description
|
|
|
|
,tcomment = T.pack comment
|
|
|
|
,tprecedingcomment = T.pack precomment
|
2020-02-27 23:58:30 +03:00
|
|
|
,tpostings = postings''
|
2020-02-27 11:27:51 +03:00
|
|
|
}
|
|
|
|
|
2020-02-27 22:46:36 +03:00
|
|
|
-- | Given CSV rules and a CSV record, generate the corresponding transaction's
|
|
|
|
-- Nth posting, if sufficient fields have been assigned for it.
|
|
|
|
-- N is provided as a string.
|
|
|
|
-- The names of the required fields are provided, allowing more flexibility.
|
|
|
|
-- The transaction which will contain this posting is also provided,
|
|
|
|
-- so we can build the usual transaction<->posting cyclic reference.
|
|
|
|
mkPosting ::
|
|
|
|
CsvRules -> CsvRecord -> String ->
|
|
|
|
HledgerFieldName -> HledgerFieldName -> HledgerFieldName ->
|
|
|
|
HledgerFieldName -> HledgerFieldName -> HledgerFieldName ->
|
|
|
|
Transaction ->
|
|
|
|
Maybe (Posting, Bool)
|
|
|
|
mkPosting rules record number accountFld amountFld amountInFld amountOutFld balanceFld commentFld t =
|
|
|
|
-- if we have figured out an account N, make a posting N
|
|
|
|
case maccountAndIsFinal of
|
|
|
|
Nothing -> Nothing
|
|
|
|
Just (acct, final) ->
|
|
|
|
Just (posting{paccount = accountNameWithoutPostingType acct
|
|
|
|
,pamount = fromMaybe missingmixedamt mamount
|
|
|
|
,ptransaction = Just t
|
|
|
|
,pbalanceassertion = mkBalanceAssertion rules record <$> mbalance
|
|
|
|
,pcomment = comment
|
|
|
|
,ptype = accountNamePostingType acct}
|
|
|
|
,final)
|
|
|
|
where
|
|
|
|
-- the account name to use for this posting, if any, and whether it is the
|
|
|
|
-- default unknown account, which may be improved later, or an explicitly
|
|
|
|
-- set account, which may not.
|
|
|
|
maccountAndIsFinal :: Maybe (AccountName, Bool) =
|
|
|
|
case maccount of
|
|
|
|
-- accountN is set to the empty string - no posting will be generated
|
|
|
|
Just "" -> Nothing
|
|
|
|
-- accountN is set (possibly to "expenses:unknown"! #1192) - mark it final
|
|
|
|
Just a -> Just (a, True)
|
|
|
|
-- accountN is unset
|
|
|
|
Nothing ->
|
|
|
|
case (mamount, mbalance) of
|
|
|
|
-- amountN is set, or implied by balanceN - set accountN to
|
|
|
|
-- the default unknown account ("expenses:unknown") and
|
|
|
|
-- allow it to be improved later
|
|
|
|
(Just _, _) -> Just (unknownExpenseAccount, False)
|
|
|
|
(_, Just _) -> Just (unknownExpenseAccount, False)
|
|
|
|
-- amountN is also unset - no posting will be generated
|
|
|
|
(Nothing, Nothing) -> Nothing
|
|
|
|
where
|
|
|
|
maccount = T.pack <$> (fieldval accountFld
|
|
|
|
-- XXX what's this needed for ? Test & document, or drop.
|
|
|
|
-- Also, this the only place we interpolate in a keyword rule, I think.
|
|
|
|
`withDefault` ruleval ("default-account" ++ number))
|
2020-02-28 10:28:33 +03:00
|
|
|
-- XXX what's this needed for ? Test & document, or drop.
|
2020-02-27 22:46:36 +03:00
|
|
|
mdefaultcurrency = rule "default-currency"
|
|
|
|
currency = fromMaybe (fromMaybe "" mdefaultcurrency) $
|
|
|
|
fieldval ("currency"++number) `withDefault` fieldval "currency"
|
|
|
|
mamount = chooseAmount rules record currency amountFld amountInFld amountOutFld
|
|
|
|
mbalance :: Maybe (Amount, GenericSourcePos) =
|
|
|
|
fieldval balanceFld >>= parsebalance currency number
|
|
|
|
where
|
|
|
|
parsebalance currency n str
|
|
|
|
| all isSpace str = Nothing
|
|
|
|
| otherwise = Just
|
|
|
|
(either (balanceerror n str) id $
|
2020-02-29 12:54:24 +03:00
|
|
|
runParser (evalStateT (amountp <* eof) nulljournal) "" $
|
2020-02-27 22:46:36 +03:00
|
|
|
T.pack $ (currency++) $ simplifySign str
|
|
|
|
,nullsourcepos) -- XXX parse position to show when assertion fails,
|
|
|
|
-- the csv record's line number would be good
|
|
|
|
where
|
|
|
|
balanceerror n str err = error' $ unlines
|
|
|
|
["error: could not parse \""++str++"\" as balance"++n++" amount"
|
|
|
|
,showRecord record
|
|
|
|
,showRules rules record
|
|
|
|
,"the default-currency is: "++fromMaybe "unspecified" mdefaultcurrency
|
|
|
|
,"the parse error is: "++customErrorBundlePretty err
|
|
|
|
]
|
|
|
|
comment = T.pack $ fromMaybe "" $ fieldval commentFld
|
|
|
|
rule = csvRule rules :: DirectiveName -> Maybe FieldTemplate
|
|
|
|
ruleval = csvRuleValue rules record :: DirectiveName -> Maybe String
|
|
|
|
-- field = hledgerField rules record :: HledgerFieldName -> Maybe FieldTemplate
|
|
|
|
fieldval = hledgerFieldValue rules record :: HledgerFieldName -> Maybe String
|
2020-02-27 11:27:51 +03:00
|
|
|
-- | Default account names to use when needed.
|
|
|
|
unknownExpenseAccount = "expenses:unknown"
|
|
|
|
unknownIncomeAccount = "income:unknown"
|
|
|
|
|
|
|
|
-- | If this posting has the "expenses:unknown" account name,
|
|
|
|
-- replace that with "income:unknown" if the amount is negative.
|
|
|
|
-- The posting's amount should be explicit.
|
|
|
|
improveUnknownAccountName p@Posting{..}
|
|
|
|
| paccount == unknownExpenseAccount
|
|
|
|
&& fromMaybe False (isNegativeMixedAmount pamount) = p{paccount=unknownIncomeAccount}
|
|
|
|
| otherwise = p
|
|
|
|
|
|
|
|
-- | Make a balance assertion for the given amount, with the given parse
|
|
|
|
-- position (to be shown in assertion failures), with the assertion type
|
|
|
|
-- possibly set by a balance-type rule.
|
|
|
|
-- The CSV rules and current record are also provided, to be shown in case
|
|
|
|
-- balance-type's argument is bad (XXX refactor).
|
2020-02-28 10:28:33 +03:00
|
|
|
mkBalanceAssertion :: CsvRules -> CsvRecord -> (Amount, GenericSourcePos) -> BalanceAssertion
|
2020-02-27 11:27:51 +03:00
|
|
|
mkBalanceAssertion rules record (amt, pos) = assrt{baamount=amt, baposition=pos}
|
|
|
|
where
|
|
|
|
assrt =
|
|
|
|
case getDirective "balance-type" rules of
|
|
|
|
Nothing -> nullassertion
|
|
|
|
Just "=" -> nullassertion
|
|
|
|
Just "==" -> nullassertion{batotal=True}
|
|
|
|
Just "=*" -> nullassertion{bainclusive=True}
|
2019-12-07 20:05:41 +03:00
|
|
|
Just "==*" -> nullassertion{batotal=True, bainclusive=True}
|
2020-02-27 11:27:51 +03:00
|
|
|
Just x -> error' $ unlines
|
|
|
|
[ "balance-type \"" ++ x ++"\" is invalid. Use =, ==, =* or ==*."
|
2019-11-13 02:33:38 +03:00
|
|
|
, showRecord record
|
|
|
|
, showRules rules record
|
|
|
|
]
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2019-10-16 00:41:17 +03:00
|
|
|
chooseAmount :: CsvRules -> CsvRecord -> String -> String -> String -> String -> Maybe MixedAmount
|
|
|
|
chooseAmount rules record currency amountFld amountInFld amountOutFld =
|
2013-03-30 02:56:55 +04:00
|
|
|
let
|
2019-10-12 02:36:17 +03:00
|
|
|
mamount = getEffectiveAssignment rules record amountFld
|
|
|
|
mamountin = getEffectiveAssignment rules record amountInFld
|
|
|
|
mamountout = getEffectiveAssignment rules record amountOutFld
|
|
|
|
parse amt = notZero =<< (parseAmount currency <$> notEmpty =<< (strip . renderTemplate rules record) <$> amt)
|
2013-03-30 02:56:55 +04:00
|
|
|
in
|
2019-10-12 02:36:17 +03:00
|
|
|
case (parse mamount, parse mamountin, parse mamountout) of
|
|
|
|
(Nothing, Nothing, Nothing) -> Nothing
|
2017-12-13 02:51:20 +03:00
|
|
|
(Just a, Nothing, Nothing) -> Just a
|
2019-10-12 02:36:17 +03:00
|
|
|
(Nothing, Just i, Nothing) -> Just i
|
|
|
|
(Nothing, Nothing, Just o) -> Just $ negate o
|
|
|
|
(Nothing, Just i, Just o) -> error' $ "both "++amountInFld++" and "++amountOutFld++" have a value\n"
|
|
|
|
++ " "++amountInFld++": " ++ show i ++ "\n"
|
|
|
|
++ " "++amountOutFld++": " ++ show o ++ "\n"
|
2018-04-04 22:45:34 +03:00
|
|
|
++ " record: " ++ showRecord record
|
2019-10-12 02:36:17 +03:00
|
|
|
_ -> error' $ "found values for "++amountFld++" and for "++amountInFld++"/"++amountOutFld++"\n"
|
|
|
|
++ "please use either "++amountFld++" or "++amountInFld++"/"++amountOutFld++"\n"
|
2018-04-04 22:45:34 +03:00
|
|
|
++ " record: " ++ showRecord record
|
2019-10-12 02:36:17 +03:00
|
|
|
where
|
|
|
|
notZero amt = if isZeroMixedAmount amt then Nothing else Just amt
|
|
|
|
notEmpty str = if str=="" then Nothing else Just str
|
|
|
|
|
|
|
|
parseAmount currency amountstr =
|
|
|
|
either (amounterror amountstr) (Mixed . (:[]))
|
2020-02-29 12:54:24 +03:00
|
|
|
<$> runParser (evalStateT (amountp <* eof) nulljournal) ""
|
2019-10-12 02:36:17 +03:00
|
|
|
<$> T.pack
|
|
|
|
<$> (currency++)
|
|
|
|
<$> simplifySign
|
|
|
|
<$> amountstr
|
|
|
|
|
|
|
|
amounterror amountstr err = error' $ unlines
|
|
|
|
["error: could not parse \""++fromJust amountstr++"\" as an amount"
|
|
|
|
,showRecord record
|
|
|
|
,showRules rules record
|
|
|
|
,"the default-currency is: "++fromMaybe "unspecified" (getDirective "default-currency" rules)
|
|
|
|
,"the parse error is: "++customErrorBundlePretty err
|
|
|
|
,"you may need to "
|
|
|
|
++"change your amount or currency rules, "
|
|
|
|
++"or add or change your skip rule"
|
|
|
|
]
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2017-05-14 12:17:56 +03:00
|
|
|
type CsvAmountString = String
|
2017-05-14 03:34:33 +03:00
|
|
|
|
2017-05-14 12:17:56 +03:00
|
|
|
-- | Canonicalise the sign in a CSV amount string.
|
2018-09-07 20:12:13 +03:00
|
|
|
-- Such strings can have a minus sign, negating parentheses,
|
2018-04-18 17:35:47 +03:00
|
|
|
-- or any two of these (which cancels out).
|
|
|
|
--
|
|
|
|
-- >>> simplifySign "1"
|
|
|
|
-- "1"
|
|
|
|
-- >>> simplifySign "-1"
|
|
|
|
-- "-1"
|
|
|
|
-- >>> simplifySign "(1)"
|
|
|
|
-- "-1"
|
|
|
|
-- >>> simplifySign "--1"
|
|
|
|
-- "1"
|
|
|
|
-- >>> simplifySign "-(1)"
|
|
|
|
-- "1"
|
|
|
|
-- >>> simplifySign "(-1)"
|
|
|
|
-- "1"
|
|
|
|
-- >>> simplifySign "((1))"
|
|
|
|
-- "1"
|
2017-05-14 12:17:56 +03:00
|
|
|
simplifySign :: CsvAmountString -> CsvAmountString
|
|
|
|
simplifySign ('(':s) | lastMay s == Just ')' = simplifySign $ negateStr $ init s
|
2018-04-18 17:35:47 +03:00
|
|
|
simplifySign ('-':'(':s) | lastMay s == Just ')' = simplifySign $ init s
|
2017-05-14 12:17:56 +03:00
|
|
|
simplifySign ('-':'-':s) = s
|
|
|
|
simplifySign s = s
|
2013-03-30 02:56:55 +04:00
|
|
|
|
|
|
|
negateStr :: String -> String
|
|
|
|
negateStr ('-':s) = s
|
|
|
|
negateStr s = '-':s
|
|
|
|
|
|
|
|
-- | Show a (approximate) recreation of the original CSV record.
|
|
|
|
showRecord :: CsvRecord -> String
|
2019-06-15 02:17:40 +03:00
|
|
|
showRecord r = "the CSV record is: "++intercalate "," (map show r)
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2020-02-27 12:00:35 +03:00
|
|
|
-- | Given the conversion rules, a CSV record and a hledger field name, find
|
2020-02-27 21:59:18 +03:00
|
|
|
-- the value template ultimately assigned to this field, if any, by a field
|
2020-02-27 12:00:35 +03:00
|
|
|
-- assignment at top level or in a conditional block matching this record.
|
2020-02-26 04:54:16 +03:00
|
|
|
--
|
|
|
|
-- Note conditional blocks' patterns are matched against an approximation of the
|
|
|
|
-- CSV record: all the field values, without enclosing quotes, comma-separated.
|
|
|
|
--
|
2020-02-27 12:00:35 +03:00
|
|
|
getEffectiveAssignment :: CsvRules -> CsvRecord -> HledgerFieldName -> Maybe FieldTemplate
|
2020-02-26 04:54:16 +03:00
|
|
|
getEffectiveAssignment rules record f = lastMay $ map snd $ assignments
|
2013-03-30 02:56:55 +04:00
|
|
|
where
|
2020-02-26 04:54:16 +03:00
|
|
|
-- all active assignments to field f, in order
|
|
|
|
assignments = dbg2 "assignments" $ filter ((==f).fst) $ toplevelassignments ++ conditionalassignments
|
2013-03-30 02:56:55 +04:00
|
|
|
where
|
2020-02-26 04:54:16 +03:00
|
|
|
-- all top level field assignments
|
2013-06-19 11:30:33 +04:00
|
|
|
toplevelassignments = rassignments rules
|
2020-02-26 04:54:16 +03:00
|
|
|
-- all field assignments in conditional blocks assigning to field f and active for the current csv record
|
|
|
|
conditionalassignments = concatMap cbAssignments $ filter isBlockActive $ blocksAssigning f
|
2013-03-30 02:56:55 +04:00
|
|
|
where
|
2020-02-26 04:54:16 +03:00
|
|
|
-- all conditional blocks which can potentially assign field f
|
2020-02-12 06:04:03 +03:00
|
|
|
blocksAssigning f = filter (any ((==f).fst) . cbAssignments) $ rconditionalblocks rules
|
2020-02-26 04:54:16 +03:00
|
|
|
-- does this conditional block match the current csv record ?
|
|
|
|
isBlockActive :: ConditionalBlock -> Bool
|
|
|
|
isBlockActive CB{..} = any matcherMatches cbMatchers
|
2013-03-30 02:56:55 +04:00
|
|
|
where
|
2020-02-26 04:54:16 +03:00
|
|
|
-- does this individual matcher match the current csv record ?
|
2020-02-12 17:20:40 +03:00
|
|
|
matcherMatches :: Matcher -> Bool
|
2020-02-26 04:54:16 +03:00
|
|
|
matcherMatches (RecordMatcher pat) = regexMatchesCI pat wholecsvline
|
|
|
|
where
|
|
|
|
-- a synthetic whole CSV record to match against; note, it has
|
|
|
|
-- no quotes enclosing fields, and is always comma-separated,
|
|
|
|
-- so may differ from the actual record, and may not be valid CSV.
|
|
|
|
wholecsvline = dbg3 "wholecsvline" $ intercalate "," record
|
|
|
|
matcherMatches (FieldMatcher csvfieldref pat) = regexMatchesCI pat csvfieldvalue
|
2013-03-30 02:56:55 +04:00
|
|
|
where
|
2020-02-26 04:54:16 +03:00
|
|
|
-- the value of the referenced CSV field to match against.
|
|
|
|
csvfieldvalue = dbg3 "csvfieldvalue" $ replaceCsvFieldReference rules record csvfieldref
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2019-06-15 02:43:12 +03:00
|
|
|
-- | Render a field assigment's template, possibly interpolating referenced
|
|
|
|
-- CSV field values. Outer whitespace is removed from interpolated values.
|
2013-03-30 02:56:55 +04:00
|
|
|
renderTemplate :: CsvRules -> CsvRecord -> FieldTemplate -> String
|
2020-02-26 04:54:16 +03:00
|
|
|
renderTemplate rules record t = regexReplaceBy "%[A-z0-9_-]+" (replaceCsvFieldReference rules record) t
|
|
|
|
|
|
|
|
-- | Replace something that looks like a reference to a csv field ("%date" or "%1)
|
|
|
|
-- with that field's value. If it doesn't look like a field reference, or if we
|
|
|
|
-- can't find such a field, leave it unchanged.
|
|
|
|
replaceCsvFieldReference :: CsvRules -> CsvRecord -> CsvFieldReference -> String
|
|
|
|
replaceCsvFieldReference rules record s@('%':fieldname) = fromMaybe s $ csvFieldValue rules record fieldname
|
|
|
|
replaceCsvFieldReference _ _ s = s
|
|
|
|
|
|
|
|
-- | Get the (whitespace-stripped) value of a CSV field, identified by its name or
|
|
|
|
-- column number, ("date" or "1"), from the given CSV record, if such a field exists.
|
|
|
|
csvFieldValue :: CsvRules -> CsvRecord -> CsvFieldName -> Maybe String
|
|
|
|
csvFieldValue rules record fieldname = do
|
|
|
|
fieldindex <- if | all isDigit fieldname -> readMay fieldname
|
|
|
|
| otherwise -> lookup (map toLower fieldname) $ rcsvfieldindexes rules
|
|
|
|
fieldvalue <- strip <$> atMay record (fieldindex-1)
|
|
|
|
return fieldvalue
|
|
|
|
|
2020-02-27 23:09:39 +03:00
|
|
|
-- | Parse the date string using the specified date-format, or if unspecified
|
|
|
|
-- the "simple date" formats (YYYY/MM/DD, YYYY-MM-DD, YYYY.MM.DD, leading
|
|
|
|
-- zeroes optional).
|
|
|
|
parseDateWithCustomOrDefaultFormats :: Maybe DateFormat -> String -> Maybe Day
|
2020-02-28 10:31:53 +03:00
|
|
|
parseDateWithCustomOrDefaultFormats mformat s = asum $ map parsewith formats
|
2012-03-11 01:55:48 +04:00
|
|
|
where
|
2015-03-30 02:12:54 +03:00
|
|
|
parsetime =
|
|
|
|
#if MIN_VERSION_time(1,5,0)
|
|
|
|
parseTimeM True
|
|
|
|
#else
|
|
|
|
parseTime
|
|
|
|
#endif
|
|
|
|
parsewith = flip (parsetime defaultTimeLocale) s
|
2013-03-30 02:56:55 +04:00
|
|
|
formats = maybe
|
|
|
|
["%Y/%-m/%-d"
|
|
|
|
,"%Y-%-m-%-d"
|
|
|
|
,"%Y.%-m.%-d"
|
|
|
|
-- ,"%-m/%-d/%Y"
|
|
|
|
-- ,parseTime defaultTimeLocale "%Y/%m/%e" (take 5 s ++ "0" ++ drop 5 s)
|
|
|
|
-- ,parseTime defaultTimeLocale "%Y-%m-%e" (take 5 s ++ "0" ++ drop 5 s)
|
|
|
|
-- ,parseTime defaultTimeLocale "%m/%e/%Y" ('0':s)
|
|
|
|
-- ,parseTime defaultTimeLocale "%m-%e-%Y" ('0':s)
|
|
|
|
]
|
|
|
|
(:[])
|
|
|
|
mformat
|
|
|
|
|
2020-03-02 09:00:39 +03:00
|
|
|
--- ** tests
|
2013-03-30 02:56:55 +04:00
|
|
|
|
2018-09-06 23:08:26 +03:00
|
|
|
tests_CsvReader = tests "CsvReader" [
|
2018-09-04 23:39:21 +03:00
|
|
|
tests "parseCsvRules" [
|
2020-02-26 04:54:16 +03:00
|
|
|
test "empty file" $
|
2019-11-27 23:46:29 +03:00
|
|
|
parseCsvRules "unknown" "" @?= Right defrules
|
2020-02-26 04:54:16 +03:00
|
|
|
]
|
2018-09-04 23:39:21 +03:00
|
|
|
,tests "rulesp" [
|
2020-02-26 04:54:16 +03:00
|
|
|
test "trailing comments" $
|
2019-11-27 23:46:29 +03:00
|
|
|
parseWithState' defrules rulesp "skip\n# \n#\n" @?= Right defrules{rdirectives = [("skip","")]}
|
2018-09-07 20:12:13 +03:00
|
|
|
|
2020-02-26 04:54:16 +03:00
|
|
|
,test "trailing blank lines" $
|
2019-11-27 23:46:29 +03:00
|
|
|
parseWithState' defrules rulesp "skip\n\n \n" @?= (Right defrules{rdirectives = [("skip","")]})
|
2018-09-07 20:12:13 +03:00
|
|
|
|
2020-02-26 04:54:16 +03:00
|
|
|
,test "no final newline" $
|
2019-11-27 23:46:29 +03:00
|
|
|
parseWithState' defrules rulesp "skip" @?= (Right defrules{rdirectives=[("skip","")]})
|
2018-09-04 01:30:52 +03:00
|
|
|
|
2020-02-26 04:54:16 +03:00
|
|
|
,test "assignment with empty value" $
|
2019-11-27 23:46:29 +03:00
|
|
|
parseWithState' defrules rulesp "account1 \nif foo\n account2 foo\n" @?=
|
2020-02-12 17:20:40 +03:00
|
|
|
(Right defrules{rassignments = [("account1","")], rconditionalblocks = [CB{cbMatchers=[RecordMatcher "foo"],cbAssignments=[("account2","foo")]}]})
|
2020-02-26 04:54:16 +03:00
|
|
|
]
|
2019-11-13 16:41:32 +03:00
|
|
|
,tests "conditionalblockp" [
|
2020-02-26 04:54:16 +03:00
|
|
|
test "space after conditional" $ -- #1120
|
2019-11-27 23:46:29 +03:00
|
|
|
parseWithState' defrules conditionalblockp "if a\n account2 b\n \n" @?=
|
2020-02-12 17:20:40 +03:00
|
|
|
(Right $ CB{cbMatchers=[RecordMatcher "a"],cbAssignments=[("account2","b")]})
|
2020-02-26 04:54:16 +03:00
|
|
|
|
|
|
|
,tests "csvfieldreferencep" [
|
|
|
|
test "number" $ parseWithState' defrules csvfieldreferencep "%1" @?= (Right "%1")
|
|
|
|
,test "name" $ parseWithState' defrules csvfieldreferencep "%date" @?= (Right "%date")
|
|
|
|
,test "quoted name" $ parseWithState' defrules csvfieldreferencep "%\"csv date\"" @?= (Right "%\"csv date\"")
|
|
|
|
]
|
|
|
|
|
|
|
|
,tests "matcherp" [
|
|
|
|
|
|
|
|
test "recordmatcherp" $
|
|
|
|
parseWithState' defrules matcherp "A A\n" @?= (Right $ RecordMatcher "A A")
|
|
|
|
|
|
|
|
,test "fieldmatcherp.starts-with-%" $
|
|
|
|
parseWithState' defrules matcherp "description A A\n" @?= (Right $ RecordMatcher "description A A")
|
|
|
|
|
|
|
|
,test "fieldmatcherp" $
|
|
|
|
parseWithState' defrules matcherp "%description A A\n" @?= (Right $ FieldMatcher "%description" "A A")
|
|
|
|
|
2020-02-26 21:25:36 +03:00
|
|
|
-- ,test "fieldmatcherp with operator" $
|
|
|
|
-- parseWithState' defrules matcherp "%description ~ A A\n" @?= (Right $ FieldMatcher "%description" "A A")
|
2020-02-26 04:54:16 +03:00
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
,tests "getEffectiveAssignment" [
|
|
|
|
let rules = defrules{rcsvfieldindexes=[("csvdate",1)],rassignments=[("date","%csvdate")]}
|
|
|
|
|
|
|
|
in test "toplevel" $ getEffectiveAssignment rules ["a","b"] "date" @?= (Just "%csvdate")
|
|
|
|
|
|
|
|
,let rules = defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher "%csvdate" "a"] [("date","%csvdate")]]}
|
|
|
|
in test "conditional" $ getEffectiveAssignment rules ["a","b"] "date" @?= (Just "%csvdate")
|
|
|
|
|
|
|
|
]
|
|
|
|
|
2019-11-13 16:41:32 +03:00
|
|
|
]
|
2020-02-26 04:54:16 +03:00
|
|
|
|
|
|
|
]
|