hledger/hledger-lib/Hledger/Read/CsvReader.hs

1319 lines
54 KiB
Haskell
Raw Normal View History

--- * -*- outline-regexp:"--- \\*"; -*-
--- ** doc
-- In Emacs, use TAB on lines beginning with "-- *" to collapse/expand sections.
{-|
A reader for CSV data, using an extra rules file to help interpret the data.
-}
-- Lots of haddocks in this file are for non-exported types.
-- Here's a command that will render them:
-- stack haddock hledger-lib --fast --no-haddock-deps --haddock-arguments='--ignore-all-exports' --open
--- ** language
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PackageImports #-}
2020-02-12 06:04:03 +03:00
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE ViewPatterns #-}
--- ** exports
module Hledger.Read.CsvReader (
-- * Reader
reader,
2012-05-30 01:00:49 +04:00
-- * Misc.
CSV, CsvRecord, CsvValue,
csvFileFor,
2012-05-30 01:00:49 +04:00
rulesFileFor,
parseRulesFile,
csv: merge lucamolteni's cassava/custom separators (squashed) (#829) commit 5ba464de761b298e50d57a8b7d14bc28adb30d5d Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:54:12 2018 +0200 Fix CI 2 commit f060ae9449f4b61a915b0ed4629fc1ba9b66fb4a Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:30:08 2018 +0200 Fix CI build commit af0719a33b9b72ad244ae80198d881a1f7145e9d Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:19:01 2018 +0200 Fix rebase commit 1a24ddfa54dfb4ff1326e1a51005ffa82d3dc3c8 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Aug 10 16:25:24 2018 +0200 Fixed some GHC warnings commit 1ac43398a359b5925ef71f53347698f1c6c510ef Author: Luca Molteni <volothamp@gmail.com> Date: Fri Aug 10 16:14:49 2018 +0200 Fix .cabal commit 422456b925d8aa4ab3e869f51e98c2b1c3dcde0a Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 22:56:20 2018 +0200 Removed to-do list commit 1118b762e4fd15c4fe7ba48ba86676706ea3a5a5 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 22:53:28 2018 +0200 Better test commit 1146ed0941655668bf7684f18aa15c5f4b9b20c2 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 15:32:28 2018 +0200 Fix parsing commit 4fc2374b2b81802990da30c96756aab54d77399c Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 22:11:11 2018 +0200 Parsing of separator commit f7a61737f1ad4460ba20ca9b2e86eb21468abb33 Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 14:29:23 2018 +0200 Almost separator in options commit ac8841cf3b9c80914bc3271ad9b9ff4ae9ba48a7 Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 14:16:59 2018 +0200 Separator in parseCSV commit 92a8b9f6ba77ea4237f769641e03029ac88542ea Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 13:30:41 2018 +0200 separator option commit ec417a81ae625647cf35e61776cdf02bdb2c6aea Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 10:45:26 2018 +0200 Removed one qualified import commit 8b2f386c2f780adcd34cff3de7edceacc1d325a7 Author: Luca Molteni <volothamp@gmail.com> Date: Wed Jun 20 14:01:12 2018 +0200 Removed string conversions commit a14d0e099e28a286bb81770cfc9cb8f5c7e5cf1f Author: Luca Molteni <volothamp@gmail.com> Date: Wed Jun 20 10:23:20 2018 +0200 custom delimiter in cassava commit 694d48e2bc1ada0037b90367c017f3082f68ed45 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:51:54 2018 +0200 Use Text.getContents - remove UTF-8 compatibility library commit a7ada2cc60033ebdd796ca34cc2ec69a4f387843 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:49:34 2018 +0200 todo list commit 58ec47d3987909f6bace50e3e647e30dadd5bf03 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:45:22 2018 +0200 CSV test now has unicode characters commit b7851e94c3f1683b63ec7250a12bcde3b7bed691 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 16:59:39 2018 +0200 Use decode from Text commit 79f59fd28ccaca08fcd718fcd8d00b1c1d65d7e1 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 13:28:57 2018 +0200 Use Text and Lazy Bytestring commit 470c9bcb8dc00669beb4ef0303a1e7d9f7aecc89 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:30:22 2018 +0200 Use megaparsec error commit f978848ba249ef4f67b855bea5d4e549290c205c Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:22:07 2018 +0200 Renamed qualify and remove Parsec commit 152587fde204c43a55798d212e43f37cd3038c2e Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:12:36 2018 +0200 Use cassava mega parsec commit cf281577a3d3a071196484a6fc8485f2ea1f7d67 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 14:01:47 2018 +0200 Removed Data.Vector commit 1272e8e758369d8cc5778029a705b277355a5029 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 12:16:18 2018 +0200 Removed Parsec ParseError commit ae07f043135a19307fd65b281ade37a74c76acb2 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 12:06:14 2018 +0200 Type sinonim for ParsecError commit 8e15b253c11bd1c0c35a7641aeb18aa54e0ba9b0 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 11:16:08 2018 +0200 Replaced with typeclasses commit 1ed46f9c175603611325f3d377004e4b85f29377 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 11:01:33 2018 +0200 Replaced Text/CSV with Cassava commit 362f4111b5854145703174b976fc7acbd71b8783 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 10:34:37 2018 +0200 Use cassava parsin instead of Text/CSV commit 83e678e371618687cf7c15a4e2cfa67f570b6b64 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 08:22:51 2018 +0200 Text CSV error messages commit f922df71d274beeacab9fb2530b16c97f005cc08 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 21:45:20 2018 +0200 Better types commit edd130781c84790a53bff2283e6041eb8232e7cf Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 21:34:59 2018 +0200 Conversion to Text CSV type commit 0799383214483018ad2d977a3c8022414959c2b2 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 16:06:21 2018 +0200 First function with cassava commit e92aeb151ff527b383ff3d0ced7764e81b71af82 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 13:47:34 2018 +0200 Added cassava as dependency commit 5ea005c558a3939af7e5f0cd735a9b4da931228e Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 13:18:47 2018 +0200 Better .gitignore for multi idea modules
2018-09-07 20:12:13 +03:00
printCSV,
-- * Tests
2018-09-06 23:08:26 +03:00
tests_CsvReader,
)
where
--- ** imports
import Prelude ()
import "base-compat-batteries" Prelude.Compat hiding (fail)
2019-09-14 04:11:40 +03:00
import Control.Exception (IOException, handle, throw)
import Control.Monad (liftM, unless, when)
import Control.Monad.Except (ExceptT, throwError)
import Control.Monad.IO.Class (MonadIO, liftIO)
import Control.Monad.State.Strict (StateT, get, modify', evalStateT)
2019-09-14 04:11:40 +03:00
import Control.Monad.Trans.Class (lift)
import Data.Char (toLower, isDigit, isSpace, isAlphaNum, ord)
import Data.Bifunctor (first)
import "base-compat-batteries" Data.List.Compat
import qualified Data.List.Split as LS (splitOn)
import Data.Maybe
import Data.MemoUgly (memo)
import Data.Ord
import qualified Data.Set as S
lib: textification: parse stream 10% more allocation, but 35% lower maximum residency, and slightly quicker. hledger -f data/100x100x10.journal stats <<ghc: 39327768 bytes, 77 GCs, 196834/269496 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.010 elapsed), 0.020 MUT (0.092 elapsed), 0.014 GC (0.119 elapsed) :ghc>> <<ghc: 42842136 bytes, 84 GCs, 194010/270912 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.009 elapsed), 0.016 MUT (0.029 elapsed), 0.012 GC (0.120 elapsed) :ghc>> hledger -f data/1000x1000x10.journal stats <<ghc: 314291440 bytes, 612 GCs, 2070776/6628048 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.000 elapsed), 0.128 MUT (0.144 elapsed), 0.059 GC (0.070 elapsed) :ghc>> <<ghc: 349558872 bytes, 681 GCs, 1397597/4106384 avg/max bytes residency (7 samples), 11M in use, 0.000 INIT (0.004 elapsed), 0.124 MUT (0.133 elapsed), 0.047 GC (0.053 elapsed) :ghc>> hledger -f data/10000x1000x10.journal stats <<ghc: 3070026824 bytes, 5973 GCs, 12698030/62951784 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.002 elapsed), 1.268 MUT (1.354 elapsed), 0.514 GC (0.587 elapsed) :ghc>> <<ghc: 3424013128 bytes, 6658 GCs, 11405501/41071624 avg/max bytes residency (11 samples), 111M in use, 0.000 INIT (0.001 elapsed), 1.343 MUT (1.406 elapsed), 0.511 GC (0.573 elapsed) :ghc>> hledger -f data/100000x1000x10.journal stats <<ghc: 30753387392 bytes, 59811 GCs, 117615462/666703600 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.000 elapsed), 12.068 MUT (12.238 elapsed), 6.015 GC (7.190 elapsed) :ghc>> <<ghc: 34306530696 bytes, 66727 GCs, 76806196/414629312 avg/max bytes residency (14 samples), 1009M in use, 0.000 INIT (0.010 elapsed), 14.357 MUT (16.370 elapsed), 5.298 GC (6.534 elapsed) :ghc>>
2016-05-25 01:58:23 +03:00
import Data.Text (Text)
lib: textification begins! account names The first of several conversions from String to (strict) Text, hopefully reducing space and time usage. This one shows a small improvement, with GHC 7.10.3 and text-1.2.2.1: hledger -f data/100x100x10.journal stats string: <<ghc: 39471064 bytes, 77 GCs, 198421/275048 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.001 elapsed), 0.015 MUT (0.020 elapsed), 0.010 GC (0.014 elapsed) :ghc>> text: <<ghc: 39268024 bytes, 77 GCs, 197018/270840 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.002 elapsed), 0.016 MUT (0.022 elapsed), 0.009 GC (0.011 elapsed) :ghc>> hledger -f data/1000x100x10.journal stats string: <<ghc: 318555920 bytes, 617 GCs, 2178997/7134472 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.001 elapsed), 0.129 MUT (0.136 elapsed), 0.067 GC (0.077 elapsed) :ghc>> text: <<ghc: 314248496 bytes, 612 GCs, 2074045/6617960 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.003 elapsed), 0.137 MUT (0.145 elapsed), 0.067 GC (0.079 elapsed) :ghc>> hledger -f data/10000x100x10.journal stats string: <<ghc: 3114763608 bytes, 6026 GCs, 18858950/75552024 avg/max bytes residency (11 samples), 201M in use, 0.000 INIT (0.000 elapsed), 1.331 MUT (1.372 elapsed), 0.699 GC (0.812 elapsed) :ghc>> text: <<ghc: 3071468920 bytes, 5968 GCs, 14120344/62951360 avg/max bytes residency (9 samples), 124M in use, 0.000 INIT (0.003 elapsed), 1.272 MUT (1.349 elapsed), 0.513 GC (0.578 elapsed) :ghc>> hledger -f data/100000x100x10.journal stats string: <<ghc: 31186579432 bytes, 60278 GCs, 135332581/740228992 avg/max bytes residency (13 samples), 1697M in use, 0.000 INIT (0.008 elapsed), 14.677 MUT (15.508 elapsed), 7.081 GC (8.074 elapsed) :ghc>> text: <<ghc: 30753427672 bytes, 59763 GCs, 117595958/666457240 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.008 elapsed), 13.713 MUT (13.966 elapsed), 6.220 GC (7.108 elapsed) :ghc>>
2016-05-24 04:16:21 +03:00
import qualified Data.Text as T
csv: merge lucamolteni's cassava/custom separators (squashed) (#829) commit 5ba464de761b298e50d57a8b7d14bc28adb30d5d Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:54:12 2018 +0200 Fix CI 2 commit f060ae9449f4b61a915b0ed4629fc1ba9b66fb4a Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:30:08 2018 +0200 Fix CI build commit af0719a33b9b72ad244ae80198d881a1f7145e9d Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:19:01 2018 +0200 Fix rebase commit 1a24ddfa54dfb4ff1326e1a51005ffa82d3dc3c8 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Aug 10 16:25:24 2018 +0200 Fixed some GHC warnings commit 1ac43398a359b5925ef71f53347698f1c6c510ef Author: Luca Molteni <volothamp@gmail.com> Date: Fri Aug 10 16:14:49 2018 +0200 Fix .cabal commit 422456b925d8aa4ab3e869f51e98c2b1c3dcde0a Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 22:56:20 2018 +0200 Removed to-do list commit 1118b762e4fd15c4fe7ba48ba86676706ea3a5a5 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 22:53:28 2018 +0200 Better test commit 1146ed0941655668bf7684f18aa15c5f4b9b20c2 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 15:32:28 2018 +0200 Fix parsing commit 4fc2374b2b81802990da30c96756aab54d77399c Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 22:11:11 2018 +0200 Parsing of separator commit f7a61737f1ad4460ba20ca9b2e86eb21468abb33 Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 14:29:23 2018 +0200 Almost separator in options commit ac8841cf3b9c80914bc3271ad9b9ff4ae9ba48a7 Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 14:16:59 2018 +0200 Separator in parseCSV commit 92a8b9f6ba77ea4237f769641e03029ac88542ea Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 13:30:41 2018 +0200 separator option commit ec417a81ae625647cf35e61776cdf02bdb2c6aea Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 10:45:26 2018 +0200 Removed one qualified import commit 8b2f386c2f780adcd34cff3de7edceacc1d325a7 Author: Luca Molteni <volothamp@gmail.com> Date: Wed Jun 20 14:01:12 2018 +0200 Removed string conversions commit a14d0e099e28a286bb81770cfc9cb8f5c7e5cf1f Author: Luca Molteni <volothamp@gmail.com> Date: Wed Jun 20 10:23:20 2018 +0200 custom delimiter in cassava commit 694d48e2bc1ada0037b90367c017f3082f68ed45 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:51:54 2018 +0200 Use Text.getContents - remove UTF-8 compatibility library commit a7ada2cc60033ebdd796ca34cc2ec69a4f387843 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:49:34 2018 +0200 todo list commit 58ec47d3987909f6bace50e3e647e30dadd5bf03 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:45:22 2018 +0200 CSV test now has unicode characters commit b7851e94c3f1683b63ec7250a12bcde3b7bed691 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 16:59:39 2018 +0200 Use decode from Text commit 79f59fd28ccaca08fcd718fcd8d00b1c1d65d7e1 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 13:28:57 2018 +0200 Use Text and Lazy Bytestring commit 470c9bcb8dc00669beb4ef0303a1e7d9f7aecc89 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:30:22 2018 +0200 Use megaparsec error commit f978848ba249ef4f67b855bea5d4e549290c205c Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:22:07 2018 +0200 Renamed qualify and remove Parsec commit 152587fde204c43a55798d212e43f37cd3038c2e Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:12:36 2018 +0200 Use cassava mega parsec commit cf281577a3d3a071196484a6fc8485f2ea1f7d67 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 14:01:47 2018 +0200 Removed Data.Vector commit 1272e8e758369d8cc5778029a705b277355a5029 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 12:16:18 2018 +0200 Removed Parsec ParseError commit ae07f043135a19307fd65b281ade37a74c76acb2 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 12:06:14 2018 +0200 Type sinonim for ParsecError commit 8e15b253c11bd1c0c35a7641aeb18aa54e0ba9b0 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 11:16:08 2018 +0200 Replaced with typeclasses commit 1ed46f9c175603611325f3d377004e4b85f29377 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 11:01:33 2018 +0200 Replaced Text/CSV with Cassava commit 362f4111b5854145703174b976fc7acbd71b8783 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 10:34:37 2018 +0200 Use cassava parsin instead of Text/CSV commit 83e678e371618687cf7c15a4e2cfa67f570b6b64 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 08:22:51 2018 +0200 Text CSV error messages commit f922df71d274beeacab9fb2530b16c97f005cc08 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 21:45:20 2018 +0200 Better types commit edd130781c84790a53bff2283e6041eb8232e7cf Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 21:34:59 2018 +0200 Conversion to Text CSV type commit 0799383214483018ad2d977a3c8022414959c2b2 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 16:06:21 2018 +0200 First function with cassava commit e92aeb151ff527b383ff3d0ced7764e81b71af82 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 13:47:34 2018 +0200 Added cassava as dependency commit 5ea005c558a3939af7e5f0cd735a9b4da931228e Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 13:18:47 2018 +0200 Better .gitignore for multi idea modules
2018-09-07 20:12:13 +03:00
import qualified Data.Text.Encoding as T
import qualified Data.Text.IO as T
import Data.Time.Calendar (Day)
import Data.Time.Format (parseTimeM, defaultTimeLocale)
import Safe
import System.Directory (doesFileExist)
import System.FilePath
csv: merge lucamolteni's cassava/custom separators (squashed) (#829) commit 5ba464de761b298e50d57a8b7d14bc28adb30d5d Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:54:12 2018 +0200 Fix CI 2 commit f060ae9449f4b61a915b0ed4629fc1ba9b66fb4a Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:30:08 2018 +0200 Fix CI build commit af0719a33b9b72ad244ae80198d881a1f7145e9d Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:19:01 2018 +0200 Fix rebase commit 1a24ddfa54dfb4ff1326e1a51005ffa82d3dc3c8 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Aug 10 16:25:24 2018 +0200 Fixed some GHC warnings commit 1ac43398a359b5925ef71f53347698f1c6c510ef Author: Luca Molteni <volothamp@gmail.com> Date: Fri Aug 10 16:14:49 2018 +0200 Fix .cabal commit 422456b925d8aa4ab3e869f51e98c2b1c3dcde0a Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 22:56:20 2018 +0200 Removed to-do list commit 1118b762e4fd15c4fe7ba48ba86676706ea3a5a5 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 22:53:28 2018 +0200 Better test commit 1146ed0941655668bf7684f18aa15c5f4b9b20c2 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 15:32:28 2018 +0200 Fix parsing commit 4fc2374b2b81802990da30c96756aab54d77399c Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 22:11:11 2018 +0200 Parsing of separator commit f7a61737f1ad4460ba20ca9b2e86eb21468abb33 Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 14:29:23 2018 +0200 Almost separator in options commit ac8841cf3b9c80914bc3271ad9b9ff4ae9ba48a7 Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 14:16:59 2018 +0200 Separator in parseCSV commit 92a8b9f6ba77ea4237f769641e03029ac88542ea Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 13:30:41 2018 +0200 separator option commit ec417a81ae625647cf35e61776cdf02bdb2c6aea Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 10:45:26 2018 +0200 Removed one qualified import commit 8b2f386c2f780adcd34cff3de7edceacc1d325a7 Author: Luca Molteni <volothamp@gmail.com> Date: Wed Jun 20 14:01:12 2018 +0200 Removed string conversions commit a14d0e099e28a286bb81770cfc9cb8f5c7e5cf1f Author: Luca Molteni <volothamp@gmail.com> Date: Wed Jun 20 10:23:20 2018 +0200 custom delimiter in cassava commit 694d48e2bc1ada0037b90367c017f3082f68ed45 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:51:54 2018 +0200 Use Text.getContents - remove UTF-8 compatibility library commit a7ada2cc60033ebdd796ca34cc2ec69a4f387843 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:49:34 2018 +0200 todo list commit 58ec47d3987909f6bace50e3e647e30dadd5bf03 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:45:22 2018 +0200 CSV test now has unicode characters commit b7851e94c3f1683b63ec7250a12bcde3b7bed691 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 16:59:39 2018 +0200 Use decode from Text commit 79f59fd28ccaca08fcd718fcd8d00b1c1d65d7e1 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 13:28:57 2018 +0200 Use Text and Lazy Bytestring commit 470c9bcb8dc00669beb4ef0303a1e7d9f7aecc89 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:30:22 2018 +0200 Use megaparsec error commit f978848ba249ef4f67b855bea5d4e549290c205c Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:22:07 2018 +0200 Renamed qualify and remove Parsec commit 152587fde204c43a55798d212e43f37cd3038c2e Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:12:36 2018 +0200 Use cassava mega parsec commit cf281577a3d3a071196484a6fc8485f2ea1f7d67 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 14:01:47 2018 +0200 Removed Data.Vector commit 1272e8e758369d8cc5778029a705b277355a5029 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 12:16:18 2018 +0200 Removed Parsec ParseError commit ae07f043135a19307fd65b281ade37a74c76acb2 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 12:06:14 2018 +0200 Type sinonim for ParsecError commit 8e15b253c11bd1c0c35a7641aeb18aa54e0ba9b0 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 11:16:08 2018 +0200 Replaced with typeclasses commit 1ed46f9c175603611325f3d377004e4b85f29377 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 11:01:33 2018 +0200 Replaced Text/CSV with Cassava commit 362f4111b5854145703174b976fc7acbd71b8783 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 10:34:37 2018 +0200 Use cassava parsin instead of Text/CSV commit 83e678e371618687cf7c15a4e2cfa67f570b6b64 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 08:22:51 2018 +0200 Text CSV error messages commit f922df71d274beeacab9fb2530b16c97f005cc08 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 21:45:20 2018 +0200 Better types commit edd130781c84790a53bff2283e6041eb8232e7cf Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 21:34:59 2018 +0200 Conversion to Text CSV type commit 0799383214483018ad2d977a3c8022414959c2b2 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 16:06:21 2018 +0200 First function with cassava commit e92aeb151ff527b383ff3d0ced7764e81b71af82 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 13:47:34 2018 +0200 Added cassava as dependency commit 5ea005c558a3939af7e5f0cd735a9b4da931228e Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 13:18:47 2018 +0200 Better .gitignore for multi idea modules
2018-09-07 20:12:13 +03:00
import qualified Data.Csv as Cassava
import qualified Data.Csv.Parser.Megaparsec as CassavaMP
import qualified Data.ByteString as B
2018-09-30 04:32:08 +03:00
import qualified Data.ByteString.Lazy as BL
csv: merge lucamolteni's cassava/custom separators (squashed) (#829) commit 5ba464de761b298e50d57a8b7d14bc28adb30d5d Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:54:12 2018 +0200 Fix CI 2 commit f060ae9449f4b61a915b0ed4629fc1ba9b66fb4a Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:30:08 2018 +0200 Fix CI build commit af0719a33b9b72ad244ae80198d881a1f7145e9d Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:19:01 2018 +0200 Fix rebase commit 1a24ddfa54dfb4ff1326e1a51005ffa82d3dc3c8 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Aug 10 16:25:24 2018 +0200 Fixed some GHC warnings commit 1ac43398a359b5925ef71f53347698f1c6c510ef Author: Luca Molteni <volothamp@gmail.com> Date: Fri Aug 10 16:14:49 2018 +0200 Fix .cabal commit 422456b925d8aa4ab3e869f51e98c2b1c3dcde0a Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 22:56:20 2018 +0200 Removed to-do list commit 1118b762e4fd15c4fe7ba48ba86676706ea3a5a5 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 22:53:28 2018 +0200 Better test commit 1146ed0941655668bf7684f18aa15c5f4b9b20c2 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 15:32:28 2018 +0200 Fix parsing commit 4fc2374b2b81802990da30c96756aab54d77399c Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 22:11:11 2018 +0200 Parsing of separator commit f7a61737f1ad4460ba20ca9b2e86eb21468abb33 Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 14:29:23 2018 +0200 Almost separator in options commit ac8841cf3b9c80914bc3271ad9b9ff4ae9ba48a7 Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 14:16:59 2018 +0200 Separator in parseCSV commit 92a8b9f6ba77ea4237f769641e03029ac88542ea Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 13:30:41 2018 +0200 separator option commit ec417a81ae625647cf35e61776cdf02bdb2c6aea Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 10:45:26 2018 +0200 Removed one qualified import commit 8b2f386c2f780adcd34cff3de7edceacc1d325a7 Author: Luca Molteni <volothamp@gmail.com> Date: Wed Jun 20 14:01:12 2018 +0200 Removed string conversions commit a14d0e099e28a286bb81770cfc9cb8f5c7e5cf1f Author: Luca Molteni <volothamp@gmail.com> Date: Wed Jun 20 10:23:20 2018 +0200 custom delimiter in cassava commit 694d48e2bc1ada0037b90367c017f3082f68ed45 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:51:54 2018 +0200 Use Text.getContents - remove UTF-8 compatibility library commit a7ada2cc60033ebdd796ca34cc2ec69a4f387843 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:49:34 2018 +0200 todo list commit 58ec47d3987909f6bace50e3e647e30dadd5bf03 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:45:22 2018 +0200 CSV test now has unicode characters commit b7851e94c3f1683b63ec7250a12bcde3b7bed691 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 16:59:39 2018 +0200 Use decode from Text commit 79f59fd28ccaca08fcd718fcd8d00b1c1d65d7e1 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 13:28:57 2018 +0200 Use Text and Lazy Bytestring commit 470c9bcb8dc00669beb4ef0303a1e7d9f7aecc89 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:30:22 2018 +0200 Use megaparsec error commit f978848ba249ef4f67b855bea5d4e549290c205c Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:22:07 2018 +0200 Renamed qualify and remove Parsec commit 152587fde204c43a55798d212e43f37cd3038c2e Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:12:36 2018 +0200 Use cassava mega parsec commit cf281577a3d3a071196484a6fc8485f2ea1f7d67 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 14:01:47 2018 +0200 Removed Data.Vector commit 1272e8e758369d8cc5778029a705b277355a5029 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 12:16:18 2018 +0200 Removed Parsec ParseError commit ae07f043135a19307fd65b281ade37a74c76acb2 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 12:06:14 2018 +0200 Type sinonim for ParsecError commit 8e15b253c11bd1c0c35a7641aeb18aa54e0ba9b0 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 11:16:08 2018 +0200 Replaced with typeclasses commit 1ed46f9c175603611325f3d377004e4b85f29377 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 11:01:33 2018 +0200 Replaced Text/CSV with Cassava commit 362f4111b5854145703174b976fc7acbd71b8783 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 10:34:37 2018 +0200 Use cassava parsin instead of Text/CSV commit 83e678e371618687cf7c15a4e2cfa67f570b6b64 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 08:22:51 2018 +0200 Text CSV error messages commit f922df71d274beeacab9fb2530b16c97f005cc08 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 21:45:20 2018 +0200 Better types commit edd130781c84790a53bff2283e6041eb8232e7cf Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 21:34:59 2018 +0200 Conversion to Text CSV type commit 0799383214483018ad2d977a3c8022414959c2b2 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 16:06:21 2018 +0200 First function with cassava commit e92aeb151ff527b383ff3d0ced7764e81b71af82 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 13:47:34 2018 +0200 Added cassava as dependency commit 5ea005c558a3939af7e5f0cd735a9b4da931228e Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 13:18:47 2018 +0200 Better .gitignore for multi idea modules
2018-09-07 20:12:13 +03:00
import Data.Foldable
import Text.Megaparsec hiding (parse)
import Text.Megaparsec.Char
2018-09-30 04:32:08 +03:00
import Text.Megaparsec.Custom
import Text.Printf (printf)
import Hledger.Data
import Hledger.Utils
import Hledger.Read.Common (Reader(..),InputOpts(..),amountp, statusp, genericSourcePos, journalFinalise)
--- ** doctest setup
-- $setup
-- >>> :set -XOverloadedStrings
--- ** some types
csv: merge lucamolteni's cassava/custom separators (squashed) (#829) commit 5ba464de761b298e50d57a8b7d14bc28adb30d5d Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:54:12 2018 +0200 Fix CI 2 commit f060ae9449f4b61a915b0ed4629fc1ba9b66fb4a Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:30:08 2018 +0200 Fix CI build commit af0719a33b9b72ad244ae80198d881a1f7145e9d Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:19:01 2018 +0200 Fix rebase commit 1a24ddfa54dfb4ff1326e1a51005ffa82d3dc3c8 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Aug 10 16:25:24 2018 +0200 Fixed some GHC warnings commit 1ac43398a359b5925ef71f53347698f1c6c510ef Author: Luca Molteni <volothamp@gmail.com> Date: Fri Aug 10 16:14:49 2018 +0200 Fix .cabal commit 422456b925d8aa4ab3e869f51e98c2b1c3dcde0a Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 22:56:20 2018 +0200 Removed to-do list commit 1118b762e4fd15c4fe7ba48ba86676706ea3a5a5 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 22:53:28 2018 +0200 Better test commit 1146ed0941655668bf7684f18aa15c5f4b9b20c2 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 15:32:28 2018 +0200 Fix parsing commit 4fc2374b2b81802990da30c96756aab54d77399c Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 22:11:11 2018 +0200 Parsing of separator commit f7a61737f1ad4460ba20ca9b2e86eb21468abb33 Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 14:29:23 2018 +0200 Almost separator in options commit ac8841cf3b9c80914bc3271ad9b9ff4ae9ba48a7 Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 14:16:59 2018 +0200 Separator in parseCSV commit 92a8b9f6ba77ea4237f769641e03029ac88542ea Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 13:30:41 2018 +0200 separator option commit ec417a81ae625647cf35e61776cdf02bdb2c6aea Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 10:45:26 2018 +0200 Removed one qualified import commit 8b2f386c2f780adcd34cff3de7edceacc1d325a7 Author: Luca Molteni <volothamp@gmail.com> Date: Wed Jun 20 14:01:12 2018 +0200 Removed string conversions commit a14d0e099e28a286bb81770cfc9cb8f5c7e5cf1f Author: Luca Molteni <volothamp@gmail.com> Date: Wed Jun 20 10:23:20 2018 +0200 custom delimiter in cassava commit 694d48e2bc1ada0037b90367c017f3082f68ed45 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:51:54 2018 +0200 Use Text.getContents - remove UTF-8 compatibility library commit a7ada2cc60033ebdd796ca34cc2ec69a4f387843 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:49:34 2018 +0200 todo list commit 58ec47d3987909f6bace50e3e647e30dadd5bf03 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:45:22 2018 +0200 CSV test now has unicode characters commit b7851e94c3f1683b63ec7250a12bcde3b7bed691 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 16:59:39 2018 +0200 Use decode from Text commit 79f59fd28ccaca08fcd718fcd8d00b1c1d65d7e1 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 13:28:57 2018 +0200 Use Text and Lazy Bytestring commit 470c9bcb8dc00669beb4ef0303a1e7d9f7aecc89 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:30:22 2018 +0200 Use megaparsec error commit f978848ba249ef4f67b855bea5d4e549290c205c Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:22:07 2018 +0200 Renamed qualify and remove Parsec commit 152587fde204c43a55798d212e43f37cd3038c2e Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:12:36 2018 +0200 Use cassava mega parsec commit cf281577a3d3a071196484a6fc8485f2ea1f7d67 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 14:01:47 2018 +0200 Removed Data.Vector commit 1272e8e758369d8cc5778029a705b277355a5029 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 12:16:18 2018 +0200 Removed Parsec ParseError commit ae07f043135a19307fd65b281ade37a74c76acb2 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 12:06:14 2018 +0200 Type sinonim for ParsecError commit 8e15b253c11bd1c0c35a7641aeb18aa54e0ba9b0 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 11:16:08 2018 +0200 Replaced with typeclasses commit 1ed46f9c175603611325f3d377004e4b85f29377 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 11:01:33 2018 +0200 Replaced Text/CSV with Cassava commit 362f4111b5854145703174b976fc7acbd71b8783 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 10:34:37 2018 +0200 Use cassava parsin instead of Text/CSV commit 83e678e371618687cf7c15a4e2cfa67f570b6b64 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 08:22:51 2018 +0200 Text CSV error messages commit f922df71d274beeacab9fb2530b16c97f005cc08 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 21:45:20 2018 +0200 Better types commit edd130781c84790a53bff2283e6041eb8232e7cf Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 21:34:59 2018 +0200 Conversion to Text CSV type commit 0799383214483018ad2d977a3c8022414959c2b2 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 16:06:21 2018 +0200 First function with cassava commit e92aeb151ff527b383ff3d0ced7764e81b71af82 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 13:47:34 2018 +0200 Added cassava as dependency commit 5ea005c558a3939af7e5f0cd735a9b4da931228e Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 13:18:47 2018 +0200 Better .gitignore for multi idea modules
2018-09-07 20:12:13 +03:00
type CSV = [CsvRecord]
type CsvRecord = [CsvValue]
type CsvValue = String
csv: merge lucamolteni's cassava/custom separators (squashed) (#829) commit 5ba464de761b298e50d57a8b7d14bc28adb30d5d Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:54:12 2018 +0200 Fix CI 2 commit f060ae9449f4b61a915b0ed4629fc1ba9b66fb4a Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:30:08 2018 +0200 Fix CI build commit af0719a33b9b72ad244ae80198d881a1f7145e9d Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:19:01 2018 +0200 Fix rebase commit 1a24ddfa54dfb4ff1326e1a51005ffa82d3dc3c8 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Aug 10 16:25:24 2018 +0200 Fixed some GHC warnings commit 1ac43398a359b5925ef71f53347698f1c6c510ef Author: Luca Molteni <volothamp@gmail.com> Date: Fri Aug 10 16:14:49 2018 +0200 Fix .cabal commit 422456b925d8aa4ab3e869f51e98c2b1c3dcde0a Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 22:56:20 2018 +0200 Removed to-do list commit 1118b762e4fd15c4fe7ba48ba86676706ea3a5a5 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 22:53:28 2018 +0200 Better test commit 1146ed0941655668bf7684f18aa15c5f4b9b20c2 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 15:32:28 2018 +0200 Fix parsing commit 4fc2374b2b81802990da30c96756aab54d77399c Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 22:11:11 2018 +0200 Parsing of separator commit f7a61737f1ad4460ba20ca9b2e86eb21468abb33 Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 14:29:23 2018 +0200 Almost separator in options commit ac8841cf3b9c80914bc3271ad9b9ff4ae9ba48a7 Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 14:16:59 2018 +0200 Separator in parseCSV commit 92a8b9f6ba77ea4237f769641e03029ac88542ea Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 13:30:41 2018 +0200 separator option commit ec417a81ae625647cf35e61776cdf02bdb2c6aea Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 10:45:26 2018 +0200 Removed one qualified import commit 8b2f386c2f780adcd34cff3de7edceacc1d325a7 Author: Luca Molteni <volothamp@gmail.com> Date: Wed Jun 20 14:01:12 2018 +0200 Removed string conversions commit a14d0e099e28a286bb81770cfc9cb8f5c7e5cf1f Author: Luca Molteni <volothamp@gmail.com> Date: Wed Jun 20 10:23:20 2018 +0200 custom delimiter in cassava commit 694d48e2bc1ada0037b90367c017f3082f68ed45 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:51:54 2018 +0200 Use Text.getContents - remove UTF-8 compatibility library commit a7ada2cc60033ebdd796ca34cc2ec69a4f387843 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:49:34 2018 +0200 todo list commit 58ec47d3987909f6bace50e3e647e30dadd5bf03 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:45:22 2018 +0200 CSV test now has unicode characters commit b7851e94c3f1683b63ec7250a12bcde3b7bed691 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 16:59:39 2018 +0200 Use decode from Text commit 79f59fd28ccaca08fcd718fcd8d00b1c1d65d7e1 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 13:28:57 2018 +0200 Use Text and Lazy Bytestring commit 470c9bcb8dc00669beb4ef0303a1e7d9f7aecc89 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:30:22 2018 +0200 Use megaparsec error commit f978848ba249ef4f67b855bea5d4e549290c205c Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:22:07 2018 +0200 Renamed qualify and remove Parsec commit 152587fde204c43a55798d212e43f37cd3038c2e Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:12:36 2018 +0200 Use cassava mega parsec commit cf281577a3d3a071196484a6fc8485f2ea1f7d67 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 14:01:47 2018 +0200 Removed Data.Vector commit 1272e8e758369d8cc5778029a705b277355a5029 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 12:16:18 2018 +0200 Removed Parsec ParseError commit ae07f043135a19307fd65b281ade37a74c76acb2 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 12:06:14 2018 +0200 Type sinonim for ParsecError commit 8e15b253c11bd1c0c35a7641aeb18aa54e0ba9b0 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 11:16:08 2018 +0200 Replaced with typeclasses commit 1ed46f9c175603611325f3d377004e4b85f29377 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 11:01:33 2018 +0200 Replaced Text/CSV with Cassava commit 362f4111b5854145703174b976fc7acbd71b8783 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 10:34:37 2018 +0200 Use cassava parsin instead of Text/CSV commit 83e678e371618687cf7c15a4e2cfa67f570b6b64 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 08:22:51 2018 +0200 Text CSV error messages commit f922df71d274beeacab9fb2530b16c97f005cc08 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 21:45:20 2018 +0200 Better types commit edd130781c84790a53bff2283e6041eb8232e7cf Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 21:34:59 2018 +0200 Conversion to Text CSV type commit 0799383214483018ad2d977a3c8022414959c2b2 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 16:06:21 2018 +0200 First function with cassava commit e92aeb151ff527b383ff3d0ced7764e81b71af82 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 13:47:34 2018 +0200 Added cassava as dependency commit 5ea005c558a3939af7e5f0cd735a9b4da931228e Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 13:18:47 2018 +0200 Better .gitignore for multi idea modules
2018-09-07 20:12:13 +03:00
--- ** reader
csv: merge lucamolteni's cassava/custom separators (squashed) (#829) commit 5ba464de761b298e50d57a8b7d14bc28adb30d5d Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:54:12 2018 +0200 Fix CI 2 commit f060ae9449f4b61a915b0ed4629fc1ba9b66fb4a Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:30:08 2018 +0200 Fix CI build commit af0719a33b9b72ad244ae80198d881a1f7145e9d Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:19:01 2018 +0200 Fix rebase commit 1a24ddfa54dfb4ff1326e1a51005ffa82d3dc3c8 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Aug 10 16:25:24 2018 +0200 Fixed some GHC warnings commit 1ac43398a359b5925ef71f53347698f1c6c510ef Author: Luca Molteni <volothamp@gmail.com> Date: Fri Aug 10 16:14:49 2018 +0200 Fix .cabal commit 422456b925d8aa4ab3e869f51e98c2b1c3dcde0a Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 22:56:20 2018 +0200 Removed to-do list commit 1118b762e4fd15c4fe7ba48ba86676706ea3a5a5 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 22:53:28 2018 +0200 Better test commit 1146ed0941655668bf7684f18aa15c5f4b9b20c2 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 15:32:28 2018 +0200 Fix parsing commit 4fc2374b2b81802990da30c96756aab54d77399c Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 22:11:11 2018 +0200 Parsing of separator commit f7a61737f1ad4460ba20ca9b2e86eb21468abb33 Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 14:29:23 2018 +0200 Almost separator in options commit ac8841cf3b9c80914bc3271ad9b9ff4ae9ba48a7 Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 14:16:59 2018 +0200 Separator in parseCSV commit 92a8b9f6ba77ea4237f769641e03029ac88542ea Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 13:30:41 2018 +0200 separator option commit ec417a81ae625647cf35e61776cdf02bdb2c6aea Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 10:45:26 2018 +0200 Removed one qualified import commit 8b2f386c2f780adcd34cff3de7edceacc1d325a7 Author: Luca Molteni <volothamp@gmail.com> Date: Wed Jun 20 14:01:12 2018 +0200 Removed string conversions commit a14d0e099e28a286bb81770cfc9cb8f5c7e5cf1f Author: Luca Molteni <volothamp@gmail.com> Date: Wed Jun 20 10:23:20 2018 +0200 custom delimiter in cassava commit 694d48e2bc1ada0037b90367c017f3082f68ed45 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:51:54 2018 +0200 Use Text.getContents - remove UTF-8 compatibility library commit a7ada2cc60033ebdd796ca34cc2ec69a4f387843 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:49:34 2018 +0200 todo list commit 58ec47d3987909f6bace50e3e647e30dadd5bf03 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:45:22 2018 +0200 CSV test now has unicode characters commit b7851e94c3f1683b63ec7250a12bcde3b7bed691 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 16:59:39 2018 +0200 Use decode from Text commit 79f59fd28ccaca08fcd718fcd8d00b1c1d65d7e1 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 13:28:57 2018 +0200 Use Text and Lazy Bytestring commit 470c9bcb8dc00669beb4ef0303a1e7d9f7aecc89 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:30:22 2018 +0200 Use megaparsec error commit f978848ba249ef4f67b855bea5d4e549290c205c Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:22:07 2018 +0200 Renamed qualify and remove Parsec commit 152587fde204c43a55798d212e43f37cd3038c2e Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:12:36 2018 +0200 Use cassava mega parsec commit cf281577a3d3a071196484a6fc8485f2ea1f7d67 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 14:01:47 2018 +0200 Removed Data.Vector commit 1272e8e758369d8cc5778029a705b277355a5029 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 12:16:18 2018 +0200 Removed Parsec ParseError commit ae07f043135a19307fd65b281ade37a74c76acb2 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 12:06:14 2018 +0200 Type sinonim for ParsecError commit 8e15b253c11bd1c0c35a7641aeb18aa54e0ba9b0 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 11:16:08 2018 +0200 Replaced with typeclasses commit 1ed46f9c175603611325f3d377004e4b85f29377 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 11:01:33 2018 +0200 Replaced Text/CSV with Cassava commit 362f4111b5854145703174b976fc7acbd71b8783 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 10:34:37 2018 +0200 Use cassava parsin instead of Text/CSV commit 83e678e371618687cf7c15a4e2cfa67f570b6b64 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 08:22:51 2018 +0200 Text CSV error messages commit f922df71d274beeacab9fb2530b16c97f005cc08 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 21:45:20 2018 +0200 Better types commit edd130781c84790a53bff2283e6041eb8232e7cf Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 21:34:59 2018 +0200 Conversion to Text CSV type commit 0799383214483018ad2d977a3c8022414959c2b2 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 16:06:21 2018 +0200 First function with cassava commit e92aeb151ff527b383ff3d0ced7764e81b71af82 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 13:47:34 2018 +0200 Added cassava as dependency commit 5ea005c558a3939af7e5f0cd735a9b4da931228e Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 13:18:47 2018 +0200 Better .gitignore for multi idea modules
2018-09-07 20:12:13 +03:00
reader :: MonadIO m => Reader m
reader = Reader
{rFormat = "csv"
,rExtensions = ["csv","tsv","ssv"]
,rReadFn = parse
,rParser = error' "sorry, CSV files can't be included yet" -- PARTIAL:
}
-- | Parse and post-process a "Journal" from CSV data, or give an error.
-- Does not check balance assertions.
-- XXX currently ignores the provided data, reads it from the file path instead.
parse :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal
parse iopts f t = do
let rulesfile = mrules_file_ iopts
r <- liftIO $ readJournalFromCsv rulesfile f t
case r of Left e -> throwError e
Right pj -> journalFinalise iopts{ignore_assertions_=True} f t pj'
where
-- journalFinalise assumes the journal's items are
-- reversed, as produced by JournalReader's parser.
-- But here they are already properly ordered. So we'd
-- better preemptively reverse them once more. XXX inefficient
pj' = journalReverse pj
--- ** reading rules files
--- *** rules utilities
-- Not used by hledger; just for lib users,
-- | An pure-exception-throwing IO action that parses this file's content
-- as CSV conversion rules, interpolating any included files first,
-- and runs some extra validation checks.
parseRulesFile :: FilePath -> ExceptT String IO CsvRules
parseRulesFile f =
liftIO (readFilePortably f >>= expandIncludes (takeDirectory f))
>>= either throwError return . parseAndValidateCsvRules f
-- | Given a CSV file path, what would normally be the corresponding rules file ?
rulesFileFor :: FilePath -> FilePath
rulesFileFor = (++ ".rules")
-- | Given a CSV rules file path, what would normally be the corresponding CSV file ?
csvFileFor :: FilePath -> FilePath
csvFileFor = reverse . drop 6 . reverse
defaultRulesText :: FilePath -> Text
defaultRulesText csvfile = T.pack $ unlines
["# hledger csv conversion rules for " ++ csvFileFor (takeFileName csvfile)
,"# cf http://hledger.org/manual#csv-files"
,""
,"account1 assets:bank:checking"
,""
,"fields date, description, amount1"
,""
,"#skip 1"
,"#newest-first"
,""
,"#date-format %-d/%-m/%Y"
,"#date-format %-m/%-d/%Y"
,"#date-format %Y-%h-%d"
,""
,"#currency $"
,""
,"if ITUNES"
," account2 expenses:entertainment"
,""
,"if (TO|FROM) SAVINGS"
," account2 assets:bank:savings\n"
]
addDirective :: (DirectiveName, String) -> CsvRulesParsed -> CsvRulesParsed
addDirective d r = r{rdirectives=d:rdirectives r}
addAssignment :: (HledgerFieldName, FieldTemplate) -> CsvRulesParsed -> CsvRulesParsed
addAssignment a r = r{rassignments=a:rassignments r}
setIndexesAndAssignmentsFromList :: [CsvFieldName] -> CsvRulesParsed -> CsvRulesParsed
setIndexesAndAssignmentsFromList fs r = addAssignmentsFromList fs . setCsvFieldIndexesFromList fs $ r
setCsvFieldIndexesFromList :: [CsvFieldName] -> CsvRulesParsed -> CsvRulesParsed
setCsvFieldIndexesFromList fs r = r{rcsvfieldindexes=zip fs [1..]}
addAssignmentsFromList :: [CsvFieldName] -> CsvRulesParsed -> CsvRulesParsed
addAssignmentsFromList fs r = foldl' maybeAddAssignment r journalfieldnames
where
maybeAddAssignment rules f = (maybe id addAssignmentFromIndex $ elemIndex f fs) rules
where
addAssignmentFromIndex i = addAssignment (f, "%"++show (i+1))
addConditionalBlock :: ConditionalBlock -> CsvRulesParsed -> CsvRulesParsed
addConditionalBlock b r = r{rconditionalblocks=b:rconditionalblocks r}
addConditionalBlocks :: [ConditionalBlock] -> CsvRulesParsed -> CsvRulesParsed
addConditionalBlocks bs r = r{rconditionalblocks=bs++rconditionalblocks r}
getDirective :: DirectiveName -> CsvRules -> Maybe FieldTemplate
getDirective directivename = lookup directivename . rdirectives
instance ShowErrorComponent String where
showErrorComponent = id
-- | Inline all files referenced by include directives in this hledger CSV rules text, recursively.
-- Included file paths may be relative to the directory of the provided file path.
-- This is done as a pre-parse step to simplify the CSV rules parser.
expandIncludes :: FilePath -> Text -> IO Text
expandIncludes dir content = mapM (expandLine dir) (T.lines content) >>= return . T.unlines
where
expandLine dir line =
case line of
(T.stripPrefix "include " -> Just f) -> expandIncludes dir' =<< T.readFile f'
where
f' = dir </> dropWhile isSpace (T.unpack f)
dir' = takeDirectory f'
_ -> return line
-- | An error-throwing IO action that parses this text as CSV conversion rules
-- and runs some extra validation checks. The file path is used in error messages.
parseAndValidateCsvRules :: FilePath -> T.Text -> Either String CsvRules
parseAndValidateCsvRules rulesfile s =
case parseCsvRules rulesfile s of
Left err -> Left $ customErrorBundlePretty err
Right rules -> first makeFancyParseError $ validateRules rules
where
makeFancyParseError :: String -> String
makeFancyParseError errorString =
parseErrorPretty (FancyError 0 (S.singleton $ ErrorFail errorString) :: ParseError Text String)
-- | Parse this text as CSV conversion rules. The file path is for error messages.
parseCsvRules :: FilePath -> T.Text -> Either (ParseErrorBundle T.Text CustomErr) CsvRules
-- parseCsvRules rulesfile s = runParser csvrulesfile nullrules{baseAccount=takeBaseName rulesfile} rulesfile s
parseCsvRules rulesfile s =
runParser (evalStateT rulesp defrules) rulesfile s
-- | Return the validated rules, or an error.
validateRules :: CsvRules -> Either String CsvRules
validateRules rules = do
unless (isAssigned "date") $ Left "Please specify (at top level) the date field. Eg: date %1\n"
Right rules
where
isAssigned f = isJust $ getEffectiveAssignment rules [] f
--- *** rules types
-- | A set of data definitions and account-matching patterns sufficient to
-- convert a particular CSV data file into meaningful journal transactions.
data CsvRules' a = CsvRules' {
rdirectives :: [(DirectiveName,String)],
-- ^ top-level rules, as (keyword, value) pairs
rcsvfieldindexes :: [(CsvFieldName, CsvFieldIndex)],
-- ^ csv field names and their column number, if declared by a fields list
rassignments :: [(HledgerFieldName, FieldTemplate)],
-- ^ top-level assignments to hledger fields, as (field name, value template) pairs
rconditionalblocks :: [ConditionalBlock],
-- ^ conditional blocks, which containing additional assignments/rules to apply to matched csv records
rblocksassigning :: a -- (String -> [ConditionalBlock])
-- ^ all conditional blocks which can potentially assign field with a given name (memoized)
}
-- | Type used by parsers. Directives, assignments and conditional blocks
-- are in the reverse order compared to what is in the file and rblocksassigning is non-functional,
-- could not be used for processing CSV records yet
type CsvRulesParsed = CsvRules' ()
-- | Type used after parsing is done. Directives, assignments and conditional blocks
-- are in the same order as they were in the unput file and rblocksassigning is functional.
-- Ready to be used for CSV record processing
type CsvRules = CsvRules' (String -> [ConditionalBlock])
instance Eq CsvRules where
r1 == r2 = (rdirectives r1, rcsvfieldindexes r1, rassignments r1) ==
(rdirectives r2, rcsvfieldindexes r2, rassignments r2)
-- It is used for debug output only
instance Show CsvRules where
show r = "CsvRules { rdirectives=" ++ show (rdirectives r) ++
", rcsvfieldindexes=" ++ show (rcsvfieldindexes r) ++
", rassignments=" ++ show (rassignments r) ++
", rconditionalblocks="++ show (rconditionalblocks r) ++
" }"
type CsvRulesParser a = StateT CsvRulesParsed SimpleTextParser a
-- | The keyword of a CSV rule - "fields", "skip", "if", etc.
type DirectiveName = String
-- | CSV field name.
type CsvFieldName = String
-- | 1-based CSV column number.
type CsvFieldIndex = Int
-- | Percent symbol followed by a CSV field name or column number. Eg: %date, %1.
type CsvFieldReference = String
-- | One of the standard hledger fields or pseudo-fields that can be assigned to.
-- Eg date, account1, amount, amount1-in, date-format.
type HledgerFieldName = String
-- | A text value to be assigned to a hledger field, possibly
-- containing csv field references to be interpolated.
type FieldTemplate = String
-- | A strptime date parsing pattern, as supported by Data.Time.Format.
2020-02-12 17:20:40 +03:00
type DateFormat = String
-- | A regular expression.
2020-02-12 17:20:40 +03:00
type RegexpPattern = String
-- | A prefix for a matcher test, either & or none (implicit or).
data MatcherPrefix = And | None
deriving (Show, Eq)
2020-02-12 17:20:40 +03:00
-- | A single test for matching a CSV record, in one way or another.
data Matcher =
RecordMatcher MatcherPrefix RegexpPattern -- ^ match if this regexp matches the overall CSV record
| FieldMatcher MatcherPrefix CsvFieldReference RegexpPattern -- ^ match if this regexp matches the referenced CSV field's value
2020-02-12 17:20:40 +03:00
deriving (Show, Eq)
2020-02-12 06:04:03 +03:00
2020-02-12 17:20:40 +03:00
-- | A conditional block: a set of CSV record matchers, and a sequence
-- of rules which will be enabled only if one or more of the matchers
-- succeeds.
2020-02-12 17:20:40 +03:00
--
-- Three types of rule are allowed inside conditional blocks: field
2020-02-12 06:04:03 +03:00
-- assignments, skip, end. (A skip or end rule is stored as if it was
-- a field assignment, and executed in validateCsv. XXX)
data ConditionalBlock = CB {
2020-02-12 17:20:40 +03:00
cbMatchers :: [Matcher]
,cbAssignments :: [(HledgerFieldName, FieldTemplate)]
2020-02-12 06:04:03 +03:00
} deriving (Show, Eq)
defrules :: CsvRulesParsed
defrules = CsvRules' {
rdirectives=[],
rcsvfieldindexes=[],
rassignments=[],
rconditionalblocks=[],
rblocksassigning = ()
}
-- | Create CsvRules from the content parsed out of the rules file
mkrules :: CsvRulesParsed -> CsvRules
mkrules rules =
let conditionalblocks = reverse $ rconditionalblocks rules
maybeMemo = if length conditionalblocks >= 15 then memo else id
in
CsvRules' {
rdirectives=reverse $ rdirectives rules,
rcsvfieldindexes=rcsvfieldindexes rules,
rassignments=reverse $ rassignments rules,
rconditionalblocks=conditionalblocks,
rblocksassigning = maybeMemo (\f -> filter (any ((==f).fst) . cbAssignments) conditionalblocks)
}
matcherPrefix :: Matcher -> MatcherPrefix
matcherPrefix (RecordMatcher prefix _) = prefix
matcherPrefix (FieldMatcher prefix _ _) = prefix
-- | Group matchers into associative pairs based on prefix, e.g.:
-- A
-- & B
-- C
-- D
-- & E
-- => [[A, B], [C], [D, E]]
groupedMatchers :: [Matcher] -> [[Matcher]]
groupedMatchers [] = []
groupedMatchers (x:xs) = (x:ys) : groupedMatchers zs
where (ys, zs) = span (\y -> matcherPrefix y == And) xs
--- *** rules parsers
{-
Grammar for the CSV conversion rules, more or less:
RULES: RULE*
RULE: ( FIELD-LIST | FIELD-ASSIGNMENT | CONDITIONAL-BLOCK | SKIP | NEWEST-FIRST | DATE-FORMAT | COMMENT | BLANK ) NEWLINE
FIELD-LIST: fields SPACE FIELD-NAME ( SPACE? , SPACE? FIELD-NAME )*
FIELD-NAME: QUOTED-FIELD-NAME | BARE-FIELD-NAME
QUOTED-FIELD-NAME: " (any CHAR except double-quote)+ "
BARE-FIELD-NAME: any CHAR except space, tab, #, ;
FIELD-ASSIGNMENT: JOURNAL-FIELD ASSIGNMENT-SEPARATOR FIELD-VALUE
JOURNAL-FIELD: date | date2 | status | code | description | comment | account1 | account2 | amount | JOURNAL-PSEUDO-FIELD
JOURNAL-PSEUDO-FIELD: amount-in | amount-out | currency
ASSIGNMENT-SEPARATOR: SPACE | ( : SPACE? )
FIELD-VALUE: VALUE (possibly containing CSV-FIELD-REFERENCEs)
CSV-FIELD-REFERENCE: % CSV-FIELD
CSV-FIELD: ( FIELD-NAME | FIELD-NUMBER ) (corresponding to a CSV field)
FIELD-NUMBER: DIGIT+
CONDITIONAL-BLOCK: if ( FIELD-MATCHER NEWLINE )+ INDENTED-BLOCK
FIELD-MATCHER: ( CSV-FIELD-NAME SPACE? )? ( MATCHOP SPACE? )? PATTERNS
MATCHOP: ~
PATTERNS: ( NEWLINE REGEXP )* REGEXP
INDENTED-BLOCK: ( SPACE ( FIELD-ASSIGNMENT | COMMENT ) NEWLINE )+
REGEXP: ( NONSPACE CHAR* ) SPACE?
VALUE: SPACE? ( CHAR* ) SPACE?
COMMENT: SPACE? COMMENT-CHAR VALUE
COMMENT-CHAR: # | ;
NONSPACE: any CHAR not a SPACE-CHAR
BLANK: SPACE?
SPACE: SPACE-CHAR+
SPACE-CHAR: space | tab
CHAR: any character except newline
DIGIT: 0-9
-}
rulesp :: CsvRulesParser CsvRules
rulesp = do
_ <- many $ choice
[blankorcommentlinep <?> "blank or comment line"
,(directivep >>= modify' . addDirective) <?> "directive"
,(fieldnamelistp >>= modify' . setIndexesAndAssignmentsFromList) <?> "field name list"
,(fieldassignmentp >>= modify' . addAssignment) <?> "field assignment"
-- conditionalblockp backtracks because it shares "if" prefix with conditionaltablep.
,try (conditionalblockp >>= modify' . addConditionalBlock) <?> "conditional block"
-- 'reverse' is there to ensure that conditions are added in the order they listed in the file
,(conditionaltablep >>= modify' . addConditionalBlocks . reverse) <?> "conditional table"
]
eof
r <- get
return $ mkrules r
blankorcommentlinep :: CsvRulesParser ()
blankorcommentlinep = lift (dbgparse 8 "trying blankorcommentlinep") >> choiceInState [blanklinep, commentlinep]
blanklinep :: CsvRulesParser ()
blanklinep = lift skipNonNewlineSpaces >> newline >> return () <?> "blank line"
commentlinep :: CsvRulesParser ()
commentlinep = lift skipNonNewlineSpaces >> commentcharp >> lift restofline >> return () <?> "comment line"
commentcharp :: CsvRulesParser Char
commentcharp = oneOf (";#*" :: [Char])
directivep :: CsvRulesParser (DirectiveName, String)
directivep = (do
lift $ dbgparse 8 "trying directive"
d <- fmap T.unpack $ choiceInState $ map (lift . string . T.pack) directives
v <- (((char ':' >> lift (many spacenonewline)) <|> lift (some spacenonewline)) >> directivevalp)
<|> (optional (char ':') >> lift skipNonNewlineSpaces >> lift eolof >> return "")
return (d, v)
) <?> "directive"
2020-01-02 19:26:13 +03:00
directives :: [String]
directives =
["date-format"
2020-01-02 19:26:59 +03:00
,"separator"
-- ,"default-account"
-- ,"default-currency"
,"skip"
,"newest-first"
2019-11-13 12:24:50 +03:00
, "balance-type"
]
directivevalp :: CsvRulesParser String
2018-09-30 04:32:08 +03:00
directivevalp = anySingle `manyTill` lift eolof
fieldnamelistp :: CsvRulesParser [CsvFieldName]
fieldnamelistp = (do
lift $ dbgparse 8 "trying fieldnamelist"
string "fields"
optional $ char ':'
lift skipNonNewlineSpaces1
let separator = lift skipNonNewlineSpaces >> char ',' >> lift skipNonNewlineSpaces
f <- fromMaybe "" <$> optional fieldnamep
fs <- some $ (separator >> fromMaybe "" <$> optional fieldnamep)
lift restofline
return $ map (map toLower) $ f:fs
) <?> "field name list"
fieldnamep :: CsvRulesParser String
fieldnamep = quotedfieldnamep <|> barefieldnamep
quotedfieldnamep :: CsvRulesParser String
quotedfieldnamep = do
char '"'
f <- some $ noneOf ("\"\n:;#~" :: [Char])
char '"'
return f
barefieldnamep :: CsvRulesParser String
barefieldnamep = some $ noneOf (" \t\n,;#~" :: [Char])
fieldassignmentp :: CsvRulesParser (HledgerFieldName, FieldTemplate)
fieldassignmentp = do
lift $ dbgparse 8 "trying fieldassignmentp"
f <- journalfieldnamep
v <- choiceInState [ assignmentseparatorp >> fieldvalp
, lift eolof >> return ""
]
return (f,v)
<?> "field assignment"
journalfieldnamep :: CsvRulesParser String
journalfieldnamep = do
lift (dbgparse 8 "trying journalfieldnamep")
T.unpack <$> choiceInState (map (lift . string . T.pack) journalfieldnames)
maxpostings = 99
csv: merge lucamolteni's cassava/custom separators (squashed) (#829) commit 5ba464de761b298e50d57a8b7d14bc28adb30d5d Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:54:12 2018 +0200 Fix CI 2 commit f060ae9449f4b61a915b0ed4629fc1ba9b66fb4a Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:30:08 2018 +0200 Fix CI build commit af0719a33b9b72ad244ae80198d881a1f7145e9d Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:19:01 2018 +0200 Fix rebase commit 1a24ddfa54dfb4ff1326e1a51005ffa82d3dc3c8 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Aug 10 16:25:24 2018 +0200 Fixed some GHC warnings commit 1ac43398a359b5925ef71f53347698f1c6c510ef Author: Luca Molteni <volothamp@gmail.com> Date: Fri Aug 10 16:14:49 2018 +0200 Fix .cabal commit 422456b925d8aa4ab3e869f51e98c2b1c3dcde0a Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 22:56:20 2018 +0200 Removed to-do list commit 1118b762e4fd15c4fe7ba48ba86676706ea3a5a5 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 22:53:28 2018 +0200 Better test commit 1146ed0941655668bf7684f18aa15c5f4b9b20c2 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 15:32:28 2018 +0200 Fix parsing commit 4fc2374b2b81802990da30c96756aab54d77399c Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 22:11:11 2018 +0200 Parsing of separator commit f7a61737f1ad4460ba20ca9b2e86eb21468abb33 Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 14:29:23 2018 +0200 Almost separator in options commit ac8841cf3b9c80914bc3271ad9b9ff4ae9ba48a7 Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 14:16:59 2018 +0200 Separator in parseCSV commit 92a8b9f6ba77ea4237f769641e03029ac88542ea Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 13:30:41 2018 +0200 separator option commit ec417a81ae625647cf35e61776cdf02bdb2c6aea Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 10:45:26 2018 +0200 Removed one qualified import commit 8b2f386c2f780adcd34cff3de7edceacc1d325a7 Author: Luca Molteni <volothamp@gmail.com> Date: Wed Jun 20 14:01:12 2018 +0200 Removed string conversions commit a14d0e099e28a286bb81770cfc9cb8f5c7e5cf1f Author: Luca Molteni <volothamp@gmail.com> Date: Wed Jun 20 10:23:20 2018 +0200 custom delimiter in cassava commit 694d48e2bc1ada0037b90367c017f3082f68ed45 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:51:54 2018 +0200 Use Text.getContents - remove UTF-8 compatibility library commit a7ada2cc60033ebdd796ca34cc2ec69a4f387843 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:49:34 2018 +0200 todo list commit 58ec47d3987909f6bace50e3e647e30dadd5bf03 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:45:22 2018 +0200 CSV test now has unicode characters commit b7851e94c3f1683b63ec7250a12bcde3b7bed691 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 16:59:39 2018 +0200 Use decode from Text commit 79f59fd28ccaca08fcd718fcd8d00b1c1d65d7e1 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 13:28:57 2018 +0200 Use Text and Lazy Bytestring commit 470c9bcb8dc00669beb4ef0303a1e7d9f7aecc89 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:30:22 2018 +0200 Use megaparsec error commit f978848ba249ef4f67b855bea5d4e549290c205c Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:22:07 2018 +0200 Renamed qualify and remove Parsec commit 152587fde204c43a55798d212e43f37cd3038c2e Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:12:36 2018 +0200 Use cassava mega parsec commit cf281577a3d3a071196484a6fc8485f2ea1f7d67 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 14:01:47 2018 +0200 Removed Data.Vector commit 1272e8e758369d8cc5778029a705b277355a5029 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 12:16:18 2018 +0200 Removed Parsec ParseError commit ae07f043135a19307fd65b281ade37a74c76acb2 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 12:06:14 2018 +0200 Type sinonim for ParsecError commit 8e15b253c11bd1c0c35a7641aeb18aa54e0ba9b0 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 11:16:08 2018 +0200 Replaced with typeclasses commit 1ed46f9c175603611325f3d377004e4b85f29377 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 11:01:33 2018 +0200 Replaced Text/CSV with Cassava commit 362f4111b5854145703174b976fc7acbd71b8783 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 10:34:37 2018 +0200 Use cassava parsin instead of Text/CSV commit 83e678e371618687cf7c15a4e2cfa67f570b6b64 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 08:22:51 2018 +0200 Text CSV error messages commit f922df71d274beeacab9fb2530b16c97f005cc08 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 21:45:20 2018 +0200 Better types commit edd130781c84790a53bff2283e6041eb8232e7cf Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 21:34:59 2018 +0200 Conversion to Text CSV type commit 0799383214483018ad2d977a3c8022414959c2b2 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 16:06:21 2018 +0200 First function with cassava commit e92aeb151ff527b383ff3d0ced7764e81b71af82 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 13:47:34 2018 +0200 Added cassava as dependency commit 5ea005c558a3939af7e5f0cd735a9b4da931228e Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 13:18:47 2018 +0200 Better .gitignore for multi idea modules
2018-09-07 20:12:13 +03:00
-- Transaction fields and pseudo fields for CSV conversion.
-- Names must precede any other name they contain, for the parser
-- (amount-in before amount; date2 before date). TODO: fix
journalfieldnames =
concat [[ "account" ++ i
,"amount" ++ i ++ "-in"
,"amount" ++ i ++ "-out"
,"amount" ++ i
,"balance" ++ i
,"comment" ++ i
,"currency" ++ i
] | x <- [maxpostings, (maxpostings-1)..1], let i = show x]
++
["amount-in"
,"amount-out"
,"amount"
,"balance"
,"code"
,"comment"
,"currency"
,"date2"
,"date"
,"description"
,"status"
,"skip" -- skip and end are not really fields, but we list it here to allow conditional rules that skip records
,"end"
]
assignmentseparatorp :: CsvRulesParser ()
assignmentseparatorp = do
lift $ dbgparse 8 "trying assignmentseparatorp"
_ <- choiceInState [ lift skipNonNewlineSpaces >> char ':' >> lift skipNonNewlineSpaces
, lift skipNonNewlineSpaces1
]
return ()
fieldvalp :: CsvRulesParser String
fieldvalp = do
lift $ dbgparse 8 "trying fieldvalp"
2018-09-30 04:32:08 +03:00
anySingle `manyTill` lift eolof
-- A conditional block: one or more matchers, one per line, followed by one or more indented rules.
conditionalblockp :: CsvRulesParser ConditionalBlock
conditionalblockp = do
lift $ dbgparse 8 "trying conditionalblockp"
-- "if\nMATCHER" or "if \nMATCHER" or "if MATCHER"
start <- getOffset
string "if" >> ( (newline >> return Nothing)
<|> (lift skipNonNewlineSpaces1 >> optional newline))
2020-02-12 17:20:40 +03:00
ms <- some matcherp
as <- catMaybes <$>
many (lift skipNonNewlineSpaces1 >>
choice [ lift eolof >> return Nothing
, fmap Just fieldassignmentp
])
when (null as) $
customFailure $ parseErrorAt start $ "start of conditional block found, but no assignment rules afterward\n(assignment rules in a conditional block should be indented)\n"
2020-02-12 17:20:40 +03:00
return $ CB{cbMatchers=ms, cbAssignments=as}
<?> "conditional block"
-- A conditional table: "if" followed by separator, followed by some field names,
-- followed by many lines, each of which has:
-- one matchers, followed by field assignments (as many as there were fields)
conditionaltablep :: CsvRulesParser [ConditionalBlock]
conditionaltablep = do
lift $ dbgparse 8 "trying conditionaltablep"
start <- getOffset
string "if"
sep <- lift $ satisfy (\c -> not (isAlphaNum c || isSpace c))
fields <- journalfieldnamep `sepBy1` (char sep)
newline
body <- flip manyTill (lift eolof) $ do
off <- getOffset
m <- matcherp' (char sep >> return ())
vs <- LS.splitOn [sep] <$> lift restofline
if (length vs /= length fields)
then customFailure $ parseErrorAt off $ ((printf "line of conditional table should have %d values, but this one has only %d\n" (length fields) (length vs)) :: String)
else return (m,vs)
when (null body) $
customFailure $ parseErrorAt start $ "start of conditional table found, but no assignment rules afterward\n"
return $ flip map body $ \(m,vs) ->
CB{cbMatchers=[m], cbAssignments=zip fields vs}
<?> "conditional table"
-- A single matcher, on one line.
matcherp' :: CsvRulesParser () -> CsvRulesParser Matcher
matcherp' end = try (fieldmatcherp end) <|> recordmatcherp end
2020-02-12 17:20:40 +03:00
matcherp :: CsvRulesParser Matcher
matcherp = matcherp' (lift eolof)
-- A single whole-record matcher.
-- A pattern on the whole line, not beginning with a csv field reference.
recordmatcherp :: CsvRulesParser () -> CsvRulesParser Matcher
recordmatcherp end = do
lift $ dbgparse 8 "trying recordmatcherp"
-- pos <- currentPos
-- _ <- optional (matchoperatorp >> lift skipNonNewlineSpaces >> optional newline)
p <- matcherprefixp
r <- regexp end
2020-02-12 17:20:40 +03:00
-- when (null ps) $
-- Fail.fail "start of record matcher found, but no patterns afterward\n(patterns should not be indented)\n"
return $ RecordMatcher p r
<?> "record matcher"
-- | A single matcher for a specific field. A csv field reference
-- (like %date or %1), and a pattern on the rest of the line,
-- optionally space-separated. Eg:
-- %description chez jacques
fieldmatcherp :: CsvRulesParser () -> CsvRulesParser Matcher
fieldmatcherp end = do
lift $ dbgparse 8 "trying fieldmatcher"
-- An optional fieldname (default: "all")
-- f <- fromMaybe "all" `fmap` (optional $ do
-- f' <- fieldnamep
-- lift skipNonNewlineSpaces
-- return f')
p <- matcherprefixp
f <- csvfieldreferencep <* lift skipNonNewlineSpaces
-- optional operator.. just ~ (case insensitive infix regex) for now
-- _op <- fromMaybe "~" <$> optional matchoperatorp
lift skipNonNewlineSpaces
r <- regexp end
return $ FieldMatcher p f r
<?> "field matcher"
matcherprefixp :: CsvRulesParser MatcherPrefix
matcherprefixp = do
lift $ dbgparse 8 "trying matcherprefixp"
(char '&' >> lift skipNonNewlineSpaces >> return And) <|> return None
csvfieldreferencep :: CsvRulesParser CsvFieldReference
csvfieldreferencep = do
lift $ dbgparse 8 "trying csvfieldreferencep"
char '%'
f <- fieldnamep
return $ '%' : quoteIfNeeded f
2020-02-12 17:20:40 +03:00
-- A single regular expression
regexp :: CsvRulesParser () -> CsvRulesParser RegexpPattern
regexp end = do
lift $ dbgparse 8 "trying regexp"
-- notFollowedBy matchoperatorp
c <- lift nonspace
cs <- anySingle `manyTill` end
return $ strip $ c:cs
-- -- A match operator, indicating the type of match to perform.
-- -- Currently just ~ meaning case insensitive infix regex match.
-- matchoperatorp :: CsvRulesParser String
-- matchoperatorp = fmap T.unpack $ choiceInState $ map string
-- ["~"
-- -- ,"!~"
-- -- ,"="
-- -- ,"!="
-- ]
--- ** reading csv files
-- | Read a Journal from the given CSV data (and filename, used for error
-- messages), or return an error. Proceed as follows:
--
-- 1. parse CSV conversion rules from the specified rules file, or from
-- the default rules file for the specified CSV file, if it exists,
-- or throw a parse error; if it doesn't exist, use built-in default rules
--
-- 2. parse the CSV data, or throw a parse error
--
-- 3. convert the CSV records to transactions using the rules
--
-- 4. if the rules file didn't exist, create it with the default rules and filename
--
-- 5. return the transactions as a Journal
--
readJournalFromCsv :: Maybe FilePath -> FilePath -> Text -> IO (Either String Journal)
readJournalFromCsv Nothing "-" _ = return $ Left "please use --rules-file when reading CSV from stdin"
readJournalFromCsv mrulesfile csvfile csvdata =
handle (\(e::IOException) -> return $ Left $ show e) $ do
-- make and throw an IO exception.. which we catch and convert to an Either above ?
let throwerr = throw . userError
-- parse the csv rules
let rulesfile = fromMaybe (rulesFileFor csvfile) mrulesfile
rulesfileexists <- doesFileExist rulesfile
rulestext <-
if rulesfileexists
then do
dbg6IO "using conversion rules file" rulesfile
readFilePortably rulesfile >>= expandIncludes (takeDirectory rulesfile)
else
return $ defaultRulesText rulesfile
rules <- either throwerr return $ parseAndValidateCsvRules rulesfile rulestext
dbg6IO "rules" rules
-- parse the skip directive's value, if any
let skiplines = case getDirective "skip" rules of
Nothing -> 0
Just "" -> 1
Just s -> readDef (throwerr $ "could not parse skip value: " ++ show s) s
-- parse csv
let
-- parsec seems to fail if you pass it "-" here TODO: try again with megaparsec
parsecfilename = if csvfile == "-" then "(stdin)" else csvfile
separator =
case getDirective "separator" rules >>= parseSeparator of
Just c -> c
_ | ext == "ssv" -> ';'
_ | ext == "tsv" -> '\t'
_ -> ','
where
ext = map toLower $ drop 1 $ takeExtension csvfile
dbg6IO "using separator" separator
records <- (either throwerr id .
dbg7 "validateCsv" . validateCsv rules skiplines .
dbg7 "parseCsv")
`fmap` parseCsv separator parsecfilename csvdata
dbg6IO "first 3 csv records" $ take 3 records
-- identify header lines
-- let (headerlines, datalines) = identifyHeaderLines records
-- mfieldnames = lastMay headerlines
let
-- convert CSV records to transactions
txns = snd $ mapAccumL
(\pos r ->
let
SourcePos name line col = pos
line' = (mkPos . (+1) . unPos) line
pos' = SourcePos name line' col
in
(pos, transactionFromCsvRecord pos' rules r)
)
(initialPos parsecfilename) records
-- Ensure transactions are ordered chronologically.
-- First, if the CSV records seem to be most-recent-first (because
-- there's an explicit "newest-first" directive, or there's more
-- than one date and the first date is more recent than the last):
-- reverse them to get same-date transactions ordered chronologically.
txns' =
(if newestfirst || mdataseemsnewestfirst == Just True then reverse else id) txns
where
newestfirst = dbg6 "newestfirst" $ isJust $ getDirective "newest-first" rules
mdataseemsnewestfirst = dbg6 "mdataseemsnewestfirst" $
case nub $ map tdate txns of
ds | length ds > 1 -> Just $ head ds > last ds
_ -> Nothing
-- Second, sort by date.
txns'' = sortBy (comparing tdate) txns'
when (not rulesfileexists) $ do
dbg1IO "creating conversion rules file" rulesfile
writeFile rulesfile $ T.unpack rulestext
return $ Right nulljournal{jtxns=txns''}
-- | Parse special separator names TAB and SPACE, or return the first
-- character. Return Nothing on empty string
parseSeparator :: String -> Maybe Char
parseSeparator = specials . map toLower
where specials "space" = Just ' '
specials "tab" = Just '\t'
specials (x:_) = Just x
specials [] = Nothing
parseCsv :: Char -> FilePath -> Text -> IO (Either String CSV)
parseCsv separator filePath csvdata =
case filePath of
"-" -> liftM (parseCassava separator "(stdin)") T.getContents
_ -> return $ parseCassava separator filePath csvdata
parseCassava :: Char -> FilePath -> Text -> Either String CSV
parseCassava separator path content =
either (Left . errorBundlePretty) (Right . parseResultToCsv) <$>
CassavaMP.decodeWith (decodeOptions separator) Cassava.NoHeader path $
BL.fromStrict $ T.encodeUtf8 content
decodeOptions :: Char -> Cassava.DecodeOptions
decodeOptions separator = Cassava.defaultDecodeOptions {
Cassava.decDelimiter = fromIntegral (ord separator)
}
parseResultToCsv :: (Foldable t, Functor t) => t (t B.ByteString) -> CSV
parseResultToCsv = toListList . unpackFields
where
toListList = toList . fmap toList
unpackFields = (fmap . fmap) (T.unpack . T.decodeUtf8)
printCSV :: CSV -> String
printCSV records = unlined (printRecord `map` records)
where printRecord = concat . intersperse "," . map printField
printField f = "\"" ++ concatMap escape f ++ "\""
escape '"' = "\"\""
escape x = [x]
unlined = concat . intersperse "\n"
-- | Return the cleaned up and validated CSV data (can be empty), or an error.
validateCsv :: CsvRules -> Int -> Either String CSV -> Either String [CsvRecord]
validateCsv _ _ (Left err) = Left err
validateCsv rules numhdrlines (Right rs) = validate $ applyConditionalSkips $ drop numhdrlines $ filternulls rs
where
filternulls = filter (/=[""])
skipCount r =
case (getEffectiveAssignment rules r "end", getEffectiveAssignment rules r "skip") of
(Nothing, Nothing) -> Nothing
(Just _, _) -> Just maxBound
(Nothing, Just "") -> Just 1
(Nothing, Just x) -> Just (read x)
applyConditionalSkips [] = []
applyConditionalSkips (r:rest) =
case skipCount r of
Nothing -> r:(applyConditionalSkips rest)
Just cnt -> applyConditionalSkips (drop (cnt-1) rest)
validate [] = Right []
validate rs@(_first:_)
| isJust lessthan2 = let r = fromJust lessthan2 in
Left $ printf "CSV record %s has less than two fields" (show r)
| otherwise = Right rs
where
lessthan2 = headMay $ filter ((<2).length) rs
-- -- | The highest (0-based) field index referenced in the field
-- -- definitions, or -1 if no fields are defined.
-- maxFieldIndex :: CsvRules -> Int
-- maxFieldIndex r = maximumDef (-1) $ catMaybes [
-- dateField r
-- ,statusField r
-- ,codeField r
-- ,amountField r
-- ,amountInField r
-- ,amountOutField r
-- ,currencyField r
-- ,accountField r
-- ,account2Field r
-- ,date2Field r
-- ]
--- ** converting csv records to transactions
showRules rules record =
unlines $ catMaybes [ (("the "++fld++" rule is: ")++) <$> getEffectiveAssignment rules record fld | fld <- journalfieldnames]
-- | Look up the value (template) of a csv rule by rule keyword.
csvRule :: CsvRules -> DirectiveName -> Maybe FieldTemplate
csvRule rules = (`getDirective` rules)
-- | Look up the value template assigned to a hledger field by field
-- list/field assignment rules, taking into account the current record and
-- conditional rules.
hledgerField :: CsvRules -> CsvRecord -> HledgerFieldName -> Maybe FieldTemplate
hledgerField = getEffectiveAssignment
-- | Look up the final value assigned to a hledger field, with csv field
-- references interpolated.
hledgerFieldValue :: CsvRules -> CsvRecord -> HledgerFieldName -> Maybe String
hledgerFieldValue rules record = fmap (renderTemplate rules record) . hledgerField rules record
transactionFromCsvRecord :: SourcePos -> CsvRules -> CsvRecord -> Transaction
transactionFromCsvRecord sourcepos rules record = t
where
----------------------------------------------------------------------
-- 1. Define some helpers:
rule = csvRule rules :: DirectiveName -> Maybe FieldTemplate
2020-02-27 22:46:36 +03:00
-- ruleval = csvRuleValue rules record :: DirectiveName -> Maybe String
field = hledgerField rules record :: HledgerFieldName -> Maybe FieldTemplate
fieldval = hledgerFieldValue rules record :: HledgerFieldName -> Maybe String
parsedate = parseDateWithCustomOrDefaultFormats (rule "date-format")
mkdateerror datefield datevalue mdateformat = unlines
["error: could not parse \""++datevalue++"\" as a date using date format "
++maybe "\"YYYY/M/D\", \"YYYY-M-D\" or \"YYYY.M.D\"" show mdateformat
,showRecord record
,"the "++datefield++" rule is: "++(fromMaybe "required, but missing" $ field datefield)
,"the date-format is: "++fromMaybe "unspecified" mdateformat
,"you may need to "
++"change your "++datefield++" rule, "
++maybe "add a" (const "change your") mdateformat++" date-format rule, "
++"or "++maybe "add a" (const "change your") mskip++" skip rule"
,"for m/d/y or d/m/y dates, use date-format %-m/%-d/%Y or date-format %-d/%-m/%Y"
]
where
mskip = rule "skip"
----------------------------------------------------------------------
-- 2. Gather values needed for the transaction itself, by evaluating the
-- field assignment rules using the CSV record's data, and parsing a bit
-- more where needed (dates, status).
mdateformat = rule "date-format"
date = fromMaybe "" $ fieldval "date"
-- PARTIAL:
date' = fromMaybe (error' $ mkdateerror "date" date mdateformat) $ parsedate date
mdate2 = fieldval "date2"
mdate2' = maybe Nothing (maybe (error' $ mkdateerror "date2" (fromMaybe "" mdate2) mdateformat) Just . parsedate) mdate2
status =
case fieldval "status" of
Nothing -> Unmarked
Just s -> either statuserror id $ runParser (statusp <* eof) "" $ T.pack s
where
statuserror err = error' $ unlines
["error: could not parse \""++s++"\" as a cleared status (should be *, ! or empty)"
,"the parse error is: "++customErrorBundlePretty err
]
code = maybe "" singleline $ fieldval "code"
description = maybe "" singleline $ fieldval "description"
comment = maybe "" singleline $ fieldval "comment"
precomment = maybe "" singleline $ fieldval "precomment"
----------------------------------------------------------------------
-- 3. Generate the postings for which an account has been assigned
-- (possibly indirectly due to an amount or balance assignment)
p1IsVirtual = (accountNamePostingType . T.pack <$> fieldval "account1") == Just VirtualPosting
ps = [p | n <- [1..maxpostings]
,let comment = T.pack $ fromMaybe "" $ fieldval ("comment"++show n)
,let currency = fromMaybe "" (fieldval ("currency"++show n) <|> fieldval "currency")
,let mamount = getAmount rules record currency p1IsVirtual n
,let mbalance = getBalance rules record currency n
,Just (acct,isfinal) <- [getAccount rules record mamount mbalance n] -- skips Nothings
,let acct' | not isfinal && acct==unknownExpenseAccount &&
fromMaybe False (mamount >>= isNegativeMixedAmount) = unknownIncomeAccount
| otherwise = acct
,let p = nullposting{paccount = accountNameWithoutPostingType acct'
,pamount = fromMaybe missingmixedamt mamount
,ptransaction = Just t
,pbalanceassertion = mkBalanceAssertion rules record <$> mbalance
,pcomment = comment
,ptype = accountNamePostingType acct
}
]
----------------------------------------------------------------------
-- 4. Build the transaction (and name it, so the postings can reference it).
t = nulltransaction{
tsourcepos = genericSourcePos sourcepos -- the CSV line number
,tdate = date'
,tdate2 = mdate2'
,tstatus = status
,tcode = T.pack code
,tdescription = T.pack description
,tcomment = T.pack comment
,tprecedingcomment = T.pack precomment
,tpostings = ps
}
-- | Figure out the amount specified for posting N, if any.
-- A currency symbol to prepend to the amount, if any, is provided,
-- and whether posting 1 requires balancing or not.
-- This looks for a non-empty amount value assigned to "amountN", "amountN-in", or "amountN-out".
-- For postings 1 or 2 it also looks at "amount", "amount-in", "amount-out".
-- If more than one of these has a value, it looks for one that is non-zero.
-- If there's multiple non-zeros, or no non-zeros but multiple zeros, it throws an error.
getAmount :: CsvRules -> CsvRecord -> String -> Bool -> Int -> Maybe MixedAmount
getAmount rules record currency p1IsVirtual n =
-- Warning, many tricky corner cases here.
-- docs: hledger_csv.m4.md #### amount
-- tests: tests/csv.test ~ 13, 31-34
let
unnumberedfieldnames = ["amount","amount-in","amount-out"]
-- amount field names which can affect this posting
fieldnames = map (("amount"++show n)++) ["","-in","-out"]
-- For posting 1, also recognise the old amount/amount-in/amount-out names.
-- For posting 2, the same but only if posting 1 needs balancing.
++ if n==1 || n==2 && not p1IsVirtual then unnumberedfieldnames else []
-- assignments to any of these field names with non-empty values
assignments = [(f,a') | f <- fieldnames
, Just v@(_:_) <- [strip . renderTemplate rules record <$> hledgerField rules record f]
, let a = parseAmount rules record currency v
-- With amount/amount-in/amount-out, in posting 2,
-- flip the sign and convert to cost, as they did before 1.17
, let a' = if f `elem` unnumberedfieldnames && n==2 then mixedAmountCost (-a) else a
]
-- if any of the numbered field names are present, discard all the unnumbered ones
assignments' | any isnumbered assignments = filter isnumbered assignments
| otherwise = assignments
where
isnumbered (f,_) = any (flip elem ['0'..'9']) f
-- if there's more than one value and only some are zeros, discard the zeros
assignments''
| length assignments' > 1 && not (null nonzeros) = nonzeros
| otherwise = assignments'
where nonzeros = filter (not . mixedAmountLooksZero . snd) assignments'
in case -- dbg0 ("amounts for posting "++show n)
assignments'' of
[] -> Nothing
[(f,a)] | "-out" `isSuffixOf` f -> Just (-a) -- for -out fields, flip the sign
[(_,a)] -> Just a
fs -> error' $ unlines $ [ -- PARTIAL:
"multiple non-zero amounts or multiple zero amounts assigned,"
,"please ensure just one. (https://hledger.org/csv.html#amount)"
," " ++ showRecord record
," for posting: " ++ show n
]
++ [" assignment: " ++ f ++ " " ++
fromMaybe "" (hledgerField rules record f) ++
"\t=> value: " ++ showMixedAmount a -- XXX not sure this is showing all the right info
| (f,a) <- fs]
2020-02-27 22:46:36 +03:00
where
-- | Given a non-empty amount string to parse, along with a possibly
-- non-empty currency symbol to prepend, parse as a hledger amount (as
-- in journal format), or raise an error.
-- The CSV rules and record are provided for the error message.
parseAmount :: CsvRules -> CsvRecord -> String -> String -> MixedAmount
parseAmount rules record currency amountstr =
either mkerror (Mixed . (:[])) $ -- PARTIAL:
runParser (evalStateT (amountp <* eof) nulljournal) "" $
T.pack $ (currency++) $ simplifySign amountstr
2020-02-27 22:46:36 +03:00
where
mkerror e = error' $ unlines
["error: could not parse \""++amountstr++"\" as an amount"
,showRecord record
,showRules rules record
-- ,"the default-currency is: "++fromMaybe "unspecified" (getDirective "default-currency" rules)
,"the parse error is: "++customErrorBundlePretty e
,"you may need to "
++"change your amount*, balance*, or currency* rules, "
++"or add or change your skip rule"
]
-- | Figure out the expected balance (assertion or assignment) specified for posting N,
-- if any (and its parse position).
getBalance :: CsvRules -> CsvRecord -> String -> Int -> Maybe (Amount, GenericSourcePos)
getBalance rules record currency n =
(fieldval ("balance"++show n)
-- for posting 1, also recognise the old field name
<|> if n==1 then fieldval "balance" else Nothing)
>>= parsebalance currency n . strip
where
parsebalance currency n s
| null s = Nothing
| otherwise = Just
(either (mkerror n s) id $
runParser (evalStateT (amountp <* eof) nulljournal) "" $
T.pack $ (currency++) $ simplifySign s
,nullsourcepos) -- XXX parse position to show when assertion fails,
-- the csv record's line number would be good
2020-02-27 22:46:36 +03:00
where
mkerror n s e = error' $ unlines
["error: could not parse \""++s++"\" as balance"++show n++" amount"
,showRecord record
,showRules rules record
-- ,"the default-currency is: "++fromMaybe "unspecified" mdefaultcurrency
,"the parse error is: "++customErrorBundlePretty e
]
-- mdefaultcurrency = rule "default-currency"
2020-02-27 22:46:36 +03:00
fieldval = hledgerFieldValue rules record :: HledgerFieldName -> Maybe String
-- | Make a balance assertion for the given amount, with the given parse
-- position (to be shown in assertion failures), with the assertion type
-- possibly set by a balance-type rule.
-- The CSV rules and current record are also provided, to be shown in case
-- balance-type's argument is bad (XXX refactor).
mkBalanceAssertion :: CsvRules -> CsvRecord -> (Amount, GenericSourcePos) -> BalanceAssertion
mkBalanceAssertion rules record (amt, pos) = assrt{baamount=amt, baposition=pos}
where
assrt =
case getDirective "balance-type" rules of
Nothing -> nullassertion
Just "=" -> nullassertion
Just "==" -> nullassertion{batotal=True}
Just "=*" -> nullassertion{bainclusive=True}
Just "==*" -> nullassertion{batotal=True, bainclusive=True}
Just x -> error' $ unlines -- PARTIAL:
[ "balance-type \"" ++ x ++"\" is invalid. Use =, ==, =* or ==*."
, showRecord record
, showRules rules record
]
-- | Figure out the account name specified for posting N, if any.
-- And whether it is the default unknown account (which may be
-- improved later) or an explicitly set account (which may not).
getAccount :: CsvRules -> CsvRecord -> Maybe MixedAmount -> Maybe (Amount, GenericSourcePos) -> Int -> Maybe (AccountName, Bool)
getAccount rules record mamount mbalance n =
let
fieldval = hledgerFieldValue rules record :: HledgerFieldName -> Maybe String
maccount = T.pack <$> fieldval ("account"++show n)
in case maccount of
-- accountN is set to the empty string - no posting will be generated
Just "" -> Nothing
-- accountN is set (possibly to "expenses:unknown"! #1192) - mark it final
Just a -> Just (a, True)
-- accountN is unset
Nothing ->
case (mamount, mbalance) of
-- amountN is set, or implied by balanceN - set accountN to
-- the default unknown account ("expenses:unknown") and
-- allow it to be improved later
(Just _, _) -> Just (unknownExpenseAccount, False)
(_, Just _) -> Just (unknownExpenseAccount, False)
-- amountN is also unset - no posting will be generated
(Nothing, Nothing) -> Nothing
-- | Default account names to use when needed.
unknownExpenseAccount = "expenses:unknown"
unknownIncomeAccount = "income:unknown"
2017-05-14 12:17:56 +03:00
type CsvAmountString = String
2017-05-14 12:17:56 +03:00
-- | Canonicalise the sign in a CSV amount string.
csv: merge lucamolteni's cassava/custom separators (squashed) (#829) commit 5ba464de761b298e50d57a8b7d14bc28adb30d5d Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:54:12 2018 +0200 Fix CI 2 commit f060ae9449f4b61a915b0ed4629fc1ba9b66fb4a Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:30:08 2018 +0200 Fix CI build commit af0719a33b9b72ad244ae80198d881a1f7145e9d Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:19:01 2018 +0200 Fix rebase commit 1a24ddfa54dfb4ff1326e1a51005ffa82d3dc3c8 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Aug 10 16:25:24 2018 +0200 Fixed some GHC warnings commit 1ac43398a359b5925ef71f53347698f1c6c510ef Author: Luca Molteni <volothamp@gmail.com> Date: Fri Aug 10 16:14:49 2018 +0200 Fix .cabal commit 422456b925d8aa4ab3e869f51e98c2b1c3dcde0a Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 22:56:20 2018 +0200 Removed to-do list commit 1118b762e4fd15c4fe7ba48ba86676706ea3a5a5 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 22:53:28 2018 +0200 Better test commit 1146ed0941655668bf7684f18aa15c5f4b9b20c2 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 15:32:28 2018 +0200 Fix parsing commit 4fc2374b2b81802990da30c96756aab54d77399c Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 22:11:11 2018 +0200 Parsing of separator commit f7a61737f1ad4460ba20ca9b2e86eb21468abb33 Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 14:29:23 2018 +0200 Almost separator in options commit ac8841cf3b9c80914bc3271ad9b9ff4ae9ba48a7 Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 14:16:59 2018 +0200 Separator in parseCSV commit 92a8b9f6ba77ea4237f769641e03029ac88542ea Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 13:30:41 2018 +0200 separator option commit ec417a81ae625647cf35e61776cdf02bdb2c6aea Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 10:45:26 2018 +0200 Removed one qualified import commit 8b2f386c2f780adcd34cff3de7edceacc1d325a7 Author: Luca Molteni <volothamp@gmail.com> Date: Wed Jun 20 14:01:12 2018 +0200 Removed string conversions commit a14d0e099e28a286bb81770cfc9cb8f5c7e5cf1f Author: Luca Molteni <volothamp@gmail.com> Date: Wed Jun 20 10:23:20 2018 +0200 custom delimiter in cassava commit 694d48e2bc1ada0037b90367c017f3082f68ed45 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:51:54 2018 +0200 Use Text.getContents - remove UTF-8 compatibility library commit a7ada2cc60033ebdd796ca34cc2ec69a4f387843 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:49:34 2018 +0200 todo list commit 58ec47d3987909f6bace50e3e647e30dadd5bf03 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:45:22 2018 +0200 CSV test now has unicode characters commit b7851e94c3f1683b63ec7250a12bcde3b7bed691 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 16:59:39 2018 +0200 Use decode from Text commit 79f59fd28ccaca08fcd718fcd8d00b1c1d65d7e1 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 13:28:57 2018 +0200 Use Text and Lazy Bytestring commit 470c9bcb8dc00669beb4ef0303a1e7d9f7aecc89 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:30:22 2018 +0200 Use megaparsec error commit f978848ba249ef4f67b855bea5d4e549290c205c Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:22:07 2018 +0200 Renamed qualify and remove Parsec commit 152587fde204c43a55798d212e43f37cd3038c2e Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:12:36 2018 +0200 Use cassava mega parsec commit cf281577a3d3a071196484a6fc8485f2ea1f7d67 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 14:01:47 2018 +0200 Removed Data.Vector commit 1272e8e758369d8cc5778029a705b277355a5029 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 12:16:18 2018 +0200 Removed Parsec ParseError commit ae07f043135a19307fd65b281ade37a74c76acb2 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 12:06:14 2018 +0200 Type sinonim for ParsecError commit 8e15b253c11bd1c0c35a7641aeb18aa54e0ba9b0 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 11:16:08 2018 +0200 Replaced with typeclasses commit 1ed46f9c175603611325f3d377004e4b85f29377 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 11:01:33 2018 +0200 Replaced Text/CSV with Cassava commit 362f4111b5854145703174b976fc7acbd71b8783 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 10:34:37 2018 +0200 Use cassava parsin instead of Text/CSV commit 83e678e371618687cf7c15a4e2cfa67f570b6b64 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 08:22:51 2018 +0200 Text CSV error messages commit f922df71d274beeacab9fb2530b16c97f005cc08 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 21:45:20 2018 +0200 Better types commit edd130781c84790a53bff2283e6041eb8232e7cf Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 21:34:59 2018 +0200 Conversion to Text CSV type commit 0799383214483018ad2d977a3c8022414959c2b2 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 16:06:21 2018 +0200 First function with cassava commit e92aeb151ff527b383ff3d0ced7764e81b71af82 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 13:47:34 2018 +0200 Added cassava as dependency commit 5ea005c558a3939af7e5f0cd735a9b4da931228e Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 13:18:47 2018 +0200 Better .gitignore for multi idea modules
2018-09-07 20:12:13 +03:00
-- Such strings can have a minus sign, negating parentheses,
-- or any two of these (which cancels out).
--
-- >>> simplifySign "1"
-- "1"
-- >>> simplifySign "-1"
-- "-1"
-- >>> simplifySign "(1)"
-- "-1"
-- >>> simplifySign "--1"
-- "1"
-- >>> simplifySign "-(1)"
-- "1"
-- >>> simplifySign "(-1)"
-- "1"
-- >>> simplifySign "((1))"
-- "1"
2017-05-14 12:17:56 +03:00
simplifySign :: CsvAmountString -> CsvAmountString
simplifySign ('(':s) | lastMay s == Just ')' = simplifySign $ negateStr $ init s
simplifySign ('-':'(':s) | lastMay s == Just ')' = simplifySign $ init s
2017-05-14 12:17:56 +03:00
simplifySign ('-':'-':s) = s
simplifySign s = s
negateStr :: String -> String
negateStr ('-':s) = s
negateStr s = '-':s
-- | Show a (approximate) recreation of the original CSV record.
showRecord :: CsvRecord -> String
showRecord r = "record values: "++intercalate "," (map show r)
-- | Given the conversion rules, a CSV record and a hledger field name, find
-- the value template ultimately assigned to this field, if any, by a field
-- assignment at top level or in a conditional block matching this record.
--
-- Note conditional blocks' patterns are matched against an approximation of the
-- CSV record: all the field values, without enclosing quotes, comma-separated.
--
getEffectiveAssignment :: CsvRules -> CsvRecord -> HledgerFieldName -> Maybe FieldTemplate
getEffectiveAssignment rules record f = lastMay $ map snd $ assignments
where
-- all active assignments to field f, in order
assignments = dbg7 "assignments" $ filter ((==f).fst) $ toplevelassignments ++ conditionalassignments
where
-- all top level field assignments
toplevelassignments = rassignments rules
-- all field assignments in conditional blocks assigning to field f and active for the current csv record
conditionalassignments = concatMap cbAssignments $ filter isBlockActive $ (rblocksassigning rules) f
where
-- does this conditional block match the current csv record ?
isBlockActive :: ConditionalBlock -> Bool
isBlockActive CB{..} = any (all matcherMatches) $ groupedMatchers cbMatchers
where
-- does this individual matcher match the current csv record ?
2020-02-12 17:20:40 +03:00
matcherMatches :: Matcher -> Bool
matcherMatches (RecordMatcher _ pat) = regexMatchesCI pat' wholecsvline
where
pat' = dbg7 "regex" pat
-- A synthetic whole CSV record to match against. Note, this can be
-- different from the original CSV data:
-- - any whitespace surrounding field values is preserved
-- - any quotes enclosing field values are removed
-- - and the field separator is always comma
-- which means that a field containing a comma will look like two fields.
wholecsvline = dbg7 "wholecsvline" $ intercalate "," record
matcherMatches (FieldMatcher _ csvfieldref pat) = regexMatchesCI pat csvfieldvalue
where
-- the value of the referenced CSV field to match against.
csvfieldvalue = dbg7 "csvfieldvalue" $ replaceCsvFieldReference rules record csvfieldref
2020-06-17 04:48:45 +03:00
-- | Render a field assignment's template, possibly interpolating referenced
-- CSV field values. Outer whitespace is removed from interpolated values.
renderTemplate :: CsvRules -> CsvRecord -> FieldTemplate -> String
renderTemplate rules record t = regexReplaceBy "%[A-z0-9_-]+" (replaceCsvFieldReference rules record) t
-- | Replace something that looks like a reference to a csv field ("%date" or "%1)
-- with that field's value. If it doesn't look like a field reference, or if we
-- can't find such a field, leave it unchanged.
replaceCsvFieldReference :: CsvRules -> CsvRecord -> CsvFieldReference -> String
replaceCsvFieldReference rules record s@('%':fieldname) = fromMaybe s $ csvFieldValue rules record fieldname
replaceCsvFieldReference _ _ s = s
-- | Get the (whitespace-stripped) value of a CSV field, identified by its name or
-- column number, ("date" or "1"), from the given CSV record, if such a field exists.
csvFieldValue :: CsvRules -> CsvRecord -> CsvFieldName -> Maybe String
csvFieldValue rules record fieldname = do
fieldindex <- if | all isDigit fieldname -> readMay fieldname
| otherwise -> lookup (map toLower fieldname) $ rcsvfieldindexes rules
fieldvalue <- strip <$> atMay record (fieldindex-1)
return fieldvalue
-- | Parse the date string using the specified date-format, or if unspecified
-- the "simple date" formats (YYYY/MM/DD, YYYY-MM-DD, YYYY.MM.DD, leading
-- zeroes optional).
parseDateWithCustomOrDefaultFormats :: Maybe DateFormat -> String -> Maybe Day
parseDateWithCustomOrDefaultFormats mformat s = asum $ map parsewith formats
where
parsewith = flip (parseTimeM True defaultTimeLocale) s
formats = maybe
["%Y/%-m/%-d"
,"%Y-%-m-%-d"
,"%Y.%-m.%-d"
-- ,"%-m/%-d/%Y"
-- ,parseTime defaultTimeLocale "%Y/%m/%e" (take 5 s ++ "0" ++ drop 5 s)
-- ,parseTime defaultTimeLocale "%Y-%m-%e" (take 5 s ++ "0" ++ drop 5 s)
-- ,parseTime defaultTimeLocale "%m/%e/%Y" ('0':s)
-- ,parseTime defaultTimeLocale "%m-%e-%Y" ('0':s)
]
(:[])
mformat
--- ** tests
2018-09-06 23:08:26 +03:00
tests_CsvReader = tests "CsvReader" [
2018-09-04 23:39:21 +03:00
tests "parseCsvRules" [
test "empty file" $
parseCsvRules "unknown" "" @?= Right (mkrules defrules)
]
2018-09-04 23:39:21 +03:00
,tests "rulesp" [
test "trailing comments" $
parseWithState' defrules rulesp "skip\n# \n#\n" @?= Right (mkrules $ defrules{rdirectives = [("skip","")]})
csv: merge lucamolteni's cassava/custom separators (squashed) (#829) commit 5ba464de761b298e50d57a8b7d14bc28adb30d5d Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:54:12 2018 +0200 Fix CI 2 commit f060ae9449f4b61a915b0ed4629fc1ba9b66fb4a Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:30:08 2018 +0200 Fix CI build commit af0719a33b9b72ad244ae80198d881a1f7145e9d Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:19:01 2018 +0200 Fix rebase commit 1a24ddfa54dfb4ff1326e1a51005ffa82d3dc3c8 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Aug 10 16:25:24 2018 +0200 Fixed some GHC warnings commit 1ac43398a359b5925ef71f53347698f1c6c510ef Author: Luca Molteni <volothamp@gmail.com> Date: Fri Aug 10 16:14:49 2018 +0200 Fix .cabal commit 422456b925d8aa4ab3e869f51e98c2b1c3dcde0a Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 22:56:20 2018 +0200 Removed to-do list commit 1118b762e4fd15c4fe7ba48ba86676706ea3a5a5 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 22:53:28 2018 +0200 Better test commit 1146ed0941655668bf7684f18aa15c5f4b9b20c2 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 15:32:28 2018 +0200 Fix parsing commit 4fc2374b2b81802990da30c96756aab54d77399c Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 22:11:11 2018 +0200 Parsing of separator commit f7a61737f1ad4460ba20ca9b2e86eb21468abb33 Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 14:29:23 2018 +0200 Almost separator in options commit ac8841cf3b9c80914bc3271ad9b9ff4ae9ba48a7 Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 14:16:59 2018 +0200 Separator in parseCSV commit 92a8b9f6ba77ea4237f769641e03029ac88542ea Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 13:30:41 2018 +0200 separator option commit ec417a81ae625647cf35e61776cdf02bdb2c6aea Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 10:45:26 2018 +0200 Removed one qualified import commit 8b2f386c2f780adcd34cff3de7edceacc1d325a7 Author: Luca Molteni <volothamp@gmail.com> Date: Wed Jun 20 14:01:12 2018 +0200 Removed string conversions commit a14d0e099e28a286bb81770cfc9cb8f5c7e5cf1f Author: Luca Molteni <volothamp@gmail.com> Date: Wed Jun 20 10:23:20 2018 +0200 custom delimiter in cassava commit 694d48e2bc1ada0037b90367c017f3082f68ed45 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:51:54 2018 +0200 Use Text.getContents - remove UTF-8 compatibility library commit a7ada2cc60033ebdd796ca34cc2ec69a4f387843 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:49:34 2018 +0200 todo list commit 58ec47d3987909f6bace50e3e647e30dadd5bf03 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:45:22 2018 +0200 CSV test now has unicode characters commit b7851e94c3f1683b63ec7250a12bcde3b7bed691 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 16:59:39 2018 +0200 Use decode from Text commit 79f59fd28ccaca08fcd718fcd8d00b1c1d65d7e1 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 13:28:57 2018 +0200 Use Text and Lazy Bytestring commit 470c9bcb8dc00669beb4ef0303a1e7d9f7aecc89 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:30:22 2018 +0200 Use megaparsec error commit f978848ba249ef4f67b855bea5d4e549290c205c Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:22:07 2018 +0200 Renamed qualify and remove Parsec commit 152587fde204c43a55798d212e43f37cd3038c2e Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:12:36 2018 +0200 Use cassava mega parsec commit cf281577a3d3a071196484a6fc8485f2ea1f7d67 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 14:01:47 2018 +0200 Removed Data.Vector commit 1272e8e758369d8cc5778029a705b277355a5029 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 12:16:18 2018 +0200 Removed Parsec ParseError commit ae07f043135a19307fd65b281ade37a74c76acb2 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 12:06:14 2018 +0200 Type sinonim for ParsecError commit 8e15b253c11bd1c0c35a7641aeb18aa54e0ba9b0 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 11:16:08 2018 +0200 Replaced with typeclasses commit 1ed46f9c175603611325f3d377004e4b85f29377 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 11:01:33 2018 +0200 Replaced Text/CSV with Cassava commit 362f4111b5854145703174b976fc7acbd71b8783 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 10:34:37 2018 +0200 Use cassava parsin instead of Text/CSV commit 83e678e371618687cf7c15a4e2cfa67f570b6b64 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 08:22:51 2018 +0200 Text CSV error messages commit f922df71d274beeacab9fb2530b16c97f005cc08 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 21:45:20 2018 +0200 Better types commit edd130781c84790a53bff2283e6041eb8232e7cf Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 21:34:59 2018 +0200 Conversion to Text CSV type commit 0799383214483018ad2d977a3c8022414959c2b2 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 16:06:21 2018 +0200 First function with cassava commit e92aeb151ff527b383ff3d0ced7764e81b71af82 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 13:47:34 2018 +0200 Added cassava as dependency commit 5ea005c558a3939af7e5f0cd735a9b4da931228e Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 13:18:47 2018 +0200 Better .gitignore for multi idea modules
2018-09-07 20:12:13 +03:00
,test "trailing blank lines" $
parseWithState' defrules rulesp "skip\n\n \n" @?= (Right (mkrules $ defrules{rdirectives = [("skip","")]}))
csv: merge lucamolteni's cassava/custom separators (squashed) (#829) commit 5ba464de761b298e50d57a8b7d14bc28adb30d5d Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:54:12 2018 +0200 Fix CI 2 commit f060ae9449f4b61a915b0ed4629fc1ba9b66fb4a Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:30:08 2018 +0200 Fix CI build commit af0719a33b9b72ad244ae80198d881a1f7145e9d Author: Luca Molteni <volothamp@gmail.com> Date: Fri Sep 7 17:19:01 2018 +0200 Fix rebase commit 1a24ddfa54dfb4ff1326e1a51005ffa82d3dc3c8 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Aug 10 16:25:24 2018 +0200 Fixed some GHC warnings commit 1ac43398a359b5925ef71f53347698f1c6c510ef Author: Luca Molteni <volothamp@gmail.com> Date: Fri Aug 10 16:14:49 2018 +0200 Fix .cabal commit 422456b925d8aa4ab3e869f51e98c2b1c3dcde0a Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 22:56:20 2018 +0200 Removed to-do list commit 1118b762e4fd15c4fe7ba48ba86676706ea3a5a5 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 22:53:28 2018 +0200 Better test commit 1146ed0941655668bf7684f18aa15c5f4b9b20c2 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jul 1 15:32:28 2018 +0200 Fix parsing commit 4fc2374b2b81802990da30c96756aab54d77399c Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 22:11:11 2018 +0200 Parsing of separator commit f7a61737f1ad4460ba20ca9b2e86eb21468abb33 Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 14:29:23 2018 +0200 Almost separator in options commit ac8841cf3b9c80914bc3271ad9b9ff4ae9ba48a7 Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 14:16:59 2018 +0200 Separator in parseCSV commit 92a8b9f6ba77ea4237f769641e03029ac88542ea Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 13:30:41 2018 +0200 separator option commit ec417a81ae625647cf35e61776cdf02bdb2c6aea Author: Luca Molteni <volothamp@gmail.com> Date: Thu Jun 21 10:45:26 2018 +0200 Removed one qualified import commit 8b2f386c2f780adcd34cff3de7edceacc1d325a7 Author: Luca Molteni <volothamp@gmail.com> Date: Wed Jun 20 14:01:12 2018 +0200 Removed string conversions commit a14d0e099e28a286bb81770cfc9cb8f5c7e5cf1f Author: Luca Molteni <volothamp@gmail.com> Date: Wed Jun 20 10:23:20 2018 +0200 custom delimiter in cassava commit 694d48e2bc1ada0037b90367c017f3082f68ed45 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:51:54 2018 +0200 Use Text.getContents - remove UTF-8 compatibility library commit a7ada2cc60033ebdd796ca34cc2ec69a4f387843 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:49:34 2018 +0200 todo list commit 58ec47d3987909f6bace50e3e647e30dadd5bf03 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 17:45:22 2018 +0200 CSV test now has unicode characters commit b7851e94c3f1683b63ec7250a12bcde3b7bed691 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 16:59:39 2018 +0200 Use decode from Text commit 79f59fd28ccaca08fcd718fcd8d00b1c1d65d7e1 Author: Luca Molteni <volothamp@gmail.com> Date: Sun Jun 10 13:28:57 2018 +0200 Use Text and Lazy Bytestring commit 470c9bcb8dc00669beb4ef0303a1e7d9f7aecc89 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:30:22 2018 +0200 Use megaparsec error commit f978848ba249ef4f67b855bea5d4e549290c205c Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:22:07 2018 +0200 Renamed qualify and remove Parsec commit 152587fde204c43a55798d212e43f37cd3038c2e Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 15:12:36 2018 +0200 Use cassava mega parsec commit cf281577a3d3a071196484a6fc8485f2ea1f7d67 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 14:01:47 2018 +0200 Removed Data.Vector commit 1272e8e758369d8cc5778029a705b277355a5029 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 12:16:18 2018 +0200 Removed Parsec ParseError commit ae07f043135a19307fd65b281ade37a74c76acb2 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 12:06:14 2018 +0200 Type sinonim for ParsecError commit 8e15b253c11bd1c0c35a7641aeb18aa54e0ba9b0 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 11:16:08 2018 +0200 Replaced with typeclasses commit 1ed46f9c175603611325f3d377004e4b85f29377 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 11:01:33 2018 +0200 Replaced Text/CSV with Cassava commit 362f4111b5854145703174b976fc7acbd71b8783 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 10:34:37 2018 +0200 Use cassava parsin instead of Text/CSV commit 83e678e371618687cf7c15a4e2cfa67f570b6b64 Author: Luca Molteni <volothamp@gmail.com> Date: Sat Jun 9 08:22:51 2018 +0200 Text CSV error messages commit f922df71d274beeacab9fb2530b16c97f005cc08 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 21:45:20 2018 +0200 Better types commit edd130781c84790a53bff2283e6041eb8232e7cf Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 21:34:59 2018 +0200 Conversion to Text CSV type commit 0799383214483018ad2d977a3c8022414959c2b2 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 16:06:21 2018 +0200 First function with cassava commit e92aeb151ff527b383ff3d0ced7764e81b71af82 Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 13:47:34 2018 +0200 Added cassava as dependency commit 5ea005c558a3939af7e5f0cd735a9b4da931228e Author: Luca Molteni <volothamp@gmail.com> Date: Fri Jun 8 13:18:47 2018 +0200 Better .gitignore for multi idea modules
2018-09-07 20:12:13 +03:00
,test "no final newline" $
parseWithState' defrules rulesp "skip" @?= (Right (mkrules $ defrules{rdirectives=[("skip","")]}))
,test "assignment with empty value" $
parseWithState' defrules rulesp "account1 \nif foo\n account2 foo\n" @?=
(Right (mkrules $ defrules{rassignments = [("account1","")], rconditionalblocks = [CB{cbMatchers=[RecordMatcher None "foo"],cbAssignments=[("account2","foo")]}]}))
]
,tests "conditionalblockp" [
test "space after conditional" $ -- #1120
parseWithState' defrules conditionalblockp "if a\n account2 b\n \n" @?=
(Right $ CB{cbMatchers=[RecordMatcher None "a"],cbAssignments=[("account2","b")]})
,tests "csvfieldreferencep" [
test "number" $ parseWithState' defrules csvfieldreferencep "%1" @?= (Right "%1")
,test "name" $ parseWithState' defrules csvfieldreferencep "%date" @?= (Right "%date")
,test "quoted name" $ parseWithState' defrules csvfieldreferencep "%\"csv date\"" @?= (Right "%\"csv date\"")
]
,tests "matcherp" [
test "recordmatcherp" $
parseWithState' defrules matcherp "A A\n" @?= (Right $ RecordMatcher None "A A")
,test "recordmatcherp.starts-with-&" $
parseWithState' defrules matcherp "& A A\n" @?= (Right $ RecordMatcher And "A A")
,test "fieldmatcherp.starts-with-%" $
parseWithState' defrules matcherp "description A A\n" @?= (Right $ RecordMatcher None "description A A")
,test "fieldmatcherp" $
parseWithState' defrules matcherp "%description A A\n" @?= (Right $ FieldMatcher None "%description" "A A")
,test "fieldmatcherp.starts-with-&" $
parseWithState' defrules matcherp "& %description A A\n" @?= (Right $ FieldMatcher And "%description" "A A")
-- ,test "fieldmatcherp with operator" $
-- parseWithState' defrules matcherp "%description ~ A A\n" @?= (Right $ FieldMatcher "%description" "A A")
]
,tests "getEffectiveAssignment" [
let rules = mkrules $ defrules {rcsvfieldindexes=[("csvdate",1)],rassignments=[("date","%csvdate")]}
in test "toplevel" $ getEffectiveAssignment rules ["a","b"] "date" @?= (Just "%csvdate")
,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" "a"] [("date","%csvdate")]]}
in test "conditional" $ getEffectiveAssignment rules ["a","b"] "date" @?= (Just "%csvdate")
,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" "a", FieldMatcher None "%description" "b"] [("date","%csvdate")]]}
in test "conditional-with-or-a" $ getEffectiveAssignment rules ["a"] "date" @?= (Just "%csvdate")
,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" "a", FieldMatcher None "%description" "b"] [("date","%csvdate")]]}
in test "conditional-with-or-b" $ getEffectiveAssignment rules ["_", "b"] "date" @?= (Just "%csvdate")
,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" "a", FieldMatcher And "%description" "b"] [("date","%csvdate")]]}
in test "conditional.with-and" $ getEffectiveAssignment rules ["a", "b"] "date" @?= (Just "%csvdate")
,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" "a", FieldMatcher And "%description" "b", FieldMatcher None "%description" "c"] [("date","%csvdate")]]}
in test "conditional.with-and-or" $ getEffectiveAssignment rules ["_", "c"] "date" @?= (Just "%csvdate")
]
]
]