mirror of
https://github.com/simonmichael/hledger.git
synced 2024-12-28 21:02:04 +03:00
convert: new rules file format, more docs
This commit is contained in:
parent
43e327d05b
commit
2607082e9e
@ -1,98 +1,249 @@
|
||||
{-|
|
||||
|
||||
Convert account data in CSV format (eg downloaded from a bank) to ledger
|
||||
format, and print it on stdout. See the manual for more details.
|
||||
|
||||
-}
|
||||
|
||||
module Commands.Convert where
|
||||
import Data.List.Split (splitOn)
|
||||
import Options -- (Opt,Debug)
|
||||
import Ledger.Types (Ledger,AccountName)
|
||||
import Ledger.Utils (strip)
|
||||
import System.IO (stderr, hPutStr)
|
||||
import Options (Opt(Debug))
|
||||
import Ledger.Types (Ledger,AccountName,LedgerTransaction(..),Posting(..),PostingType(..))
|
||||
import Ledger.Utils (strip, spacenonewline, restofline)
|
||||
import Ledger.Parse (someamount, emptyCtx, ledgeraccountname)
|
||||
import Ledger.Amount (nullmixedamt)
|
||||
import System.IO (stderr)
|
||||
import Text.CSV (parseCSVFromFile, printCSV)
|
||||
import Text.Printf (printf)
|
||||
import Text.Printf (hPrintf)
|
||||
import Text.RegexPR (matchRegexPR)
|
||||
import Data.Maybe
|
||||
import Ledger.Dates (firstJust, showDate)
|
||||
import Ledger.Dates (firstJust, showDate, parsedate)
|
||||
import Locale (defaultTimeLocale)
|
||||
import Data.Time.Format (parseTime)
|
||||
import Control.Monad (when)
|
||||
import Safe (readMay, readDef)
|
||||
import Control.Monad (when, guard)
|
||||
import Safe (readDef, readMay)
|
||||
import System.FilePath.Posix (takeBaseName)
|
||||
import Text.ParserCombinators.Parsec
|
||||
|
||||
|
||||
convert :: [Opt] -> [String] -> Ledger -> IO ()
|
||||
convert opts args _ = do
|
||||
when (length args /= 3) (error "please specify a csv file, base account, and import rules file.")
|
||||
let [csvfile,baseacct,rulesfile] = args
|
||||
rulesstr <- readFile rulesfile
|
||||
(fieldpositions,rules) <- parseRules rulesstr
|
||||
parse <- parseCSVFromFile csvfile
|
||||
let records = case parse of
|
||||
when (length args /= 2) (error "please specify a csv data file and conversion rules file.")
|
||||
let debug = Debug `elem` opts
|
||||
[csvfile,rulesfile] = args
|
||||
csvparse <- parseCSVFromFile csvfile
|
||||
let records = case csvparse of
|
||||
Left e -> error $ show e
|
||||
Right rs -> reverse rs
|
||||
mapM_ (print_ledger_txn (Debug `elem` opts) (baseacct,fieldpositions,rules)) records
|
||||
Right rs -> reverse $ filter (/= [""]) rs
|
||||
rulesstr <- readFile rulesfile
|
||||
let rules = case parseCsvRules (takeBaseName csvfile) rulesstr of
|
||||
Left e -> error $ show e
|
||||
Right r -> r
|
||||
when debug $ hPrintf stderr "using csv conversion rules file %s\n" rulesfile
|
||||
when debug $ hPrintf stderr "%s\n" (show rules)
|
||||
mapM_ (printTxn debug rules) records
|
||||
|
||||
{- |
|
||||
A set of data definitions and account-matching patterns sufficient to
|
||||
convert a particular CSV data file into meaningful ledger transactions. See above.
|
||||
-}
|
||||
data CsvRules = CsvRules {
|
||||
dateField :: Maybe FieldPosition,
|
||||
statusField :: Maybe FieldPosition,
|
||||
codeField :: Maybe FieldPosition,
|
||||
descriptionField :: Maybe FieldPosition,
|
||||
amountField :: Maybe FieldPosition,
|
||||
currencyField :: Maybe FieldPosition,
|
||||
baseCurrency :: Maybe String,
|
||||
baseAccount :: AccountName,
|
||||
accountRules :: [AccountRule]
|
||||
} deriving (Show)
|
||||
|
||||
type Rule = (
|
||||
[(String, Maybe String)] -- list of patterns and optional replacements
|
||||
,AccountName -- account name to use for a matched transaction
|
||||
nullrules = CsvRules {
|
||||
dateField=Nothing,
|
||||
statusField=Nothing,
|
||||
codeField=Nothing,
|
||||
descriptionField=Nothing,
|
||||
amountField=Nothing,
|
||||
currencyField=Nothing,
|
||||
baseCurrency=Nothing,
|
||||
baseAccount="unknown",
|
||||
accountRules=[]
|
||||
}
|
||||
|
||||
type FieldPosition = Int
|
||||
|
||||
type AccountRule = (
|
||||
[(String, Maybe String)] -- list of regex match patterns with optional replacements
|
||||
,AccountName -- account name to use for a transaction matching this rule
|
||||
)
|
||||
|
||||
parseRules :: String -> IO ([Maybe Int],[Rule])
|
||||
parseRules s = do
|
||||
let ls = map strip $ lines s
|
||||
let paras = splitOn [""] ls
|
||||
let fieldpositions = map readMay $ splitOn "," $ head $ head paras
|
||||
let rules = [(map parsePatRepl $ init ls, last ls) | ls <- tail paras]
|
||||
return (fieldpositions,rules)
|
||||
type CsvRecord = [String]
|
||||
|
||||
parsePatRepl :: String -> (String, Maybe String)
|
||||
parsePatRepl l = case splitOn "=" l of
|
||||
(p:r:_) -> (p, Just r)
|
||||
_ -> (l, Nothing)
|
||||
-- rules file parser
|
||||
|
||||
print_ledger_txn :: Bool -> (String,[Maybe Int],[Rule]) -> [String] -> IO ()
|
||||
print_ledger_txn _ (_,[],_) _ = return ()
|
||||
print_ledger_txn _ (('#':_),_,_) _ = return ()
|
||||
print_ledger_txn debug (baseacct,fieldpositions,rules) csvrecord
|
||||
| length csvrecord < maximum (map (fromMaybe 0) fieldpositions) + 1 = return ()
|
||||
| otherwise =
|
||||
do
|
||||
when debug $ hPutStr stderr $ printCSV [csvrecord]
|
||||
let date = maybe "" (csvrecord !!) (fieldpositions !! 0)
|
||||
number = maybe "" (csvrecord !!) (fieldpositions !! 1)
|
||||
description = maybe "" (csvrecord !!) (fieldpositions !! 2)
|
||||
amount = maybe "" (csvrecord !!) (fieldpositions !! 3)
|
||||
amount' = strnegate amount where strnegate ('-':s) = s
|
||||
strnegate s = '-':s
|
||||
unknownacct | (readDef 0 amount' :: Double) < 0 = "income:unknown"
|
||||
parseCsvRules :: String -> String -> Either ParseError CsvRules
|
||||
parseCsvRules basefilename s = runParser csvrulesP nullrules{baseAccount=basefilename} "" s
|
||||
|
||||
csvrulesP :: GenParser Char CsvRules CsvRules
|
||||
csvrulesP = do
|
||||
optional blanklines
|
||||
many definitions
|
||||
r <- getState
|
||||
ars <- many accountrule
|
||||
optional blanklines
|
||||
eof
|
||||
return r{accountRules=ars}
|
||||
|
||||
-- | Real independent parser choice, even when alternative matches share a prefix.
|
||||
choice' parsers = choice $ map try (init parsers) ++ [last parsers]
|
||||
|
||||
definitions :: GenParser Char CsvRules ()
|
||||
definitions = do
|
||||
choice' [
|
||||
datefield
|
||||
,statusfield
|
||||
,codefield
|
||||
,descriptionfield
|
||||
,amountfield
|
||||
,currencyfield
|
||||
,basecurrency
|
||||
,baseaccount
|
||||
] <?> "definition"
|
||||
return ()
|
||||
|
||||
datefield = do
|
||||
string "date-field"
|
||||
many1 spacenonewline
|
||||
v <- restofline
|
||||
r <- getState
|
||||
setState r{dateField=readMay v}
|
||||
|
||||
codefield = do
|
||||
string "code-field"
|
||||
many1 spacenonewline
|
||||
v <- restofline
|
||||
r <- getState
|
||||
setState r{codeField=readMay v}
|
||||
|
||||
statusfield = do
|
||||
string "status-field"
|
||||
many1 spacenonewline
|
||||
v <- restofline
|
||||
r <- getState
|
||||
setState r{statusField=readMay v}
|
||||
|
||||
descriptionfield = do
|
||||
string "description-field"
|
||||
many1 spacenonewline
|
||||
v <- restofline
|
||||
r <- getState
|
||||
setState r{descriptionField=readMay v}
|
||||
|
||||
amountfield = do
|
||||
string "amount-field"
|
||||
many1 spacenonewline
|
||||
v <- restofline
|
||||
r <- getState
|
||||
setState r{amountField=readMay v}
|
||||
|
||||
currencyfield = do
|
||||
string "currency-field"
|
||||
many1 spacenonewline
|
||||
v <- restofline
|
||||
r <- getState
|
||||
setState r{currencyField=readMay v}
|
||||
|
||||
basecurrency = do
|
||||
string "currency"
|
||||
many1 spacenonewline
|
||||
v <- restofline
|
||||
r <- getState
|
||||
setState r{baseCurrency=Just v}
|
||||
|
||||
baseaccount = do
|
||||
string "base-account"
|
||||
many1 spacenonewline
|
||||
v <- ledgeraccountname
|
||||
optional newline
|
||||
r <- getState
|
||||
setState r{baseAccount=v}
|
||||
|
||||
accountrule :: GenParser Char CsvRules AccountRule
|
||||
accountrule = do
|
||||
blanklines
|
||||
pats <- many1 matchreplacepattern
|
||||
guard $ length pats >= 2
|
||||
let pats' = init pats
|
||||
acct = either (fail.show) id $ runParser ledgeraccountname () "" $ fst $ last pats
|
||||
return (pats',acct)
|
||||
|
||||
blanklines = many1 blankline >> return ()
|
||||
|
||||
blankline = many spacenonewline >> newline >> return () <?> "blank line"
|
||||
|
||||
matchreplacepattern = do
|
||||
matchpat <- many1 (noneOf "=\n")
|
||||
replpat <- optionMaybe $ do {char '='; many $ noneOf "\n"}
|
||||
newline
|
||||
return (matchpat,replpat)
|
||||
|
||||
printTxn :: Bool -> CsvRules -> CsvRecord -> IO ()
|
||||
printTxn debug rules rec = do
|
||||
when debug $ hPrintf stderr "csv: %s" (printCSV [rec])
|
||||
putStr $ show $ transactionFromCsvRecord rules rec
|
||||
|
||||
-- csv record conversion
|
||||
|
||||
transactionFromCsvRecord :: CsvRules -> CsvRecord -> LedgerTransaction
|
||||
transactionFromCsvRecord rules fields =
|
||||
let
|
||||
date = parsedate $ normaliseDate $ maybe "1900/1/1" (fields !!) (dateField rules)
|
||||
status = maybe False (null . strip . (fields !!)) (statusField rules)
|
||||
code = maybe "" (fields !!) (codeField rules)
|
||||
desc = maybe "" (fields !!) (descriptionField rules)
|
||||
comment = ""
|
||||
precomment = ""
|
||||
amountstr = maybe "" (fields !!) (amountField rules)
|
||||
amountstr' = strnegate amountstr where strnegate ('-':s) = s
|
||||
strnegate s = '-':s
|
||||
currency = maybe (fromMaybe "" $ baseCurrency rules) (fields !!) (currencyField rules)
|
||||
amountstr'' = currency ++ amountstr'
|
||||
amountparse = runParser someamount emptyCtx "" amountstr''
|
||||
amount = either (const nullmixedamt) id amountparse
|
||||
unknownacct | (readDef 0 amountstr' :: Double) < 0 = "income:unknown"
|
||||
| otherwise = "expenses:unknown"
|
||||
(acct,desc) = choose_acct_desc rules (unknownacct,description)
|
||||
printf "%s%s %s\n" (fixdate date) (if not (null number) then printf " (%s)" number else "") desc
|
||||
printf " %-30s %15s\n" acct (printf "$%s" amount' :: String)
|
||||
printf " %s\n\n" baseacct
|
||||
(acct,newdesc) = identify (accountRules rules) unknownacct desc
|
||||
in
|
||||
LedgerTransaction {
|
||||
ltdate=date,
|
||||
lteffectivedate=Nothing,
|
||||
ltstatus=status,
|
||||
ltcode=code,
|
||||
ltdescription=newdesc,
|
||||
ltcomment=comment,
|
||||
ltpreceding_comment_lines=precomment,
|
||||
ltpostings=[
|
||||
Posting {
|
||||
pstatus=False,
|
||||
paccount=acct,
|
||||
pamount=amount,
|
||||
pcomment="",
|
||||
ptype=RegularPosting
|
||||
},
|
||||
Posting {
|
||||
pstatus=False,
|
||||
paccount=baseAccount rules,
|
||||
pamount=(-amount),
|
||||
pcomment="",
|
||||
ptype=RegularPosting
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
choose_acct_desc :: [Rule] -> (String,String) -> (String,String)
|
||||
choose_acct_desc rules (acct,desc) | null matchingrules = (acct,desc)
|
||||
| otherwise = (a,d)
|
||||
where
|
||||
matchingrules = filter ismatch rules :: [Rule]
|
||||
where ismatch = any (isJust . flip matchregex desc . fst) . fst
|
||||
(prs,a) = head matchingrules
|
||||
mrs = filter (isJust . fst) $ map (\(p,r) -> (matchregex p desc, r)) prs
|
||||
(m,repl) = head mrs
|
||||
matched = fst $ fst $ fromJust m
|
||||
d = fromMaybe matched repl
|
||||
|
||||
matchregex = matchRegexPR . ("(?i)" ++)
|
||||
|
||||
fixdate :: String -> String
|
||||
fixdate s = maybe "0000/00/00" showDate $
|
||||
-- | Convert some date string with unknown format to YYYY/MM/DD.
|
||||
normaliseDate :: String -> String
|
||||
normaliseDate s = maybe "0000/00/00" showDate $
|
||||
firstJust
|
||||
[parseTime defaultTimeLocale "%Y/%m/%e" s
|
||||
-- can't parse a month without leading 0, try adding onee
|
||||
-- can't parse a month without leading 0, try adding one
|
||||
,parseTime defaultTimeLocale "%Y/%m/%e" (take 5 s ++ "0" ++ drop 5 s)
|
||||
,parseTime defaultTimeLocale "%Y-%m-%e" s
|
||||
,parseTime defaultTimeLocale "%Y-%m-%e" (take 5 s ++ "0" ++ drop 5 s)
|
||||
@ -102,3 +253,19 @@ fixdate s = maybe "0000/00/00" showDate $
|
||||
,parseTime defaultTimeLocale "%m-%e-%Y" ('0':s)
|
||||
]
|
||||
|
||||
-- | Apply account matching rules to a transaction description to obtain
|
||||
-- the most appropriate account and a new description.
|
||||
identify :: [AccountRule] -> String -> String -> (String,String)
|
||||
identify rules defacct desc | null matchingrules = (defacct,desc)
|
||||
| otherwise = (acct,newdesc)
|
||||
where
|
||||
matchingrules = filter ismatch rules :: [AccountRule]
|
||||
where ismatch = any (isJust . flip matchregex desc . fst) . fst
|
||||
(prs,acct) = head matchingrules
|
||||
mrs = filter (isJust . fst) $ map (\(p,r) -> (matchregex p desc, r)) prs
|
||||
(m,repl) = head mrs
|
||||
matched = fst $ fst $ fromJust m
|
||||
newdesc = fromMaybe matched repl
|
||||
|
||||
matchregex = matchRegexPR . ("(?i)" ++)
|
||||
|
||||
|
@ -20,7 +20,7 @@ timeprogname = "hours"
|
||||
usagehdr =
|
||||
"Usage: hledger [OPTIONS] [COMMAND [PATTERNS]]\n" ++
|
||||
" hours [OPTIONS] [COMMAND [PATTERNS]]\n" ++
|
||||
" hledger convert CSVFILE ACCOUNTNAME RULESFILE\n" ++
|
||||
" hledger convert CSVFILE RULESFILE\n" ++
|
||||
"\n" ++
|
||||
"hledger uses your ~/.ledger or $LEDGER file (or another specified with -f),\n" ++
|
||||
"while hours uses your ~/.timelog or $TIMELOG file.\n" ++
|
||||
|
73
README
73
README
@ -157,26 +157,35 @@ Commands
|
||||
Convert
|
||||
,,,,,,,
|
||||
|
||||
The convert command converts CSV (comma-separated value) files downloaded
|
||||
from a bank into ledger format. Doing a bulk import in this way can be a
|
||||
easier than entering each transaction by hand. The downside is you no
|
||||
longer have your own data with which to detect errors by the bank.
|
||||
The convert command reads a CSV_ file you have downloaded from your bank,
|
||||
and prints out the transactions in ledger format, suitable for adding to
|
||||
your ledger. This can be a lot quicker than entering every transaction by
|
||||
hand. (The downside is that you are less likely to notice if your bank makes an
|
||||
error, so keep an eye on them!)
|
||||
|
||||
Usage is different from the other commands, typically it looks like this::
|
||||
Use it like this::
|
||||
|
||||
hledger convert FILE.csv BASEACCOUNT FILE.rules >FILE.ledger
|
||||
(review FILE.ledger, then copy and paste into main ledger)
|
||||
hledger convert FILE.csv FILE.rules >FILE.ledger
|
||||
|
||||
Ie convert the csv data and save the output into a similarly named ledger file.
|
||||
(It's not required, but convenient to use the same base filename for the
|
||||
csv, rules and output files as above.)
|
||||
This will convert the csv data in FILE.csv using the conversion hints in
|
||||
FILE.rules, and save the output into a temporary ledger file. Then you
|
||||
should review FILE.ledger for problems; update the rules and convert again
|
||||
if needed; and finally copy/paste transactions which are new into your
|
||||
main ledger.
|
||||
|
||||
BASEACCOUNT is the source account for these transactions, eg
|
||||
``assets:bank:checking``.
|
||||
rules file
|
||||
''''''''''
|
||||
|
||||
FILE.rules contains some rules to help convert the data. Here's an example::
|
||||
A .rules file contains some data definitions and some rules for assigning
|
||||
destination accounts to transactions. Typically you will have one csv file
|
||||
and corresponding rules file per bank account. Here's an example rules
|
||||
file for converting the csv download from a Wells Fargo checking account::
|
||||
|
||||
0,-,4,1
|
||||
base-account assets:bank:checking
|
||||
date-field 0
|
||||
description-field 4
|
||||
amount-field 1
|
||||
currency $
|
||||
|
||||
SPECTRUM
|
||||
expenses:health:gym
|
||||
@ -188,21 +197,33 @@ FILE.rules contains some rules to help convert the data. Here's an example::
|
||||
(TO|FROM) SAVINGS
|
||||
assets:bank:savings
|
||||
|
||||
It must contain:
|
||||
This says:
|
||||
|
||||
- paragraphs separated by one blank line.
|
||||
- the ledger account corresponding to this csv file is assets:bank:checking
|
||||
- the first csv field is the date, the second is the amount, the fifth is the description
|
||||
- prepend a dollar sign to the amount field
|
||||
- if description contains SPECTRUM (case-insensitive), the transaction is a gym expense
|
||||
- if description contains ITUNES or BLKBSTR, the transaction is an entertainment expense; also rewrite BLKBSTR as BLOCKBUSTER
|
||||
- if description contains TO SAVINGS or FROM SAVINGS, the transaction is a savings transfer
|
||||
|
||||
- The first paragraph is a single line of four comma-separated fields,
|
||||
which are numbers indicating the (0-based) csv field positions
|
||||
corresponding to the transaction date, code/number, description, and amount.
|
||||
If a field does not exist in the csv, use - to specify a default value.
|
||||
Notes:
|
||||
|
||||
- Other paragraphs consist of one or more regular expression patterns, one
|
||||
per line, followed by a line specifying the account to use when a
|
||||
transaction's description matches any of these patterns. Patterns may
|
||||
optionally have a replacement pattern specified after =, otherwise the
|
||||
matching part is used.
|
||||
- Definitions must come first, one per line, all in one paragraph. Each
|
||||
is a name and a value separated by whitespace. Supported names are:
|
||||
base-account, date-field, status-field, code-field, description-field,
|
||||
amount-field, currency-field, currency. All are optional and will
|
||||
use defaults if not specified.
|
||||
|
||||
- The remainder of the file is account-assigning rules. Each is a
|
||||
paragraph consisting of one or more description-matching patterns
|
||||
(case-insensitive regular expressions), one per line, followed by the
|
||||
account name to use when the transaction's description matches any of
|
||||
these patterns.
|
||||
|
||||
- A match pattern may optionally be followed by = and a replacement
|
||||
pattern, which will become the ledger transaction's description.
|
||||
Otherwise the matched part of the csv description is used. (To preserve
|
||||
the full csv description, use .* before and after the match pattern.)
|
||||
|
||||
Smart dates
|
||||
...........
|
||||
@ -377,3 +398,5 @@ Other differences
|
||||
.. _c++ ledger's manual: http://joyful.com/repos/ledger/doc/ledger.html
|
||||
.. _binaries: http://hledger.org/binaries/
|
||||
.. _Haskell Platform: http://hackage.haskell.org/platform/
|
||||
.. _CSV: http://en.wikipedia.org/wiki/Comma-separated_values
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user