convert: find/create the rules file automatically, allow comment lines

This commit is contained in:
Simon Michael 2009-12-03 23:34:23 +00:00
parent 2607082e9e
commit 1c8a0aed08
3 changed files with 69 additions and 20 deletions

View File

@ -5,6 +5,7 @@ format, and print it on stdout. See the manual for more details.
module Commands.Convert where
import Options (Opt(Debug))
import Version (versionstr)
import Ledger.Types (Ledger,AccountName,LedgerTransaction(..),Posting(..),PostingType(..))
import Ledger.Utils (strip, spacenonewline, restofline)
import Ledger.Parse (someamount, emptyCtx, ledgeraccountname)
@ -19,27 +20,61 @@ import Locale (defaultTimeLocale)
import Data.Time.Format (parseTime)
import Control.Monad (when, guard)
import Safe (readDef, readMay)
import System.FilePath.Posix (takeBaseName)
import System.Posix.Files (fileExist)
import System.FilePath.Posix (takeBaseName, replaceExtension)
import Text.ParserCombinators.Parsec
convert :: [Opt] -> [String] -> Ledger -> IO ()
convert opts args _ = do
when (length args /= 2) (error "please specify a csv data file and conversion rules file.")
let debug = Debug `elem` opts
[csvfile,rulesfile] = args
when (null args) $ error "please specify a csv data file."
let csvfile = head args
csvparse <- parseCSVFromFile csvfile
let records = case csvparse of
Left e -> error $ show e
Right rs -> reverse $ filter (/= [""]) rs
let debug = Debug `elem` opts
rulesfile = rulesFileFor csvfile
exists <- fileExist rulesfile
if (not exists) then do
hPrintf stderr "creating conversion rules file %s, edit this file for better results\n" rulesfile
writeFile rulesfile initialRulesFileContent
else
hPrintf stderr "using conversion rules file %s\n" rulesfile
rulesstr <- readFile rulesfile
let rules = case parseCsvRules (takeBaseName csvfile) rulesstr of
let rules = case parseCsvRules rulesfile rulesstr of
Left e -> error $ show e
Right r -> r
when debug $ hPrintf stderr "using csv conversion rules file %s\n" rulesfile
when debug $ hPrintf stderr "%s\n" (show rules)
when debug $ hPrintf stderr "rules: %s\n" (show rules)
mapM_ (printTxn debug rules) records
rulesFileFor :: FilePath -> FilePath
rulesFileFor csvfile = replaceExtension csvfile ".rules"
initialRulesFileContent :: String
initialRulesFileContent =
"# csv conversion rules file generated by hledger "++versionstr++"\n" ++
"# Add rules to this file for more accurate conversion, see\n"++
"# http://hledger.org/MANUAL.html#convert\n" ++
"\n" ++
"base-account assets:bank:checking\n" ++
"date-field 0\n" ++
"description-field 4\n" ++
"amount-field 1\n" ++
"currency $\n" ++
"\n" ++
"# account-assigning rules\n" ++
"\n" ++
"SPECTRUM\n" ++
"expenses:health:gym\n" ++
"\n" ++
"ITUNES\n" ++
"BLKBSTR=BLOCKBUSTER\n" ++
"expenses:entertainment\n" ++
"\n" ++
"(TO|FROM) SAVINGS\n" ++
"assets:bank:savings\n"
{- |
A set of data definitions and account-matching patterns sufficient to
convert a particular CSV data file into meaningful ledger transactions. See above.
@ -79,16 +114,16 @@ type CsvRecord = [String]
-- rules file parser
parseCsvRules :: String -> String -> Either ParseError CsvRules
parseCsvRules basefilename s = runParser csvrulesP nullrules{baseAccount=basefilename} "" s
parseCsvRules :: FilePath -> String -> Either ParseError CsvRules
parseCsvRules rulesfile s = runParser csvrulesfile nullrules{baseAccount=takeBaseName rulesfile} rulesfile s
csvrulesP :: GenParser Char CsvRules CsvRules
csvrulesP = do
optional blanklines
csvrulesfile :: GenParser Char CsvRules CsvRules
csvrulesfile = do
many blankorcommentline
many definitions
r <- getState
ars <- many accountrule
optional blanklines
many blankorcommentline
eof
return r{accountRules=ars}
@ -106,6 +141,7 @@ definitions = do
,currencyfield
,basecurrency
,baseaccount
,commentline
] <?> "definition"
return ()
@ -169,17 +205,26 @@ baseaccount = do
accountrule :: GenParser Char CsvRules AccountRule
accountrule = do
blanklines
many blankorcommentline
pats <- many1 matchreplacepattern
guard $ length pats >= 2
let pats' = init pats
acct = either (fail.show) id $ runParser ledgeraccountname () "" $ fst $ last pats
many commentline
return (pats',acct)
blanklines = many1 blankline >> return ()
blankline = many spacenonewline >> newline >> return () <?> "blank line"
commentchar = oneOf ";#"
commentline = many spacenonewline >> commentchar >> restofline >> return () <?> "comment line"
blankorcommentline = choice' [blankline, commentline]
matchreplacepattern = do
notFollowedBy commentchar
matchpat <- many1 (noneOf "=\n")
replpat <- optionMaybe $ do {char '='; many $ noneOf "\n"}
newline

View File

@ -20,7 +20,7 @@ timeprogname = "hours"
usagehdr =
"Usage: hledger [OPTIONS] [COMMAND [PATTERNS]]\n" ++
" hours [OPTIONS] [COMMAND [PATTERNS]]\n" ++
" hledger convert CSVFILE RULESFILE\n" ++
" hledger convert CSVFILE\n" ++
"\n" ++
"hledger uses your ~/.ledger or $LEDGER file (or another specified with -f),\n" ++
"while hours uses your ~/.timelog or $TIMELOG file.\n" ++

16
README
View File

@ -165,13 +165,13 @@ error, so keep an eye on them!)
Use it like this::
hledger convert FILE.csv FILE.rules >FILE.ledger
hledger convert FILE.csv >FILE.ledger
This will convert the csv data in FILE.csv using the conversion hints in
FILE.rules, and save the output into a temporary ledger file. Then you
should review FILE.ledger for problems; update the rules and convert again
if needed; and finally copy/paste transactions which are new into your
main ledger.
This will convert the csv data in FILE.csv, using conversion rules defined
in FILE.rules, and save the output into a temporary ledger file. If
FILE.rules does not exist it will be created. Then you should review
FILE.ledger for problems; update the rules and convert again if needed;
and finally copy/paste transactions which are new into your main ledger.
rules file
''''''''''
@ -187,6 +187,8 @@ file for converting the csv download from a Wells Fargo checking account::
amount-field 1
currency $
# account-assigning rules
SPECTRUM
expenses:health:gym
@ -208,6 +210,8 @@ This says:
Notes:
- Lines beginning with # or ; are ignored (but avoid using inside an account rule)
- Definitions must come first, one per line, all in one paragraph. Each
is a name and a value separated by whitespace. Supported names are:
base-account, date-field, status-field, code-field, description-field,