mirror of
https://github.com/simonmichael/hledger.git
synced 2024-11-10 14:16:41 +03:00
d39040c634
When generating a new posting as a multiple of an existing posting, support conversion to a different commodity. For example, postings in hours can be used to generate postings in USD. Automatic transactions generated from rewrite rules use the commodity, amount style, and transaction price if the rewrite defines a commodity.
847 lines
32 KiB
Haskell
847 lines
32 KiB
Haskell
--- * doc
|
|
-- Lines beginning "--- *" are collapsible orgstruct nodes. Emacs users,
|
|
-- (add-hook 'haskell-mode-hook
|
|
-- (lambda () (set-variable 'orgstruct-heading-prefix-regexp "--- " t))
|
|
-- 'orgstruct-mode)
|
|
-- and press TAB on nodes to expand/collapse.
|
|
|
|
{-|
|
|
|
|
Some common parsers and helpers used by several readers.
|
|
Some of these might belong in Hledger.Read.JournalReader or Hledger.Read.
|
|
|
|
-}
|
|
|
|
--- * module
|
|
{-# LANGUAGE CPP, RecordWildCards, NamedFieldPuns, NoMonoLocalBinds, ScopedTypeVariables, FlexibleContexts, TupleSections, OverloadedStrings #-}
|
|
|
|
module Hledger.Read.Common
|
|
where
|
|
--- * imports
|
|
import Prelude ()
|
|
import Prelude.Compat hiding (readFile)
|
|
import Control.Monad.Compat
|
|
import Control.Monad.Except (ExceptT(..), runExceptT, throwError) --, catchError)
|
|
import Control.Monad.State.Strict
|
|
import Data.Char (isNumber)
|
|
import Data.Functor.Identity
|
|
import Data.List.Compat
|
|
import Data.List.NonEmpty (NonEmpty(..))
|
|
import Data.List.Split (wordsBy)
|
|
import Data.Maybe
|
|
import Data.Monoid
|
|
import Data.Text (Text)
|
|
import qualified Data.Text as T
|
|
import Data.Time.Calendar
|
|
import Data.Time.LocalTime
|
|
import Safe
|
|
import System.Time (getClockTime)
|
|
import Text.Megaparsec hiding (parse,State)
|
|
import Text.Megaparsec.Text
|
|
|
|
import Hledger.Data
|
|
import Hledger.Utils
|
|
|
|
-- $setup
|
|
|
|
--- * parsing utils
|
|
|
|
-- | Run a string parser with no state in the identity monad.
|
|
runTextParser, rtp :: TextParser Identity a -> Text -> Either (ParseError Char Dec) a
|
|
runTextParser p t = runParser p "" t
|
|
rtp = runTextParser
|
|
|
|
-- | Run a journal parser with a null journal-parsing state.
|
|
runJournalParser, rjp :: Monad m => TextParser m a -> Text -> m (Either (ParseError Char Dec) a)
|
|
runJournalParser p t = runParserT p "" t
|
|
rjp = runJournalParser
|
|
|
|
-- | Run an error-raising journal parser with a null journal-parsing state.
|
|
runErroringJournalParser, rejp :: Monad m => ErroringJournalParser m a -> Text -> m (Either String a)
|
|
runErroringJournalParser p t =
|
|
runExceptT $
|
|
runJournalParser (evalStateT p mempty)
|
|
t >>=
|
|
either (throwError . parseErrorPretty) return
|
|
rejp = runErroringJournalParser
|
|
|
|
genericSourcePos :: SourcePos -> GenericSourcePos
|
|
genericSourcePos p = GenericSourcePos (sourceName p) (fromIntegral . unPos $ sourceLine p) (fromIntegral . unPos $ sourceColumn p)
|
|
|
|
journalSourcePos :: SourcePos -> SourcePos -> GenericSourcePos
|
|
journalSourcePos p p' = JournalSourcePos (sourceName p) (fromIntegral . unPos $ sourceLine p, fromIntegral $ line')
|
|
where line'
|
|
| (unPos $ sourceColumn p') == 1 = unPos (sourceLine p') - 1
|
|
| otherwise = unPos $ sourceLine p' -- might be at end of file withat last new-line
|
|
|
|
|
|
-- | Given a megaparsec ParsedJournal parser, balance assertion flag, file
|
|
-- path and file content: parse and post-process a Journal, or give an error.
|
|
parseAndFinaliseJournal :: ErroringJournalParser IO ParsedJournal -> Bool
|
|
-> FilePath -> Text -> ExceptT String IO Journal
|
|
parseAndFinaliseJournal parser assrt f txt = do
|
|
t <- liftIO getClockTime
|
|
y <- liftIO getCurrentYear
|
|
ep <- runParserT (evalStateT parser nulljournal {jparsedefaultyear=Just y}) f txt
|
|
case ep of
|
|
Right pj -> case journalFinalise t f txt assrt pj of
|
|
Right j -> return j
|
|
Left e -> throwError e
|
|
Left e -> throwError $ parseErrorPretty e
|
|
|
|
parseAndFinaliseJournal' :: JournalParser ParsedJournal -> Bool -> FilePath -> Text -> ExceptT String IO Journal
|
|
parseAndFinaliseJournal' parser assrt f txt = do
|
|
t <- liftIO getClockTime
|
|
y <- liftIO getCurrentYear
|
|
let ep = runParser (evalStateT parser nulljournal {jparsedefaultyear=Just y}) f txt
|
|
case ep of
|
|
Right pj -> case journalFinalise t f txt assrt pj of
|
|
Right j -> return j
|
|
Left e -> throwError e
|
|
Left e -> throwError $ parseErrorPretty e
|
|
|
|
setYear :: Year -> JournalStateParser m ()
|
|
setYear y = modify' (\j -> j{jparsedefaultyear=Just y})
|
|
|
|
getYear :: JournalStateParser m (Maybe Year)
|
|
getYear = fmap jparsedefaultyear get
|
|
|
|
setDefaultCommodityAndStyle :: (CommoditySymbol,AmountStyle) -> JournalStateParser m ()
|
|
setDefaultCommodityAndStyle cs = modify' (\j -> j{jparsedefaultcommodity=Just cs})
|
|
|
|
getDefaultCommodityAndStyle :: JournalStateParser m (Maybe (CommoditySymbol,AmountStyle))
|
|
getDefaultCommodityAndStyle = jparsedefaultcommodity `fmap` get
|
|
|
|
pushAccount :: AccountName -> JournalStateParser m ()
|
|
pushAccount acct = modify' (\j -> j{jaccounts = acct : jaccounts j})
|
|
|
|
pushParentAccount :: AccountName -> JournalStateParser m ()
|
|
pushParentAccount acct = modify' (\j -> j{jparseparentaccounts = acct : jparseparentaccounts j})
|
|
|
|
popParentAccount :: JournalStateParser m ()
|
|
popParentAccount = do
|
|
j <- get
|
|
case jparseparentaccounts j of
|
|
[] -> unexpected (Tokens ('E' :| "nd of apply account block with no beginning"))
|
|
(_:rest) -> put j{jparseparentaccounts=rest}
|
|
|
|
getParentAccount :: JournalStateParser m AccountName
|
|
getParentAccount = fmap (concatAccountNames . reverse . jparseparentaccounts) get
|
|
|
|
addAccountAlias :: MonadState Journal m => AccountAlias -> m ()
|
|
addAccountAlias a = modify' (\(j@Journal{..}) -> j{jparsealiases=a:jparsealiases})
|
|
|
|
getAccountAliases :: MonadState Journal m => m [AccountAlias]
|
|
getAccountAliases = fmap jparsealiases get
|
|
|
|
clearAccountAliases :: MonadState Journal m => m ()
|
|
clearAccountAliases = modify' (\(j@Journal{..}) -> j{jparsealiases=[]})
|
|
|
|
-- getTransactionCount :: MonadState Journal m => m Integer
|
|
-- getTransactionCount = fmap jparsetransactioncount get
|
|
--
|
|
-- setTransactionCount :: MonadState Journal m => Integer -> m ()
|
|
-- setTransactionCount i = modify' (\j -> j{jparsetransactioncount=i})
|
|
--
|
|
-- -- | Increment the transaction index by one and return the new value.
|
|
-- incrementTransactionCount :: MonadState Journal m => m Integer
|
|
-- incrementTransactionCount = do
|
|
-- modify' (\j -> j{jparsetransactioncount=jparsetransactioncount j + 1})
|
|
-- getTransactionCount
|
|
|
|
journalAddFile :: (FilePath,Text) -> Journal -> Journal
|
|
journalAddFile f j@Journal{jfiles=fs} = j{jfiles=fs++[f]}
|
|
-- append, unlike the other fields, even though we do a final reverse,
|
|
-- to compensate for additional reversal due to including/monoid-concatting
|
|
|
|
-- -- | Terminate parsing entirely, returning the given error message
|
|
-- -- with the current parse position prepended.
|
|
-- parserError :: String -> ErroringJournalParser a
|
|
-- parserError s = do
|
|
-- pos <- getPosition
|
|
-- parserErrorAt pos s
|
|
|
|
-- | Terminate parsing entirely, returning the given error message
|
|
-- with the given parse position prepended.
|
|
parserErrorAt :: Monad m => SourcePos -> String -> ErroringJournalParser m a
|
|
parserErrorAt pos s = throwError $ sourcePosPretty pos ++ ":\n" ++ s
|
|
|
|
--- * parsers
|
|
--- ** transaction bits
|
|
|
|
statusp :: TextParser m ClearedStatus
|
|
statusp =
|
|
choice'
|
|
[ many spacenonewline >> char '*' >> return Cleared
|
|
, many spacenonewline >> char '!' >> return Pending
|
|
, return Uncleared
|
|
]
|
|
<?> "cleared status"
|
|
|
|
codep :: TextParser m String
|
|
codep = try (do { some spacenonewline; char '(' <?> "codep"; anyChar `manyTill` char ')' } ) <|> return ""
|
|
|
|
descriptionp :: JournalStateParser m String
|
|
descriptionp = many (noneOf (";\n" :: [Char]))
|
|
|
|
--- ** dates
|
|
|
|
-- | Parse a date in YYYY/MM/DD format.
|
|
-- Hyphen (-) and period (.) are also allowed as separators.
|
|
-- The year may be omitted if a default year has been set.
|
|
-- Leading zeroes may be omitted.
|
|
datep :: JournalStateParser m Day
|
|
datep = do
|
|
-- hacky: try to ensure precise errors for invalid dates
|
|
-- XXX reported error position is not too good
|
|
-- pos <- genericSourcePos <$> getPosition
|
|
datestr <- do
|
|
c <- digitChar
|
|
cs <- lift $ many $ choice' [digitChar, datesepchar]
|
|
return $ c:cs
|
|
let sepchars = nub $ sort $ filter (`elem` datesepchars) datestr
|
|
when (length sepchars /= 1) $ fail $ "bad date, different separators used: " ++ datestr
|
|
let dateparts = wordsBy (`elem` datesepchars) datestr
|
|
currentyear <- getYear
|
|
[y,m,d] <- case (dateparts,currentyear) of
|
|
([m,d],Just y) -> return [show y,m,d]
|
|
([_,_],Nothing) -> fail $ "partial date "++datestr++" found, but the current year is unknown"
|
|
([y,m,d],_) -> return [y,m,d]
|
|
_ -> fail $ "bad date: " ++ datestr
|
|
let maybedate = fromGregorianValid (read y) (read m) (read d)
|
|
case maybedate of
|
|
Nothing -> fail $ "bad date: " ++ datestr
|
|
Just date -> return date
|
|
<?> "full or partial date"
|
|
|
|
-- | Parse a date and time in YYYY/MM/DD HH:MM[:SS][+-ZZZZ] format.
|
|
-- Hyphen (-) and period (.) are also allowed as date separators.
|
|
-- The year may be omitted if a default year has been set.
|
|
-- Seconds are optional.
|
|
-- The timezone is optional and ignored (the time is always interpreted as a local time).
|
|
-- Leading zeroes may be omitted (except in a timezone).
|
|
datetimep :: JournalStateParser m LocalTime
|
|
datetimep = do
|
|
day <- datep
|
|
lift $ some spacenonewline
|
|
h <- some digitChar
|
|
let h' = read h
|
|
guard $ h' >= 0 && h' <= 23
|
|
char ':'
|
|
m <- some digitChar
|
|
let m' = read m
|
|
guard $ m' >= 0 && m' <= 59
|
|
s <- optional $ char ':' >> some digitChar
|
|
let s' = case s of Just sstr -> read sstr
|
|
Nothing -> 0
|
|
guard $ s' >= 0 && s' <= 59
|
|
{- tz <- -}
|
|
optional $ do
|
|
plusminus <- oneOf ("-+" :: [Char])
|
|
d1 <- digitChar
|
|
d2 <- digitChar
|
|
d3 <- digitChar
|
|
d4 <- digitChar
|
|
return $ plusminus:d1:d2:d3:d4:""
|
|
-- ltz <- liftIO $ getCurrentTimeZone
|
|
-- let tz' = maybe ltz (fromMaybe ltz . parseTime defaultTimeLocale "%z") tz
|
|
-- return $ localTimeToUTC tz' $ LocalTime day $ TimeOfDay h' m' (fromIntegral s')
|
|
return $ LocalTime day $ TimeOfDay h' m' (fromIntegral s')
|
|
|
|
secondarydatep :: Day -> JournalStateParser m Day
|
|
secondarydatep primarydate = do
|
|
char '='
|
|
-- kludgy way to use primary date for default year
|
|
let withDefaultYear d p = do
|
|
y <- getYear
|
|
let (y',_,_) = toGregorian d in setYear y'
|
|
r <- p
|
|
when (isJust y) $ setYear $ fromJust y -- XXX
|
|
-- mapM setYear <$> y
|
|
return r
|
|
withDefaultYear primarydate datep
|
|
|
|
-- |
|
|
-- >> parsewith twoorthreepartdatestringp "2016/01/2"
|
|
-- Right "2016/01/2"
|
|
-- twoorthreepartdatestringp = do
|
|
-- n1 <- some digitChar
|
|
-- c <- datesepchar
|
|
-- n2 <- some digitChar
|
|
-- mn3 <- optional $ char c >> some digitChar
|
|
-- return $ n1 ++ c:n2 ++ maybe "" (c:) mn3
|
|
|
|
--- ** account names
|
|
|
|
-- | Parse an account name, then apply any parent account prefix and/or account aliases currently in effect.
|
|
modifiedaccountnamep :: JournalStateParser m AccountName
|
|
modifiedaccountnamep = do
|
|
parent <- getParentAccount
|
|
aliases <- getAccountAliases
|
|
a <- lift accountnamep
|
|
return $
|
|
accountNameApplyAliases aliases $
|
|
-- XXX accountNameApplyAliasesMemo ? doesn't seem to make a difference
|
|
joinAccountNames parent
|
|
a
|
|
|
|
-- | Parse an account name. Account names start with a non-space, may
|
|
-- have single spaces inside them, and are terminated by two or more
|
|
-- spaces (or end of input). Also they have one or more components of
|
|
-- at least one character, separated by the account separator char.
|
|
-- (This parser will also consume one following space, if present.)
|
|
accountnamep :: TextParser m AccountName
|
|
accountnamep = do
|
|
astr <- do
|
|
c <- nonspace
|
|
cs <- striptrailingspace <$> many (nonspace <|> singlespace)
|
|
return $ c:cs
|
|
let a = T.pack astr
|
|
when (accountNameFromComponents (accountNameComponents a) /= a)
|
|
(fail $ "account name seems ill-formed: "++astr)
|
|
return a
|
|
where
|
|
singlespace = try (do {spacenonewline; do {notFollowedBy spacenonewline; return ' '}})
|
|
striptrailingspace "" = ""
|
|
striptrailingspace s = if last s == ' ' then init s else s
|
|
|
|
-- accountnamechar = notFollowedBy (oneOf "()[]") >> nonspace
|
|
-- <?> "account name character (non-bracket, non-parenthesis, non-whitespace)"
|
|
|
|
--- ** amounts
|
|
|
|
-- | Parse whitespace then an amount, with an optional left or right
|
|
-- currency symbol and optional price, or return the special
|
|
-- "missing" marker amount.
|
|
spaceandamountormissingp :: Monad m => JournalStateParser m MixedAmount
|
|
spaceandamountormissingp =
|
|
try (do
|
|
lift $ some spacenonewline
|
|
(Mixed . (:[])) `fmap` amountp <|> return missingmixedamt
|
|
) <|> return missingmixedamt
|
|
|
|
#ifdef TESTS
|
|
assertParseEqual' :: (Show a, Eq a) => (Either ParseError a) -> a -> Assertion
|
|
assertParseEqual' parse expected = either (assertFailure.show) (`is'` expected) parse
|
|
|
|
is' :: (Eq a, Show a) => a -> a -> Assertion
|
|
a `is'` e = assertEqual e a
|
|
|
|
test_spaceandamountormissingp = do
|
|
assertParseEqual' (parseWithState mempty spaceandamountormissingp " $47.18") (Mixed [usd 47.18])
|
|
assertParseEqual' (parseWithState mempty spaceandamountormissingp "$47.18") missingmixedamt
|
|
assertParseEqual' (parseWithState mempty spaceandamountormissingp " ") missingmixedamt
|
|
assertParseEqual' (parseWithState mempty spaceandamountormissingp "") missingmixedamt
|
|
#endif
|
|
|
|
-- | Parse a single-commodity amount, with optional symbol on the left or
|
|
-- right, optional unit or total price, and optional (ignored)
|
|
-- ledger-style balance assertion or fixed lot price declaration.
|
|
amountp :: Monad m => JournalStateParser m Amount
|
|
amountp = try leftsymbolamountp <|> try rightsymbolamountp <|> nosymbolamountp
|
|
|
|
#ifdef TESTS
|
|
test_amountp = do
|
|
assertParseEqual' (parseWithState mempty amountp "$47.18") (usd 47.18)
|
|
assertParseEqual' (parseWithState mempty amountp "$1.") (usd 1 `withPrecision` 0)
|
|
-- ,"amount with unit price" ~: do
|
|
assertParseEqual'
|
|
(parseWithState mempty amountp "$10 @ €0.5")
|
|
(usd 10 `withPrecision` 0 `at` (eur 0.5 `withPrecision` 1))
|
|
-- ,"amount with total price" ~: do
|
|
assertParseEqual'
|
|
(parseWithState mempty amountp "$10 @@ €5")
|
|
(usd 10 `withPrecision` 0 @@ (eur 5 `withPrecision` 0))
|
|
#endif
|
|
|
|
-- | Parse an amount from a string, or get an error.
|
|
amountp' :: String -> Amount
|
|
amountp' s =
|
|
case runParser (evalStateT (amountp <* eof) mempty) "" (T.pack s) of
|
|
Right amt -> amt
|
|
Left err -> error' $ show err -- XXX should throwError
|
|
|
|
-- | Parse a mixed amount from a string, or get an error.
|
|
mamountp' :: String -> MixedAmount
|
|
mamountp' = Mixed . (:[]) . amountp'
|
|
|
|
signp :: TextParser m String
|
|
signp = do
|
|
sign <- optional $ oneOf ("+-" :: [Char])
|
|
return $ case sign of Just '-' -> "-"
|
|
_ -> ""
|
|
|
|
multiplierp :: TextParser m Bool
|
|
multiplierp = do
|
|
multiplier <- optional $ oneOf ("*" :: [Char])
|
|
return $ case multiplier of Just '*' -> True
|
|
_ -> False
|
|
|
|
leftsymbolamountp :: Monad m => JournalStateParser m Amount
|
|
leftsymbolamountp = do
|
|
sign <- lift signp
|
|
m <- lift multiplierp
|
|
c <- lift commoditysymbolp
|
|
sp <- lift $ many spacenonewline
|
|
(q,prec,mdec,mgrps) <- lift numberp
|
|
let s = amountstyle{ascommodityside=L, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}
|
|
p <- priceamountp
|
|
let applysign = if sign=="-" then negate else id
|
|
return $ applysign $ Amount c q p s m
|
|
<?> "left-symbol amount"
|
|
|
|
rightsymbolamountp :: Monad m => JournalStateParser m Amount
|
|
rightsymbolamountp = do
|
|
m <- lift multiplierp
|
|
(q,prec,mdec,mgrps) <- lift numberp
|
|
sp <- lift $ many spacenonewline
|
|
c <- lift commoditysymbolp
|
|
p <- priceamountp
|
|
let s = amountstyle{ascommodityside=R, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}
|
|
return $ Amount c q p s m
|
|
<?> "right-symbol amount"
|
|
|
|
nosymbolamountp :: Monad m => JournalStateParser m Amount
|
|
nosymbolamountp = do
|
|
m <- lift multiplierp
|
|
(q,prec,mdec,mgrps) <- lift numberp
|
|
p <- priceamountp
|
|
-- apply the most recently seen default commodity and style to this commodityless amount
|
|
defcs <- getDefaultCommodityAndStyle
|
|
let (c,s) = case defcs of
|
|
Just (defc,defs) -> (defc, defs{asprecision=max (asprecision defs) prec})
|
|
Nothing -> ("", amountstyle{asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps})
|
|
return $ Amount c q p s m
|
|
<?> "no-symbol amount"
|
|
|
|
commoditysymbolp :: TextParser m CommoditySymbol
|
|
commoditysymbolp = (quotedcommoditysymbolp <|> simplecommoditysymbolp) <?> "commodity symbol"
|
|
|
|
quotedcommoditysymbolp :: TextParser m CommoditySymbol
|
|
quotedcommoditysymbolp = do
|
|
char '"'
|
|
s <- some $ noneOf (";\n\"" :: [Char])
|
|
char '"'
|
|
return $ T.pack s
|
|
|
|
simplecommoditysymbolp :: TextParser m CommoditySymbol
|
|
simplecommoditysymbolp = T.pack <$> some (noneOf nonsimplecommoditychars)
|
|
|
|
priceamountp :: Monad m => JournalStateParser m Price
|
|
priceamountp =
|
|
try (do
|
|
lift (many spacenonewline)
|
|
char '@'
|
|
try (do
|
|
char '@'
|
|
lift (many spacenonewline)
|
|
a <- amountp -- XXX can parse more prices ad infinitum, shouldn't
|
|
return $ TotalPrice a)
|
|
<|> (do
|
|
lift (many spacenonewline)
|
|
a <- amountp -- XXX can parse more prices ad infinitum, shouldn't
|
|
return $ UnitPrice a))
|
|
<|> return NoPrice
|
|
|
|
partialbalanceassertionp :: Monad m => JournalStateParser m (Maybe Amount)
|
|
partialbalanceassertionp =
|
|
try (do
|
|
lift (many spacenonewline)
|
|
char '='
|
|
lift (many spacenonewline)
|
|
a <- amountp -- XXX should restrict to a simple amount
|
|
return $ Just $ a)
|
|
<|> return Nothing
|
|
|
|
-- balanceassertion :: Monad m => TextParser m (Maybe MixedAmount)
|
|
-- balanceassertion =
|
|
-- try (do
|
|
-- lift (many spacenonewline)
|
|
-- string "=="
|
|
-- lift (many spacenonewline)
|
|
-- a <- amountp -- XXX should restrict to a simple amount
|
|
-- return $ Just $ Mixed [a])
|
|
-- <|> return Nothing
|
|
|
|
-- http://ledger-cli.org/3.0/doc/ledger3.html#Fixing-Lot-Prices
|
|
fixedlotpricep :: Monad m => JournalStateParser m (Maybe Amount)
|
|
fixedlotpricep =
|
|
try (do
|
|
lift (many spacenonewline)
|
|
char '{'
|
|
lift (many spacenonewline)
|
|
char '='
|
|
lift (many spacenonewline)
|
|
a <- amountp -- XXX should restrict to a simple amount
|
|
lift (many spacenonewline)
|
|
char '}'
|
|
return $ Just a)
|
|
<|> return Nothing
|
|
|
|
-- | Parse a string representation of a number for its value and display
|
|
-- attributes.
|
|
--
|
|
-- Some international number formats are accepted, eg either period or comma
|
|
-- may be used for the decimal point, and the other of these may be used for
|
|
-- separating digit groups in the integer part. See
|
|
-- http://en.wikipedia.org/wiki/Decimal_separator for more examples.
|
|
--
|
|
-- This returns: the parsed numeric value, the precision (number of digits
|
|
-- seen following the decimal point), the decimal point character used if any,
|
|
-- and the digit group style if any.
|
|
--
|
|
numberp :: TextParser m (Quantity, Int, Maybe Char, Maybe DigitGroupStyle)
|
|
numberp = do
|
|
-- a number is an optional sign followed by a sequence of digits possibly
|
|
-- interspersed with periods, commas, or both
|
|
-- ptrace "numberp"
|
|
sign <- signp
|
|
parts <- some $ choice' [some digitChar, some $ char ',', some $ char '.']
|
|
dbg8 "numberp parsed" (sign,parts) `seq` return ()
|
|
|
|
-- check the number is well-formed and identify the decimal point and digit
|
|
-- group separator characters used, if any
|
|
let (numparts, puncparts) = partition numeric parts
|
|
(ok, mdecimalpoint, mseparator) =
|
|
case (numparts, puncparts) of
|
|
([],_) -> (False, Nothing, Nothing) -- no digits, not ok
|
|
(_,[]) -> (True, Nothing, Nothing) -- digits with no punctuation, ok
|
|
(_,[[d]]) -> (True, Just d, Nothing) -- just a single punctuation of length 1, assume it's a decimal point
|
|
(_,[_]) -> (False, Nothing, Nothing) -- a single punctuation of some other length, not ok
|
|
(_,_:_:_) -> -- two or more punctuations
|
|
let (s:ss, d) = (init puncparts, last puncparts) -- the leftmost is a separator and the rightmost may be a decimal point
|
|
in if any ((/=1).length) puncparts -- adjacent punctuation chars, not ok
|
|
|| any (s/=) ss -- separator chars vary, not ok
|
|
|| head parts == s -- number begins with a separator char, not ok
|
|
then (False, Nothing, Nothing)
|
|
else if s == d
|
|
then (True, Nothing, Just $ head s) -- just one kind of punctuation - must be separators
|
|
else (True, Just $ head d, Just $ head s) -- separator(s) and a decimal point
|
|
unless ok $ fail $ "number seems ill-formed: "++concat parts
|
|
|
|
-- get the digit group sizes and digit group style if any
|
|
let (intparts',fracparts') = span ((/= mdecimalpoint) . Just . head) parts
|
|
(intparts, fracpart) = (filter numeric intparts', filter numeric fracparts')
|
|
groupsizes = reverse $ case map length intparts of
|
|
(a:b:cs) | a < b -> b:cs
|
|
gs -> gs
|
|
mgrps = (`DigitGroups` groupsizes) <$> mseparator
|
|
|
|
-- put the parts back together without digit group separators, get the precision and parse the value
|
|
let int = concat $ "":intparts
|
|
frac = concat $ "":fracpart
|
|
precision = length frac
|
|
int' = if null int then "0" else int
|
|
frac' = if null frac then "0" else frac
|
|
quantity = read $ sign++int'++"."++frac' -- this read should never fail
|
|
|
|
return $ dbg8 "numberp quantity,precision,mdecimalpoint,mgrps" (quantity,precision,mdecimalpoint,mgrps)
|
|
<?> "numberp"
|
|
where
|
|
numeric = isNumber . headDef '_'
|
|
|
|
-- test_numberp = do
|
|
-- let s `is` n = assertParseEqual (parseWithState mempty numberp s) n
|
|
-- assertFails = assertBool . isLeft . parseWithState mempty numberp
|
|
-- assertFails ""
|
|
-- "0" `is` (0, 0, '.', ',', [])
|
|
-- "1" `is` (1, 0, '.', ',', [])
|
|
-- "1.1" `is` (1.1, 1, '.', ',', [])
|
|
-- "1,000.1" `is` (1000.1, 1, '.', ',', [3])
|
|
-- "1.00.000,1" `is` (100000.1, 1, ',', '.', [3,2])
|
|
-- "1,000,000" `is` (1000000, 0, '.', ',', [3,3])
|
|
-- "1." `is` (1, 0, '.', ',', [])
|
|
-- "1," `is` (1, 0, ',', '.', [])
|
|
-- ".1" `is` (0.1, 1, '.', ',', [])
|
|
-- ",1" `is` (0.1, 1, ',', '.', [])
|
|
-- assertFails "1,000.000,1"
|
|
-- assertFails "1.000,000.1"
|
|
-- assertFails "1,000.000.1"
|
|
-- assertFails "1,,1"
|
|
-- assertFails "1..1"
|
|
-- assertFails ".1,"
|
|
-- assertFails ",1."
|
|
|
|
--- ** comments
|
|
|
|
multilinecommentp :: JournalStateParser m ()
|
|
multilinecommentp = do
|
|
string "comment" >> lift (many spacenonewline) >> newline
|
|
go
|
|
where
|
|
go = try (eof <|> (string "end comment" >> newline >> return ()))
|
|
<|> (anyLine >> go)
|
|
anyLine = anyChar `manyTill` newline
|
|
|
|
emptyorcommentlinep :: JournalStateParser m ()
|
|
emptyorcommentlinep = do
|
|
lift (many spacenonewline) >> (commentp <|> (lift (many spacenonewline) >> newline >> return ""))
|
|
return ()
|
|
|
|
-- | Parse a possibly multi-line comment following a semicolon.
|
|
followingcommentp :: JournalStateParser m Text
|
|
followingcommentp =
|
|
-- ptrace "followingcommentp"
|
|
do samelinecomment <- lift (many spacenonewline) >> (try semicoloncommentp <|> (newline >> return ""))
|
|
newlinecomments <- many (try (lift (some spacenonewline) >> semicoloncommentp))
|
|
return $ T.unlines $ samelinecomment:newlinecomments
|
|
|
|
-- | Parse a possibly multi-line comment following a semicolon, and
|
|
-- any tags and/or posting dates within it. Posting dates can be
|
|
-- expressed with "date"/"date2" tags and/or bracketed dates. The
|
|
-- dates are parsed in full here so that errors are reported in the
|
|
-- right position. Missing years can be inferred if a default date is
|
|
-- provided.
|
|
--
|
|
-- >>> rejp (followingcommentandtagsp (Just $ fromGregorian 2000 1 2)) "; a:b, date:3/4, [=5/6]"
|
|
-- Right ("a:b, date:3/4, [=5/6]\n",[("a","b"),("date","3/4")],Just 2000-03-04,Just 2000-05-06)
|
|
--
|
|
-- Year unspecified and no default provided -> unknown year error, at correct position:
|
|
-- >>> rejp (followingcommentandtagsp Nothing) " ; xxx date:3/4\n ; second line"
|
|
-- Left ...1:22...partial date 3/4 found, but the current year is unknown...
|
|
--
|
|
-- Date tag value contains trailing text - forgot the comma, confused:
|
|
-- the syntaxes ? We'll accept the leading date anyway
|
|
-- >>> rejp (followingcommentandtagsp (Just $ fromGregorian 2000 1 2)) "; date:3/4=5/6"
|
|
-- Right ("date:3/4=5/6\n",[("date","3/4=5/6")],Just 2000-03-04,Nothing)
|
|
--
|
|
followingcommentandtagsp :: MonadIO m => Maybe Day
|
|
-> ErroringJournalParser m (Text, [Tag], Maybe Day, Maybe Day)
|
|
followingcommentandtagsp mdefdate = do
|
|
-- pdbg 0 "followingcommentandtagsp"
|
|
|
|
-- Parse a single or multi-line comment, starting on this line or the next one.
|
|
-- Save the starting position and preserve all whitespace for the subsequent re-parsing,
|
|
-- to get good error positions.
|
|
startpos <- getPosition
|
|
commentandwhitespace :: String <- do
|
|
let semicoloncommentp' = (:) <$> char ';' <*> anyChar `manyTill` eolof
|
|
sp1 <- lift (many spacenonewline)
|
|
l1 <- try (lift semicoloncommentp') <|> (newline >> return "")
|
|
ls <- lift . many $ try ((++) <$> some spacenonewline <*> semicoloncommentp')
|
|
return $ unlines $ (sp1 ++ l1) : ls
|
|
let comment = T.pack $ unlines $ map (lstrip . dropWhile (==';') . strip) $ lines commentandwhitespace
|
|
-- pdbg 0 $ "commentws:"++show commentandwhitespace
|
|
-- pdbg 0 $ "comment:"++show comment
|
|
|
|
-- Reparse the comment for any tags.
|
|
tags <- case runTextParser (setPosition startpos >> tagsp) $ T.pack commentandwhitespace of
|
|
Right ts -> return ts
|
|
Left e -> throwError $ parseErrorPretty e
|
|
-- pdbg 0 $ "tags: "++show tags
|
|
|
|
-- Reparse the comment for any posting dates. Use the transaction date for defaults, if provided.
|
|
epdates <- liftIO $ rejp (setPosition startpos >> postingdatesp mdefdate) $ T.pack commentandwhitespace
|
|
pdates <- case epdates of
|
|
Right ds -> return ds
|
|
Left e -> throwError e
|
|
-- pdbg 0 $ "pdates: "++show pdates
|
|
let mdate = headMay $ map snd $ filter ((=="date").fst) pdates
|
|
mdate2 = headMay $ map snd $ filter ((=="date2").fst) pdates
|
|
|
|
return (comment, tags, mdate, mdate2)
|
|
|
|
commentp :: JournalStateParser m Text
|
|
commentp = commentStartingWithp commentchars
|
|
|
|
commentchars :: [Char]
|
|
commentchars = "#;*"
|
|
|
|
semicoloncommentp :: JournalStateParser m Text
|
|
semicoloncommentp = commentStartingWithp ";"
|
|
|
|
commentStartingWithp :: [Char] -> JournalStateParser m Text
|
|
commentStartingWithp cs = do
|
|
-- ptrace "commentStartingWith"
|
|
oneOf cs
|
|
lift (many spacenonewline)
|
|
l <- anyChar `manyTill` (lift eolof)
|
|
optional newline
|
|
return $ T.pack l
|
|
|
|
--- ** tags
|
|
|
|
-- | Extract any tags (name:value ended by comma or newline) embedded in a string.
|
|
--
|
|
-- >>> commentTags "a b:, c:c d:d, e"
|
|
-- [("b",""),("c","c d:d")]
|
|
--
|
|
-- >>> commentTags "a [1/1/1] [1/1] [1], [=1/1/1] [=1/1] [=1] [1/1=1/1/1] [1=1/1/1] b:c"
|
|
-- [("b","c")]
|
|
--
|
|
-- --[("date","1/1/1"),("date","1/1"),("date2","1/1/1"),("date2","1/1"),("date","1/1"),("date2","1/1/1"),("date","1"),("date2","1/1/1")]
|
|
--
|
|
-- >>> commentTags "\na b:, \nd:e, f"
|
|
-- [("b",""),("d","e")]
|
|
--
|
|
commentTags :: Text -> [Tag]
|
|
commentTags s =
|
|
case runTextParser tagsp s of
|
|
Right r -> r
|
|
Left _ -> [] -- shouldn't happen
|
|
|
|
-- | Parse all tags found in a string.
|
|
tagsp :: Parser [Tag]
|
|
tagsp = -- do
|
|
-- pdbg 0 $ "tagsp"
|
|
many (try (nontagp >> tagp))
|
|
|
|
-- | Parse everything up till the first tag.
|
|
--
|
|
-- >>> rtp nontagp "\na b:, \nd:e, f"
|
|
-- Right "\na "
|
|
nontagp :: Parser String
|
|
nontagp = -- do
|
|
-- pdbg 0 "nontagp"
|
|
-- anyChar `manyTill` (lookAhead (try (tagorbracketeddatetagsp Nothing >> return ()) <|> eof))
|
|
anyChar `manyTill` lookAhead (try (void tagp) <|> eof)
|
|
-- XXX costly ?
|
|
|
|
-- | Tags begin with a colon-suffixed tag name (a word beginning with
|
|
-- a letter) and are followed by a tag value (any text up to a comma
|
|
-- or newline, whitespace-stripped).
|
|
--
|
|
-- >>> rtp tagp "a:b b , c AuxDate: 4/2"
|
|
-- Right ("a","b b")
|
|
--
|
|
tagp :: Parser Tag
|
|
tagp = do
|
|
-- pdbg 0 "tagp"
|
|
n <- tagnamep
|
|
v <- tagvaluep
|
|
return (n,v)
|
|
|
|
-- |
|
|
-- >>> rtp tagnamep "a:"
|
|
-- Right "a"
|
|
tagnamep :: Parser Text
|
|
tagnamep = -- do
|
|
-- pdbg 0 "tagnamep"
|
|
T.pack <$> some (noneOf (": \t\n" :: [Char])) <* char ':'
|
|
|
|
tagvaluep :: TextParser m Text
|
|
tagvaluep = do
|
|
-- ptrace "tagvalue"
|
|
v <- anyChar `manyTill` (void (try (char ',')) <|> eolof)
|
|
return $ T.pack $ strip $ reverse $ dropWhile (==',') $ reverse $ strip v
|
|
|
|
--- ** posting dates
|
|
|
|
-- | Parse all posting dates found in a string. Posting dates can be
|
|
-- expressed with date/date2 tags and/or bracketed dates. The dates
|
|
-- are parsed fully to give useful errors. Missing years can be
|
|
-- inferred only if a default date is provided.
|
|
--
|
|
postingdatesp :: Monad m => Maybe Day -> ErroringJournalParser m [(TagName,Day)]
|
|
postingdatesp mdefdate = do
|
|
-- pdbg 0 $ "postingdatesp"
|
|
let p = ((:[]) <$> datetagp mdefdate) <|> bracketeddatetagsp mdefdate
|
|
nonp =
|
|
many (notFollowedBy p >> anyChar)
|
|
-- anyChar `manyTill` (lookAhead (try (p >> return ()) <|> eof))
|
|
concat <$> many (try (nonp >> p))
|
|
|
|
--- ** date tags
|
|
|
|
-- | Date tags are tags with name "date" or "date2". Their value is
|
|
-- parsed as a date, using the provided default date if any for
|
|
-- inferring a missing year if needed. Any error in date parsing is
|
|
-- reported and terminates parsing.
|
|
--
|
|
-- >>> rejp (datetagp Nothing) "date: 2000/1/2 "
|
|
-- Right ("date",2000-01-02)
|
|
--
|
|
-- >>> rejp (datetagp (Just $ fromGregorian 2001 2 3)) "date2:3/4"
|
|
-- Right ("date2",2001-03-04)
|
|
--
|
|
-- >>> rejp (datetagp Nothing) "date: 3/4"
|
|
-- Left ...1:9...partial date 3/4 found, but the current year is unknown...
|
|
--
|
|
datetagp :: Monad m => Maybe Day -> ErroringJournalParser m (TagName,Day)
|
|
datetagp mdefdate = do
|
|
-- pdbg 0 "datetagp"
|
|
string "date"
|
|
n <- T.pack . fromMaybe "" <$> optional (string "2")
|
|
char ':'
|
|
startpos <- getPosition
|
|
v <- lift tagvaluep
|
|
-- re-parse value as a date.
|
|
j <- get
|
|
let ep :: Either (ParseError Char Dec) Day
|
|
ep = parseWithState'
|
|
j{jparsedefaultyear=first3.toGregorian <$> mdefdate}
|
|
-- The value extends to a comma, newline, or end of file.
|
|
-- It seems like ignoring any extra stuff following a date
|
|
-- gives better errors here.
|
|
(do
|
|
setPosition startpos
|
|
datep) -- <* eof)
|
|
v
|
|
case ep
|
|
of Left e -> throwError $ parseErrorPretty e
|
|
Right d -> return ("date"<>n, d)
|
|
|
|
--- ** bracketed dates
|
|
|
|
-- tagorbracketeddatetagsp :: Monad m => Maybe Day -> TextParser u m [Tag]
|
|
-- tagorbracketeddatetagsp mdefdate =
|
|
-- bracketeddatetagsp mdefdate <|> ((:[]) <$> tagp)
|
|
|
|
-- | Parse Ledger-style bracketed posting dates ([DATE=DATE2]), as
|
|
-- "date" and/or "date2" tags. Anything that looks like an attempt at
|
|
-- this (a square-bracketed sequence of 0123456789/-.= containing at
|
|
-- least one digit and one date separator) is also parsed, and will
|
|
-- throw an appropriate error.
|
|
--
|
|
-- The dates are parsed in full here so that errors are reported in
|
|
-- the right position. A missing year in DATE can be inferred if a
|
|
-- default date is provided. A missing year in DATE2 will be inferred
|
|
-- from DATE.
|
|
--
|
|
-- >>> rejp (bracketeddatetagsp Nothing) "[2016/1/2=3/4]"
|
|
-- Right [("date",2016-01-02),("date2",2016-03-04)]
|
|
--
|
|
-- >>> rejp (bracketeddatetagsp Nothing) "[1]"
|
|
-- Left ...not a bracketed date...
|
|
--
|
|
-- >>> rejp (bracketeddatetagsp Nothing) "[2016/1/32]"
|
|
-- Left ...1:11:...bad date: 2016/1/32...
|
|
--
|
|
-- >>> rejp (bracketeddatetagsp Nothing) "[1/31]"
|
|
-- Left ...1:6:...partial date 1/31 found, but the current year is unknown...
|
|
--
|
|
-- >>> rejp (bracketeddatetagsp Nothing) "[0123456789/-.=/-.=]"
|
|
-- Left ...1:15:...bad date, different separators...
|
|
--
|
|
bracketeddatetagsp :: Monad m => Maybe Day -> ErroringJournalParser m [(TagName, Day)]
|
|
bracketeddatetagsp mdefdate = do
|
|
-- pdbg 0 "bracketeddatetagsp"
|
|
char '['
|
|
startpos <- getPosition
|
|
let digits = "0123456789"
|
|
s <- some (oneOf $ '=':digits++datesepchars)
|
|
char ']'
|
|
unless (any (`elem` s) digits && any (`elem` datesepchars) s) $
|
|
fail "not a bracketed date"
|
|
|
|
-- looks sufficiently like a bracketed date, now we
|
|
-- re-parse as dates and throw any errors
|
|
j <- get
|
|
let ep :: Either (ParseError Char Dec) (Maybe Day, Maybe Day)
|
|
ep = parseWithState'
|
|
j{jparsedefaultyear=first3.toGregorian <$> mdefdate}
|
|
(do
|
|
setPosition startpos
|
|
md1 <- optional datep
|
|
maybe (return ()) (setYear.first3.toGregorian) md1
|
|
md2 <- optional $ char '=' >> datep
|
|
eof
|
|
return (md1,md2)
|
|
)
|
|
(T.pack s)
|
|
case ep
|
|
of Left e -> throwError $ parseErrorPretty e
|
|
Right (md1,md2) -> return $ catMaybes
|
|
[("date",) <$> md1, ("date2",) <$> md2]
|
|
|