--- * doc -- Lines beginning "--- *" are collapsible orgstruct nodes. Emacs users, -- (add-hook 'haskell-mode-hook -- (lambda () (set-variable 'orgstruct-heading-prefix-regexp "--- " t)) -- 'orgstruct-mode) -- and press TAB on nodes to expand/collapse. {-| Some common parsers and helpers used by several readers. Some of these might belong in Hledger.Read.JournalReader or Hledger.Read. -} --- * module {-# LANGUAGE CPP, BangPatterns, DeriveDataTypeable, RecordWildCards, NamedFieldPuns, NoMonoLocalBinds, ScopedTypeVariables, FlexibleContexts, TupleSections, OverloadedStrings #-} {-# LANGUAGE LambdaCase #-} module Hledger.Read.Common ( Reader (..), InputOpts (..), definputopts, rawOptsToInputOpts, -- * parsing utilities runTextParser, rtp, runJournalParser, rjp, runErroringJournalParser, rejp, genericSourcePos, journalSourcePos, generateAutomaticPostings, parseAndFinaliseJournal, parseAndFinaliseJournal', -- TODO unused ? check addons setYear, getYear, setDefaultCommodityAndStyle, getDefaultCommodityAndStyle, getDefaultAmountStyle, getAmountStyle, pushAccount, pushParentAccount, popParentAccount, getParentAccount, addAccountAlias, getAccountAliases, clearAccountAliases, journalAddFile, parserErrorAt, -- * parsers -- ** transaction bits statusp, codep, descriptionp, -- ** dates datep, datetimep, secondarydatep, -- ** account names modifiedaccountnamep, accountnamep, -- ** amounts spaceandamountormissingp, amountp, amountp', mamountp', commoditysymbolp, priceamountp, partialbalanceassertionp, fixedlotpricep, numberp, fromRawNumber, rawnumberp, -- ** comments multilinecommentp, emptyorcommentlinep, followingcommentp, followingcommentandtagsp, -- ** tags commentTags, tagsp, -- ** bracketed dates bracketeddatetagsp ) where --- * imports import Prelude () import Prelude.Compat hiding (readFile) import Control.Monad.Compat import Control.Monad.Except (ExceptT(..), runExceptT, throwError) --, catchError) import Control.Monad.State.Strict import Data.Bifunctor import Data.Char import Data.Data import Data.Decimal (DecimalRaw (Decimal), Decimal) import Data.Default import Data.Functor.Identity import Data.List.Compat import Data.List.NonEmpty (NonEmpty(..)) import Data.Maybe #if !(MIN_VERSION_base(4,11,0)) import Data.Monoid #endif import qualified Data.Map as M import qualified Data.Semigroup as Sem import Data.Text (Text) import qualified Data.Text as T import Data.Time.Calendar import Data.Time.LocalTime import Data.Void (Void) import System.Time (getClockTime) import Text.Megaparsec import Text.Megaparsec.Char import Text.Megaparsec.Char.Lexer (decimal) import Hledger.Data import Hledger.Utils import qualified Hledger.Query as Q (Query(Any)) -- | A hledger journal reader is a triple of storage format name, a -- detector of that format, and a parser from that format to Journal. data Reader = Reader { -- The canonical name of the format handled by this reader rFormat :: StorageFormat -- The file extensions recognised as containing this format ,rExtensions :: [String] -- A text parser for this format, accepting input options, file -- path for error messages and file contents, producing an exception-raising IO -- action that returns a journal or error message. ,rParser :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal -- Experimental readers are never tried automatically. ,rExperimental :: Bool } instance Show Reader where show r = rFormat r ++ " reader" -- $setup -- | Various options to use when reading journal files. -- Similar to CliOptions.inputflags, simplifies the journal-reading functions. data InputOpts = InputOpts { -- files_ :: [FilePath] mformat_ :: Maybe StorageFormat -- ^ a file/storage format to try, unless overridden -- by a filename prefix. Nothing means try all. ,mrules_file_ :: Maybe FilePath -- ^ a conversion rules file to use (when reading CSV) ,aliases_ :: [String] -- ^ account name aliases to apply ,anon_ :: Bool -- ^ do light anonymisation/obfuscation of the data ,ignore_assertions_ :: Bool -- ^ don't check balance assertions ,new_ :: Bool -- ^ read only new transactions since this file was last read ,new_save_ :: Bool -- ^ save latest new transactions state for next time ,pivot_ :: String -- ^ use the given field's value as the account name ,auto_ :: Bool -- ^ generate automatic postings when journal is parsed } deriving (Show, Data) --, Typeable) instance Default InputOpts where def = definputopts definputopts :: InputOpts definputopts = InputOpts def def def def def def True def def rawOptsToInputOpts :: RawOpts -> InputOpts rawOptsToInputOpts rawopts = InputOpts{ -- files_ = map (T.unpack . stripquotes . T.pack) $ listofstringopt "file" rawopts mformat_ = Nothing ,mrules_file_ = maybestringopt "rules-file" rawopts ,aliases_ = map (T.unpack . stripquotes . T.pack) $ listofstringopt "alias" rawopts ,anon_ = boolopt "anon" rawopts ,ignore_assertions_ = boolopt "ignore-assertions" rawopts ,new_ = boolopt "new" rawopts ,new_save_ = True ,pivot_ = stringopt "pivot" rawopts ,auto_ = boolopt "auto" rawopts } --- * parsing utilities -- | Run a string parser with no state in the identity monad. runTextParser, rtp :: TextParser Identity a -> Text -> Either (ParseError Char Void) a runTextParser p t = runParser p "" t rtp = runTextParser -- XXX odd, why doesn't this take a JournalParser ? -- | Run a journal parser with a null journal-parsing state. runJournalParser, rjp :: Monad m => TextParser m a -> Text -> m (Either (ParseError Char Void) a) runJournalParser p t = runParserT p "" t rjp = runJournalParser -- | Run an error-raising journal parser with a null journal-parsing state. runErroringJournalParser, rejp :: Monad m => ErroringJournalParser m a -> Text -> m (Either String a) runErroringJournalParser p t = runExceptT $ runJournalParser (evalStateT p mempty) t >>= either (throwError . parseErrorPretty) return rejp = runErroringJournalParser genericSourcePos :: SourcePos -> GenericSourcePos genericSourcePos p = GenericSourcePos (sourceName p) (fromIntegral . unPos $ sourceLine p) (fromIntegral . unPos $ sourceColumn p) journalSourcePos :: SourcePos -> SourcePos -> GenericSourcePos journalSourcePos p p' = JournalSourcePos (sourceName p) (fromIntegral . unPos $ sourceLine p, fromIntegral $ line') where line' | (unPos $ sourceColumn p') == 1 = unPos (sourceLine p') - 1 | otherwise = unPos $ sourceLine p' -- might be at end of file withat last new-line -- | Generate Automatic postings and add them to the current journal. generateAutomaticPostings :: Journal -> Journal generateAutomaticPostings j = j { jtxns = map modifier $ jtxns j } where modifier = foldr (flip (.) . runModifierTransaction') id mtxns runModifierTransaction' = fmap txnTieKnot . runModifierTransaction Q.Any mtxns = jmodifiertxns j -- | Given a megaparsec ParsedJournal parser, input options, file -- path and file content: parse and post-process a Journal, or give an error. parseAndFinaliseJournal :: ErroringJournalParser IO ParsedJournal -> InputOpts -> FilePath -> Text -> ExceptT String IO Journal parseAndFinaliseJournal parser iopts f txt = do t <- liftIO getClockTime y <- liftIO getCurrentYear ep <- runParserT (evalStateT parser nulljournal {jparsedefaultyear=Just y}) f txt case ep of Right pj -> let pj' = if auto_ iopts then generateAutomaticPostings pj else pj in case journalFinalise t f txt (not $ ignore_assertions_ iopts) pj' of Right j -> return j Left e -> throwError e Left e -> throwError $ parseErrorPretty e parseAndFinaliseJournal' :: JournalParser Identity ParsedJournal -> InputOpts -> FilePath -> Text -> ExceptT String IO Journal parseAndFinaliseJournal' parser iopts f txt = do t <- liftIO getClockTime y <- liftIO getCurrentYear let ep = runParser (evalStateT parser nulljournal {jparsedefaultyear=Just y}) f txt case ep of Right pj -> let pj' = if auto_ iopts then generateAutomaticPostings pj else pj in case journalFinalise t f txt (not $ ignore_assertions_ iopts) pj' of Right j -> return j Left e -> throwError e Left e -> throwError $ parseErrorPretty e setYear :: Year -> JournalParser m () setYear y = modify' (\j -> j{jparsedefaultyear=Just y}) getYear :: JournalParser m (Maybe Year) getYear = fmap jparsedefaultyear get setDefaultCommodityAndStyle :: (CommoditySymbol,AmountStyle) -> JournalParser m () setDefaultCommodityAndStyle cs = modify' (\j -> j{jparsedefaultcommodity=Just cs}) getDefaultCommodityAndStyle :: JournalParser m (Maybe (CommoditySymbol,AmountStyle)) getDefaultCommodityAndStyle = jparsedefaultcommodity `fmap` get -- | Get amount style associated with default currency. -- -- Returns 'AmountStyle' used to defined by a latest default commodity directive -- prior to current position within this file or its parents. getDefaultAmountStyle :: JournalParser m (Maybe AmountStyle) getDefaultAmountStyle = fmap snd <$> getDefaultCommodityAndStyle -- | Lookup currency-specific amount style. -- -- Returns 'AmountStyle' used in commodity directive within current journal -- prior to current position or in its parents files. getAmountStyle :: CommoditySymbol -> JournalParser m (Maybe AmountStyle) getAmountStyle commodity = do specificStyle <- maybe Nothing cformat . M.lookup commodity . jcommodities <$> get defaultStyle <- fmap snd <$> getDefaultCommodityAndStyle let effectiveStyle = listToMaybe $ catMaybes [specificStyle, defaultStyle] return effectiveStyle pushAccount :: AccountName -> JournalParser m () pushAccount acct = modify' (\j -> j{jaccounts = (acct, Nothing) : jaccounts j}) pushParentAccount :: AccountName -> JournalParser m () pushParentAccount acct = modify' (\j -> j{jparseparentaccounts = acct : jparseparentaccounts j}) popParentAccount :: JournalParser m () popParentAccount = do j <- get case jparseparentaccounts j of [] -> unexpected (Tokens ('E' :| "nd of apply account block with no beginning")) (_:rest) -> put j{jparseparentaccounts=rest} getParentAccount :: JournalParser m AccountName getParentAccount = fmap (concatAccountNames . reverse . jparseparentaccounts) get addAccountAlias :: MonadState Journal m => AccountAlias -> m () addAccountAlias a = modify' (\(j@Journal{..}) -> j{jparsealiases=a:jparsealiases}) getAccountAliases :: MonadState Journal m => m [AccountAlias] getAccountAliases = fmap jparsealiases get clearAccountAliases :: MonadState Journal m => m () clearAccountAliases = modify' (\(j@Journal{..}) -> j{jparsealiases=[]}) -- getTransactionCount :: MonadState Journal m => m Integer -- getTransactionCount = fmap jparsetransactioncount get -- -- setTransactionCount :: MonadState Journal m => Integer -> m () -- setTransactionCount i = modify' (\j -> j{jparsetransactioncount=i}) -- -- -- | Increment the transaction index by one and return the new value. -- incrementTransactionCount :: MonadState Journal m => m Integer -- incrementTransactionCount = do -- modify' (\j -> j{jparsetransactioncount=jparsetransactioncount j + 1}) -- getTransactionCount journalAddFile :: (FilePath,Text) -> Journal -> Journal journalAddFile f j@Journal{jfiles=fs} = j{jfiles=fs++[f]} -- append, unlike the other fields, even though we do a final reverse, -- to compensate for additional reversal due to including/monoid-concatting -- -- | Terminate parsing entirely, returning the given error message -- -- with the current parse position prepended. -- parserError :: String -> ErroringJournalParser a -- parserError s = do -- pos <- getPosition -- parserErrorAt pos s -- | Terminate parsing entirely, returning the given error message -- with the given parse position prepended. parserErrorAt :: Monad m => SourcePos -> String -> ErroringJournalParser m a parserErrorAt pos s = throwError $ sourcePosPretty pos ++ ":\n" ++ s --- * parsers --- ** transaction bits statusp :: TextParser m Status statusp = choice' [ skipMany spacenonewline >> char '*' >> return Cleared , skipMany spacenonewline >> char '!' >> return Pending , return Unmarked ] "cleared status" codep :: TextParser m Text codep = try codep' <|> pure "" where codep' = do skipSome spacenonewline between (char '(' "codep") (char ')') $ takeWhileP Nothing (/= ')') descriptionp :: JournalParser m Text descriptionp = takeWhileP Nothing $ \c -> c /= ';' && c /= '\n' --- ** dates -- | Parse a date in YYYY/MM/DD format. -- Hyphen (-) and period (.) are also allowed as separators. -- The year may be omitted if a default year has been set. -- Leading zeroes may be omitted. datep :: JournalParser m Day datep = do myear <- getYear lift $ datep' myear datep' :: Maybe Year -> TextParser m Day datep' mYear = do d1 <- decimal "year or month" sep <- satisfy isDateSepChar "date separator" d2 <- decimal "month or day" fullDate d1 sep d2 <|> partialDate mYear d1 sep d2 "full or partial date" where fullDate :: Integer -> Char -> Integer -> TextParser m Day fullDate year sep1 month = do sep2 <- satisfy isDateSepChar "date separator" day <- decimal "day" let dateStr = show year ++ [sep1] ++ show month ++ [sep2] ++ show day when (sep1 /= sep2) $ fail $ "invalid date (mixing date separators is not allowed): " ++ dateStr case fromGregorianValid year (fromIntegral month) day of Nothing -> fail $ "well-formed but invalid date: " ++ dateStr Just date -> pure date partialDate :: Maybe Year -> Integer -> Char -> Integer -> TextParser m Day partialDate mYear month sep day = case mYear of Just year -> case fromGregorianValid year (fromIntegral month) (fromIntegral day) of Nothing -> fail $ "well-formed but invalid date: " ++ dateStr Just date -> pure date where dateStr = show year ++ [sep] ++ show month ++ [sep] ++ show day Nothing -> fail $ "partial date "++dateStr++" found, but the current year is unknown" where dateStr = show month ++ [sep] ++ show day -- | Parse a date and time in YYYY/MM/DD HH:MM[:SS][+-ZZZZ] format. -- Hyphen (-) and period (.) are also allowed as date separators. -- The year may be omitted if a default year has been set. -- Seconds are optional. -- The timezone is optional and ignored (the time is always interpreted as a local time). -- Leading zeroes may be omitted (except in a timezone). datetimep :: JournalParser m LocalTime datetimep = do day <- datep lift $ skipSome spacenonewline h <- some digitChar let h' = read h guard $ h' >= 0 && h' <= 23 char ':' m <- some digitChar let m' = read m guard $ m' >= 0 && m' <= 59 s <- optional $ char ':' >> some digitChar let s' = case s of Just sstr -> read sstr Nothing -> 0 guard $ s' >= 0 && s' <= 59 {- tz <- -} optional $ do plusminus <- oneOf ("-+" :: [Char]) d1 <- digitChar d2 <- digitChar d3 <- digitChar d4 <- digitChar return $ plusminus:d1:d2:d3:d4:"" -- ltz <- liftIO $ getCurrentTimeZone -- let tz' = maybe ltz (fromMaybe ltz . parseTime defaultTimeLocale "%z") tz -- return $ localTimeToUTC tz' $ LocalTime day $ TimeOfDay h' m' (fromIntegral s') return $ LocalTime day $ TimeOfDay h' m' (fromIntegral s') secondarydatep :: Day -> JournalParser m Day secondarydatep primarydate = do char '=' -- kludgy way to use primary date for default year let withDefaultYear d p = do y <- getYear let (y',_,_) = toGregorian d in setYear y' r <- p when (isJust y) $ setYear $ fromJust y -- XXX -- mapM setYear <$> y return r withDefaultYear primarydate datep -- | -- >> parsewith twoorthreepartdatestringp "2016/01/2" -- Right "2016/01/2" -- twoorthreepartdatestringp = do -- n1 <- some digitChar -- c <- datesepchar -- n2 <- some digitChar -- mn3 <- optional $ char c >> some digitChar -- return $ n1 ++ c:n2 ++ maybe "" (c:) mn3 --- ** account names -- | Parse an account name, then apply any parent account prefix and/or account aliases currently in effect. modifiedaccountnamep :: JournalParser m AccountName modifiedaccountnamep = do parent <- getParentAccount aliases <- getAccountAliases a <- lift accountnamep return $ accountNameApplyAliases aliases $ -- XXX accountNameApplyAliasesMemo ? doesn't seem to make a difference joinAccountNames parent a -- | Parse an account name. Account names start with a non-space, may -- have single spaces inside them, and are terminated by two or more -- spaces (or end of input). Also they have one or more components of -- at least one character, separated by the account separator char. -- (This parser will also consume one following space, if present.) accountnamep :: TextParser m AccountName accountnamep = do firstPart <- part otherParts <- many $ try $ singleSpace *> part let account = T.unwords $ firstPart : otherParts when (accountNameFromComponents (accountNameComponents account) /= account) (fail $ "account name seems ill-formed: " ++ T.unpack account) pure account where part = takeWhile1P Nothing (not . isSpace) singleSpace = void spacenonewline *> notFollowedBy spacenonewline --- ** amounts -- | Parse whitespace then an amount, with an optional left or right -- currency symbol and optional price, or return the special -- "missing" marker amount. spaceandamountormissingp :: Monad m => JournalParser m MixedAmount spaceandamountormissingp = try (do lift $ skipSome spacenonewline (Mixed . (:[])) `fmap` amountp <|> return missingmixedamt ) <|> return missingmixedamt #ifdef TESTS assertParseEqual' :: (Show a, Eq a) => (Either ParseError a) -> a -> Assertion assertParseEqual' parse expected = either (assertFailure.show) (`is'` expected) parse is' :: (Eq a, Show a) => a -> a -> Assertion a `is'` e = assertEqual e a test_spaceandamountormissingp = do assertParseEqual' (parseWithState mempty spaceandamountormissingp " $47.18") (Mixed [usd 47.18]) assertParseEqual' (parseWithState mempty spaceandamountormissingp "$47.18") missingmixedamt assertParseEqual' (parseWithState mempty spaceandamountormissingp " ") missingmixedamt assertParseEqual' (parseWithState mempty spaceandamountormissingp "") missingmixedamt #endif -- | Parse a single-commodity amount, with optional symbol on the left or -- right, optional unit or total price, and optional (ignored) -- ledger-style balance assertion or fixed lot price declaration. amountp :: Monad m => JournalParser m Amount amountp = try leftsymbolamountp <|> try rightsymbolamountp <|> nosymbolamountp #ifdef TESTS test_amountp = do assertParseEqual' (parseWithState mempty amountp "$47.18") (usd 47.18) assertParseEqual' (parseWithState mempty amountp "$1.") (usd 1 `withPrecision` 0) -- ,"amount with unit price" ~: do assertParseEqual' (parseWithState mempty amountp "$10 @ €0.5") (usd 10 `withPrecision` 0 `at` (eur 0.5 `withPrecision` 1)) -- ,"amount with total price" ~: do assertParseEqual' (parseWithState mempty amountp "$10 @@ €5") (usd 10 `withPrecision` 0 @@ (eur 5 `withPrecision` 0)) #endif -- | Parse an amount from a string, or get an error. amountp' :: String -> Amount amountp' s = case runParser (evalStateT (amountp <* eof) mempty) "" (T.pack s) of Right amt -> amt Left err -> error' $ show err -- XXX should throwError -- | Parse a mixed amount from a string, or get an error. mamountp' :: String -> MixedAmount mamountp' = Mixed . (:[]) . amountp' signp :: TextParser m String signp = do sign <- optional $ oneOf ("+-" :: [Char]) return $ case sign of Just '-' -> "-" _ -> "" multiplierp :: TextParser m Bool multiplierp = do multiplier <- optional $ oneOf ("*" :: [Char]) return $ case multiplier of Just '*' -> True _ -> False -- | This is like skipMany but it returns True if at least one element -- was skipped. This is helpful if you’re just using many to check if -- the resulting list is empty or not. skipMany' :: MonadPlus m => m a -> m Bool skipMany' p = go False where go !isNull = do more <- option False (True <$ p) if more then go True else pure isNull leftsymbolamountp :: Monad m => JournalParser m Amount leftsymbolamountp = do sign <- lift signp m <- lift multiplierp c <- lift commoditysymbolp suggestedStyle <- getAmountStyle c commodityspaced <- lift $ skipMany' spacenonewline (q,prec,mdec,mgrps) <- lift $ numberp suggestedStyle let s = amountstyle{ascommodityside=L, ascommodityspaced=commodityspaced, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps} p <- priceamountp let applysign = if sign=="-" then negate else id return $ applysign $ Amount c q p s m "left-symbol amount" rightsymbolamountp :: Monad m => JournalParser m Amount rightsymbolamountp = do m <- lift multiplierp sign <- lift signp rawnum <- lift $ rawnumberp expMod <- lift . option id $ try exponentp commodityspaced <- lift $ skipMany' spacenonewline c <- lift commoditysymbolp suggestedStyle <- getAmountStyle c let (q0,prec0,mdec,mgrps) = fromRawNumber suggestedStyle (sign == "-") rawnum (q, prec) = expMod (q0, prec0) p <- priceamountp let s = amountstyle{ascommodityside=R, ascommodityspaced=commodityspaced, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps} return $ Amount c q p s m "right-symbol amount" nosymbolamountp :: Monad m => JournalParser m Amount nosymbolamountp = do m <- lift multiplierp suggestedStyle <- getDefaultAmountStyle (q,prec,mdec,mgrps) <- lift $ numberp suggestedStyle p <- priceamountp -- apply the most recently seen default commodity and style to this commodityless amount defcs <- getDefaultCommodityAndStyle let (c,s) = case defcs of Just (defc,defs) -> (defc, defs{asprecision=max (asprecision defs) prec}) Nothing -> ("", amountstyle{asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}) return $ Amount c q p s m "no-symbol amount" commoditysymbolp :: TextParser m CommoditySymbol commoditysymbolp = (quotedcommoditysymbolp <|> simplecommoditysymbolp) "commodity symbol" quotedcommoditysymbolp :: TextParser m CommoditySymbol quotedcommoditysymbolp = between (char '"') (char '"') $ takeWhile1P Nothing $ \c -> c /= ';' && c /= '\n' && c /= '\"' simplecommoditysymbolp :: TextParser m CommoditySymbol simplecommoditysymbolp = takeWhile1P Nothing (not . isNonsimpleCommodityChar) priceamountp :: Monad m => JournalParser m Price priceamountp = try (do lift (skipMany spacenonewline) char '@' try (do char '@' lift (skipMany spacenonewline) a <- amountp -- XXX can parse more prices ad infinitum, shouldn't return $ TotalPrice a) <|> (do lift (skipMany spacenonewline) a <- amountp -- XXX can parse more prices ad infinitum, shouldn't return $ UnitPrice a)) <|> return NoPrice partialbalanceassertionp :: Monad m => JournalParser m BalanceAssertion partialbalanceassertionp = try (do lift (skipMany spacenonewline) sourcepos <- genericSourcePos <$> lift getPosition char '=' lift (skipMany spacenonewline) a <- amountp -- XXX should restrict to a simple amount return $ Just (a, sourcepos)) <|> return Nothing -- balanceassertion :: Monad m => TextParser m (Maybe MixedAmount) -- balanceassertion = -- try (do -- lift (skipMany spacenonewline) -- string "==" -- lift (skipMany spacenonewline) -- a <- amountp -- XXX should restrict to a simple amount -- return $ Just $ Mixed [a]) -- <|> return Nothing -- http://ledger-cli.org/3.0/doc/ledger3.html#Fixing-Lot-Prices fixedlotpricep :: Monad m => JournalParser m (Maybe Amount) fixedlotpricep = try (do lift (skipMany spacenonewline) char '{' lift (skipMany spacenonewline) char '=' lift (skipMany spacenonewline) a <- amountp -- XXX should restrict to a simple amount lift (skipMany spacenonewline) char '}' return $ Just a) <|> return Nothing -- | Parse a string representation of a number for its value and display -- attributes. -- -- Some international number formats are accepted, eg either period or comma -- may be used for the decimal point, and the other of these may be used for -- separating digit groups in the integer part. See -- http://en.wikipedia.org/wiki/Decimal_separator for more examples. -- -- This returns: the parsed numeric value, the precision (number of digits -- seen following the decimal point), the decimal point character used if any, -- and the digit group style if any. -- numberp :: Maybe AmountStyle -> TextParser m (Quantity, Int, Maybe Char, Maybe DigitGroupStyle) numberp suggestedStyle = do -- a number is an optional sign followed by a sequence of digits possibly -- interspersed with periods, commas, or both -- ptrace "numberp" sign <- signp raw <- rawnumberp dbg8 "numberp suggestedStyle" suggestedStyle `seq` return () let num@(q, prec, decSep, groups) = dbg8 "numberp quantity,precision,mdecimalpoint,mgrps" (fromRawNumber suggestedStyle (sign == "-") raw) option num . try $ do when (isJust groups) $ fail "groups and exponent are not mixable" (q', prec') <- exponentp <*> pure (q, prec) return (q', prec', decSep, groups) "numberp" exponentp :: TextParser m ((Quantity, Int) -> (Quantity, Int)) exponentp = do char' 'e' exp <- liftM read $ (++) <$> signp <*> some digitChar return $ bimap (* 10^^exp) (max 0 . subtract exp) "exponentp" -- | Interpret a raw number as a decimal number, and identify the decimal -- point charcter and digit separating scheme. There is only one ambiguous -- case: when there is just a single separator between two digit groups. -- Disambiguate using an amount style, if provided; otherwise, assume that -- the separator is a decimal point. -- -- Returns: -- - the decimal number -- - the precision (number of digits after the decimal point) -- - the decimal point character, if any -- - the digit group style, if any (digit group character and sizes of digit groups) fromRawNumber :: Maybe AmountStyle -> Bool -> RawNumber -> (Quantity, Int, Maybe Char, Maybe DigitGroupStyle) fromRawNumber suggestedStyle negated raw = case raw of LeadingDecimalPt decPt digitGrp -> let quantity = sign $ Decimal (fromIntegral precision) (digitGroupNumber digitGrp) precision = digitGroupLength digitGrp in (quantity, precision, Just decPt, Nothing) TrailingDecimalPt digitGrp decPt -> let quantity = sign $ Decimal (fromIntegral precision) (digitGroupNumber digitGrp) precision = 0 in (quantity, precision, Just decPt, Nothing) NoSeparators digitGrp -> let quantity = sign $ Decimal (fromIntegral precision) (digitGroupNumber digitGrp) precision = 0 in (quantity, precision, Nothing, Nothing) AmbiguousNumber digitGrp1 sep digitGrp2 -> -- If present, use the suggested style to disambiguate; -- otherwise, assume that the separator is a decimal point where possible. if isDecimalPointChar sep && maybe True (sep `isValidDecimalBy`) suggestedStyle then -- Assuming that the separator is a decimal point let quantity = sign $ Decimal (fromIntegral precision) (digitGroupNumber $ digitGrp1 <> digitGrp2) precision = digitGroupLength digitGrp2 in (quantity, precision, Just sep, Nothing) else -- Assuming that the separator is digit separator let quantity = sign $ Decimal (fromIntegral precision) (digitGroupNumber $ digitGrp1 <> digitGrp2) precision = 0 digitGroupStyle = Just $ DigitGroups sep (groupSizes $ [digitGrp1, digitGrp2]) in (quantity, precision, Nothing, digitGroupStyle) DigitSeparators digitSep digitGrps -> let quantity = sign $ Decimal (fromIntegral precision) (digitGroupNumber $ mconcat digitGrps) precision = 0 digitGroupStyle = Just $ DigitGroups digitSep (groupSizes digitGrps) in (quantity, precision, Nothing, digitGroupStyle) BothSeparators digitSep digitGrps decPt decimalGrp -> let quantity = sign $ Decimal (fromIntegral precision) (digitGroupNumber $ mconcat digitGrps <> decimalGrp) precision = digitGroupLength decimalGrp digitGroupStyle = Just $ DigitGroups digitSep (groupSizes digitGrps) in (quantity, precision, Just decPt, digitGroupStyle) where sign :: Decimal -> Decimal sign = if negated then negate else id -- Outputs digit group sizes from least significant to most significant groupSizes :: [DigitGrp] -> [Int] groupSizes digitGrps = reverse $ case map digitGroupLength digitGrps of (a:b:cs) | a < b -> b:cs gs -> gs isValidDecimalBy :: Char -> AmountStyle -> Bool isValidDecimalBy c = \case AmountStyle{asdecimalpoint = Just d} -> d == c AmountStyle{asdigitgroups = Just (DigitGroups g _)} -> g /= c AmountStyle{asprecision = 0} -> False _ -> True -- | Parse and interpret the structure of a number as far as possible -- without external hints. Numbers are digit strings, possibly separated -- into digit groups by one of two types of separators. (1) Numbers may -- optionally have a decimal point, which may be either a period or comma. -- (2) Numbers may optionally contain digit group separators, which must -- all be either a period, a comma, or a space. -- -- It is our task to deduce the identities of the decimal point and digit -- separator characters, based on the allowed syntax. For instance, we -- make use of the fact that a decimal point can occur at most once and -- must succeed all digit group separators. -- -- >>> parseTest rawnumberp "1,234,567.89" -- BothSeparators ',' ["1","234","567"] '.' "89" -- >>> parseTest rawnumberp "1 000" -- AmbiguousNumber "1" ' ' "000" -- rawnumberp :: TextParser m RawNumber rawnumberp = label "rawnumberp" $ do rawNumber <- leadingDecimalPt <|> leadingDigits -- Guard against mistyped numbers notFollowedBy $ satisfy isDecimalPointChar <|> (char ' ' >> digitChar) return $ dbg8 "rawnumberp" rawNumber where leadingDecimalPt :: TextParser m RawNumber leadingDecimalPt = LeadingDecimalPt <$> satisfy isDecimalPointChar <*> pdigitgroup leadingDigits :: TextParser m RawNumber leadingDigits = do grp1 <- pdigitgroup withSeparators grp1 <|> trailingDecimalPt grp1 <|> pure (NoSeparators grp1) withSeparators :: DigitGrp -> TextParser m RawNumber withSeparators grp1 = do (sep, grp2) <- try $ (,) <$> satisfy isDigitSeparatorChar <*> pdigitgroup grps <- many $ try $ char sep *> pdigitgroup let digitGroups = grp1 : grp2 : grps withDecimalPt sep digitGroups <|> pure (withoutDecimalPt grp1 sep grp2 grps) withDecimalPt :: Char -> [DigitGrp] -> TextParser m RawNumber withDecimalPt digitSep digitGroups = do decimalPt <- satisfy $ \c -> isDecimalPointChar c && c /= digitSep decimalDigitGroup <- option mempty pdigitgroup pure $ BothSeparators digitSep digitGroups decimalPt decimalDigitGroup withoutDecimalPt :: DigitGrp -> Char -> DigitGrp -> [DigitGrp] -> RawNumber withoutDecimalPt grp1 sep grp2 grps | null grps = AmbiguousNumber grp1 sep grp2 | otherwise = DigitSeparators sep (grp1:grp2:grps) trailingDecimalPt :: DigitGrp -> TextParser m RawNumber trailingDecimalPt grp1 = do decimalPt <- satisfy isDecimalPointChar pure $ TrailingDecimalPt grp1 decimalPt isDecimalPointChar :: Char -> Bool isDecimalPointChar c = c == '.' || c == ',' isDigitSeparatorChar :: Char -> Bool isDigitSeparatorChar c = isDecimalPointChar c || c == ' ' data DigitGrp = DigitGrp { digitGroupLength :: Int, digitGroupNumber :: Integer } deriving (Eq) instance Show DigitGrp where show (DigitGrp len num) | len > 0 = "\"" ++ padding ++ numStr ++ "\"" | otherwise = "\"\"" where numStr = show num padding = replicate (len - length numStr) '0' instance Sem.Semigroup DigitGrp where DigitGrp l1 n1 <> DigitGrp l2 n2 = DigitGrp (l1 + l2) (n1 * 10^l2 + n2) instance Monoid DigitGrp where mempty = DigitGrp 0 0 mappend = (Sem.<>) pdigitgroup :: TextParser m DigitGrp pdigitgroup = label "digit group" $ makeGroup <$> takeWhile1P (Just "digit") isDigit where makeGroup = uncurry DigitGrp . foldl' step (0, 0) . T.unpack step (!l, !a) c = (l+1, a*10 + fromIntegral (digitToInt c)) data RawNumber = LeadingDecimalPt Char DigitGrp -- .50 | TrailingDecimalPt DigitGrp Char -- 100. | NoSeparators DigitGrp -- 100 | AmbiguousNumber DigitGrp Char DigitGrp -- 1,000 | DigitSeparators Char [DigitGrp] -- 1,000,000 | BothSeparators Char [DigitGrp] Char DigitGrp -- 1,000.50 deriving (Show, Eq) -- test_numberp = do -- let s `is` n = assertParseEqual (parseWithState mempty numberp s) n -- assertFails = assertBool . isLeft . parseWithState mempty numberp -- assertFails "" -- "0" `is` (0, 0, '.', ',', []) -- "1" `is` (1, 0, '.', ',', []) -- "1.1" `is` (1.1, 1, '.', ',', []) -- "1,000.1" `is` (1000.1, 1, '.', ',', [3]) -- "1.00.000,1" `is` (100000.1, 1, ',', '.', [3,2]) -- "1,000,000" `is` (1000000, 0, '.', ',', [3,3]) -- "1." `is` (1, 0, '.', ',', []) -- "1," `is` (1, 0, ',', '.', []) -- ".1" `is` (0.1, 1, '.', ',', []) -- ",1" `is` (0.1, 1, ',', '.', []) -- assertFails "1,000.000,1" -- assertFails "1.000,000.1" -- assertFails "1,000.000.1" -- assertFails "1,,1" -- assertFails "1..1" -- assertFails ".1," -- assertFails ",1." --- ** comments multilinecommentp :: TextParser m () multilinecommentp = startComment *> anyLine `skipManyTill` endComment where startComment = string "comment" >> emptyLine endComment = eof <|> (string "end comment" >> emptyLine) emptyLine = void $ skipMany spacenonewline *> newline anyLine = takeWhileP Nothing (\c -> c /= '\n') *> newline emptyorcommentlinep :: TextParser m () emptyorcommentlinep = do skipMany spacenonewline void linecommentp <|> void newline -- | Parse a possibly multi-line comment following a semicolon. followingcommentp :: TextParser m Text followingcommentp = T.unlines . map snd <$> followingcommentlinesp followingcommentlinesp :: TextParser m [(SourcePos, Text)] followingcommentlinesp = do skipMany spacenonewline samelineComment@(_, samelineCommentText) <- try commentp <|> (,) <$> (getPosition <* eolof) <*> pure "" newlineComments <- many $ try $ do skipSome spacenonewline -- leading whitespace is required commentp if T.null samelineCommentText && null newlineComments then pure [] else pure $ samelineComment : newlineComments -- | Parse a possibly multi-line comment following a semicolon, and -- any tags and/or posting dates within it. Posting dates can be -- expressed with "date"/"date2" tags and/or bracketed dates. The -- dates are parsed in full here so that errors are reported in the -- right position. Missing years can be inferred if a default date is -- provided. -- -- >>> rejp (followingcommentandtagsp (Just $ fromGregorian 2000 1 2)) "; a:b, date:3/4, [=5/6]" -- Right ("a:b, date:3/4, [=5/6]\n",[("a","b"),("date","3/4")],Just 2000-03-04,Just 2000-05-06) -- -- Year unspecified and no default provided -> unknown year error, at correct position: -- >>> rejp (followingcommentandtagsp Nothing) " ; xxx date:3/4\n ; second line" -- Left ...1:22...partial date 3/4 found, but the current year is unknown... -- -- Date tag value contains trailing text - forgot the comma, confused: -- the syntaxes ? We'll accept the leading date anyway -- >>> rejp (followingcommentandtagsp (Just $ fromGregorian 2000 1 2)) "; date:3/4=5/6" -- Right ("date:3/4=5/6\n",[("date","3/4=5/6")],Just 2000-03-04,Nothing) -- followingcommentandtagsp :: Monad m => Maybe Day -> ErroringJournalParser m (Text, [Tag], Maybe Day, Maybe Day) followingcommentandtagsp mdefdate = do -- pdbg 0 "followingcommentandtagsp" commentLines <- lift followingcommentlinesp -- pdbg 0 $ "commentws:" ++ show commentLines -- Reparse the comment for any tags. tagsWithPositions <- case traverse (runTextParserAt tagswithvaluepositions) commentLines of Right tss -> pure $ concat tss Left e -> throwError $ parseErrorPretty e -- Extract date-tag style posting dates from the tags. -- Use the transaction date for defaults, if provided. let isDateLabel txt = txt == "date" || txt == "date2" isDateTag = isDateLabel . fst . snd tagDates <- case traverse tagDate $ filter isDateTag tagsWithPositions of Right ds -> pure ds Left e -> throwError $ parseErrorPretty e -- Reparse the comment for any bracketed style posting dates. -- Use the transaction date for defaults, if provided. bracketedDates <- case traverse (runTextParserAt (bracketedpostingdatesp mdefdate)) commentLines of Right dss -> pure $ concat dss Left e -> throwError $ parseErrorPretty e let pdates = tagDates ++ bracketedDates mdate = fmap snd $ find ((=="date") .fst) pdates mdate2 = fmap snd $ find ((=="date2").fst) pdates -- pdbg 0 $ "allDates: "++show pdates let strippedComment = T.unlines $ map (T.strip . snd) commentLines tags = map snd tagsWithPositions -- pdbg 0 $ "comment:"++show strippedComment pure (strippedComment, tags, mdate, mdate2) where runTextParserAt parser (pos, txt) = runTextParser (setPosition pos *> parser) txt tagDate :: (SourcePos, Tag) -> Either (ParseError Char Void) (TagName, Day) tagDate (pos, (name, value)) = fmap (name,) $ runTextParserAt (datep' myear) (pos, value) where myear = fmap (first3 . toGregorian) mdefdate -- A transaction/posting comment must start with a semicolon. This parser -- discards the leading whitespace of the comment and returns the source -- position of the comment's first non-whitespace character. commentp :: TextParser m (SourcePos, Text) commentp = commentStartingWithp (==';') -- A line (file-level) comment can start with a semicolon, hash, or star -- (allowing org nodes). This parser discards the leading whitespace of -- the comment and returns the source position of the comment's first -- non-whitespace character. linecommentp :: TextParser m (SourcePos, Text) linecommentp = commentStartingWithp $ \c -> c == ';' || c == '#' || c == '*' commentStartingWithp :: (Char -> Bool) -> TextParser m (SourcePos, Text) commentStartingWithp f = do -- ptrace "commentStartingWith" satisfy f skipMany spacenonewline startPos <- getPosition content <- takeWhileP Nothing (\c -> c /= '\n') optional newline return (startPos, content) --- ** tags -- | Extract any tags (name:value ended by comma or newline) embedded in a string. -- -- >>> commentTags "a b:, c:c d:d, e" -- [("b",""),("c","c d:d")] -- -- >>> commentTags "a [1/1/1] [1/1] [1], [=1/1/1] [=1/1] [=1] [1/1=1/1/1] [1=1/1/1] b:c" -- [("b","c")] -- -- --[("date","1/1/1"),("date","1/1"),("date2","1/1/1"),("date2","1/1"),("date","1/1"),("date2","1/1/1"),("date","1"),("date2","1/1/1")] -- -- >>> commentTags "\na b:, \nd:e, f" -- [("b",""),("d","e")] -- -- >>> commentTags ":value" -- [] -- commentTags :: Text -> [Tag] commentTags s = either (const []) id $ runTextParser tagsp s -- | Parse all tags found in a string. tagsp :: SimpleTextParser [Tag] tagsp = map snd <$> tagswithvaluepositions tagswithvaluepositions :: SimpleTextParser [(SourcePos, Tag)] tagswithvaluepositions = do -- pdbg 0 $ "tagsp" -- If we parse in a single pass, we cannot know whether some text -- belongs to a tag label until we have reached a colon (in which case -- it does) or whitespace (in which case it does not). Therefore, we -- hold on to the text until we reach such a break point, and then -- decide what to do. potentialTagName <- tillNextBreak atSpaceChar <|> atColon potentialTagName <|> atEof where isBreak :: Char -> Bool isBreak c = isSpace c || c == ':' tillNextBreak :: SimpleTextParser Text tillNextBreak = takeWhileP Nothing (not . isBreak) tagValue :: SimpleTextParser Text tagValue = T.strip <$> takeWhileP Nothing (not . commaOrNewline) where commaOrNewline c = c == ',' || c == '\n' atSpaceChar :: SimpleTextParser [(SourcePos, Tag)] atSpaceChar = skipSome spaceChar *> tagswithvaluepositions atColon :: Text -> SimpleTextParser [(SourcePos, Tag)] atColon tagName = do char ':' if T.null tagName then tagswithvaluepositions else do pos <- getPosition tagVal <- tagValue let tag = (pos, (tagName, tagVal)) tags <- tagswithvaluepositions pure $ tag : tags atEof :: SimpleTextParser [(SourcePos, Tag)] atEof = eof *> pure [] --- ** posting dates -- | Parse all bracketed posting dates found in a string. The dates are -- parsed fully to give useful errors. Missing years can be inferred only -- if a default date is provided. -- bracketedpostingdatesp :: Maybe Day -> SimpleTextParser [(TagName,Day)] bracketedpostingdatesp mdefdate = do -- pdbg 0 $ "bracketedpostingdatesp" skipMany $ notChar '[' fmap concat $ sepEndBy (bracketeddatetagsp mdefdate <|> char '[' *> pure []) (skipMany $ notChar '[') --- ** bracketed dates -- | Parse Ledger-style bracketed posting dates ([DATE=DATE2]), as -- "date" and/or "date2" tags. Anything that looks like an attempt at -- this (a square-bracketed sequence of 0123456789/-.= containing at -- least one digit and one date separator) is also parsed, and will -- throw an appropriate error. -- -- The dates are parsed in full here so that errors are reported in -- the right position. A missing year in DATE can be inferred if a -- default date is provided. A missing year in DATE2 will be inferred -- from DATE. -- -- >>> first parseErrorPretty $ rtp (bracketeddatetagsp Nothing) "[2016/1/2=3/4]" -- Right [("date",2016-01-02),("date2",2016-03-04)] -- -- >>> first parseErrorPretty $ rtp (bracketeddatetagsp Nothing) "[1]" -- Left ...not a bracketed date... -- -- >>> first parseErrorPretty $ rtp (bracketeddatetagsp Nothing) "[2016/1/32]" -- Left ...1:11:...well-formed but invalid date: 2016/1/32... -- -- >>> first parseErrorPretty $ rtp (bracketeddatetagsp Nothing) "[1/31]" -- Left ...1:6:...partial date 1/31 found, but the current year is unknown... -- -- >>> first parseErrorPretty $ rtp (bracketeddatetagsp Nothing) "[0123456789/-.=/-.=]" -- Left ...1:13:...expecting month or day... -- bracketeddatetagsp :: Maybe Day -> SimpleTextParser [(TagName, Day)] bracketeddatetagsp mdefdate = do -- pdbg 0 "bracketeddatetagsp" try $ do s <- lookAhead $ between (char '[') (char ']') $ takeWhile1P Nothing isBracketedDateChar unless (T.any isDigit s && T.any isDateSepChar s) $ fail "not a bracketed date" -- Looks sufficiently like a bracketed date to commit to parsing a date between (char '[') (char ']') $ do let myear1 = fmap readYear mdefdate md1 <- optional $ datep' myear1 let myear2 = fmap readYear md1 <|> myear1 md2 <- optional $ char '=' *> datep' myear2 pure $ catMaybes [("date",) <$> md1, ("date2",) <$> md2] where readYear = first3 . toGregorian isBracketedDateChar c = isDigit c || isDateSepChar c || c == '='