Merge pull request #804 from awjchen/parseErrors

Display the line on which a parse error occurs
This commit is contained in:
Simon Michael 2018-06-11 14:31:30 -07:00 committed by GitHub
commit a7ca636942
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 657 additions and 358 deletions

View File

@ -91,7 +91,6 @@ import Data.Time.Calendar
import Data.Time.Calendar.OrdinalDate
import Data.Time.Clock
import Data.Time.LocalTime
import Data.Void (Void)
import Safe (headMay, lastMay, readMay)
import Text.Megaparsec
import Text.Megaparsec.Char
@ -313,7 +312,7 @@ earliest (Just d1) (Just d2) = Just $ min d1 d2
-- | Parse a period expression to an Interval and overall DateSpan using
-- the provided reference date, or return a parse error.
parsePeriodExpr :: Day -> Text -> Either (ParseError Char Void) (Interval, DateSpan)
parsePeriodExpr :: Day -> Text -> Either (ParseError Char CustomErr) (Interval, DateSpan)
parsePeriodExpr refdate s = parsewith (periodexpr refdate <* eof) (T.toLower s)
maybePeriod :: Day -> Text -> Maybe (Interval,DateSpan)
@ -373,13 +372,13 @@ fixSmartDateStr :: Day -> Text -> String
fixSmartDateStr d s = either
(\e->error' $ printf "could not parse date %s %s" (show s) (show e))
id
$ (fixSmartDateStrEither d s :: Either (ParseError Char Void) String)
$ (fixSmartDateStrEither d s :: Either (ParseError Char CustomErr) String)
-- | A safe version of fixSmartDateStr.
fixSmartDateStrEither :: Day -> Text -> Either (ParseError Char Void) String
fixSmartDateStrEither :: Day -> Text -> Either (ParseError Char CustomErr) String
fixSmartDateStrEither d = either Left (Right . showDate) . fixSmartDateStrEither' d
fixSmartDateStrEither' :: Day -> Text -> Either (ParseError Char Void) Day
fixSmartDateStrEither' :: Day -> Text -> Either (ParseError Char CustomErr) Day
fixSmartDateStrEither' d s = case parsewith smartdateonly (T.toLower s) of
Right sd -> Right $ fixSmartDate d sd
Left e -> Left e

View File

@ -183,6 +183,7 @@ instance NFData PostingType
type TagName = Text
type TagValue = Text
type Tag = (TagName, TagValue) -- ^ A tag name and (possibly empty) value.
type DateTag = (TagName, Day)
-- | The status of a transaction or posting, recorded with a status mark
-- (nothing, !, or *). What these mean is ultimately user defined.

View File

@ -14,6 +14,7 @@ Some of these might belong in Hledger.Read.JournalReader or Hledger.Read.
--- * module
{-# LANGUAGE CPP, BangPatterns, DeriveDataTypeable, RecordWildCards, NamedFieldPuns, NoMonoLocalBinds, ScopedTypeVariables, FlexibleContexts, TupleSections, OverloadedStrings #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE PackageImports #-}
@ -28,8 +29,6 @@ module Hledger.Read.Common (
rtp,
runJournalParser,
rjp,
runErroringJournalParser,
rejp,
genericSourcePos,
journalSourcePos,
generateAutomaticPostings,
@ -49,7 +48,6 @@ module Hledger.Read.Common (
getAccountAliases,
clearAccountAliases,
journalAddFile,
parserErrorAt,
-- * parsers
-- ** transaction bits
@ -82,12 +80,10 @@ module Hledger.Read.Common (
-- ** comments
multilinecommentp,
emptyorcommentlinep,
followingcommentp,
followingcommentandtagsp,
-- ** tags
commentTags,
tagsp,
followingcommentp,
transactioncommentp,
postingcommentp,
-- ** bracketed dates
bracketeddatetagsp
@ -97,8 +93,9 @@ where
import Prelude ()
import "base-compat-batteries" Prelude.Compat hiding (readFile)
import "base-compat-batteries" Control.Monad.Compat
import Control.Monad.Except (ExceptT(..), runExceptT, throwError) --, catchError)
import Control.Monad.Except (ExceptT(..), throwError)
import Control.Monad.State.Strict
import Data.Bifunctor (bimap, second)
import Data.Char
import Data.Data
import Data.Decimal (DecimalRaw (Decimal), Decimal)
@ -113,11 +110,11 @@ import Data.Text (Text)
import qualified Data.Text as T
import Data.Time.Calendar
import Data.Time.LocalTime
import Data.Void (Void)
import System.Time (getClockTime)
import Text.Megaparsec
import Text.Megaparsec.Char
import Text.Megaparsec.Char.Lexer (decimal)
import Text.Megaparsec.Custom
import Hledger.Data
import Hledger.Utils
@ -184,21 +181,15 @@ rawOptsToInputOpts rawopts = InputOpts{
--- * parsing utilities
-- | Run a string parser with no state in the identity monad.
runTextParser, rtp :: TextParser Identity a -> Text -> Either (ParseError Char Void) a
runTextParser, rtp :: TextParser Identity a -> Text -> Either (ParseError Char CustomErr) a
runTextParser p t = runParser p "" t
rtp = runTextParser
-- | Run a journal parser with a null journal-parsing state.
runJournalParser, rjp :: Monad m => JournalParser m a -> Text -> m (Either (ParseError Char Void) a)
runJournalParser, rjp :: Monad m => JournalParser m a -> Text -> m (Either (ParseError Char CustomErr) a)
runJournalParser p t = runParserT (evalStateT p mempty) "" t
rjp = runJournalParser
-- | Run an error-raising journal parser with a null journal-parsing state.
runErroringJournalParser, rejp :: Monad m => ErroringJournalParser m a -> Text -> m (Either String a)
runErroringJournalParser p t = runExceptT $
runJournalParser p t >>= either (throwError . parseErrorPretty) return
rejp = runErroringJournalParser
genericSourcePos :: SourcePos -> GenericSourcePos
genericSourcePos p = GenericSourcePos (sourceName p) (fromIntegral . unPos $ sourceLine p) (fromIntegral . unPos $ sourceColumn p)
@ -219,19 +210,19 @@ generateAutomaticPostings j = j { jtxns = map modifier $ jtxns j }
-- | Given a megaparsec ParsedJournal parser, input options, file
-- path and file content: parse and post-process a Journal, or give an error.
parseAndFinaliseJournal :: ErroringJournalParser IO ParsedJournal -> InputOpts
parseAndFinaliseJournal :: JournalParser IO ParsedJournal -> InputOpts
-> FilePath -> Text -> ExceptT String IO Journal
parseAndFinaliseJournal parser iopts f txt = do
t <- liftIO getClockTime
y <- liftIO getCurrentYear
ep <- runParserT (evalStateT parser nulljournal {jparsedefaultyear=Just y}) f txt
ep <- liftIO $ runParserT (evalStateT parser nulljournal {jparsedefaultyear=Just y}) f txt
case ep of
Right pj ->
let pj' = if auto_ iopts then generateAutomaticPostings pj else pj in
case journalFinalise t f txt (not $ ignore_assertions_ iopts) pj' of
Right j -> return j
Left e -> throwError e
Left e -> throwError $ parseErrorPretty e
Left e -> throwError $ customParseErrorPretty txt e
parseAndFinaliseJournal' :: JournalParser Identity ParsedJournal -> InputOpts
-> FilePath -> Text -> ExceptT String IO Journal
@ -319,18 +310,6 @@ journalAddFile f j@Journal{jfiles=fs} = j{jfiles=fs++[f]}
-- append, unlike the other fields, even though we do a final reverse,
-- to compensate for additional reversal due to including/monoid-concatting
-- -- | Terminate parsing entirely, returning the given error message
-- -- with the current parse position prepended.
-- parserError :: String -> ErroringJournalParser a
-- parserError s = do
-- pos <- getPosition
-- parserErrorAt pos s
-- | Terminate parsing entirely, returning the given error message
-- with the given parse position prepended.
parserErrorAt :: Monad m => SourcePos -> String -> ErroringJournalParser m a
parserErrorAt pos s = throwError $ sourcePosPretty pos ++ ":\n" ++ s
--- * parsers
--- ** transaction bits
@ -348,7 +327,7 @@ codep = option "" $ try $ do
skipSome spacenonewline
between (char '(') (char ')') $ takeWhileP Nothing (/= ')')
descriptionp :: JournalParser m Text
descriptionp :: TextParser m Text
descriptionp = takeWhileP Nothing (not . semicolonOrNewline)
where semicolonOrNewline c = c == ';' || c == '\n'
@ -365,38 +344,47 @@ datep = do
datep' :: Maybe Year -> TextParser m Day
datep' mYear = do
startPos <- getPosition
d1 <- decimal <?> "year or month"
sep <- satisfy isDateSepChar <?> "date separator"
d2 <- decimal <?> "month or day"
fullDate d1 sep d2 <|> partialDate mYear d1 sep d2
fullDate startPos d1 sep d2 <|> partialDate startPos mYear d1 sep d2
<?> "full or partial date"
where
fullDate :: Integer -> Char -> Int -> TextParser m Day
fullDate year sep1 month = do
fullDate :: SourcePos -> Integer -> Char -> Int -> TextParser m Day
fullDate startPos year sep1 month = do
sep2 <- satisfy isDateSepChar <?> "date separator"
day <- decimal <?> "day"
endPos <- getPosition
let dateStr = show year ++ [sep1] ++ show month ++ [sep2] ++ show day
when (sep1 /= sep2) $ fail $
when (sep1 /= sep2) $ parseErrorAtRegion startPos endPos $
"invalid date (mixing date separators is not allowed): " ++ dateStr
case fromGregorianValid year month day of
Nothing -> fail $ "well-formed but invalid date: " ++ dateStr
Nothing -> parseErrorAtRegion startPos endPos $
"well-formed but invalid date: " ++ dateStr
Just date -> pure $! date
partialDate :: Maybe Year -> Integer -> Char -> Int -> TextParser m Day
partialDate mYear month sep day = case mYear of
Just year ->
case fromGregorianValid year (fromIntegral month) day of
Nothing -> fail $ "well-formed but invalid date: " ++ dateStr
Just date -> pure $! date
where dateStr = show year ++ [sep] ++ show month ++ [sep] ++ show day
partialDate
:: SourcePos -> Maybe Year -> Integer -> Char -> Int -> TextParser m Day
partialDate startPos mYear month sep day = do
endPos <- getPosition
case mYear of
Just year ->
case fromGregorianValid year (fromIntegral month) day of
Nothing -> parseErrorAtRegion startPos endPos $
"well-formed but invalid date: " ++ dateStr
Just date -> pure $! date
where dateStr = show year ++ [sep] ++ show month ++ [sep] ++ show day
Nothing -> fail $
"partial date "++dateStr++" found, but the current year is unknown"
where dateStr = show month ++ [sep] ++ show day
Nothing -> parseErrorAtRegion startPos endPos $
"partial date "++dateStr++" found, but the current year is unknown"
where dateStr = show month ++ [sep] ++ show day
{-# INLINABLE datep' #-}
-- | Parse a date and time in YYYY/MM/DD HH:MM[:SS][+-ZZZZ] format.
-- Hyphen (-) and period (.) are also allowed as date separators.
@ -469,7 +457,7 @@ accountnamep = do
-- | Parse whitespace then an amount, with an optional left or right
-- currency symbol and optional price, or return the special
-- "missing" marker amount.
spaceandamountormissingp :: Monad m => JournalParser m MixedAmount
spaceandamountormissingp :: JournalParser m MixedAmount
spaceandamountormissingp =
option missingmixedamt $ try $ do
lift $ skipSome spacenonewline
@ -492,13 +480,13 @@ test_spaceandamountormissingp = do
-- | Parse a single-commodity amount, with optional symbol on the left or
-- right, optional unit or total price, and optional (ignored)
-- ledger-style balance assertion or fixed lot price declaration.
amountp :: Monad m => JournalParser m Amount
amountp :: JournalParser m Amount
amountp = do
amount <- amountwithoutpricep
price <- priceamountp
pure $ amount { aprice = price }
amountwithoutpricep :: Monad m => JournalParser m Amount
amountwithoutpricep :: JournalParser m Amount
amountwithoutpricep =
try leftsymbolamountp <|> try rightsymbolamountp <|> nosymbolamountp
@ -545,7 +533,7 @@ skipMany' p = go False
then go True
else pure isNull
leftsymbolamountp :: Monad m => JournalParser m Amount
leftsymbolamountp :: JournalParser m Amount
leftsymbolamountp = do
sign <- lift signp
m <- lift multiplierp
@ -557,7 +545,7 @@ leftsymbolamountp = do
return $ Amount c (sign q) NoPrice s m
<?> "left-symbol amount"
rightsymbolamountp :: Monad m => JournalParser m Amount
rightsymbolamountp :: JournalParser m Amount
rightsymbolamountp = do
m <- lift multiplierp
sign <- lift signp
@ -576,7 +564,7 @@ rightsymbolamountp = do
return $ Amount c (sign q) NoPrice s m
<?> "right-symbol amount"
nosymbolamountp :: Monad m => JournalParser m Amount
nosymbolamountp :: JournalParser m Amount
nosymbolamountp = do
m <- lift multiplierp
suggestedStyle <- getDefaultAmountStyle
@ -601,7 +589,7 @@ quotedcommoditysymbolp =
simplecommoditysymbolp :: TextParser m CommoditySymbol
simplecommoditysymbolp = takeWhile1P Nothing (not . isNonsimpleCommodityChar)
priceamountp :: Monad m => JournalParser m Price
priceamountp :: JournalParser m Price
priceamountp = option NoPrice $ try $ do
lift (skipMany spacenonewline)
char '@'
@ -612,7 +600,7 @@ priceamountp = option NoPrice $ try $ do
pure $ priceConstructor priceAmount
partialbalanceassertionp :: Monad m => JournalParser m BalanceAssertion
partialbalanceassertionp :: JournalParser m BalanceAssertion
partialbalanceassertionp = optional $ try $ do
lift (skipMany spacenonewline)
sourcepos <- genericSourcePos <$> lift getPosition
@ -632,7 +620,7 @@ partialbalanceassertionp = optional $ try $ do
-- <|> return Nothing
-- http://ledger-cli.org/3.0/doc/ledger3.html#Fixing-Lot-Prices
fixedlotpricep :: Monad m => JournalParser m (Maybe Amount)
fixedlotpricep :: JournalParser m (Maybe Amount)
fixedlotpricep = optional $ try $ do
lift (skipMany spacenonewline)
char '{'
@ -885,211 +873,235 @@ data AmbiguousNumber = AmbiguousNumber DigitGrp Char DigitGrp -- 1,000
multilinecommentp :: TextParser m ()
multilinecommentp = startComment *> anyLine `skipManyTill` endComment
where
startComment = string "comment" >> skipLine
endComment = eof <|> string "end comment" *> skipLine
startComment = string "comment" *> trailingSpaces
endComment = eof <|> string "end comment" *> trailingSpaces
skipLine = void $ skipMany spacenonewline *> newline
anyLine = takeWhileP Nothing (\c -> c /= '\n') *> newline
trailingSpaces = skipMany spacenonewline <* newline
anyLine = void $ takeWhileP Nothing (\c -> c /= '\n') *> newline
{-# INLINABLE multilinecommentp #-}
emptyorcommentlinep :: TextParser m ()
emptyorcommentlinep = do
skipMany spacenonewline
void linecommentp <|> void newline
skiplinecommentp <|> void newline
where
-- A line (file-level) comment can start with a semicolon, hash, or star
-- (allowing org nodes).
skiplinecommentp :: TextParser m ()
skiplinecommentp = do
satisfy $ \c -> c == ';' || c == '#' || c == '*'
void $ takeWhileP Nothing (\c -> c /= '\n')
optional newline
pure ()
-- | Parse a possibly multi-line comment following a semicolon.
followingcommentp :: TextParser m Text
followingcommentp = T.unlines . map snd <$> followingcommentlinesp
{-# INLINABLE emptyorcommentlinep #-}
followingcommentlinesp :: TextParser m [(SourcePos, Text)]
followingcommentlinesp = do
skipMany spacenonewline
samelineComment@(_, samelineCommentText)
<- try commentp <|> (,) <$> (getPosition <* eolof) <*> pure ""
newlineComments <- many $ try $ do
skipSome spacenonewline -- leading whitespace is required
commentp
if T.null samelineCommentText && null newlineComments
then pure []
else pure $ samelineComment : newlineComments
-- | Parse a possibly multi-line comment following a semicolon, and
-- any tags and/or posting dates within it. Posting dates can be
-- expressed with "date"/"date2" tags and/or bracketed dates. The
-- dates are parsed in full here so that errors are reported in the
-- right position. Missing years can be inferred if a default date is
-- provided.
-- A parser combinator for parsing (possibly multiline) comments
-- following journal items.
--
-- >>> rejp (followingcommentandtagsp (Just $ fromGregorian 2000 1 2)) "; a:b, date:3/4, [=5/6]"
-- Several journal items may be followed by comments, which begin with
-- semicolons and extend to the end of the line. Such comments may span
-- multiple lines, but comment lines below the journal item must be
-- preceeded by leading whitespace.
--
-- This parser combinator accepts a parser that consumes all input up
-- until the next newline. This parser should extract the "content" from
-- comments. The resulting parser returns this content plus the raw text
-- of the comment itself.
followingcommentp' :: (Monoid a) => TextParser m a -> TextParser m (Text, a)
followingcommentp' contentp = do
skipMany spacenonewline
sameLine <- try headerp *> match' contentp <|> pure ("", mempty)
_ <- eolof
lowerLines <- many $
try (skipSome spacenonewline *> headerp) *> match' contentp <* eolof
let (textLines, results) = unzip $ sameLine : lowerLines
strippedCommentText = T.unlines $ map T.strip textLines
result = mconcat results
pure (strippedCommentText, result)
where
headerp = char ';' *> skipMany spacenonewline
{-# INLINABLE followingcommentp' #-}
-- | Parse the text of a (possibly multiline) comment following a journal
-- item.
followingcommentp :: TextParser m Text
followingcommentp =
fst <$> followingcommentp' (void $ takeWhileP Nothing (/= '\n'))
{-# INLINABLE followingcommentp #-}
-- | Parse a transaction comment and extract its tags.
--
-- The first line of a transaction may be followed by comments, which
-- begin with semicolons and extend to the end of the line. Transaction
-- comments may span multiple lines, but comment lines below the
-- transaction must be preceeded by leading whitespace.
--
-- 2000/1/1 ; a transaction comment starting on the same line ...
-- ; extending to the next line
-- account1 $1
-- account2
--
-- Tags are name-value pairs.
--
-- >>> let getTags (_,tags) = tags
-- >>> let parseTags = fmap getTags . rtp transactioncommentp
--
-- >>> parseTags "; name1: val1, name2:all this is value2"
-- Right [("name1","val1"),("name2","all this is value2")]
--
-- A tag's name must be immediately followed by a colon, without
-- separating whitespace. The corresponding value consists of all the text
-- following the colon up until the next colon or newline, stripped of
-- leading and trailing whitespace.
--
transactioncommentp :: TextParser m (Text, [Tag])
transactioncommentp = followingcommentp' commenttagsp
{-# INLINABLE transactioncommentp #-}
commenttagsp :: TextParser m [Tag]
commenttagsp = do
tagName <- fmap (last . T.split isSpace)
$ takeWhileP Nothing (\c -> c /= ':' && c /= '\n')
atColon tagName <|> pure [] -- if not ':', then either '\n' or EOF
where
atColon :: Text -> TextParser m [Tag]
atColon name = char ':' *> do
if T.null name
then commenttagsp
else do
skipMany spacenonewline
val <- tagValue
let tag = (name, val)
(tag:) <$> commenttagsp
tagValue :: TextParser m Text
tagValue = do
val <- T.strip <$> takeWhileP Nothing (\c -> c /= ',' && c /= '\n')
_ <- optional $ char ','
pure val
{-# INLINABLE commenttagsp #-}
-- | Parse a posting comment and extract its tags and dates.
--
-- Postings may be followed by comments, which begin with semicolons and
-- extend to the end of the line. Posting comments may span multiple
-- lines, but comment lines below the posting must be preceeded by
-- leading whitespace.
--
-- 2000/1/1
-- account1 $1 ; a posting comment starting on the same line ...
-- ; extending to the next line
--
-- account2
-- ; a posting comment beginning on the next line
--
-- Tags are name-value pairs.
--
-- >>> let getTags (_,tags,_,_) = tags
-- >>> let parseTags = fmap getTags . rtp (postingcommentp Nothing)
--
-- >>> parseTags "; name1: val1, name2:all this is value2"
-- Right [("name1","val1"),("name2","all this is value2")]
--
-- A tag's name must be immediately followed by a colon, without
-- separating whitespace. The corresponding value consists of all the text
-- following the colon up until the next colon or newline, stripped of
-- leading and trailing whitespace.
--
-- Posting dates may be expressed with "date"/"date2" tags or with
-- bracketed date syntax. Posting dates will inherit their year from the
-- transaction date if the year is not specified. We throw parse errors on
-- invalid dates.
--
-- >>> let getDates (_,_,d1,d2) = (d1, d2)
-- >>> let parseDates = fmap getDates . rtp (postingcommentp (Just 2000))
--
-- >>> parseDates "; date: 1/2, date2: 1999/12/31"
-- Right (Just 2000-01-02,Just 1999-12-31)
-- >>> parseDates "; [1/2=1999/12/31]"
-- Right (Just 2000-01-02,Just 1999-12-31)
--
-- Example: tags, date tags, and bracketed dates
-- >>> rtp (postingcommentp (Just 2000)) "; a:b, date:3/4, [=5/6]"
-- Right ("a:b, date:3/4, [=5/6]\n",[("a","b"),("date","3/4")],Just 2000-03-04,Just 2000-05-06)
--
-- Year unspecified and no default provided -> unknown year error, at correct position:
-- >>> rejp (followingcommentandtagsp Nothing) " ; xxx date:3/4\n ; second line"
-- Left ...1:22...partial date 3/4 found, but the current year is unknown...
--
-- Date tag value contains trailing text - forgot the comma, confused:
-- the syntaxes ? We'll accept the leading date anyway
-- >>> rejp (followingcommentandtagsp (Just $ fromGregorian 2000 1 2)) "; date:3/4=5/6"
-- Example: extraction of dates from date tags ignores trailing text
-- >>> rtp (postingcommentp (Just 2000)) "; date:3/4=5/6"
-- Right ("date:3/4=5/6\n",[("date","3/4=5/6")],Just 2000-03-04,Nothing)
--
followingcommentandtagsp
:: Monad m
=> Maybe Day
-> ErroringJournalParser m (Text, [Tag], Maybe Day, Maybe Day)
followingcommentandtagsp mdefdate = do
-- pdbg 0 "followingcommentandtagsp"
postingcommentp
:: Maybe Year -> TextParser m (Text, [Tag], Maybe Day, Maybe Day)
postingcommentp mYear = do
(commentText, (tags, dateTags)) <-
followingcommentp' (commenttagsanddatesp mYear)
let mdate = fmap snd $ find ((=="date") .fst) dateTags
mdate2 = fmap snd $ find ((=="date2").fst) dateTags
pure (commentText, tags, mdate, mdate2)
{-# INLINABLE postingcommentp #-}
commentLines <- lift followingcommentlinesp
-- pdbg 0 $ "commentws:" ++ show commentLines
-- Reparse the comment for any tags.
tagsWithPositions <- case
traverse (runTextParserAt tagswithvaluepositions) commentLines of
Right tss -> pure $ concat tss
Left e -> throwError $ parseErrorPretty e
-- Extract date-tag style posting dates from the tags.
-- Use the transaction date for defaults, if provided.
let isDateLabel txt = txt == "date" || txt == "date2"
isDateTag = isDateLabel . fst . snd
tagDates <- case traverse tagDate $ filter isDateTag tagsWithPositions of
Right ds -> pure ds
Left e -> throwError $ parseErrorPretty e
-- Reparse the comment for any bracketed style posting dates.
-- Use the transaction date for defaults, if provided.
bracketedDates <- case
traverse (runTextParserAt (bracketedpostingdatesp mdefdate))
commentLines of
Right dss -> pure $ concat dss
Left e -> throwError $ parseErrorPretty e
let pdates = tagDates ++ bracketedDates
mdate = fmap snd $ find ((=="date") .fst) pdates
mdate2 = fmap snd $ find ((=="date2").fst) pdates
-- pdbg 0 $ "allDates: "++show pdates
let strippedComment = T.unlines $ map (T.strip . snd) commentLines
tags = map snd tagsWithPositions
-- pdbg 0 $ "comment:"++show strippedComment
pure (strippedComment, tags, mdate, mdate2)
commenttagsanddatesp
:: Maybe Year -> TextParser m ([Tag], [DateTag])
commenttagsanddatesp mYear = do
(txt, dateTags) <- match $ readUpTo ':'
-- next char is either ':' or '\n' (or EOF)
let tagName = last (T.split isSpace txt)
(fmap.second) (dateTags++) (atColon tagName) <|> pure ([], dateTags) -- if not ':', then either '\n' or EOF
where
runTextParserAt parser (pos, txt) =
runTextParser (setPosition pos *> parser) txt
readUpTo :: Char -> TextParser m [DateTag]
readUpTo end = do
void $ takeWhileP Nothing (\c -> c /= end && c /= '\n' && c /= '[')
-- if not '[' then ':' or '\n' or EOF
atBracket (readUpTo end) <|> pure []
tagDate :: (SourcePos, Tag)
-> Either (ParseError Char Void) (TagName, Day)
tagDate (pos, (name, value)) =
fmap (name,) $ runTextParserAt (datep' myear) (pos, value)
where myear = fmap (first3 . toGregorian) mdefdate
atBracket :: TextParser m [DateTag] -> TextParser m [DateTag]
atBracket cont = do
-- Uses the fact that bracketed date-tags cannot contain newlines
dateTags <- option [] $ lookAhead (bracketeddatetagsp mYear)
_ <- char '['
dateTags' <- cont
pure $ dateTags ++ dateTags'
-- A transaction/posting comment must start with a semicolon. This parser
-- discards the leading whitespace of the comment and returns the source
-- position of the comment's first non-whitespace character.
commentp :: TextParser m (SourcePos, Text)
commentp = commentStartingWithp (==';')
atColon :: Text -> TextParser m ([Tag], [DateTag])
atColon name = char ':' *> do
skipMany spacenonewline
(tags, dateTags) <- case name of
"" -> pure ([], [])
"date" -> dateValue name
"date2" -> dateValue name
_ -> tagValue name
_ <- optional $ char ','
bimap (tags++) (dateTags++) <$> commenttagsanddatesp mYear
-- A line (file-level) comment can start with a semicolon, hash, or star
-- (allowing org nodes). This parser discards the leading whitespace of
-- the comment and returns the source position of the comment's first
-- non-whitespace character.
linecommentp :: TextParser m (SourcePos, Text)
linecommentp =
commentStartingWithp $ \c -> c == ';' || c == '#' || c == '*'
dateValue :: Text -> TextParser m ([Tag], [DateTag])
dateValue name = do
(txt, (date, dateTags)) <- match' $ do
date <- datep' mYear
dateTags <- readUpTo ','
pure (date, dateTags)
let val = T.strip txt
pure $ ( [(name, val)]
, (name, date) : dateTags )
commentStartingWithp :: (Char -> Bool) -> TextParser m (SourcePos, Text)
commentStartingWithp f = do
-- ptrace "commentStartingWith"
satisfy f
skipMany spacenonewline
startPos <- getPosition
content <- takeWhileP Nothing (\c -> c /= '\n')
optional newline
return (startPos, content)
tagValue :: Text -> TextParser m ([Tag], [DateTag])
tagValue name = do
(txt, dateTags) <- match' $ readUpTo ','
let val = T.strip txt
pure $ ( [(name, val)]
, dateTags )
--- ** tags
{-# INLINABLE commenttagsanddatesp #-}
-- | Extract any tags (name:value ended by comma or newline) embedded in a string.
--
-- >>> commentTags "a b:, c:c d:d, e"
-- [("b",""),("c","c d:d")]
--
-- >>> commentTags "a [1/1/1] [1/1] [1], [=1/1/1] [=1/1] [=1] [1/1=1/1/1] [1=1/1/1] b:c"
-- [("b","c")]
--
-- --[("date","1/1/1"),("date","1/1"),("date2","1/1/1"),("date2","1/1"),("date","1/1"),("date2","1/1/1"),("date","1"),("date2","1/1/1")]
--
-- >>> commentTags "\na b:, \nd:e, f"
-- [("b",""),("d","e")]
--
-- >>> commentTags ":value"
-- []
--
commentTags :: Text -> [Tag]
commentTags s = either (const []) id $ runTextParser tagsp s
-- | Parse all tags found in a string.
tagsp :: SimpleTextParser [Tag]
tagsp = map snd <$> tagswithvaluepositions
tagswithvaluepositions :: SimpleTextParser [(SourcePos, Tag)]
tagswithvaluepositions = do
-- pdbg 0 $ "tagsp"
-- If we parse in a single pass, we cannot know whether some text
-- belongs to a tag label until we have reached a colon (in which case
-- it does) or whitespace (in which case it does not). Therefore, we
-- hold on to the text until we reach such a break point, and then
-- decide what to do.
potentialTagName <- tillNextBreak
atSpaceChar <|> atColon potentialTagName <|> atEof
where
isBreak :: Char -> Bool
isBreak c = isSpace c || c == ':'
tillNextBreak :: SimpleTextParser Text
tillNextBreak = takeWhileP Nothing (not . isBreak)
tagValue :: SimpleTextParser Text
tagValue = T.strip <$> takeWhileP Nothing (not . commaOrNewline)
where commaOrNewline c = c == ',' || c == '\n'
atSpaceChar :: SimpleTextParser [(SourcePos, Tag)]
atSpaceChar = skipSome spaceChar *> tagswithvaluepositions
atColon :: Text -> SimpleTextParser [(SourcePos, Tag)]
atColon tagName = do
char ':'
if T.null tagName
then tagswithvaluepositions
else do
pos <- getPosition
tagVal <- tagValue
let tag = (pos, (tagName, tagVal))
tags <- tagswithvaluepositions
pure $ tag : tags
atEof :: SimpleTextParser [(SourcePos, Tag)]
atEof = eof *> pure []
--- ** posting dates
-- | Parse all bracketed posting dates found in a string. The dates are
-- parsed fully to give useful errors. Missing years can be inferred only
-- if a default date is provided.
--
bracketedpostingdatesp :: Maybe Day -> SimpleTextParser [(TagName,Day)]
bracketedpostingdatesp mdefdate = do
-- pdbg 0 $ "bracketedpostingdatesp"
skipMany $ notChar '['
concat <$> sepEndBy (bracketeddatetagsp mdefdate <|> char '[' *> pure [])
(skipMany $ notChar '[')
--- ** bracketed dates
@ -1119,8 +1131,9 @@ bracketedpostingdatesp mdefdate = do
-- >>> either (Left . parseErrorPretty) Right $ rtp (bracketeddatetagsp Nothing) "[0123456789/-.=/-.=]"
-- Left ...1:13:...expecting month or day...
--
bracketeddatetagsp :: Maybe Day -> SimpleTextParser [(TagName, Day)]
bracketeddatetagsp mdefdate = do
bracketeddatetagsp
:: Maybe Year -> TextParser m [(TagName, Day)]
bracketeddatetagsp mYear1 = do
-- pdbg 0 "bracketeddatetagsp"
try $ do
s <- lookAhead
@ -1131,14 +1144,24 @@ bracketeddatetagsp mdefdate = do
-- Looks sufficiently like a bracketed date to commit to parsing a date
between (char '[') (char ']') $ do
let myear1 = fmap readYear mdefdate
md1 <- optional $ datep' myear1
md1 <- optional $ datep' mYear1
let myear2 = fmap readYear md1 <|> myear1
md2 <- optional $ char '=' *> datep' myear2
let mYear2 = fmap readYear md1 <|> mYear1
md2 <- optional $ char '=' *> datep' mYear2
pure $ catMaybes [("date",) <$> md1, ("date2",) <$> md2]
where
readYear = first3 . toGregorian
isBracketedDateChar c = isDigit c || isDateSepChar c || c == '='
{-# INLINABLE bracketeddatetagsp #-}
--- ** helper parsers
-- A version of `match` that is strict in the returned text
match' :: TextParser m a -> TextParser m (Text, a)
match' p = do
(!txt, p) <- match p
pure (txt, p)

View File

@ -45,7 +45,6 @@ import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.IO as T
import Data.Time.Calendar (Day)
import Data.Void (Void)
#if MIN_VERSION_time(1,5,0)
import Data.Time.Format (parseTimeM, defaultTimeLocale)
#else
@ -404,7 +403,7 @@ parseAndValidateCsvRules rulesfile s = do
makeParseError f s = FancyError (fromList [initialPos f]) (S.singleton $ ErrorFail s)
-- | Parse this text as CSV conversion rules. The file path is for error messages.
parseCsvRules :: FilePath -> T.Text -> Either (ParseError Char Void) CsvRules
parseCsvRules :: FilePath -> T.Text -> Either (ParseError Char CustomErr) CsvRules
-- parseCsvRules rulesfile s = runParser csvrulesfile nullrules{baseAccount=takeBaseName rulesfile} rulesfile s
parseCsvRules rulesfile s =
runParser (evalStateT rulesp rules) rulesfile s

View File

@ -42,8 +42,6 @@ module Hledger.Read.JournalReader (
parseAndFinaliseJournal,
runJournalParser,
rjp,
runErroringJournalParser,
rejp,
-- * Parsers used elsewhere
getParentAccount,
@ -75,7 +73,7 @@ import Prelude ()
import "base-compat-batteries" Prelude.Compat hiding (readFile)
import qualified Control.Exception as C
import Control.Monad
import Control.Monad.Except (ExceptT(..), runExceptT, throwError)
import Control.Monad.Except (ExceptT(..))
import Control.Monad.State.Strict
import qualified Data.Map.Strict as M
import Data.Text (Text)
@ -84,7 +82,6 @@ import Data.List
import qualified Data.Text as T
import Data.Time.Calendar
import Data.Time.LocalTime
import Data.Void (Void)
import Safe
import Test.HUnit
#ifdef TESTS
@ -93,6 +90,7 @@ import Text.Megaparsec.Error
#endif
import Text.Megaparsec hiding (parse)
import Text.Megaparsec.Char
import Text.Megaparsec.Custom
import Text.Printf
import System.FilePath
@ -136,10 +134,10 @@ aliasesFromOpts = map (\a -> fromparse $ runParser accountaliasp ("--alias "++qu
-- | A journal parser. Accumulates and returns a "ParsedJournal",
-- which should be finalised/validated before use.
--
-- >>> rejp (journalp <* eof) "2015/1/1\n a 0\n"
-- >>> rjp (journalp <* eof) "2015/1/1\n a 0\n"
-- Right Journal with 1 transactions, 1 accounts
--
journalp :: MonadIO m => ErroringJournalParser m ParsedJournal
journalp :: MonadIO m => JournalParser m ParsedJournal
journalp = do
many addJournalItemP
eof
@ -147,7 +145,7 @@ journalp = do
-- | A side-effecting parser; parses any kind of journal item
-- and updates the parse state accordingly.
addJournalItemP :: MonadIO m => ErroringJournalParser m ()
addJournalItemP :: MonadIO m => JournalParser m ()
addJournalItemP =
-- all journal line types can be distinguished by the first
-- character, can use choice without backtracking
@ -166,7 +164,7 @@ addJournalItemP =
-- | Parse any journal directive and update the parse state accordingly.
-- Cf http://hledger.org/manual.html#directives,
-- http://ledger-cli.org/3.0/doc/ledger3.html#Command-Directives
directivep :: MonadIO m => ErroringJournalParser m ()
directivep :: MonadIO m => JournalParser m ()
directivep = (do
optional $ char '!'
choice [
@ -186,40 +184,44 @@ directivep = (do
]
) <?> "directive"
includedirectivep :: MonadIO m => ErroringJournalParser m ()
includedirectivep :: MonadIO m => JournalParser m ()
includedirectivep = do
string "include"
lift (skipSome spacenonewline)
filename <- lift restofline
parentpos <- getPosition
parentj <- get
filename <- T.unpack <$> takeWhileP Nothing (/= '\n') -- don't consume newline yet
-- save parent state
parentParserState <- getParserState
parentj <- get
let childj = newJournalWithParseStateFrom parentj
(ej :: Either String ParsedJournal) <-
liftIO $ runExceptT $ do
let curdir = takeDirectory (sourceName parentpos)
filepath <- expandPath curdir filename `orRethrowIOError` (show parentpos ++ " locating " ++ filename)
txt <- readFilePortably filepath `orRethrowIOError` (show parentpos ++ " reading " ++ filepath)
(ej1::Either (ParseError Char Void) ParsedJournal) <-
runParserT
(evalStateT
(choiceInState
[journalp
,timeclockfilep
,timedotfilep
-- can't include a csv file yet, that reader is special
])
childj)
filepath txt
either
(throwError
. ((show parentpos ++ " in included file " ++ show filename ++ ":\n") ++)
. parseErrorPretty)
(return . journalAddFile (filepath, txt))
ej1
case ej of
Left e -> throwError e
Right childj -> modify' (\parentj -> childj <> parentj)
-- discard child's parse info, prepend its (reversed) list data, combine other fields
parentpos <- getPosition
-- read child input
let curdir = takeDirectory (sourceName parentpos)
filepath <- lift $ expandPath curdir filename `orRethrowIOError` (show parentpos ++ " locating " ++ filename)
childInput <- lift $ readFilePortably filepath `orRethrowIOError` (show parentpos ++ " reading " ++ filepath)
-- set child state
setInput childInput
pushPosition $ initialPos filepath
put childj
-- parse include file
let parsers = [ journalp
, timeclockfilep
, timedotfilep
] -- can't include a csv file yet, that reader is special
updatedChildj <- journalAddFile (filepath, childInput) <$>
region (withSource childInput) (choiceInState parsers)
-- restore parent state, prepending the child's parse info
setParserState parentParserState
put $ updatedChildj <> parentj
-- discard child's parse info, prepend its (reversed) list data, combine other fields
void newline
newJournalWithParseStateFrom :: Journal -> Journal
newJournalWithParseStateFrom j = mempty{
@ -234,11 +236,12 @@ newJournalWithParseStateFrom j = mempty{
-- | Lift an IO action into the exception monad, rethrowing any IO
-- error with the given message prepended.
orRethrowIOError :: IO a -> String -> ExceptT String IO a
orRethrowIOError io msg =
ExceptT $
(Right <$> io)
`C.catch` \(e::C.IOException) -> return $ Left $ printf "%s:\n%s" msg (show e)
orRethrowIOError :: MonadIO m => IO a -> String -> TextParser m a
orRethrowIOError io msg = do
eResult <- liftIO $ (Right <$> io) `C.catch` \(e::C.IOException) -> pure $ Left $ printf "%s:\n%s" msg (show e)
case eResult of
Right res -> pure res
Left errMsg -> fail errMsg
accountdirectivep :: JournalParser m ()
accountdirectivep = do
@ -248,12 +251,7 @@ accountdirectivep = do
macode' :: Maybe String <- (optional $ lift $ skipSome spacenonewline >> some digitChar)
let macode :: Maybe AccountCode = read <$> macode'
newline
_tags <- many $ do
startpos <- getPosition
l <- indentedlinep
case runTextParser (setPosition startpos >> tagsp) $ T.pack l of
Right ts -> return ts
Left _e -> return [] -- TODO throwError $ parseErrorPretty e
skipMany indentedlinep
modify' (\j -> j{jaccounts = (acct, macode) : jaccounts j})
@ -262,28 +260,30 @@ indentedlinep = lift (skipSome spacenonewline) >> (rstrip <$> lift restofline)
-- | Parse a one-line or multi-line commodity directive.
--
-- >>> Right _ <- rejp commoditydirectivep "commodity $1.00"
-- >>> Right _ <- rejp commoditydirectivep "commodity $\n format $1.00"
-- >>> Right _ <- rejp commoditydirectivep "commodity $\n\n" -- a commodity with no format
-- >>> Right _ <- rejp commoditydirectivep "commodity $1.00\n format $1.00" -- both, what happens ?
commoditydirectivep :: Monad m => ErroringJournalParser m ()
commoditydirectivep = try commoditydirectiveonelinep <|> commoditydirectivemultilinep
-- >>> Right _ <- rjp commoditydirectivep "commodity $1.00"
-- >>> Right _ <- rjp commoditydirectivep "commodity $\n format $1.00"
-- >>> Right _ <- rjp commoditydirectivep "commodity $\n\n" -- a commodity with no format
-- >>> Right _ <- rjp commoditydirectivep "commodity $1.00\n format $1.00" -- both, what happens ?
commoditydirectivep :: JournalParser m ()
commoditydirectivep = commoditydirectiveonelinep <|> commoditydirectivemultilinep
-- | Parse a one-line commodity directive.
--
-- >>> Right _ <- rejp commoditydirectiveonelinep "commodity $1.00"
-- >>> Right _ <- rejp commoditydirectiveonelinep "commodity $1.00 ; blah\n"
commoditydirectiveonelinep :: Monad m => ErroringJournalParser m ()
-- >>> Right _ <- rjp commoditydirectiveonelinep "commodity $1.00"
-- >>> Right _ <- rjp commoditydirectiveonelinep "commodity $1.00 ; blah\n"
commoditydirectiveonelinep :: JournalParser m ()
commoditydirectiveonelinep = do
string "commodity"
lift (skipSome spacenonewline)
pos <- getPosition
Amount{acommodity,astyle} <- amountp
(pos, Amount{acommodity,astyle}) <- try $ do
string "commodity"
lift (skipSome spacenonewline)
pos <- getPosition
amount <- amountp
pure $ (pos, amount)
lift (skipMany spacenonewline)
_ <- lift followingcommentp
let comm = Commodity{csymbol=acommodity, cformat=Just $ dbg2 "style from commodity directive" astyle}
if asdecimalpoint astyle == Nothing
then parserErrorAt pos pleaseincludedecimalpoint
then parseErrorAt pos pleaseincludedecimalpoint
else modify' (\j -> j{jcommodities=M.insert acommodity comm $ jcommodities j})
pleaseincludedecimalpoint :: String
@ -291,8 +291,8 @@ pleaseincludedecimalpoint = "to avoid ambiguity, please include a decimal point
-- | Parse a multi-line commodity directive, containing 0 or more format subdirectives.
--
-- >>> Right _ <- rejp commoditydirectivemultilinep "commodity $ ; blah \n format $1.00 ; blah"
commoditydirectivemultilinep :: Monad m => ErroringJournalParser m ()
-- >>> Right _ <- rjp commoditydirectivemultilinep "commodity $ ; blah \n format $1.00 ; blah"
commoditydirectivemultilinep :: JournalParser m ()
commoditydirectivemultilinep = do
string "commodity"
lift (skipSome spacenonewline)
@ -306,7 +306,7 @@ commoditydirectivemultilinep = do
-- | Parse a format (sub)directive, throwing a parse error if its
-- symbol does not match the one given.
formatdirectivep :: Monad m => CommoditySymbol -> ErroringJournalParser m AmountStyle
formatdirectivep :: CommoditySymbol -> JournalParser m AmountStyle
formatdirectivep expectedsym = do
string "format"
lift (skipSome spacenonewline)
@ -316,9 +316,9 @@ formatdirectivep expectedsym = do
if acommodity==expectedsym
then
if asdecimalpoint astyle == Nothing
then parserErrorAt pos pleaseincludedecimalpoint
then parseErrorAt pos pleaseincludedecimalpoint
else return $ dbg2 "style from format subdirective" astyle
else parserErrorAt pos $
else parseErrorAt pos $
printf "commodity directive symbol \"%s\" and format directive symbol \"%s\" should be the same" expectedsym acommodity
keywordp :: String -> JournalParser m ()
@ -403,7 +403,7 @@ defaultyeardirectivep = do
failIfInvalidYear y
setYear y'
defaultcommoditydirectivep :: Monad m => ErroringJournalParser m ()
defaultcommoditydirectivep :: JournalParser m ()
defaultcommoditydirectivep = do
char 'D' <?> "default commodity"
lift (skipSome spacenonewline)
@ -411,10 +411,10 @@ defaultcommoditydirectivep = do
Amount{acommodity,astyle} <- amountp
lift restofline
if asdecimalpoint astyle == Nothing
then parserErrorAt pos pleaseincludedecimalpoint
then parseErrorAt pos pleaseincludedecimalpoint
else setDefaultCommodityAndStyle (acommodity, astyle)
marketpricedirectivep :: Monad m => JournalParser m MarketPrice
marketpricedirectivep :: JournalParser m MarketPrice
marketpricedirectivep = do
char 'P' <?> "market price"
lift (skipMany spacenonewline)
@ -434,7 +434,7 @@ ignoredpricecommoditydirectivep = do
lift restofline
return ()
commodityconversiondirectivep :: Monad m => JournalParser m ()
commodityconversiondirectivep :: JournalParser m ()
commodityconversiondirectivep = do
char 'C' <?> "commodity conversion"
lift (skipSome spacenonewline)
@ -448,7 +448,7 @@ commodityconversiondirectivep = do
--- ** transactions
modifiertransactionp :: MonadIO m => ErroringJournalParser m ModifierTransaction
modifiertransactionp :: JournalParser m ModifierTransaction
modifiertransactionp = do
char '=' <?> "modifier transaction"
lift (skipMany spacenonewline)
@ -457,17 +457,17 @@ modifiertransactionp = do
return $ ModifierTransaction valueexpr postings
-- | Parse a periodic transaction
periodictransactionp :: MonadIO m => ErroringJournalParser m PeriodicTransaction
periodictransactionp :: JournalParser m PeriodicTransaction
periodictransactionp = do
char '~' <?> "periodic transaction"
lift (skipMany spacenonewline)
periodexpr <- T.strip <$> descriptionp
periodexpr <- lift $ T.strip <$> descriptionp
_ <- lift followingcommentp
postings <- postingsp Nothing
return $ PeriodicTransaction periodexpr postings
-- | Parse a (possibly unbalanced) transaction.
transactionp :: MonadIO m => ErroringJournalParser m Transaction
transactionp :: JournalParser m Transaction
transactionp = do
-- ptrace "transactionp"
pos <- getPosition
@ -476,10 +476,10 @@ transactionp = do
lookAhead (lift spacenonewline <|> newline) <?> "whitespace or newline"
status <- lift statusp <?> "cleared status"
code <- lift codep <?> "transaction code"
description <- T.strip <$> descriptionp
comment <- lift followingcommentp
let tags = commentTags comment
postings <- postingsp (Just date)
description <- lift $ T.strip <$> descriptionp
(comment, tags) <- lift transactioncommentp
let year = first3 $ toGregorian date
postings <- postingsp (Just year)
pos' <- getPosition
let sourcepos = journalSourcePos pos pos'
return $ txnTieKnot $ Transaction 0 sourcepos date edate status code description comment tags postings ""
@ -581,18 +581,18 @@ test_transactionp = do
-- Parse the following whitespace-beginning lines as postings, posting
-- tags, and/or comments (inferring year, if needed, from the given date).
postingsp :: MonadIO m => Maybe Day -> ErroringJournalParser m [Posting]
postingsp mdate = many (postingp mdate) <?> "postings"
postingsp :: Maybe Year -> JournalParser m [Posting]
postingsp mTransactionYear = many (postingp mTransactionYear) <?> "postings"
-- linebeginningwithspaces :: Monad m => JournalParser m String
-- linebeginningwithspaces :: JournalParser m String
-- linebeginningwithspaces = do
-- sp <- lift (skipSome spacenonewline)
-- c <- nonspace
-- cs <- lift restofline
-- return $ sp ++ (c:cs) ++ "\n"
postingp :: MonadIO m => Maybe Day -> ErroringJournalParser m Posting
postingp mtdate = do
postingp :: Maybe Year -> JournalParser m Posting
postingp mTransactionYear = do
-- pdbg 0 "postingp"
(status, account) <- try $ do
lift (skipSome spacenonewline)
@ -605,7 +605,7 @@ postingp mtdate = do
massertion <- partialbalanceassertionp
_ <- fixedlotpricep
lift (skipMany spacenonewline)
(comment,tags,mdate,mdate2) <- followingcommentandtagsp mtdate
(comment,tags,mdate,mdate2) <- lift $ postingcommentp mTransactionYear
return posting
{ pdate=mdate
, pdate2=mdate2

View File

@ -83,7 +83,7 @@ reader = Reader
parse :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal
parse = parseAndFinaliseJournal timeclockfilep
timeclockfilep :: ErroringJournalParser IO ParsedJournal
timeclockfilep :: MonadIO m => JournalParser m ParsedJournal
timeclockfilep = do many timeclockitemp
eof
j@Journal{jparsetimeclockentries=es} <- get

View File

@ -1,34 +1,56 @@
{-# LANGUAGE CPP, TypeFamilies #-}
module Hledger.Utils.Parse where
import Control.Monad.Except
module Hledger.Utils.Parse (
SimpleStringParser,
SimpleTextParser,
TextParser,
JournalParser,
choice',
choiceInState,
surroundedBy,
parsewith,
parsewithString,
parseWithState,
parseWithState',
fromparse,
parseerror,
showDateParseError,
nonspace,
isNonNewlineSpace,
spacenonewline,
restofline,
eolof,
-- * re-exports
CustomErr
)
where
import Control.Monad.State.Strict (StateT, evalStateT)
import Data.Char
import Data.Functor.Identity (Identity(..))
import Data.List
import Data.Text (Text)
import Data.Void (Void)
import Text.Megaparsec
import Text.Megaparsec.Char
import Text.Megaparsec.Custom
import Text.Printf
import Hledger.Data.Types
import Hledger.Utils.UTF8IOCompat (error')
-- | A parser of string to some type.
type SimpleStringParser a = Parsec Void String a
type SimpleStringParser a = Parsec CustomErr String a
-- | A parser of strict text to some type.
type SimpleTextParser = Parsec Void Text -- XXX an "a" argument breaks the CsvRulesParser declaration somehow
type SimpleTextParser = Parsec CustomErr Text -- XXX an "a" argument breaks the CsvRulesParser declaration somehow
-- | A parser of text in some monad.
type TextParser m a = ParsecT Void Text m a
type TextParser m a = ParsecT CustomErr Text m a
-- | A parser of text in some monad, with a journal as state.
type JournalParser m a = StateT Journal (ParsecT Void Text m) a
-- | A parser of text in some monad, with a journal as state, that can throw an error string mid-parse.
type ErroringJournalParser m a = StateT Journal (ParsecT Void Text (ExceptT String m)) a
type JournalParser m a = StateT Journal (ParsecT CustomErr Text m) a
-- | Backtracking choice, use this when alternatives share a prefix.
-- Consumes no input if all choices fail.
@ -37,7 +59,7 @@ choice' = choice . map try
-- | Backtracking choice, use this when alternatives share a prefix.
-- Consumes no input if all choices fail.
choiceInState :: [StateT s (ParsecT Void Text m) a] -> StateT s (ParsecT Void Text m) a
choiceInState :: [StateT s (ParsecT CustomErr Text m) a] -> StateT s (ParsecT CustomErr Text m) a
choiceInState = choice . map try
surroundedBy :: Applicative m => m openclose -> m a -> m a
@ -49,7 +71,7 @@ parsewith p = runParser p ""
parsewithString :: Parsec e String a -> String -> Either (ParseError Char e) a
parsewithString p = runParser p ""
parseWithState :: Monad m => st -> StateT st (ParsecT Void Text m) a -> Text -> m (Either (ParseError Char Void) a)
parseWithState :: Monad m => st -> StateT st (ParsecT CustomErr Text m) a -> Text -> m (Either (ParseError Char CustomErr) a)
parseWithState ctx p s = runParserT (evalStateT p ctx) "" s
parseWithState'
@ -78,7 +100,7 @@ nonspace = satisfy (not . isSpace)
isNonNewlineSpace :: Char -> Bool
isNonNewlineSpace c = c /= '\n' && isSpace c
spacenonewline :: (Stream s, Char ~ Token s) => ParsecT Void s m Char
spacenonewline :: (Stream s, Char ~ Token s) => ParsecT CustomErr s m Char
spacenonewline = satisfy isNonNewlineSpace
restofline :: TextParser m String

View File

@ -0,0 +1,248 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE PackageImports #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE StandaloneDeriving #-}
module Text.Megaparsec.Custom (
-- * Custom parse error type
CustomErr,
-- * Throwing custom parse errors
parseErrorAt,
parseErrorAtRegion,
withSource,
-- * Pretty-printing custom parse errors
customParseErrorPretty
)
where
import Prelude ()
import "base-compat-batteries" Prelude.Compat hiding (readFile)
import Data.Foldable (asum, toList)
import qualified Data.List.NonEmpty as NE
import Data.Proxy (Proxy (Proxy))
import qualified Data.Set as S
import Data.Text (Text)
import Data.Void (Void)
import Text.Megaparsec
--- * Custom parse error type
-- | A custom error type for the parser. The type is specialized to
-- parsers of 'Text' streams.
data CustomErr
-- | Fail with a message at a specific source position interval. The
-- interval must be contained within a single line.
= ErrorFailAt SourcePos -- Starting position
Pos -- Ending position (column; same line as start)
String -- Error message
-- | Attach a source file to a parse error (for error reporting from
-- include files, e.g. with the 'region' parser combinator)
| ErrorWithSource Text -- Source file contents
(ParseError Char CustomErr) -- The original
deriving (Show, Eq, Ord)
-- We require an 'Ord' instance for 'CustomError' so that they may be
-- stored in a 'Set'. The actual instance is inconsequential, so we just
-- derive it, but this requires an (orphan) instance for 'ParseError'.
-- Hopefully this does not cause any trouble.
deriving instance (Ord c, Ord e) => Ord (ParseError c e)
instance ShowErrorComponent CustomErr where
showErrorComponent (ErrorFailAt _ _ errMsg) = errMsg
showErrorComponent (ErrorWithSource _ e) = parseErrorTextPretty e
--- * Throwing custom parse errors
-- | Fail at a specific source position.
parseErrorAt :: MonadParsec CustomErr s m => SourcePos -> String -> m a
parseErrorAt pos msg = customFailure (ErrorFailAt pos (sourceColumn pos) msg)
{-# INLINABLE parseErrorAt #-}
-- | Fail at a specific source interval (within a single line). The
-- interval is inclusive on the left and exclusive on the right; that is,
-- it spans from the start position to just before (and not including) the
-- end position.
parseErrorAtRegion
:: MonadParsec CustomErr s m
=> SourcePos -- ^ Start position
-> SourcePos -- ^ End position
-> String -- ^ Error message
-> m a
parseErrorAtRegion startPos endPos msg =
let startCol = sourceColumn startPos
endCol' = mkPos $ subtract 1 $ unPos $ sourceColumn endPos
endCol = if startCol <= endCol'
&& sourceLine startPos == sourceLine endPos
then endCol' else startCol
in customFailure (ErrorFailAt startPos endCol msg)
{-# INLINABLE parseErrorAtRegion #-}
-- | Attach a source file to a parse error. Intended for use with the
-- 'region' parser combinator.
withSource :: Text -> ParseError Char CustomErr -> ParseError Char CustomErr
withSource s e =
FancyError (errorPos e) $ S.singleton $ ErrorCustom $ ErrorWithSource s e
--- * Pretty-printing custom parse errors
-- | Pretty-print our custom parse errors and display the line on which
-- the parse error occured. Use this instead of 'parseErrorPretty'.
--
-- If any custom errors are present, arbitrarily take the first one (since
-- only one custom error should be used at a time).
customParseErrorPretty :: Text -> ParseError Char CustomErr -> String
customParseErrorPretty source err = case findCustomError err of
Nothing -> customParseErrorPretty' source err pos1
Just (ErrorWithSource customSource customErr) ->
customParseErrorPretty customSource customErr
Just (ErrorFailAt sourcePos col errMsg) ->
let newPositionStack = sourcePos NE.:| NE.tail (errorPos err)
errorIntervalLength = mkPos $ max 1 $
unPos col - unPos (sourceColumn sourcePos) + 1
newErr :: ParseError Char Void
newErr = FancyError newPositionStack (S.singleton (ErrorFail errMsg))
in customParseErrorPretty' source newErr errorIntervalLength
where
findCustomError :: ParseError Char CustomErr -> Maybe CustomErr
findCustomError err = case err of
FancyError _ errSet ->
finds (\case {ErrorCustom e -> Just e; _ -> Nothing}) errSet
_ -> Nothing
finds :: (Foldable t) => (a -> Maybe b) -> t a -> Maybe b
finds f = asum . map f . toList
--- * Modified Megaparsec source
-- The below code has been copied from Megaparsec (v.6.4.1,
-- Text.Megaparsec.Error) and modified to suit our needs. These changes are
-- indicated by square brackets. The following copyright notice, conditions,
-- and disclaimer apply to all code below this point.
--
-- Copyright © 20152018 Megaparsec contributors<br>
-- Copyright © 2007 Paolo Martini<br>
-- Copyright © 19992000 Daan Leijen
--
-- All rights reserved.
--
-- Redistribution and use in source and binary forms, with or without
-- modification, are permitted provided that the following conditions are met:
--
-- * Redistributions of source code must retain the above copyright notice,
-- this list of conditions and the following disclaimer.
--
-- * Redistributions in binary form must reproduce the above copyright notice,
-- this list of conditions and the following disclaimer in the documentation
-- and/or other materials provided with the distribution.
--
-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
-- OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-- OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
-- NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, INDIRECT,
-- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
-- OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
-- EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-- | Pretty-print a 'ParseError Char CustomErr' and display the line on
-- which the parse error occurred. The rendered 'String' always ends with
-- a newline.
customParseErrorPretty'
:: ( ShowToken (Token s)
, LineToken (Token s)
, ShowErrorComponent e
, Stream s )
=> s -- ^ Original input stream
-> ParseError (Token s) e -- ^ Parse error to render
-> Pos -- ^ Length of error interval [added]
-> String -- ^ Result of rendering
customParseErrorPretty' = customParseErrorPretty_ defaultTabWidth
customParseErrorPretty_
:: forall s e.
( ShowToken (Token s)
, LineToken (Token s)
, ShowErrorComponent e
, Stream s )
=> Pos -- ^ Tab width
-> s -- ^ Original input stream
-> ParseError (Token s) e -- ^ Parse error to render
-> Pos -- ^ Length of error interval [added]
-> String -- ^ Result of rendering
customParseErrorPretty_ w s e l =
sourcePosStackPretty (errorPos e) <> ":\n" <>
padding <> "|\n" <>
lineNumber <> " | " <> rline <> "\n" <>
padding <> "| " <> rpadding <> highlight <> "\n" <> -- [added `highlight`]
parseErrorTextPretty e
where
epos = NE.head (errorPos e) -- [changed from NE.last to NE.head]
lineNumber = (show . unPos . sourceLine) epos
padding = replicate (length lineNumber + 1) ' '
rpadding = replicate (unPos (sourceColumn epos) - 1) ' '
highlight = replicate (unPos l) '^' -- [added]
rline =
case rline' of
[] -> "<empty line>"
xs -> expandTab w xs
rline' = fmap tokenAsChar . chunkToTokens (Proxy :: Proxy s) $
selectLine (sourceLine epos) s
-- | Select a line from input stream given its number.
selectLine
:: forall s. (LineToken (Token s), Stream s)
=> Pos -- ^ Number of line to select
-> s -- ^ Input stream
-> Tokens s -- ^ Selected line
selectLine l = go pos1
where
go !n !s =
if n == l
then fst (takeWhile_ notNewline s)
else go (n <> pos1) (stripNewline $ snd (takeWhile_ notNewline s))
notNewline = not . tokenIsNewline
stripNewline s =
case take1_ s of
Nothing -> s
Just (_, s') -> s'
-- | Replace tab characters with given number of spaces.
expandTab
:: Pos
-> String
-> String
expandTab w' = go 0
where
go 0 [] = []
go 0 ('\t':xs) = go w xs
go 0 (x:xs) = x : go 0 xs
go !n xs = ' ' : go (n - 1) xs
w = unPos w'

View File

@ -2,7 +2,7 @@
--
-- see: https://github.com/sol/hpack
--
-- hash: 4e9f93f0ca43f594b381f1e1e03e67ce3379bd4830b260e6f7dc1596b946993f
-- hash: b808d840bfd7de5e860adb6ac41ec6bcee061cebcff87b4a1b87d2a46c58b0bf
name: hledger-lib
version: 1.9.99
@ -95,6 +95,7 @@ library
Hledger.Utils.UTF8IOCompat
Text.Tabular.AsciiWide
other-modules:
Text.Megaparsec.Custom
Paths_hledger_lib
hs-source-dirs:
./.
@ -187,6 +188,7 @@ test-suite doctests
Hledger.Utils.Text
Hledger.Utils.Tree
Hledger.Utils.UTF8IOCompat
Text.Megaparsec.Custom
Text.Tabular.AsciiWide
Paths_hledger_lib
hs-source-dirs:
@ -283,6 +285,7 @@ test-suite easytests
Hledger.Utils.Text
Hledger.Utils.Tree
Hledger.Utils.UTF8IOCompat
Text.Megaparsec.Custom
Text.Tabular.AsciiWide
Paths_hledger_lib
hs-source-dirs:
@ -379,6 +382,7 @@ test-suite hunittests
Hledger.Utils.Text
Hledger.Utils.Tree
Hledger.Utils.UTF8IOCompat
Text.Megaparsec.Custom
Text.Tabular.AsciiWide
Paths_hledger_lib
hs-source-dirs:

View File

@ -175,8 +175,8 @@ rewrite opts@CliOpts{rawopts_=rawopts,reportopts_=ropts} j@Journal{jtxns=ts} = d
outputFromOpts rawopts opts{reportopts_=ropts{query_=""}} j j'
postingp' :: T.Text -> IO Posting
postingp' t = runErroringJournalParser (postingp Nothing <* eof) t' >>= \case
Left err -> fail err
postingp' t = runJournalParser (postingp Nothing <* eof) t' >>= \case
Left err -> fail $ parseErrorPretty' t' err
Right p -> return p
where t' = " " <> t <> "\n" -- inject space and newline for proper parsing

View File

@ -8,6 +8,9 @@
$ hledger -f - print
>2
hledger: -:1:5:
|
1 | 2018
| ^
unexpected newline
expecting date separator or the rest of year or month

View File

@ -23,7 +23,7 @@ end comment
b 0
; date: 3.32
>>>2 /10:19/
>>>2 /10:16/
>>>=1
# 3. Ledger's bracketed date syntax is also supported: `[DATE]`,