lib: add a copy of the ledger4 parser

This adds some or all of these as new dependencies for hledger-lib:
parsers, parsec, attoparsec, trifecta
This commit is contained in:
Simon Michael 2016-11-12 10:50:57 -08:00
parent a64dea651e
commit f5ee020b88
5 changed files with 352 additions and 2 deletions

View File

@ -53,6 +53,9 @@ flag oldtime
default: False
library
hs-source-dirs:
other/ledger-parse
, .
ghc-options: -Wall -fno-warn-unused-do-bind -fno-warn-name-shadowing -fno-warn-missing-signatures -fno-warn-type-defaults -fno-warn-orphans
build-depends:
base >=4.3 && <5
@ -81,6 +84,9 @@ library
, uglymemo
, utf8-string >=0.3.5 && <1.1
, HUnit
, parsers >= 0.5
, system-filepath
, trifecta >= 0.91
, parsec
, semigroups
if impl(ghc <7.6)
@ -135,13 +141,16 @@ library
Hledger.Utils.Tree
Hledger.Utils.UTF8IOCompat
other-modules:
Ledger.Parser.Text
Paths_hledger_lib
default-language: Haskell2010
test-suite doctests
type: exitcode-stdio-1.0
hs-source-dirs:
tests
other/ledger-parse
, .
, tests
ghc-options: -Wall -fno-warn-unused-do-bind -fno-warn-name-shadowing -fno-warn-missing-signatures -fno-warn-type-defaults -fno-warn-orphans
main-is: doctests.hs
build-depends:
@ -171,18 +180,65 @@ test-suite doctests
, uglymemo
, utf8-string >=0.3.5 && <1.1
, HUnit
, parsers >= 0.5
, system-filepath
, trifecta >= 0.91
, doctest >=0.8
, Glob >=0.7
if impl(ghc <7.6)
build-depends:
ghc-prim
other-modules:
Ledger.Parser.Text
Hledger
Hledger.Data
Hledger.Data.Account
Hledger.Data.AccountName
Hledger.Data.Amount
Hledger.Data.Commodity
Hledger.Data.Dates
Hledger.Data.Journal
Hledger.Data.Ledger
Hledger.Data.Period
Hledger.Data.Posting
Hledger.Data.RawOptions
Hledger.Data.StringFormat
Hledger.Data.Timeclock
Hledger.Data.Transaction
Hledger.Data.Types
Hledger.Query
Hledger.Read
Hledger.Read.Common
Hledger.Read.CsvReader
Hledger.Read.JournalReader
Hledger.Read.TimeclockReader
Hledger.Read.TimedotReader
Hledger.Reports
Hledger.Reports.BalanceHistoryReport
Hledger.Reports.BalanceReport
Hledger.Reports.EntriesReport
Hledger.Reports.MultiBalanceReports
Hledger.Reports.PostingsReport
Hledger.Reports.ReportOptions
Hledger.Reports.TransactionsReports
Hledger.Utils
Hledger.Utils.Debug
Hledger.Utils.Parse
Hledger.Utils.Regex
Hledger.Utils.String
Hledger.Utils.Test
Hledger.Utils.Text
Hledger.Utils.Tree
Hledger.Utils.UTF8IOCompat
default-language: Haskell2010
test-suite hunittests
type: exitcode-stdio-1.0
main-is: hunittests.hs
hs-source-dirs:
tests
other/ledger-parse
, .
, tests
ghc-options: -Wall -fno-warn-unused-do-bind -fno-warn-name-shadowing -fno-warn-missing-signatures -fno-warn-type-defaults -fno-warn-orphans
build-depends:
base >=4.3 && <5
@ -211,6 +267,9 @@ test-suite hunittests
, uglymemo
, utf8-string >=0.3.5 && <1.1
, HUnit
, parsers >= 0.5
, system-filepath
, trifecta >= 0.91
, hledger-lib
, test-framework
, test-framework-hunit
@ -224,4 +283,46 @@ test-suite hunittests
else
build-depends:
time >=1.5
other-modules:
Ledger.Parser.Text
Hledger
Hledger.Data
Hledger.Data.Account
Hledger.Data.AccountName
Hledger.Data.Amount
Hledger.Data.Commodity
Hledger.Data.Dates
Hledger.Data.Journal
Hledger.Data.Ledger
Hledger.Data.Period
Hledger.Data.Posting
Hledger.Data.RawOptions
Hledger.Data.StringFormat
Hledger.Data.Timeclock
Hledger.Data.Transaction
Hledger.Data.Types
Hledger.Query
Hledger.Read
Hledger.Read.Common
Hledger.Read.CsvReader
Hledger.Read.JournalReader
Hledger.Read.TimeclockReader
Hledger.Read.TimedotReader
Hledger.Reports
Hledger.Reports.BalanceHistoryReport
Hledger.Reports.BalanceReport
Hledger.Reports.EntriesReport
Hledger.Reports.MultiBalanceReports
Hledger.Reports.PostingsReport
Hledger.Reports.ReportOptions
Hledger.Reports.TransactionsReports
Hledger.Utils
Hledger.Utils.Debug
Hledger.Utils.Parse
Hledger.Utils.Regex
Hledger.Utils.String
Hledger.Utils.Test
Hledger.Utils.Text
Hledger.Utils.Tree
Hledger.Utils.UTF8IOCompat
default-language: Haskell2010

View File

@ -0,0 +1,19 @@
opyright (c) 2012 John Wiegley
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View File

@ -0,0 +1,219 @@
{-# LANGUAGE OverloadedStrings #-}
module Ledger.Parser.Text
( parseJournalFile
, RawJournal(..)
, RawEntity(..)
, RawEntityInSitu(..)
, RawPosting(..)
, RawTransaction(..)
, RawAutoTxn(..)
, RawPeriodTxn(..)
-- , main
) where
import Control.Applicative
import Data.ByteString as B hiding (pack, unpack, singleton,
zipWith, concat)
import Data.Maybe
import qualified Data.Text.Encoding as E
import Filesystem.Path.CurrentOS hiding (concat)
import Prelude hiding (FilePath, readFile, until)
import Text.Parser.Combinators
import Text.Parser.LookAhead
import Text.Parser.Token
import Text.Trifecta
import Text.Trifecta.Delta
-- import Control.DeepSeq
-- import Criterion
-- import Criterion.Main
infixl 4 <$!>
(<$!>) :: TokenParsing m => (a -> b) -> m a -> m b
f <$!> ma = ($!) <$> pure f <*> ma
data RawJournal = RawJournal [RawEntity]
deriving (Show, Eq)
data RawEntity = Whitespace String
| FileComment String
| Directive { directiveChar :: Maybe Char
, directiveName :: !String
, directiveArg :: Maybe String }
| RawTransactionEntity RawTransaction
| RawAutoTxnEntity RawAutoTxn
| RawPeriodTxnEntity RawPeriodTxn
| EndOfFile
deriving (Show, Eq)
data RawEntityInSitu = RawEntityInSitu { rawEntityIndex :: !Int
, rawEntityStartPos :: !Rendering
, rawEntity :: !RawEntity
, rawEntityEndPos :: !Rendering }
instance Show RawEntityInSitu where
show x = show (rawEntity x) ++ "\n"
data RawPosting = RawPosting { rawPostState :: Maybe Char
, rawPostAccount :: !String
, rawPostAmount :: Maybe String
, rawPostNote :: Maybe String }
| RawPostingNote !String
deriving (Show, Eq)
data RawTransaction = RawTransaction { rawTxnDate :: !String
, rawTxnDateAux :: Maybe String
, rawTxnState :: Maybe Char
, rawTxnCode :: Maybe String
, rawTxnDesc :: !String
, rawTxnNote :: Maybe String
, rawTxnPosts :: ![RawPosting] }
deriving (Show, Eq)
data RawAutoTxn = RawAutoTxn { rawATxnQuery :: !String
, rawATxnPosts :: ![RawPosting] }
deriving (Show, Eq)
data RawPeriodTxn = RawPeriodTxn { rawPTxnPeriod :: !String
, rawPTxnPosts :: ![RawPosting] }
deriving (Show, Eq)
txnDateParser :: TokenParsing m => m String
txnDateParser = some (digit <|> oneOf "/-." <|> letter)
<?> "transaction date"
longSep :: CharParsing m => m ()
longSep = () <$ (try (char ' ' *> char ' ') <|> tab)
noteParser :: (LookAheadParsing m, CharParsing m) => m String
noteParser = char ';' *> manyTill anyChar (try (lookAhead endOfLine))
<?> "note"
longSepOrEOL :: (LookAheadParsing m, CharParsing m) => m ()
longSepOrEOL = try (lookAhead (longSep <|> endOfLine))
longSepOrEOLIf :: (LookAheadParsing m, CharParsing m) => m p -> m ()
longSepOrEOLIf p = try (lookAhead ((() <$ longSep <* p) <|> endOfLine))
until :: CharParsing m => m () -> m String
until end = (:) <$> noneOf "\r\n" <*> manyTill anyChar end
tokenP :: TokenParsing m => m p -> m p
tokenP p = p <* skipMany spaceChars
postingParser :: (LookAheadParsing m, TokenParsing m) => m RawPosting
postingParser =
(RawPosting <$!> (some spaceChars *>
optional (tokenP (char '*' <|> char '!')))
<*> tokenP (until longSepOrEOL)
<*> optional (tokenP (until (longSepOrEOLIf (char ';'))))
<*> (optional noteParser <* endOfLine)
<?> "posting")
<|>
(RawPostingNote <$!> (concat <$!>
some ((++) <$!> (some spaceChars *> noteParser)
<*> ((:[]) <$> endOfLineChar)))
<?> "posting note")
spaceChars :: CharParsing m => m ()
spaceChars = () <$ oneOf " \t"
regularTxnParser :: (LookAheadParsing m, TokenParsing m) => m RawEntity
regularTxnParser = RawTransactionEntity <$!> go
where go = RawTransaction
<$!> txnDateParser
<*> optional (char '=' *> txnDateParser)
<*> (many spaceChars *>
optional (tokenP (char '*' <|> char '!')))
<*> optional
(tokenP (parens (many (noneOf ")\r\n"))))
<*> tokenP (until (longSepOrEOLIf (char ';')))
<*> optional noteParser
<*> (endOfLine *> some postingParser)
<?> "regular transaction"
automatedTxnParser :: (LookAheadParsing m, TokenParsing m) => m RawEntity
automatedTxnParser = RawAutoTxnEntity <$!> go
where go = RawAutoTxn
<$!> (tokenP (char '=') *>
manyTill anyChar (try (lookAhead endOfLine)))
<*> (endOfLine *> some postingParser)
<?> "automated transaction"
periodicTxnParser :: (LookAheadParsing m, TokenParsing m) => m RawEntity
periodicTxnParser = RawPeriodTxnEntity <$!> go
where go = RawPeriodTxn
<$!> (tokenP (char '~') *>
manyTill anyChar (try (lookAhead endOfLine)))
<*> (endOfLine *> some postingParser)
<?> "periodic transaction"
transactionParser :: (LookAheadParsing m, TokenParsing m) => m RawEntity
transactionParser = regularTxnParser
<|> automatedTxnParser
<|> periodicTxnParser
<?> "transaction"
directiveParser :: (LookAheadParsing m, TokenParsing m) => m RawEntity
directiveParser =
Directive <$!> optional (oneOf "@!")
<*> ((:) <$!> letter <*> tokenP (many alphaNum))
<*> (optional
((:) <$!> noneOf "\r\n"
<*> manyTill anyChar (try (lookAhead endOfLine)))
<* endOfLine)
<?> "directive"
endOfLine :: CharParsing m => m ()
endOfLine = () <$ endOfLineChar
endOfLineChar :: CharParsing m => m Char
endOfLineChar = skipOptional (char '\r') *> char '\n'
commentParser :: (LookAheadParsing m, TokenParsing m) => m RawEntity
commentParser = FileComment
<$!> (concat <$!>
some ((++) <$!> noteParser
<*> ((:[]) <$> endOfLineChar)))
<?> "comment"
whitespaceParser :: TokenParsing m => m RawEntity
whitespaceParser = Whitespace <$!> some space <?> "whitespace"
entityParser :: (LookAheadParsing m, TokenParsing m) => m RawEntity
entityParser = directiveParser
<|> commentParser
<|> whitespaceParser
<|> transactionParser
<?> "journal"
rendCaret :: DeltaParsing m => m Rendering
rendCaret = addCaret <$!> position <*> rend
journalParser :: (LookAheadParsing m, DeltaParsing m) => m [RawEntityInSitu]
journalParser =
many (RawEntityInSitu <$!> pure 0 <*> rendCaret <*> entityParser <*> rendCaret)
parseJournalFile :: FilePath -> ByteString -> Result [RawEntityInSitu]
parseJournalFile file contents =
let filepath = either id id $ toText file
start = Directed (E.encodeUtf8 filepath) 0 0 0 0
in zipWith (\e i -> e { rawEntityIndex = i})
<$> parseByteString journalParser start contents
<*> pure [1..]
-- testme :: IO (Result [RawEntityInSitu])
-- testme =
-- let file = "/Users/johnw/Documents/Finances/ledger.dat"
-- in parseJournalFile (fromText (T.pack file)) <$> B.readFile file
-- instance NFData RawEntityInSitu
-- instance NFData (Result a)
-- main = do let file = "/Users/johnw/Documents/Finances/ledger.dat"
-- bs <- B.readFile file
-- defaultMain [
-- bench "main" $ nf (parseJournalFile (fromText (T.pack file))) bs ]
-- Text.hs ends here

View File

@ -0,0 +1,4 @@
This is the parser code from John W's
https://github.com/ledger/ledger4/tree/master/ledger-parse ,
revision 8fb414c + updates for latest parsers lib.
Later, perhaps it will be a published lib and we can remove this copy.

View File

@ -65,6 +65,10 @@ dependencies:
- uglymemo
- utf8-string >=0.3.5 && <1.1
- HUnit
# for ledger-parse:
- parsers >= 0.5
- system-filepath
- trifecta >= 0.91
ghc-options:
- -Wall
- -fno-warn-unused-do-bind
@ -72,6 +76,9 @@ ghc-options:
- -fno-warn-missing-signatures
- -fno-warn-type-defaults
- -fno-warn-orphans
source-dirs:
- other/ledger-parse
- .
library:
exposed-modules:
- Hledger