diff --git a/hledger-lib/hledger-lib.cabal b/hledger-lib/hledger-lib.cabal index 5eb773692..7cc24ca32 100644 --- a/hledger-lib/hledger-lib.cabal +++ b/hledger-lib/hledger-lib.cabal @@ -53,6 +53,9 @@ flag oldtime default: False library + hs-source-dirs: + other/ledger-parse + , . ghc-options: -Wall -fno-warn-unused-do-bind -fno-warn-name-shadowing -fno-warn-missing-signatures -fno-warn-type-defaults -fno-warn-orphans build-depends: base >=4.3 && <5 @@ -81,6 +84,9 @@ library , uglymemo , utf8-string >=0.3.5 && <1.1 , HUnit + , parsers >= 0.5 + , system-filepath + , trifecta >= 0.91 , parsec , semigroups if impl(ghc <7.6) @@ -135,13 +141,16 @@ library Hledger.Utils.Tree Hledger.Utils.UTF8IOCompat other-modules: + Ledger.Parser.Text Paths_hledger_lib default-language: Haskell2010 test-suite doctests type: exitcode-stdio-1.0 hs-source-dirs: - tests + other/ledger-parse + , . + , tests ghc-options: -Wall -fno-warn-unused-do-bind -fno-warn-name-shadowing -fno-warn-missing-signatures -fno-warn-type-defaults -fno-warn-orphans main-is: doctests.hs build-depends: @@ -171,18 +180,65 @@ test-suite doctests , uglymemo , utf8-string >=0.3.5 && <1.1 , HUnit + , parsers >= 0.5 + , system-filepath + , trifecta >= 0.91 , doctest >=0.8 , Glob >=0.7 if impl(ghc <7.6) build-depends: ghc-prim + other-modules: + Ledger.Parser.Text + Hledger + Hledger.Data + Hledger.Data.Account + Hledger.Data.AccountName + Hledger.Data.Amount + Hledger.Data.Commodity + Hledger.Data.Dates + Hledger.Data.Journal + Hledger.Data.Ledger + Hledger.Data.Period + Hledger.Data.Posting + Hledger.Data.RawOptions + Hledger.Data.StringFormat + Hledger.Data.Timeclock + Hledger.Data.Transaction + Hledger.Data.Types + Hledger.Query + Hledger.Read + Hledger.Read.Common + Hledger.Read.CsvReader + Hledger.Read.JournalReader + Hledger.Read.TimeclockReader + Hledger.Read.TimedotReader + Hledger.Reports + Hledger.Reports.BalanceHistoryReport + Hledger.Reports.BalanceReport + Hledger.Reports.EntriesReport + Hledger.Reports.MultiBalanceReports + Hledger.Reports.PostingsReport + Hledger.Reports.ReportOptions + Hledger.Reports.TransactionsReports + Hledger.Utils + Hledger.Utils.Debug + Hledger.Utils.Parse + Hledger.Utils.Regex + Hledger.Utils.String + Hledger.Utils.Test + Hledger.Utils.Text + Hledger.Utils.Tree + Hledger.Utils.UTF8IOCompat default-language: Haskell2010 test-suite hunittests type: exitcode-stdio-1.0 main-is: hunittests.hs hs-source-dirs: - tests + other/ledger-parse + , . + , tests ghc-options: -Wall -fno-warn-unused-do-bind -fno-warn-name-shadowing -fno-warn-missing-signatures -fno-warn-type-defaults -fno-warn-orphans build-depends: base >=4.3 && <5 @@ -211,6 +267,9 @@ test-suite hunittests , uglymemo , utf8-string >=0.3.5 && <1.1 , HUnit + , parsers >= 0.5 + , system-filepath + , trifecta >= 0.91 , hledger-lib , test-framework , test-framework-hunit @@ -224,4 +283,46 @@ test-suite hunittests else build-depends: time >=1.5 + other-modules: + Ledger.Parser.Text + Hledger + Hledger.Data + Hledger.Data.Account + Hledger.Data.AccountName + Hledger.Data.Amount + Hledger.Data.Commodity + Hledger.Data.Dates + Hledger.Data.Journal + Hledger.Data.Ledger + Hledger.Data.Period + Hledger.Data.Posting + Hledger.Data.RawOptions + Hledger.Data.StringFormat + Hledger.Data.Timeclock + Hledger.Data.Transaction + Hledger.Data.Types + Hledger.Query + Hledger.Read + Hledger.Read.Common + Hledger.Read.CsvReader + Hledger.Read.JournalReader + Hledger.Read.TimeclockReader + Hledger.Read.TimedotReader + Hledger.Reports + Hledger.Reports.BalanceHistoryReport + Hledger.Reports.BalanceReport + Hledger.Reports.EntriesReport + Hledger.Reports.MultiBalanceReports + Hledger.Reports.PostingsReport + Hledger.Reports.ReportOptions + Hledger.Reports.TransactionsReports + Hledger.Utils + Hledger.Utils.Debug + Hledger.Utils.Parse + Hledger.Utils.Regex + Hledger.Utils.String + Hledger.Utils.Test + Hledger.Utils.Text + Hledger.Utils.Tree + Hledger.Utils.UTF8IOCompat default-language: Haskell2010 diff --git a/hledger-lib/other/ledger-parse/LICENSE b/hledger-lib/other/ledger-parse/LICENSE new file mode 100644 index 000000000..c98c70072 --- /dev/null +++ b/hledger-lib/other/ledger-parse/LICENSE @@ -0,0 +1,19 @@ +opyright (c) 2012 John Wiegley + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/hledger-lib/other/ledger-parse/Ledger/Parser/Text.hs b/hledger-lib/other/ledger-parse/Ledger/Parser/Text.hs new file mode 100644 index 000000000..1d114eaf6 --- /dev/null +++ b/hledger-lib/other/ledger-parse/Ledger/Parser/Text.hs @@ -0,0 +1,219 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Ledger.Parser.Text + ( parseJournalFile + , RawJournal(..) + , RawEntity(..) + , RawEntityInSitu(..) + , RawPosting(..) + , RawTransaction(..) + , RawAutoTxn(..) + , RawPeriodTxn(..) + -- , main + ) where + +import Control.Applicative +import Data.ByteString as B hiding (pack, unpack, singleton, + zipWith, concat) +import Data.Maybe +import qualified Data.Text.Encoding as E +import Filesystem.Path.CurrentOS hiding (concat) +import Prelude hiding (FilePath, readFile, until) +import Text.Parser.Combinators +import Text.Parser.LookAhead +import Text.Parser.Token +import Text.Trifecta +import Text.Trifecta.Delta +-- import Control.DeepSeq +-- import Criterion +-- import Criterion.Main + +infixl 4 <$!> + +(<$!>) :: TokenParsing m => (a -> b) -> m a -> m b +f <$!> ma = ($!) <$> pure f <*> ma + +data RawJournal = RawJournal [RawEntity] + deriving (Show, Eq) + +data RawEntity = Whitespace String + | FileComment String + | Directive { directiveChar :: Maybe Char + , directiveName :: !String + , directiveArg :: Maybe String } + | RawTransactionEntity RawTransaction + | RawAutoTxnEntity RawAutoTxn + | RawPeriodTxnEntity RawPeriodTxn + | EndOfFile + deriving (Show, Eq) + +data RawEntityInSitu = RawEntityInSitu { rawEntityIndex :: !Int + , rawEntityStartPos :: !Rendering + , rawEntity :: !RawEntity + , rawEntityEndPos :: !Rendering } + +instance Show RawEntityInSitu where + show x = show (rawEntity x) ++ "\n" + +data RawPosting = RawPosting { rawPostState :: Maybe Char + , rawPostAccount :: !String + , rawPostAmount :: Maybe String + , rawPostNote :: Maybe String } + | RawPostingNote !String + deriving (Show, Eq) + +data RawTransaction = RawTransaction { rawTxnDate :: !String + , rawTxnDateAux :: Maybe String + , rawTxnState :: Maybe Char + , rawTxnCode :: Maybe String + , rawTxnDesc :: !String + , rawTxnNote :: Maybe String + , rawTxnPosts :: ![RawPosting] } + deriving (Show, Eq) + +data RawAutoTxn = RawAutoTxn { rawATxnQuery :: !String + , rawATxnPosts :: ![RawPosting] } + deriving (Show, Eq) + +data RawPeriodTxn = RawPeriodTxn { rawPTxnPeriod :: !String + , rawPTxnPosts :: ![RawPosting] } + deriving (Show, Eq) + +txnDateParser :: TokenParsing m => m String +txnDateParser = some (digit <|> oneOf "/-." <|> letter) + "transaction date" + +longSep :: CharParsing m => m () +longSep = () <$ (try (char ' ' *> char ' ') <|> tab) + +noteParser :: (LookAheadParsing m, CharParsing m) => m String +noteParser = char ';' *> manyTill anyChar (try (lookAhead endOfLine)) + "note" + +longSepOrEOL :: (LookAheadParsing m, CharParsing m) => m () +longSepOrEOL = try (lookAhead (longSep <|> endOfLine)) + +longSepOrEOLIf :: (LookAheadParsing m, CharParsing m) => m p -> m () +longSepOrEOLIf p = try (lookAhead ((() <$ longSep <* p) <|> endOfLine)) + +until :: CharParsing m => m () -> m String +until end = (:) <$> noneOf "\r\n" <*> manyTill anyChar end + +tokenP :: TokenParsing m => m p -> m p +tokenP p = p <* skipMany spaceChars + +postingParser :: (LookAheadParsing m, TokenParsing m) => m RawPosting +postingParser = + (RawPosting <$!> (some spaceChars *> + optional (tokenP (char '*' <|> char '!'))) + <*> tokenP (until longSepOrEOL) + <*> optional (tokenP (until (longSepOrEOLIf (char ';')))) + <*> (optional noteParser <* endOfLine) + "posting") + <|> + (RawPostingNote <$!> (concat <$!> + some ((++) <$!> (some spaceChars *> noteParser) + <*> ((:[]) <$> endOfLineChar))) + "posting note") + +spaceChars :: CharParsing m => m () +spaceChars = () <$ oneOf " \t" + +regularTxnParser :: (LookAheadParsing m, TokenParsing m) => m RawEntity +regularTxnParser = RawTransactionEntity <$!> go + where go = RawTransaction + <$!> txnDateParser + <*> optional (char '=' *> txnDateParser) + <*> (many spaceChars *> + optional (tokenP (char '*' <|> char '!'))) + <*> optional + (tokenP (parens (many (noneOf ")\r\n")))) + <*> tokenP (until (longSepOrEOLIf (char ';'))) + <*> optional noteParser + <*> (endOfLine *> some postingParser) + "regular transaction" + +automatedTxnParser :: (LookAheadParsing m, TokenParsing m) => m RawEntity +automatedTxnParser = RawAutoTxnEntity <$!> go + where go = RawAutoTxn + <$!> (tokenP (char '=') *> + manyTill anyChar (try (lookAhead endOfLine))) + <*> (endOfLine *> some postingParser) + "automated transaction" + +periodicTxnParser :: (LookAheadParsing m, TokenParsing m) => m RawEntity +periodicTxnParser = RawPeriodTxnEntity <$!> go + where go = RawPeriodTxn + <$!> (tokenP (char '~') *> + manyTill anyChar (try (lookAhead endOfLine))) + <*> (endOfLine *> some postingParser) + "periodic transaction" + +transactionParser :: (LookAheadParsing m, TokenParsing m) => m RawEntity +transactionParser = regularTxnParser + <|> automatedTxnParser + <|> periodicTxnParser + "transaction" + +directiveParser :: (LookAheadParsing m, TokenParsing m) => m RawEntity +directiveParser = + Directive <$!> optional (oneOf "@!") + <*> ((:) <$!> letter <*> tokenP (many alphaNum)) + <*> (optional + ((:) <$!> noneOf "\r\n" + <*> manyTill anyChar (try (lookAhead endOfLine))) + <* endOfLine) + "directive" + +endOfLine :: CharParsing m => m () +endOfLine = () <$ endOfLineChar + +endOfLineChar :: CharParsing m => m Char +endOfLineChar = skipOptional (char '\r') *> char '\n' + +commentParser :: (LookAheadParsing m, TokenParsing m) => m RawEntity +commentParser = FileComment + <$!> (concat <$!> + some ((++) <$!> noteParser + <*> ((:[]) <$> endOfLineChar))) + "comment" + +whitespaceParser :: TokenParsing m => m RawEntity +whitespaceParser = Whitespace <$!> some space "whitespace" + +entityParser :: (LookAheadParsing m, TokenParsing m) => m RawEntity +entityParser = directiveParser + <|> commentParser + <|> whitespaceParser + <|> transactionParser + "journal" + +rendCaret :: DeltaParsing m => m Rendering +rendCaret = addCaret <$!> position <*> rend + +journalParser :: (LookAheadParsing m, DeltaParsing m) => m [RawEntityInSitu] +journalParser = + many (RawEntityInSitu <$!> pure 0 <*> rendCaret <*> entityParser <*> rendCaret) + +parseJournalFile :: FilePath -> ByteString -> Result [RawEntityInSitu] +parseJournalFile file contents = + let filepath = either id id $ toText file + start = Directed (E.encodeUtf8 filepath) 0 0 0 0 + in zipWith (\e i -> e { rawEntityIndex = i}) + <$> parseByteString journalParser start contents + <*> pure [1..] + +-- testme :: IO (Result [RawEntityInSitu]) +-- testme = +-- let file = "/Users/johnw/Documents/Finances/ledger.dat" +-- in parseJournalFile (fromText (T.pack file)) <$> B.readFile file + +-- instance NFData RawEntityInSitu +-- instance NFData (Result a) + +-- main = do let file = "/Users/johnw/Documents/Finances/ledger.dat" +-- bs <- B.readFile file +-- defaultMain [ +-- bench "main" $ nf (parseJournalFile (fromText (T.pack file))) bs ] + +-- Text.hs ends here diff --git a/hledger-lib/other/ledger-parse/README b/hledger-lib/other/ledger-parse/README new file mode 100644 index 000000000..b57f6c8f5 --- /dev/null +++ b/hledger-lib/other/ledger-parse/README @@ -0,0 +1,4 @@ +This is the parser code from John W's +https://github.com/ledger/ledger4/tree/master/ledger-parse , +revision 8fb414c + updates for latest parsers lib. +Later, perhaps it will be a published lib and we can remove this copy. diff --git a/hledger-lib/package.yaml b/hledger-lib/package.yaml index 545827863..c8251f8d3 100644 --- a/hledger-lib/package.yaml +++ b/hledger-lib/package.yaml @@ -65,6 +65,10 @@ dependencies: - uglymemo - utf8-string >=0.3.5 && <1.1 - HUnit +# for ledger-parse: +- parsers >= 0.5 +- system-filepath +- trifecta >= 0.91 ghc-options: - -Wall - -fno-warn-unused-do-bind @@ -72,6 +76,9 @@ ghc-options: - -fno-warn-missing-signatures - -fno-warn-type-defaults - -fno-warn-orphans +source-dirs: +- other/ledger-parse +- . library: exposed-modules: - Hledger