mirror of
https://github.com/simonmichael/hledger.git
synced 2024-12-28 12:54:07 +03:00
lib: simplify format detection, avoid ledger reader by default
When we don't know a file's format, instead of choosing a subset of readers based on content sniffing, now we just try them all. Also, LedgerReader is now used only as a last resort, as it's not yet competitive with JournalReader.
This commit is contained in:
parent
59ce4c987b
commit
b6ff170688
@ -319,12 +319,19 @@ type StorageFormat = String
|
||||
-- | A hledger journal reader is a triple of storage format name, a
|
||||
-- detector of that format, and a parser from that format to Journal.
|
||||
data Reader = Reader {
|
||||
-- name of the format this reader handles
|
||||
|
||||
-- The canonical name of the format handled by this reader
|
||||
rFormat :: StorageFormat
|
||||
-- quickly check if this reader can probably handle the given file path and file content
|
||||
,rDetector :: FilePath -> Text -> Bool
|
||||
-- parse the given string, using the given parse rules file if any, returning a journal or error aware of the given file path
|
||||
|
||||
-- The file extensions recognised as containing this format
|
||||
,rExtensions :: [String]
|
||||
|
||||
-- A text parser for this format, accepting an optional rules file,
|
||||
-- assertion-checking flag, and file path for error messages,
|
||||
-- producing an exception-raising IO action that returns a journal
|
||||
-- or error message.
|
||||
,rParser :: Maybe FilePath -> Bool -> FilePath -> Text -> ExceptT String IO Journal
|
||||
|
||||
}
|
||||
|
||||
instance Show Reader where show r = rFormat r ++ " reader"
|
||||
|
@ -12,9 +12,7 @@ readJournalFiles
|
||||
readJournalFile
|
||||
requireJournalFileExists
|
||||
readJournal
|
||||
readersFor
|
||||
readerForStorageFormat
|
||||
readersForPathAndData
|
||||
findReader
|
||||
tryReaders
|
||||
@
|
||||
|
||||
@ -34,20 +32,9 @@ module Hledger.Read (
|
||||
|
||||
-- * Journal parsing
|
||||
readJournal,
|
||||
readersFor,
|
||||
readerForStorageFormat,
|
||||
readersForPathAndData,
|
||||
tryReaders,
|
||||
readJournal',
|
||||
readFormatNames,
|
||||
|
||||
-- * Re-exported
|
||||
-- accountnamep,
|
||||
-- amountp,
|
||||
-- amountp',
|
||||
-- mamountp',
|
||||
-- numberp,
|
||||
-- codep,
|
||||
accountaliasp,
|
||||
postingp,
|
||||
module Hledger.Read.Common,
|
||||
@ -64,10 +51,11 @@ import Data.List
|
||||
import Data.Maybe
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import Safe
|
||||
import System.Directory (doesFileExist, getHomeDirectory)
|
||||
import System.Environment (getEnv)
|
||||
import System.Exit (exitFailure)
|
||||
import System.FilePath ((</>))
|
||||
import System.FilePath ((</>), takeExtension)
|
||||
import System.IO (stderr)
|
||||
import Test.HUnit
|
||||
import Text.Printf
|
||||
@ -85,20 +73,16 @@ import Prelude hiding (getContents, writeFile)
|
||||
import Hledger.Utils.UTF8IOCompat (writeFile)
|
||||
|
||||
|
||||
-- The available data file readers, each one handling a particular data
|
||||
-- format. The first is also used as the default for unknown formats.
|
||||
-- The available journal readers, each one handling a particular data format.
|
||||
readers :: [Reader]
|
||||
readers = [
|
||||
JournalReader.reader
|
||||
,LedgerReader.reader
|
||||
,TimeclockReader.reader
|
||||
,TimedotReader.reader
|
||||
,CsvReader.reader
|
||||
,LedgerReader.reader
|
||||
]
|
||||
|
||||
readFormatNames :: [StorageFormat]
|
||||
readFormatNames = map rFormat readers
|
||||
|
||||
journalEnvVar = "LEDGER_FILE"
|
||||
journalEnvVar2 = "LEDGER"
|
||||
journalDefaultFilename = ".hledger.journal"
|
||||
@ -192,44 +176,28 @@ tests_readJournal' = [
|
||||
|
||||
-- | @readJournal mformat mrulesfile assrt mpath t@
|
||||
--
|
||||
-- Read a journal from this string, trying whatever readers seem appropriate:
|
||||
--
|
||||
-- - if a format is specified, try that reader only
|
||||
--
|
||||
-- - or if one or more readers recognises the file path and data, try those
|
||||
--
|
||||
-- - otherwise, try them all.
|
||||
--
|
||||
-- A CSV conversion rules file may also be specified for use by the CSV reader.
|
||||
-- Also there is a flag specifying whether to check or ignore balance assertions in the journal.
|
||||
-- Try to read a Journal from some text.
|
||||
-- If a format is specified (mformat), try only that reader.
|
||||
-- Otherwise if the file path is provided (mpath), and it specifies a format, try only that reader.
|
||||
-- Otherwise try all readers in turn until one succeeds, or return the first error if none of them succeed.
|
||||
-- A CSV conversion rules file may be specified (mrulesfile) for use by the CSV reader.
|
||||
-- If the assrt flag is true, also check and enforce balance assertions in the journal.
|
||||
readJournal :: Maybe StorageFormat -> Maybe FilePath -> Bool -> Maybe FilePath -> Text -> IO (Either String Journal)
|
||||
readJournal mformat mrulesfile assrt mpath t =
|
||||
let rs = readersFor (mformat, mpath, t)
|
||||
in tryReaders rs mrulesfile assrt mpath t
|
||||
let rs = maybe readers (:[]) $ findReader mformat mpath
|
||||
in tryReaders rs mrulesfile assrt mpath t
|
||||
|
||||
-- | @readersFor (format,path,t)@
|
||||
-- | @findReader mformat mpath@
|
||||
--
|
||||
-- Which readers are worth trying for this (possibly unspecified) format, filepath, and data ?
|
||||
readersFor :: (Maybe StorageFormat, Maybe FilePath, Text) -> [Reader]
|
||||
readersFor (format,path,t) =
|
||||
dbg1 ("possible readers for "++show (format,path,textElideRight 30 t)) $
|
||||
case format of
|
||||
Just f -> case readerForStorageFormat f of Just r -> [r]
|
||||
Nothing -> []
|
||||
Nothing -> case path of Nothing -> readers
|
||||
Just p -> case readersForPathAndData (p,t) of [] -> readers
|
||||
rs -> rs
|
||||
|
||||
-- | Find the (first) reader which can handle the given format, if any.
|
||||
readerForStorageFormat :: StorageFormat -> Maybe Reader
|
||||
readerForStorageFormat s | null rs = Nothing
|
||||
| otherwise = Just $ head rs
|
||||
where
|
||||
rs = filter ((s==).rFormat) readers :: [Reader]
|
||||
|
||||
-- | Find the readers which think they can handle the given file path and data, if any.
|
||||
readersForPathAndData :: (FilePath,Text) -> [Reader]
|
||||
readersForPathAndData (f,t) = filter (\r -> dbg1 ("try "++rFormat r++" format") $ (rDetector r) f t) readers
|
||||
-- Find the reader for the given format (mformat), if any.
|
||||
-- Or if no format is provided, find the first reader that handles the
|
||||
-- file name's extension, if any.
|
||||
findReader :: Maybe StorageFormat -> Maybe FilePath -> Maybe Reader
|
||||
findReader Nothing Nothing = Nothing
|
||||
findReader (Just fmt) _ = headMay [r | r <- readers, fmt == rFormat r]
|
||||
findReader Nothing (Just path) = headMay [r | r <- readers, ext `elem` rExtensions r]
|
||||
where
|
||||
ext = drop 1 $ takeExtension path
|
||||
|
||||
-- | @tryReaders readers mrulesfile assrt path t@
|
||||
--
|
||||
|
@ -64,18 +64,11 @@ import Hledger.Read.Common (amountp, statusp, genericSourcePos)
|
||||
|
||||
|
||||
reader :: Reader
|
||||
reader = Reader format detect parse
|
||||
|
||||
format :: String
|
||||
format = "csv"
|
||||
|
||||
-- | Does the given file path and data look like something this reader can handle ?
|
||||
detect :: FilePath -> Text -> Bool
|
||||
detect f excerpt
|
||||
-- file name known: try this reader if it has any of these extensions
|
||||
| f /= "-" = takeExtension f `elem` ['.':format]
|
||||
-- file name unknown: try this reader if excerpt contains two or more commas
|
||||
| otherwise = T.length (T.filter (==',') excerpt) >= 2
|
||||
reader = Reader
|
||||
{rFormat = "csv"
|
||||
,rExtensions = ["csv"]
|
||||
,rParser = parse
|
||||
}
|
||||
|
||||
-- | Parse and post-process a "Journal" from CSV data, or give an error.
|
||||
-- XXX currently ignores the string and reads from the file path
|
||||
|
@ -106,21 +106,11 @@ import Hledger.Utils
|
||||
--- * reader
|
||||
|
||||
reader :: Reader
|
||||
reader = Reader format detect parse
|
||||
|
||||
format :: String
|
||||
format = "journal"
|
||||
|
||||
-- | Does the given file path and data look like something this reader can handle ?
|
||||
detect :: FilePath -> Text -> Bool
|
||||
detect f _
|
||||
-- file name known: try this reader if it has any of these extensions
|
||||
| f /= "-" = takeExtension f `elem` ['.':format, ".j", ".hledger", ".ledger", ".l"]
|
||||
-- file name unknown: always try this reader
|
||||
| otherwise = True
|
||||
-- file name unknown: try this reader if we can see something like a journal entry
|
||||
-- (digits in column 0 with the next line indented)
|
||||
-- otherwise = regexMatches "(^|\n)[0-9]+.*\n[ \t]+" $ T.unpack excerpt
|
||||
reader = Reader
|
||||
{rFormat = "journal"
|
||||
,rExtensions = ["journal", "j", "hledger", "ledger"]
|
||||
,rParser = parse
|
||||
}
|
||||
|
||||
-- | Parse and post-process a "Journal" from hledger's journal file
|
||||
-- format, or give an error.
|
||||
|
@ -14,19 +14,11 @@ where
|
||||
--- * imports
|
||||
import Prelude ()
|
||||
import Prelude.Compat hiding (readFile)
|
||||
-- import qualified Control.Exception as C
|
||||
import Control.Monad
|
||||
import Control.Monad.IO.Class (liftIO)
|
||||
import Control.Monad.Except (ExceptT(..), throwError)
|
||||
-- import Control.Monad.State.Strict
|
||||
-- import qualified Data.Map.Strict as M
|
||||
import Data.Maybe
|
||||
-- import Data.List
|
||||
import Data.Text (Text, pack)
|
||||
import Data.Text.Encoding (encodeUtf8)
|
||||
-- import qualified Data.Text as T
|
||||
-- import Data.Time.Calendar
|
||||
-- import Data.Time.LocalTime
|
||||
-- import Safe
|
||||
import Test.HUnit
|
||||
-- #ifdef TESTS
|
||||
@ -35,7 +27,6 @@ import Test.HUnit
|
||||
-- #endif
|
||||
import Text.Megaparsec (eof)
|
||||
-- import Text.Printf
|
||||
import System.FilePath
|
||||
import System.Time
|
||||
import qualified Filesystem.Path.CurrentOS as F
|
||||
|
||||
@ -51,20 +42,14 @@ import Text.Trifecta.Result (Result(..))
|
||||
--- * reader
|
||||
|
||||
reader :: Reader
|
||||
reader = Reader format detect parse
|
||||
reader = Reader
|
||||
{rFormat = "ledger"
|
||||
,rExtensions = []
|
||||
,rParser = parse
|
||||
}
|
||||
|
||||
format :: String
|
||||
format = "ledger"
|
||||
|
||||
-- | Does the given file path and data look like something this reader can handle ?
|
||||
detect :: FilePath -> Text -> Bool
|
||||
detect f _
|
||||
-- file name known: try this reader if it has any of these extensions
|
||||
| f /= "-" = takeExtension f `elem` ['.':format, ".l"]
|
||||
-- file name unknown: don't try this reader
|
||||
| otherwise = False
|
||||
|
||||
-- | Parse and post-process a "Journal" from ledger's journal format, or give an error.
|
||||
-- | Generate an action that parses and post-processes a "Journal" from a
|
||||
-- C++ Ledger journal, or raises an error.
|
||||
parse :: Maybe FilePath -> Bool -> FilePath -> Text -> ExceptT String IO Journal
|
||||
parse _mrulespath assrt path txt = do
|
||||
let
|
||||
|
@ -61,7 +61,6 @@ import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import Test.HUnit
|
||||
import Text.Megaparsec hiding (parse)
|
||||
import System.FilePath
|
||||
|
||||
import Hledger.Data
|
||||
-- XXX too much reuse ?
|
||||
@ -70,18 +69,11 @@ import Hledger.Utils
|
||||
|
||||
|
||||
reader :: Reader
|
||||
reader = Reader format detect parse
|
||||
|
||||
format :: String
|
||||
format = "timeclock"
|
||||
|
||||
-- | Does the given file path and data look like something this reader can handle ?
|
||||
detect :: FilePath -> Text -> Bool
|
||||
detect f excerpt
|
||||
-- file name known: try this reader if it has any of these extensions
|
||||
| f /= "-" = takeExtension f `elem` ['.':format]
|
||||
-- file name unknown: try this reader if a line starts with "i " or "o " in excerpt
|
||||
| otherwise = regexMatches "(^|\n)[io] " $ T.unpack excerpt
|
||||
reader = Reader
|
||||
{rFormat = "timeclock"
|
||||
,rExtensions = ["timeclock"]
|
||||
,rParser = parse
|
||||
}
|
||||
|
||||
-- | Parse and post-process a "Journal" from timeclock.el's timeclock
|
||||
-- format, saving the provided file path and the current time, or give an
|
||||
|
@ -41,10 +41,8 @@ import Data.Char (isSpace)
|
||||
import Data.List (foldl')
|
||||
import Data.Maybe
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import Test.HUnit
|
||||
import Text.Megaparsec hiding (parse)
|
||||
import System.FilePath
|
||||
|
||||
import Hledger.Data
|
||||
import Hledger.Read.Common
|
||||
@ -56,18 +54,11 @@ import Hledger.Utils hiding (ptrace)
|
||||
ptrace = return
|
||||
|
||||
reader :: Reader
|
||||
reader = Reader format detect parse
|
||||
|
||||
format :: String
|
||||
format = "timedot"
|
||||
|
||||
-- | Does the given file path and data look like something this reader can handle ?
|
||||
detect :: FilePath -> Text -> Bool
|
||||
detect f excerpt
|
||||
-- file name known: try this reader if it has any of these extensions
|
||||
| f /= "-" = takeExtension f `elem` ['.':format]
|
||||
-- file name unknown: try this reader if a line starts with a number in excerpt
|
||||
| otherwise = regexMatches "(^|\n)[0-9]" $ T.unpack excerpt
|
||||
reader = Reader
|
||||
{rFormat = "timedot"
|
||||
,rExtensions = ["timedot"]
|
||||
,rParser = parse
|
||||
}
|
||||
|
||||
-- | Parse and post-process a "Journal" from the timedot format, or give an error.
|
||||
parse :: Maybe FilePath -> Bool -> FilePath -> Text -> ExceptT String IO Journal
|
||||
|
Loading…
Reference in New Issue
Block a user