mirror of
https://github.com/simonmichael/hledger.git
synced 2024-12-29 13:22:27 +03:00
imp: csv: new timezone rule; convert zoned date-times to local dates (#1936)
Previously, CSV date-times with a different time zone from yours (with or without explicit timezones in the CSV) could give off-by-one dates, because the CSV timezone was ignored. Now, 1. you can use the `timezone` rule to indicate which other timezone a CSV is implicitly using 2. CSV date-times with a timezone - whether declared by rule or parsed with %Z - are localised to the system time zone (or another set with the TZ environment variable).
This commit is contained in:
parent
7d1b1cadce
commit
3b24d9465b
@ -56,8 +56,8 @@ import qualified Data.Text.Encoding as T
|
||||
import qualified Data.Text.IO as T
|
||||
import qualified Data.Text.Lazy as TL
|
||||
import qualified Data.Text.Lazy.Builder as TB
|
||||
import Data.Time.Calendar (Day)
|
||||
import Data.Time.Format (parseTimeM, defaultTimeLocale)
|
||||
import Data.Time ( Day, TimeZone, UTCTime, LocalTime, ZonedTime(ZonedTime),
|
||||
defaultTimeLocale, getCurrentTimeZone, localDay, parseTimeM, utcToLocalTime, localTimeToUTC, zonedTimeToUTC)
|
||||
import Safe (atMay, headMay, lastMay, readMay)
|
||||
import System.Directory (doesFileExist)
|
||||
import System.FilePath ((</>), takeDirectory, takeExtension, takeFileName)
|
||||
@ -460,6 +460,7 @@ directives =
|
||||
-- ,"default-account"
|
||||
-- ,"default-currency"
|
||||
,"skip"
|
||||
,"timezone"
|
||||
,"newest-first"
|
||||
, "balance-type"
|
||||
]
|
||||
@ -703,6 +704,13 @@ readJournalFromCsv mrulesfile csvfile csvdata = do
|
||||
Just "" -> return 1
|
||||
Just s -> maybe (throwError $ "could not parse skip value: " ++ show s) return . readMay $ T.unpack s
|
||||
|
||||
mtzin <- case getDirective "timezone" rules of
|
||||
Nothing -> return Nothing
|
||||
Just s ->
|
||||
maybe (throwError $ "could not parse time zone: " ++ T.unpack s) (return.Just) $
|
||||
parseTimeM False defaultTimeLocale "%Z" $ T.unpack s
|
||||
tzout <- liftIO getCurrentTimeZone
|
||||
|
||||
-- parse csv
|
||||
let
|
||||
-- parsec seems to fail if you pass it "-" here TODO: try again with megaparsec
|
||||
@ -733,9 +741,14 @@ readJournalFromCsv mrulesfile csvfile csvdata = do
|
||||
line' = (mkPos . (+1) . unPos) line
|
||||
pos' = SourcePos name line' col
|
||||
in
|
||||
(pos', transactionFromCsvRecord pos rules r)
|
||||
(pos', transactionFromCsvRecord timesarezoned mtzin tzout pos rules r)
|
||||
)
|
||||
(initialPos parsecfilename) records
|
||||
where
|
||||
timesarezoned =
|
||||
case csvRule rules "date-format" of
|
||||
Just f | any (`T.isInfixOf` f) ["%Z","%z","%EZ","%Ez"] -> True
|
||||
_ -> False
|
||||
|
||||
-- Ensure transactions are ordered chronologically.
|
||||
-- First, if the CSV records seem to be most-recent-first (because
|
||||
@ -856,8 +869,8 @@ hledgerField = getEffectiveAssignment
|
||||
hledgerFieldValue :: CsvRules -> CsvRecord -> HledgerFieldName -> Maybe Text
|
||||
hledgerFieldValue rules record = fmap (renderTemplate rules record) . hledgerField rules record
|
||||
|
||||
transactionFromCsvRecord :: SourcePos -> CsvRules -> CsvRecord -> Transaction
|
||||
transactionFromCsvRecord sourcepos rules record = t
|
||||
transactionFromCsvRecord :: Bool -> Maybe TimeZone -> TimeZone -> SourcePos -> CsvRules -> CsvRecord -> Transaction
|
||||
transactionFromCsvRecord timesarezoned mtzin tzout sourcepos rules record = t
|
||||
where
|
||||
----------------------------------------------------------------------
|
||||
-- 1. Define some helpers:
|
||||
@ -866,7 +879,8 @@ transactionFromCsvRecord sourcepos rules record = t
|
||||
-- ruleval = csvRuleValue rules record :: DirectiveName -> Maybe String
|
||||
field = hledgerField rules record :: HledgerFieldName -> Maybe FieldTemplate
|
||||
fieldval = hledgerFieldValue rules record :: HledgerFieldName -> Maybe Text
|
||||
parsedate = parseDateWithCustomOrDefaultFormats (rule "date-format")
|
||||
mdateformat = rule "date-format"
|
||||
parsedate = parseDateWithCustomOrDefaultFormats timesarezoned mtzin tzout mdateformat
|
||||
mkdateerror datefield datevalue mdateformat' = T.unpack $ T.unlines
|
||||
["error: could not parse \""<>datevalue<>"\" as a date using date format "
|
||||
<>maybe "\"YYYY/M/D\", \"YYYY-M-D\" or \"YYYY.M.D\"" (T.pack . show) mdateformat'
|
||||
@ -887,7 +901,6 @@ transactionFromCsvRecord sourcepos rules record = t
|
||||
-- field assignment rules using the CSV record's data, and parsing a bit
|
||||
-- more where needed (dates, status).
|
||||
|
||||
mdateformat = rule "date-format"
|
||||
date = fromMaybe "" $ fieldval "date"
|
||||
-- PARTIAL:
|
||||
date' = fromMaybe (error' $ mkdateerror "date" date mdateformat) $ parsedate date
|
||||
@ -1320,11 +1333,45 @@ csvFieldValue rules record fieldname = do
|
||||
|
||||
-- | Parse the date string using the specified date-format, or if unspecified
|
||||
-- the "simple date" formats (YYYY/MM/DD, YYYY-MM-DD, YYYY.MM.DD, leading
|
||||
-- zeroes optional).
|
||||
parseDateWithCustomOrDefaultFormats :: Maybe DateFormat -> Text -> Maybe Day
|
||||
parseDateWithCustomOrDefaultFormats mformat s = asum $ map parsewith' formats
|
||||
-- zeroes optional). If a timezone is provided, we assume the DateFormat
|
||||
-- produces a zoned time and we localise that to the given timezone.
|
||||
parseDateWithCustomOrDefaultFormats :: Bool -> Maybe TimeZone -> TimeZone -> Maybe DateFormat -> Text -> Maybe Day
|
||||
parseDateWithCustomOrDefaultFormats timesarezoned mtzin tzout mformat s = localdate <$> mutctime
|
||||
-- this time code can probably be simpler, I'm just happy to get out alive
|
||||
where
|
||||
parsewith' = flip (parseTimeM True defaultTimeLocale) (T.unpack s)
|
||||
localdate :: UTCTime -> Day =
|
||||
localDay .
|
||||
dbg7 ("time in output timezone "++show tzout) .
|
||||
utcToLocalTime tzout
|
||||
mutctime :: Maybe UTCTime = asum $ map parseWithFormat formats
|
||||
|
||||
parseWithFormat :: String -> Maybe UTCTime
|
||||
parseWithFormat fmt =
|
||||
if timesarezoned
|
||||
then
|
||||
dbg7 "zoned CSV time, expressed as UTC" $
|
||||
parseTimeM True defaultTimeLocale fmt $ T.unpack s :: Maybe UTCTime
|
||||
else
|
||||
-- parse as a local day and time; then if an input timezone is provided,
|
||||
-- assume it's in that, otherwise assume it's in the output timezone;
|
||||
-- then convert to UTC like the above
|
||||
let
|
||||
mlocaltime =
|
||||
fmap (dbg7 "unzoned CSV time") $
|
||||
parseTimeM True defaultTimeLocale fmt $ T.unpack s :: Maybe LocalTime
|
||||
localTimeAsZonedTime tz lt = ZonedTime lt tz
|
||||
in
|
||||
case mtzin of
|
||||
Just tzin ->
|
||||
(dbg7 ("unzoned CSV time, declared as "++show tzin++ ", expressed as UTC") .
|
||||
localTimeToUTC tzin)
|
||||
<$> mlocaltime
|
||||
Nothing ->
|
||||
(dbg7 ("unzoned CSV time, treated as "++show tzout++ ", expressed as UTC") .
|
||||
zonedTimeToUTC .
|
||||
localTimeAsZonedTime tzout)
|
||||
<$> mlocaltime
|
||||
|
||||
formats = map T.unpack $ maybe
|
||||
["%Y/%-m/%-d"
|
||||
,"%Y-%-m-%-d"
|
||||
|
@ -4592,8 +4592,9 @@ date-format DATEFMT
|
||||
```
|
||||
This is a helper for the `date` (and `date2`) fields.
|
||||
If your CSV dates are not formatted like `YYYY-MM-DD`, `YYYY/MM/DD` or `YYYY.MM.DD`,
|
||||
you'll need to add a date-format rule describing them with a
|
||||
strptime date parsing pattern, which must parse the CSV date value completely.
|
||||
you'll need to add a date-format rule describing them with a strptime-style date parsing pattern -
|
||||
see <https://hackage.haskell.org/package/time/docs/Data-Time-Format.html#v:formatTime>.
|
||||
The pattern must parse the CSV date value completely.
|
||||
Some examples:
|
||||
``` rules
|
||||
# MM/DD/YY
|
||||
@ -4613,14 +4614,33 @@ date-format %Y-%h-%d
|
||||
# Note the time and junk must be fully parsed, though only the date is used.
|
||||
date-format %-m/%-d/%Y %l:%M %p some other junk
|
||||
```
|
||||
For the supported strptime syntax, see:\
|
||||
<https://hackage.haskell.org/package/time/docs/Data-Time-Format.html#v:formatTime>
|
||||
|
||||
Note that although you can parse date-times which include a time zone,
|
||||
that time zone is ignored; it will not change the date that is parsed.
|
||||
This means when reading CSV data with times not in your local time zone,
|
||||
dates can be "off by one".
|
||||
### `timezone`
|
||||
|
||||
```rules
|
||||
timezone TIMEZONE
|
||||
```
|
||||
|
||||
When CSV contains date-times that are implicitly in some time zone
|
||||
other than yours, but containing no explicit time zone information,
|
||||
you can use this rule to declare the CSV's native time zone,
|
||||
which helps prevent off-by-one dates.
|
||||
|
||||
When the CSV date-times do contain time zone information,
|
||||
you don't need this rule; instead, use `%Z` in `date-format`
|
||||
(or `%z`, `%EZ`, `%Ez`; see the formatTime link above).
|
||||
|
||||
In either of these cases, hledger will do a time-zone-aware conversion,
|
||||
localising the CSV date-times to your current system time zone.
|
||||
If you prefer to localise to some other time zone, eg for reproducibility,
|
||||
you can (on unix at least) set the output timezone with the TZ environment variable, eg:
|
||||
```shell
|
||||
$ TZ=HST hledger print -f foo.csv # or TZ=HST hledger import foo.csv
|
||||
```
|
||||
|
||||
`timezone` currently does not understand timezone names, except
|
||||
"UTC", "GMT", "EST", "EDT", "CST", "CDT", "MST", "MDT", "PST", or "PDT".
|
||||
For others, use numeric format: +HHMM or -HHMM.
|
||||
|
||||
### `decimal-mark`
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user