hledger/hledger-lib/Hledger/Utils.hs

295 lines
9.0 KiB
Haskell
Raw Normal View History

{-|
2022-11-04 22:51:25 +03:00
Utilities used throughout hledger, or needed low in the module hierarchy.
These are the bottom of hledger's module graph.
-}
2022-11-04 22:51:25 +03:00
module Hledger.Utils (
2022-11-05 08:12:19 +03:00
-- * Functions
applyN,
mapM',
sequence',
curry2,
uncurry2,
curry3,
uncurry3,
curry4,
uncurry4,
-- * Lists
2022-11-05 08:12:19 +03:00
maximum',
maximumStrict,
minimumStrict,
splitAtElement,
2022-11-05 08:12:19 +03:00
sumStrict,
-- * Trees
treeLeaves,
-- * Tuples
first3,
second3,
third3,
first4,
second4,
third4,
fourth4,
first5,
second5,
third5,
fourth5,
fifth5,
first6,
second6,
third6,
fourth6,
fifth6,
sixth6,
-- * Misc
2023-05-18 19:20:00 +03:00
multicol,
numDigitsInt,
makeHledgerClassyLenses,
-- * Other
2022-11-04 22:51:25 +03:00
module Hledger.Utils.Debug,
module Hledger.Utils.Parse,
module Hledger.Utils.IO,
2022-11-04 22:51:25 +03:00
module Hledger.Utils.Regex,
module Hledger.Utils.String,
module Hledger.Utils.Text,
2022-11-05 08:12:19 +03:00
-- * Tests
tests_Utils,
module Hledger.Utils.Test,
2022-11-04 22:51:25 +03:00
)
where
import Data.Char (toLower)
2023-05-18 19:20:00 +03:00
import Data.List (intersperse)
import Data.List.Extra (chunksOf, foldl', foldl1', uncons, unsnoc)
import qualified Data.Set as Set
2023-05-18 19:20:00 +03:00
import qualified Data.Text as T (pack, unpack)
2022-11-04 22:51:25 +03:00
import Data.Tree (foldTree, Tree (Node, subForest))
import Language.Haskell.TH (DecsQ, Name, mkName, nameBase)
import Lens.Micro ((&), (.~))
import Lens.Micro.TH (DefName(TopName), lensClass, lensField, makeLensesWith, classyRules)
2014-10-29 03:21:33 +03:00
import Hledger.Utils.Debug
2015-08-19 23:47:26 +03:00
import Hledger.Utils.Parse
import Hledger.Utils.IO
import Hledger.Utils.Regex
2015-08-19 23:47:26 +03:00
import Hledger.Utils.String
lib: textification begins! account names The first of several conversions from String to (strict) Text, hopefully reducing space and time usage. This one shows a small improvement, with GHC 7.10.3 and text-1.2.2.1: hledger -f data/100x100x10.journal stats string: <<ghc: 39471064 bytes, 77 GCs, 198421/275048 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.001 elapsed), 0.015 MUT (0.020 elapsed), 0.010 GC (0.014 elapsed) :ghc>> text: <<ghc: 39268024 bytes, 77 GCs, 197018/270840 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.002 elapsed), 0.016 MUT (0.022 elapsed), 0.009 GC (0.011 elapsed) :ghc>> hledger -f data/1000x100x10.journal stats string: <<ghc: 318555920 bytes, 617 GCs, 2178997/7134472 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.001 elapsed), 0.129 MUT (0.136 elapsed), 0.067 GC (0.077 elapsed) :ghc>> text: <<ghc: 314248496 bytes, 612 GCs, 2074045/6617960 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.003 elapsed), 0.137 MUT (0.145 elapsed), 0.067 GC (0.079 elapsed) :ghc>> hledger -f data/10000x100x10.journal stats string: <<ghc: 3114763608 bytes, 6026 GCs, 18858950/75552024 avg/max bytes residency (11 samples), 201M in use, 0.000 INIT (0.000 elapsed), 1.331 MUT (1.372 elapsed), 0.699 GC (0.812 elapsed) :ghc>> text: <<ghc: 3071468920 bytes, 5968 GCs, 14120344/62951360 avg/max bytes residency (9 samples), 124M in use, 0.000 INIT (0.003 elapsed), 1.272 MUT (1.349 elapsed), 0.513 GC (0.578 elapsed) :ghc>> hledger -f data/100000x100x10.journal stats string: <<ghc: 31186579432 bytes, 60278 GCs, 135332581/740228992 avg/max bytes residency (13 samples), 1697M in use, 0.000 INIT (0.008 elapsed), 14.677 MUT (15.508 elapsed), 7.081 GC (8.074 elapsed) :ghc>> text: <<ghc: 30753427672 bytes, 59763 GCs, 117595958/666457240 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.008 elapsed), 13.713 MUT (13.966 elapsed), 6.220 GC (7.108 elapsed) :ghc>>
2016-05-24 04:16:21 +03:00
import Hledger.Utils.Text
2015-08-19 23:47:26 +03:00
import Hledger.Utils.Test
2008-10-10 11:39:20 +04:00
2008-12-06 10:15:19 +03:00
2022-11-05 08:12:19 +03:00
-- Functions
-- | Apply a function the specified number of times,
-- which should be > 0 (otherwise does nothing).
-- Possibly uses O(n) stack ?
applyN :: Int -> (a -> a) -> a -> a
applyN n f | n < 1 = id
| otherwise = (!! n) . iterate f
-- from protolude, compare
-- applyN :: Int -> (a -> a) -> a -> a
-- applyN n f = X.foldr (.) identity (X.replicate n f)
-- | Like mapM but uses sequence'.
{-# INLINABLE mapM' #-}
mapM' :: Monad f => (a -> f b) -> [a] -> f [b]
mapM' f = sequence' . map f
-- | This is a version of sequence based on difference lists. It is
-- slightly faster but we mostly use it because it uses the heap
-- instead of the stack. This has the advantage that Neil Mitchells
-- trick of limiting the stack size to discover space leaks doesnt
-- show this as a false positive.
{-# INLINABLE sequence' #-}
sequence' :: Monad f => [f a] -> f [a]
sequence' ms = do
h <- go id ms
return (h [])
where
go h [] = return h
go h (m:ms') = do
x <- m
go (h . (x :)) ms'
2019-08-19 04:09:27 +03:00
curry2 :: ((a, b) -> c) -> a -> b -> c
curry2 f x y = f (x, y)
uncurry2 :: (a -> b -> c) -> (a, b) -> c
uncurry2 f (x, y) = f x y
curry3 :: ((a, b, c) -> d) -> a -> b -> c -> d
curry3 f x y z = f (x, y, z)
uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d
uncurry3 f (x, y, z) = f x y z
curry4 :: ((a, b, c, d) -> e) -> a -> b -> c -> d -> e
curry4 f w x y z = f (w, x, y, z)
uncurry4 :: (a -> b -> c -> d -> e) -> (a, b, c, d) -> e
uncurry4 f (w, x, y, z) = f w x y z
-- Lists
2022-11-05 08:12:19 +03:00
-- | Total version of maximum, for integral types, giving 0 for an empty list.
maximum' :: Integral a => [a] -> a
maximum' [] = 0
maximum' xs = maximumStrict xs
-- | Strict version of maximum that doesnt leak space
{-# INLINABLE maximumStrict #-}
maximumStrict :: Ord a => [a] -> a
maximumStrict = foldl1' max
-- | Strict version of minimum that doesnt leak space
{-# INLINABLE minimumStrict #-}
minimumStrict :: Ord a => [a] -> a
minimumStrict = foldl1' min
splitAtElement :: Eq a => a -> [a] -> [[a]]
splitAtElement x l =
case l of
[] -> []
e:es | e==x -> split es
es -> split es
where
split es = let (first,rest) = break (x==) es
in first : splitAtElement x rest
2022-11-05 08:12:19 +03:00
-- | Strict version of sum that doesnt leak space
{-# INLINABLE sumStrict #-}
sumStrict :: Num a => [a] -> a
sumStrict = foldl' (+) 0
-- Trees
-- | Get the leaves of this tree as a list.
-- The topmost node ("root" in hledger account trees) is not counted as a leaf.
treeLeaves :: Tree a -> [a]
treeLeaves Node{subForest=[]} = []
treeLeaves t = foldTree (\a bs -> (if null bs then (a:) else id) $ concat bs) t
-- Tuples
first3 (x,_,_) = x
second3 (_,x,_) = x
third3 (_,_,x) = x
first4 (x,_,_,_) = x
second4 (_,x,_,_) = x
third4 (_,_,x,_) = x
fourth4 (_,_,_,x) = x
first5 (x,_,_,_,_) = x
second5 (_,x,_,_,_) = x
third5 (_,_,x,_,_) = x
fourth5 (_,_,_,x,_) = x
fifth5 (_,_,_,_,x) = x
first6 (x,_,_,_,_,_) = x
second6 (_,x,_,_,_,_) = x
third6 (_,_,x,_,_,_) = x
fourth6 (_,_,_,x,_,_) = x
fifth6 (_,_,_,_,x,_) = x
sixth6 (_,_,_,_,_,x) = x
-- Misc
2023-05-18 19:20:00 +03:00
-- | Convert a list of strings to a multi-line multi-column list
-- fitting within the given width. Not wide character aware.
multicol :: Int -> [String] -> String
multicol _ [] = []
multicol width strs =
let
maxwidth = maximum' $ map length strs
numcols = min (length strs) (width `div` (maxwidth+2))
itemspercol = length strs `div` numcols
colitems = chunksOf itemspercol strs
cols = map unlines colitems
sep = " "
in
T.unpack $ textConcatBottomPadded $ map T.pack $ intersperse sep cols
-- | Find the number of digits of an 'Int'.
{-# INLINE numDigitsInt #-}
numDigitsInt :: Integral a => Int -> a
numDigitsInt n
| n == minBound = 19 -- negate minBound is out of the range of Int
| n < 0 = go (negate n)
| otherwise = go n
where
go a | a < 10 = 1
| a < 100 = 2
| a < 1000 = 3
| a < 10000 = 4
| a >= 10000000000000000 = 16 + go (a `quot` 10000000000000000)
| a >= 100000000 = 8 + go (a `quot` 100000000)
| otherwise = 4 + go (a `quot` 10000)
-- | Make classy lenses for Hledger options fields.
-- This is intended to be used with BalancingOpts, InputOpt, ReportOpts,
-- ReportSpec, and CliOpts.
-- When run on X, it will create a typeclass named HasX (except for ReportOpts,
-- which will be named HasReportOptsNoUpdate) containing all the lenses for that type.
-- If the field name starts with an underscore, the lens name will be created
-- by stripping the underscore from the front on the name. If the field name ends with
-- an underscore, the field name ends with an underscore, the lens name will be
-- mostly created by stripping the underscore, but a few names for which this
-- would create too many conflicts instead have a second underscore appended.
-- ReportOpts fields for which updating them requires updating the query in
-- ReportSpec are instead names by dropping the trailing underscore and
-- appending NoUpdate to the name, e.g. querystring_ -> querystringNoUpdate.
--
-- There are a few reasons for the complicated rules.
-- - We have some legacy field names ending in an underscore (e.g. value_)
-- which we want to temporarily accommodate, before eventually switching to
-- a more modern style (e.g. _rsReportOpts)
-- - Certain fields in ReportOpts need to update the enclosing ReportSpec when
-- they are updated, and it is a common programming error to forget to do
-- this. We append NoUpdate to those lenses which will not update the
-- enclosing field, and reserve the shorter name for manually define lenses
-- (or at least something lens-like) which will update the ReportSpec.
-- cf. the lengthy discussion here and in surrounding comments:
-- https://github.com/simonmichael/hledger/pull/1545#issuecomment-881974554
makeHledgerClassyLenses :: Name -> DecsQ
makeHledgerClassyLenses x = flip makeLensesWith x $ classyRules
& lensField .~ (\_ _ n -> fieldName $ nameBase n)
& lensClass .~ (className . nameBase)
where
fieldName n | Just ('_', name) <- uncons n = [TopName (mkName name)]
| Just (name, '_') <- unsnoc n,
name `Set.member` queryFields = [TopName (mkName $ name ++ "NoUpdate")]
| Just (name, '_') <- unsnoc n,
name `Set.member` commonFields = [TopName (mkName $ name ++ "__")]
| Just (name, '_') <- unsnoc n = [TopName (mkName name)]
| otherwise = []
-- Fields which would cause too many conflicts if we exposed lenses with these names.
commonFields = Set.fromList
[ "empty", "drop", "color", "transpose" -- ReportOpts
, "anon", "new", "auto" -- InputOpts
, "rawopts", "file", "debug", "width" -- CliOpts
]
-- When updating some fields of ReportOpts within a ReportSpec, we need to
-- update the rsQuery term as well. To do this we implement a special
-- HasReportOpts class with some special behaviour. We therefore give the
-- basic lenses a special NoUpdate name to avoid conflicts.
className "ReportOpts" = Just (mkName "HasReportOptsNoUpdate", mkName "reportOptsNoUpdate")
className (x':xs) = Just (mkName ("Has" ++ x':xs), mkName (toLower x' : xs))
className [] = Nothing
-- Fields of ReportOpts which need to update the Query when they are updated.
queryFields = Set.fromList ["period", "statuses", "depth", "date2", "real", "querystring"]
tests_Utils = testGroup "Utils" [
2018-09-06 23:08:26 +03:00
tests_Text
2018-09-04 01:54:29 +03:00
]