tools: add sample journals with wide characters

generatejournal now has --chinese and --mixed options, which are used to
generate some additional small sample journals.
This commit is contained in:
Simon Michael 2015-10-10 11:51:07 -07:00
parent 939f7184c9
commit e6d8a9d1bc
3 changed files with 72 additions and 22 deletions

View File

@ -794,7 +794,17 @@ ghci-web: \
$(call def-help,ghci-web, start a GHCI REPL and load the hledger-lib, hledger and hledger-web packages)
stack exec $(GHCI) -- $(BUILDFLAGS) hledger-web/app/main.hs
samplejournals: data/sample.journal data/100x100x10.journal data/1000x1000x10.journal data/1000x10000x10.journal data/10000x1000x10.journal data/10000x10000x10.journal data/100000x1000x10.journal \
samplejournals: \
data/sample.journal \
data/100x100x10.journal \
data/1000x1000x10.journal \
data/1000x10000x10.journal \
data/10000x1000x10.journal \
data/10000x10000x10.journal \
data/100000x1000x10.journal \
data/ascii.journal \
data/chinese.journal \
data/mixed.journal \
$(call def-help,samplejournals, regenerate standard sample journals in data/ )
data/sample.journal:
@ -818,6 +828,15 @@ data/10000x10000x10.journal: tools/generatejournal
data/100000x1000x10.journal: tools/generatejournal
tools/generatejournal 100000 1000 10 >$@
data/ascii.journal: tools/generatejournal
tools/generatejournal 3 5 5 >$@
data/chinese.journal: tools/generatejournal
tools/generatejournal 3 5 5 --chinese >$@
data/mixed.journal: tools/generatejournal
tools/generatejournal 3 5 5 --mixed >$@
###############################################################################
$(call def-help-subsection,DOCUMENTATION:)

View File

@ -477,7 +477,7 @@ Finally, for quick, fine-grained performance measurements when troubleshooting o
### Generate sample journal files
Synthetic data files like `data/100x100x10.journal` useful for benchmarks and testing.
Synthetic data files like `data/100x100x10.journal` are useful for benchmarks and testing.
The numbers describe the number of transactions, number of accounts, and maximum account depth respectively.
They are generated by [`tools/generatejournal.hs`](https://github.com/simonmichael/hledger/blob/master/tools/generatejournal.hs).
They should be built as needed, if not you can use `make samplejournals` rule:
@ -493,6 +493,9 @@ tools/generatejournal 1000 10000 10 >data/1000x10000x10.journal
tools/generatejournal 10000 1000 10 >data/10000x1000x10.journal
tools/generatejournal 10000 10000 10 >data/10000x10000x10.journal
tools/generatejournal 100000 1000 10 >data/100000x1000x10.journal
tools/generatejournal 3 5 5 >data/ascii.journal
tools/generatejournal 3 5 5 --chinese >data/chinese.journal
tools/generatejournal 3 5 5 --mixed >data/mixed.journal
```
### Run developer tests

View File

@ -1,30 +1,34 @@
#!/usr/bin/env runhaskell
{-
generateledger.hs NUMTXNS NUMACCTS ACCTDEPTH
Outputs a dummy ledger file with the specified number of transactions,
number of accounts, and account tree depth. Useful for
testing/profiling/benchmarking.
generatejournal.hs NUMTXNS NUMACCTS ACCTDEPTH [--chinese|--mixed]
Outputs a dummy journal file with the specified number of
transactions, number of accounts, and account tree depth. By default
it uses only ascii characters, with --chinese it uses wide chinese
characters, or with --mixed it uses both. These files are used for
testing, benchmarking, profiling, etc.
-}
module Main
where
import System.Environment
import Control.Monad
import Data.Time.LocalTime
import Data.Char
import Data.List
import Data.Time.Calendar
import Text.Printf
import Data.Time.LocalTime
import Numeric
import System.Environment
import Text.Printf
-- import Hledger.Utils.Debug
main = do
args <- getArgs
rawargs <- getArgs
let (opts,args) = partition (isPrefixOf "-") rawargs
let [numtxns, numaccts, acctdepth] = map read args :: [Int]
today <- getCurrentDay
let (year,_,_) = toGregorian today
let d = fromGregorian (year-1) 1 1
let dates = iterate (addDays 1) d
let accts = pair $ cycle $ take numaccts $ uniqueacctnames acctdepth
let accts = pair $ cycle $ take numaccts $ uniqueAccountNames opts acctdepth
mapM_ (\(n,d,(a,b)) -> putStr $ showtxn n d a b) $ take numtxns $ zip3 [1..] dates accts
return ()
@ -35,17 +39,41 @@ showtxn txnno date acct1 acct2 =
d = show date
amt = 1::Int
uniqueacctnames :: Int -> [String]
uniqueacctnames depth = uniqueacctnames' depth uniquenames
where uniquenames = map hex [1..] where hex = flip showHex ""
uniqueAccountNames :: [String] -> Int -> [String]
uniqueAccountNames opts depth =
mkacctnames uniquenames
where
mkacctnames names = mkacctnamestodepth some ++ mkacctnames rest
where
(some, rest) = splitAt depth names
-- mkacctnamestodepth ["a", "b", "c"] = ["a","a:b","a:b:c"]
mkacctnamestodepth :: [String] -> [String]
mkacctnamestodepth [] = []
mkacctnamestodepth (a:as) = a : map ((a++":")++) (mkacctnamestodepth as)
uniquenames
| "--mixed" `elem` opts = concat $ zipWith (\a b -> [a,b]) uniqueNamesHex uniqueNamesWide
| "--chinese" `elem` opts = uniqueNamesWide
| otherwise = uniqueNamesHex
uniqueacctnames' depth uniquenames = group some ++ uniqueacctnames' depth rest
where (some, rest) = splitAt depth uniquenames
uniqueNamesHex = map hex [1..] where hex = flip showHex ""
uniqueNamesWide = concat [sequences n wideChars | n <- [1..]]
-- Get the sequences of specified size starting at each element of a list,
-- cycling it if needed to fill the last sequence. If the list's elements
-- are unique, then the sequences will be too.
sequences :: Show a => Int -> [a] -> [[a]]
sequences n l = go l
where
go [] = []
go l' = s : go (tail l')
where
s' = take n l'
s | length s' == n = s'
| otherwise = take n (l' ++ cycle l)
wideChars = map chr [0x3400..0x4db0]
-- group ["a", "b", "c"] = ["a","a:b","a:b:c"]
group :: [String] -> [String]
group [] = []
group (a:as) = a : map ((a++":")++) (group as)
pair :: [a] -> [(a,a)]
pair [] = []