From e6d8a9d1bcec5b027c4da23dff2a7e890b940f22 Mon Sep 17 00:00:00 2001 From: Simon Michael Date: Sat, 10 Oct 2015 11:51:07 -0700 Subject: [PATCH] tools: add sample journals with wide characters generatejournal now has --chinese and --mixed options, which are used to generate some additional small sample journals. --- Makefile | 21 ++++++++++++- doc/developer-guide.md | 5 ++- tools/generatejournal.hs | 68 ++++++++++++++++++++++++++++------------ 3 files changed, 72 insertions(+), 22 deletions(-) diff --git a/Makefile b/Makefile index e10d8bc57..8510bcd9f 100644 --- a/Makefile +++ b/Makefile @@ -794,7 +794,17 @@ ghci-web: \ $(call def-help,ghci-web, start a GHCI REPL and load the hledger-lib, hledger and hledger-web packages) stack exec $(GHCI) -- $(BUILDFLAGS) hledger-web/app/main.hs -samplejournals: data/sample.journal data/100x100x10.journal data/1000x1000x10.journal data/1000x10000x10.journal data/10000x1000x10.journal data/10000x10000x10.journal data/100000x1000x10.journal \ +samplejournals: \ + data/sample.journal \ + data/100x100x10.journal \ + data/1000x1000x10.journal \ + data/1000x10000x10.journal \ + data/10000x1000x10.journal \ + data/10000x10000x10.journal \ + data/100000x1000x10.journal \ + data/ascii.journal \ + data/chinese.journal \ + data/mixed.journal \ $(call def-help,samplejournals, regenerate standard sample journals in data/ ) data/sample.journal: @@ -818,6 +828,15 @@ data/10000x10000x10.journal: tools/generatejournal data/100000x1000x10.journal: tools/generatejournal tools/generatejournal 100000 1000 10 >$@ +data/ascii.journal: tools/generatejournal + tools/generatejournal 3 5 5 >$@ + +data/chinese.journal: tools/generatejournal + tools/generatejournal 3 5 5 --chinese >$@ + +data/mixed.journal: tools/generatejournal + tools/generatejournal 3 5 5 --mixed >$@ + ############################################################################### $(call def-help-subsection,DOCUMENTATION:) diff --git a/doc/developer-guide.md b/doc/developer-guide.md index 96599ab15..d23697910 100644 --- a/doc/developer-guide.md +++ b/doc/developer-guide.md @@ -477,7 +477,7 @@ Finally, for quick, fine-grained performance measurements when troubleshooting o ### Generate sample journal files -Synthetic data files like `data/100x100x10.journal` useful for benchmarks and testing. +Synthetic data files like `data/100x100x10.journal` are useful for benchmarks and testing. The numbers describe the number of transactions, number of accounts, and maximum account depth respectively. They are generated by [`tools/generatejournal.hs`](https://github.com/simonmichael/hledger/blob/master/tools/generatejournal.hs). They should be built as needed, if not you can use `make samplejournals` rule: @@ -493,6 +493,9 @@ tools/generatejournal 1000 10000 10 >data/1000x10000x10.journal tools/generatejournal 10000 1000 10 >data/10000x1000x10.journal tools/generatejournal 10000 10000 10 >data/10000x10000x10.journal tools/generatejournal 100000 1000 10 >data/100000x1000x10.journal +tools/generatejournal 3 5 5 >data/ascii.journal +tools/generatejournal 3 5 5 --chinese >data/chinese.journal +tools/generatejournal 3 5 5 --mixed >data/mixed.journal ``` ### Run developer tests diff --git a/tools/generatejournal.hs b/tools/generatejournal.hs index 1df3563e1..a1e4be5d4 100755 --- a/tools/generatejournal.hs +++ b/tools/generatejournal.hs @@ -1,30 +1,34 @@ #!/usr/bin/env runhaskell {- -generateledger.hs NUMTXNS NUMACCTS ACCTDEPTH - -Outputs a dummy ledger file with the specified number of transactions, -number of accounts, and account tree depth. Useful for -testing/profiling/benchmarking. +generatejournal.hs NUMTXNS NUMACCTS ACCTDEPTH [--chinese|--mixed] +Outputs a dummy journal file with the specified number of +transactions, number of accounts, and account tree depth. By default +it uses only ascii characters, with --chinese it uses wide chinese +characters, or with --mixed it uses both. These files are used for +testing, benchmarking, profiling, etc. -} module Main where -import System.Environment -import Control.Monad -import Data.Time.LocalTime +import Data.Char +import Data.List import Data.Time.Calendar -import Text.Printf +import Data.Time.LocalTime import Numeric +import System.Environment +import Text.Printf +-- import Hledger.Utils.Debug main = do - args <- getArgs + rawargs <- getArgs + let (opts,args) = partition (isPrefixOf "-") rawargs let [numtxns, numaccts, acctdepth] = map read args :: [Int] today <- getCurrentDay let (year,_,_) = toGregorian today let d = fromGregorian (year-1) 1 1 let dates = iterate (addDays 1) d - let accts = pair $ cycle $ take numaccts $ uniqueacctnames acctdepth + let accts = pair $ cycle $ take numaccts $ uniqueAccountNames opts acctdepth mapM_ (\(n,d,(a,b)) -> putStr $ showtxn n d a b) $ take numtxns $ zip3 [1..] dates accts return () @@ -35,17 +39,41 @@ showtxn txnno date acct1 acct2 = d = show date amt = 1::Int -uniqueacctnames :: Int -> [String] -uniqueacctnames depth = uniqueacctnames' depth uniquenames - where uniquenames = map hex [1..] where hex = flip showHex "" +uniqueAccountNames :: [String] -> Int -> [String] +uniqueAccountNames opts depth = + mkacctnames uniquenames + where + mkacctnames names = mkacctnamestodepth some ++ mkacctnames rest + where + (some, rest) = splitAt depth names + -- mkacctnamestodepth ["a", "b", "c"] = ["a","a:b","a:b:c"] + mkacctnamestodepth :: [String] -> [String] + mkacctnamestodepth [] = [] + mkacctnamestodepth (a:as) = a : map ((a++":")++) (mkacctnamestodepth as) + uniquenames + | "--mixed" `elem` opts = concat $ zipWith (\a b -> [a,b]) uniqueNamesHex uniqueNamesWide + | "--chinese" `elem` opts = uniqueNamesWide + | otherwise = uniqueNamesHex -uniqueacctnames' depth uniquenames = group some ++ uniqueacctnames' depth rest - where (some, rest) = splitAt depth uniquenames +uniqueNamesHex = map hex [1..] where hex = flip showHex "" + +uniqueNamesWide = concat [sequences n wideChars | n <- [1..]] + +-- Get the sequences of specified size starting at each element of a list, +-- cycling it if needed to fill the last sequence. If the list's elements +-- are unique, then the sequences will be too. +sequences :: Show a => Int -> [a] -> [[a]] +sequences n l = go l + where + go [] = [] + go l' = s : go (tail l') + where + s' = take n l' + s | length s' == n = s' + | otherwise = take n (l' ++ cycle l) + +wideChars = map chr [0x3400..0x4db0] --- group ["a", "b", "c"] = ["a","a:b","a:b:c"] -group :: [String] -> [String] -group [] = [] -group (a:as) = a : map ((a++":")++) (group as) pair :: [a] -> [(a,a)] pair [] = []