tools: add sample journals with wide characters

generatejournal now has --chinese and --mixed options, which are used to
generate some additional small sample journals.
This commit is contained in:
Simon Michael 2015-10-10 11:51:07 -07:00
parent 939f7184c9
commit e6d8a9d1bc
3 changed files with 72 additions and 22 deletions

View File

@ -794,7 +794,17 @@ ghci-web: \
$(call def-help,ghci-web, start a GHCI REPL and load the hledger-lib, hledger and hledger-web packages)
stack exec $(GHCI) -- $(BUILDFLAGS) hledger-web/app/main.hs
samplejournals: data/sample.journal data/100x100x10.journal data/1000x1000x10.journal data/1000x10000x10.journal data/10000x1000x10.journal data/10000x10000x10.journal data/100000x1000x10.journal \
samplejournals: \
data/sample.journal \
data/100x100x10.journal \
data/1000x1000x10.journal \
data/1000x10000x10.journal \
data/10000x1000x10.journal \
data/10000x10000x10.journal \
data/100000x1000x10.journal \
data/ascii.journal \
data/chinese.journal \
data/mixed.journal \
$(call def-help,samplejournals, regenerate standard sample journals in data/ )
@ -818,6 +828,15 @@ data/10000x10000x10.journal: tools/generatejournal
data/100000x1000x10.journal: tools/generatejournal
tools/generatejournal 100000 1000 10 >$@
data/ascii.journal: tools/generatejournal
tools/generatejournal 3 5 5 >$@
data/chinese.journal: tools/generatejournal
tools/generatejournal 3 5 5 --chinese >$@
data/mixed.journal: tools/generatejournal
tools/generatejournal 3 5 5 --mixed >$@
$(call def-help-subsection,DOCUMENTATION:)

View File

@ -477,7 +477,7 @@ Finally, for quick, fine-grained performance measurements when troubleshooting o
### Generate sample journal files
Synthetic data files like `data/100x100x10.journal` useful for benchmarks and testing.
Synthetic data files like `data/100x100x10.journal` are useful for benchmarks and testing.
The numbers describe the number of transactions, number of accounts, and maximum account depth respectively.
They are generated by [`tools/generatejournal.hs`](
They should be built as needed, if not you can use `make samplejournals` rule:
@ -493,6 +493,9 @@ tools/generatejournal 1000 10000 10 >data/1000x10000x10.journal
tools/generatejournal 10000 1000 10 >data/10000x1000x10.journal
tools/generatejournal 10000 10000 10 >data/10000x10000x10.journal
tools/generatejournal 100000 1000 10 >data/100000x1000x10.journal
tools/generatejournal 3 5 5 >data/ascii.journal
tools/generatejournal 3 5 5 --chinese >data/chinese.journal
tools/generatejournal 3 5 5 --mixed >data/mixed.journal
### Run developer tests

View File

@ -1,30 +1,34 @@
#!/usr/bin/env runhaskell
Outputs a dummy ledger file with the specified number of transactions,
number of accounts, and account tree depth. Useful for
generatejournal.hs NUMTXNS NUMACCTS ACCTDEPTH [--chinese|--mixed]
Outputs a dummy journal file with the specified number of
transactions, number of accounts, and account tree depth. By default
it uses only ascii characters, with --chinese it uses wide chinese
characters, or with --mixed it uses both. These files are used for
testing, benchmarking, profiling, etc.
module Main
import System.Environment
import Control.Monad
import Data.Time.LocalTime
import Data.Char
import Data.List
import Data.Time.Calendar
import Text.Printf
import Data.Time.LocalTime
import Numeric
import System.Environment
import Text.Printf
-- import Hledger.Utils.Debug
main = do
args <- getArgs
rawargs <- getArgs
let (opts,args) = partition (isPrefixOf "-") rawargs
let [numtxns, numaccts, acctdepth] = map read args :: [Int]
today <- getCurrentDay
let (year,_,_) = toGregorian today
let d = fromGregorian (year-1) 1 1
let dates = iterate (addDays 1) d
let accts = pair $ cycle $ take numaccts $ uniqueacctnames acctdepth
let accts = pair $ cycle $ take numaccts $ uniqueAccountNames opts acctdepth
mapM_ (\(n,d,(a,b)) -> putStr $ showtxn n d a b) $ take numtxns $ zip3 [1..] dates accts
return ()
@ -35,17 +39,41 @@ showtxn txnno date acct1 acct2 =
d = show date
amt = 1::Int
uniqueacctnames :: Int -> [String]
uniqueacctnames depth = uniqueacctnames' depth uniquenames
where uniquenames = map hex [1..] where hex = flip showHex ""
uniqueAccountNames :: [String] -> Int -> [String]
uniqueAccountNames opts depth =
mkacctnames uniquenames
mkacctnames names = mkacctnamestodepth some ++ mkacctnames rest
(some, rest) = splitAt depth names
-- mkacctnamestodepth ["a", "b", "c"] = ["a","a:b","a:b:c"]
mkacctnamestodepth :: [String] -> [String]
mkacctnamestodepth [] = []
mkacctnamestodepth (a:as) = a : map ((a++":")++) (mkacctnamestodepth as)
| "--mixed" `elem` opts = concat $ zipWith (\a b -> [a,b]) uniqueNamesHex uniqueNamesWide
| "--chinese" `elem` opts = uniqueNamesWide
| otherwise = uniqueNamesHex
uniqueacctnames' depth uniquenames = group some ++ uniqueacctnames' depth rest
where (some, rest) = splitAt depth uniquenames
uniqueNamesHex = map hex [1..] where hex = flip showHex ""
uniqueNamesWide = concat [sequences n wideChars | n <- [1..]]
-- Get the sequences of specified size starting at each element of a list,
-- cycling it if needed to fill the last sequence. If the list's elements
-- are unique, then the sequences will be too.
sequences :: Show a => Int -> [a] -> [[a]]
sequences n l = go l
go [] = []
go l' = s : go (tail l')
s' = take n l'
s | length s' == n = s'
| otherwise = take n (l' ++ cycle l)
wideChars = map chr [0x3400..0x4db0]
-- group ["a", "b", "c"] = ["a","a:b","a:b:c"]
group :: [String] -> [String]
group [] = []
group (a:as) = a : map ((a++":")++) (group as)
pair :: [a] -> [(a,a)]
pair [] = []