tools: add sample journals with wide characters

generatejournal now has --chinese and --mixed options, which are used to
generate some additional small sample journals.
This commit is contained in:
Simon Michael 2015-10-10 11:51:07 -07:00
parent 939f7184c9
commit e6d8a9d1bc
3 changed files with 72 additions and 22 deletions

View File

@ -794,7 +794,17 @@ ghci-web: \
$(call def-help,ghci-web, start a GHCI REPL and load the hledger-lib, hledger and hledger-web packages) $(call def-help,ghci-web, start a GHCI REPL and load the hledger-lib, hledger and hledger-web packages)
stack exec $(GHCI) -- $(BUILDFLAGS) hledger-web/app/main.hs stack exec $(GHCI) -- $(BUILDFLAGS) hledger-web/app/main.hs
samplejournals: data/sample.journal data/100x100x10.journal data/1000x1000x10.journal data/1000x10000x10.journal data/10000x1000x10.journal data/10000x10000x10.journal data/100000x1000x10.journal \ samplejournals: \
data/sample.journal \
data/100x100x10.journal \
data/1000x1000x10.journal \
data/1000x10000x10.journal \
data/10000x1000x10.journal \
data/10000x10000x10.journal \
data/100000x1000x10.journal \
data/ascii.journal \
data/chinese.journal \
data/mixed.journal \
$(call def-help,samplejournals, regenerate standard sample journals in data/ ) $(call def-help,samplejournals, regenerate standard sample journals in data/ )
data/sample.journal: data/sample.journal:
@ -818,6 +828,15 @@ data/10000x10000x10.journal: tools/generatejournal
data/100000x1000x10.journal: tools/generatejournal data/100000x1000x10.journal: tools/generatejournal
tools/generatejournal 100000 1000 10 >$@ tools/generatejournal 100000 1000 10 >$@
data/ascii.journal: tools/generatejournal
tools/generatejournal 3 5 5 >$@
data/chinese.journal: tools/generatejournal
tools/generatejournal 3 5 5 --chinese >$@
data/mixed.journal: tools/generatejournal
tools/generatejournal 3 5 5 --mixed >$@
############################################################################### ###############################################################################
$(call def-help-subsection,DOCUMENTATION:) $(call def-help-subsection,DOCUMENTATION:)

View File

@ -477,7 +477,7 @@ Finally, for quick, fine-grained performance measurements when troubleshooting o
### Generate sample journal files ### Generate sample journal files
Synthetic data files like `data/100x100x10.journal` useful for benchmarks and testing. Synthetic data files like `data/100x100x10.journal` are useful for benchmarks and testing.
The numbers describe the number of transactions, number of accounts, and maximum account depth respectively. The numbers describe the number of transactions, number of accounts, and maximum account depth respectively.
They are generated by [`tools/generatejournal.hs`](https://github.com/simonmichael/hledger/blob/master/tools/generatejournal.hs). They are generated by [`tools/generatejournal.hs`](https://github.com/simonmichael/hledger/blob/master/tools/generatejournal.hs).
They should be built as needed, if not you can use `make samplejournals` rule: They should be built as needed, if not you can use `make samplejournals` rule:
@ -493,6 +493,9 @@ tools/generatejournal 1000 10000 10 >data/1000x10000x10.journal
tools/generatejournal 10000 1000 10 >data/10000x1000x10.journal tools/generatejournal 10000 1000 10 >data/10000x1000x10.journal
tools/generatejournal 10000 10000 10 >data/10000x10000x10.journal tools/generatejournal 10000 10000 10 >data/10000x10000x10.journal
tools/generatejournal 100000 1000 10 >data/100000x1000x10.journal tools/generatejournal 100000 1000 10 >data/100000x1000x10.journal
tools/generatejournal 3 5 5 >data/ascii.journal
tools/generatejournal 3 5 5 --chinese >data/chinese.journal
tools/generatejournal 3 5 5 --mixed >data/mixed.journal
``` ```
### Run developer tests ### Run developer tests

View File

@ -1,30 +1,34 @@
#!/usr/bin/env runhaskell #!/usr/bin/env runhaskell
{- {-
generateledger.hs NUMTXNS NUMACCTS ACCTDEPTH generatejournal.hs NUMTXNS NUMACCTS ACCTDEPTH [--chinese|--mixed]
Outputs a dummy ledger file with the specified number of transactions,
number of accounts, and account tree depth. Useful for
testing/profiling/benchmarking.
Outputs a dummy journal file with the specified number of
transactions, number of accounts, and account tree depth. By default
it uses only ascii characters, with --chinese it uses wide chinese
characters, or with --mixed it uses both. These files are used for
testing, benchmarking, profiling, etc.
-} -}
module Main module Main
where where
import System.Environment import Data.Char
import Control.Monad import Data.List
import Data.Time.LocalTime
import Data.Time.Calendar import Data.Time.Calendar
import Text.Printf import Data.Time.LocalTime
import Numeric import Numeric
import System.Environment
import Text.Printf
-- import Hledger.Utils.Debug
main = do main = do
args <- getArgs rawargs <- getArgs
let (opts,args) = partition (isPrefixOf "-") rawargs
let [numtxns, numaccts, acctdepth] = map read args :: [Int] let [numtxns, numaccts, acctdepth] = map read args :: [Int]
today <- getCurrentDay today <- getCurrentDay
let (year,_,_) = toGregorian today let (year,_,_) = toGregorian today
let d = fromGregorian (year-1) 1 1 let d = fromGregorian (year-1) 1 1
let dates = iterate (addDays 1) d let dates = iterate (addDays 1) d
let accts = pair $ cycle $ take numaccts $ uniqueacctnames acctdepth let accts = pair $ cycle $ take numaccts $ uniqueAccountNames opts acctdepth
mapM_ (\(n,d,(a,b)) -> putStr $ showtxn n d a b) $ take numtxns $ zip3 [1..] dates accts mapM_ (\(n,d,(a,b)) -> putStr $ showtxn n d a b) $ take numtxns $ zip3 [1..] dates accts
return () return ()
@ -35,17 +39,41 @@ showtxn txnno date acct1 acct2 =
d = show date d = show date
amt = 1::Int amt = 1::Int
uniqueacctnames :: Int -> [String] uniqueAccountNames :: [String] -> Int -> [String]
uniqueacctnames depth = uniqueacctnames' depth uniquenames uniqueAccountNames opts depth =
where uniquenames = map hex [1..] where hex = flip showHex "" mkacctnames uniquenames
where
mkacctnames names = mkacctnamestodepth some ++ mkacctnames rest
where
(some, rest) = splitAt depth names
-- mkacctnamestodepth ["a", "b", "c"] = ["a","a:b","a:b:c"]
mkacctnamestodepth :: [String] -> [String]
mkacctnamestodepth [] = []
mkacctnamestodepth (a:as) = a : map ((a++":")++) (mkacctnamestodepth as)
uniquenames
| "--mixed" `elem` opts = concat $ zipWith (\a b -> [a,b]) uniqueNamesHex uniqueNamesWide
| "--chinese" `elem` opts = uniqueNamesWide
| otherwise = uniqueNamesHex
uniqueacctnames' depth uniquenames = group some ++ uniqueacctnames' depth rest uniqueNamesHex = map hex [1..] where hex = flip showHex ""
where (some, rest) = splitAt depth uniquenames
uniqueNamesWide = concat [sequences n wideChars | n <- [1..]]
-- Get the sequences of specified size starting at each element of a list,
-- cycling it if needed to fill the last sequence. If the list's elements
-- are unique, then the sequences will be too.
sequences :: Show a => Int -> [a] -> [[a]]
sequences n l = go l
where
go [] = []
go l' = s : go (tail l')
where
s' = take n l'
s | length s' == n = s'
| otherwise = take n (l' ++ cycle l)
wideChars = map chr [0x3400..0x4db0]
-- group ["a", "b", "c"] = ["a","a:b","a:b:c"]
group :: [String] -> [String]
group [] = []
group (a:as) = a : map ((a++":")++) (group as)
pair :: [a] -> [(a,a)] pair :: [a] -> [(a,a)]
pair [] = [] pair [] = []