mirror of
https://github.com/qnikst/ghc-timings-report.git
synced 2024-10-03 23:39:35 +03:00
Initial layout
This commit is contained in:
commit
630c1b6861
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
dist-newstyle/
|
||||
tmp/
|
||||
cabal.project.local
|
||||
stack.yaml
|
||||
package.yaml
|
5
CHANGELOG.md
Normal file
5
CHANGELOG.md
Normal file
@ -0,0 +1,5 @@
|
||||
# Revision history for ghc-timings
|
||||
|
||||
## 0.0.0.1 -- 2020-03-09
|
||||
|
||||
* Initial layout
|
101
Main.hs
Normal file
101
Main.hs
Normal file
@ -0,0 +1,101 @@
|
||||
{-# LANGUAGE TransformListComp #-}
|
||||
module Main where
|
||||
|
||||
import Control.Monad
|
||||
import Control.Monad.Trans.Resource
|
||||
import qualified Data.ByteString.Lazy as BSL
|
||||
import qualified Data.Binary.Builder as Builder
|
||||
import Data.Conduit
|
||||
import Data.Conduit.Combinators as CL
|
||||
import Data.Conduit.List
|
||||
import Data.Set as Set
|
||||
import Data.Csv as Csv
|
||||
import Data.Csv.Builder as Csv
|
||||
import Data.Foldable
|
||||
import Data.Functor
|
||||
import Data.Function
|
||||
import Data.Traversable
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Text.Encoding as T
|
||||
import qualified Data.Text.IO as T
|
||||
import Data.Aeson
|
||||
import Data.Either
|
||||
import Data.List
|
||||
import Data.Maybe as M
|
||||
import GhcBuildPhase
|
||||
import GhcFile
|
||||
import GHC.Exts
|
||||
import qualified Data.Vector as V
|
||||
import System.Environment
|
||||
import System.FilePath
|
||||
import Prelude hiding (mapM_, print)
|
||||
import qualified Prelude
|
||||
|
||||
main :: IO ()
|
||||
main = do
|
||||
[dir] <- getArgs
|
||||
|
||||
files <- findDumpTimings dir
|
||||
|
||||
let ( files_failed,
|
||||
files_parsed)
|
||||
= partitionEithers $ files <&> \file ->
|
||||
case stripPrefix dir file of
|
||||
Nothing -> Left file
|
||||
Just x -> case splitDirectories x of
|
||||
("/": "build": hostOs: ghcVersion: packageName: "build": modulePath) -> Right GhcFile{..}
|
||||
_ -> Left file
|
||||
|
||||
unless (Prelude.null files_failed) $ do
|
||||
Prelude.putStrLn "Warning, some files are failed to be parsed"
|
||||
Prelude.print files_failed
|
||||
|
||||
-- Output all files in json form for later analysis.
|
||||
results <- for files_parsed $ \f -> do
|
||||
steps <- fmap parsePhases $ T.readFile (rebuildFilePath dir f)
|
||||
encodeFile (output </> rebuildPlainPath f <.> "json") steps
|
||||
let bs = encodeDefaultOrderedByName steps
|
||||
BSL.writeFile (output </> rebuildPlainPath f <.> "csv") bs
|
||||
pure (f, steps)
|
||||
|
||||
let stats_by_package = Map.fromListWith (<>)
|
||||
[ (packageName, Map.singleton (joinPath modulePath) steps)
|
||||
| (GhcFile{..}, steps) <- results
|
||||
]
|
||||
encodeFile (output </> "stats_by_package" <.> "json") stats_by_package
|
||||
for_ (Map.toList stats_by_package) $ \(package, stat) -> do
|
||||
let headers = Set.toList $ Set.fromList
|
||||
[ T.encodeUtf8 phaseName
|
||||
| (_, steps) <- Map.toList stat
|
||||
, Phase{..} <- steps
|
||||
]
|
||||
let bs = Csv.encodeHeader (V.fromList ("module": "total": headers))
|
||||
<> mconcat
|
||||
[ Csv.encodeRecord
|
||||
$ module_name
|
||||
: show total
|
||||
: Prelude.map (\n -> maybe "" show $ Map.lookup n by_phase) headers
|
||||
| (module_name, steps) <- Map.toList stat
|
||||
, let total = Prelude.sum [phaseTime | Phase{..} <- steps]
|
||||
, let by_phase = Map.fromListWith (+)
|
||||
[(T.encodeUtf8 phaseName, phaseTime)
|
||||
| Phase{..} <- steps
|
||||
]
|
||||
, then sortWith by (Down total)
|
||||
]
|
||||
BSL.writeFile (output </> package <.> "csv")
|
||||
$ Builder.toLazyByteString bs
|
||||
-- Prelude.print byPackage
|
||||
where
|
||||
output = "./tmp"
|
||||
|
||||
-- | Find all files that are related to the dump timings.
|
||||
--
|
||||
-- XXX: this method is not effective enough as it eagerly builds a list of FilePath
|
||||
findDumpTimings :: String -> IO [FilePath]
|
||||
findDumpTimings input = do
|
||||
runResourceT $ runConduit $ sourceDirectoryDeep False input
|
||||
.| CL.filter (\x -> x `endsWith` ".dump-timings")
|
||||
.| consume
|
||||
where
|
||||
endsWith x y = (reverse y) `isPrefixOf` (reverse x)
|
114
README.markdown
Normal file
114
README.markdown
Normal file
@ -0,0 +1,114 @@
|
||||
The idea of this tool is to get a report about program compilation so
|
||||
you will know where GHC spends time, and what does it do. This way to
|
||||
can verify your ideas about how to make compilation faster without
|
||||
touching GHC, and leads to better understanding why it where does it
|
||||
spend time and if it reasonable or not. If improved this tool can be
|
||||
used a a guidance to the improving codebase and possibly GHC to get
|
||||
better timings without sacrificing functionality. At least I hope so.
|
||||
|
||||
For the author this tool even in it's simplest form allowed to find
|
||||
a way to speedup compilation on 20% by just reorganizing code structure
|
||||
in the project.
|
||||
|
||||
NOTE: this tool is in the very early stage, and I work on that in my
|
||||
free time, basically during official holydays only. As a result I target
|
||||
only my usecase, so not all configurations are supported, but I gladly
|
||||
apply any merge requests that will make live of other users easier.
|
||||
|
||||
## How to use.
|
||||
|
||||
1. Download the tool:
|
||||
|
||||
```haskell
|
||||
hub clone https://github.com/qnikst/ghc-timing-report
|
||||
```
|
||||
|
||||
2. Build it:
|
||||
|
||||
```bash
|
||||
cabal v2-build
|
||||
```
|
||||
|
||||
At this point I don't suggest you to install the tool because
|
||||
at such an early stage it will likely require manual configuration
|
||||
a lot.
|
||||
|
||||
3. Configure your project in order to generate timing files:
|
||||
|
||||
```bash
|
||||
cabal v2-configure --ghc-options=-ddump-timings --ghc-options=-ddump-to-file
|
||||
```
|
||||
|
||||
`-ddump-timings` tells GHC to generate timings report, `-ddump-to-file` tells GHC
|
||||
to store those reports to files.
|
||||
|
||||
4. Running:
|
||||
|
||||
```bash
|
||||
cabal v2-run ghc-timings /Users/qnikst/workspace/another-project/dist-newstyle
|
||||
```
|
||||
|
||||
In the `tmp` folder you'll get all the reports.
|
||||
|
||||
# Report files.
|
||||
|
||||
(Note this section will likely always be outdated despite all efforts to keep it up to date,
|
||||
sorry for that)
|
||||
|
||||
Report that are generated keeps a ton of -garbage- useful data for each module in all the
|
||||
projects you'll see files:
|
||||
|
||||
```
|
||||
<host-os>--<ghc-version>--<package-version>--<module>.dump-timings.json
|
||||
<host-os>--<ghc-version>--<package-version>--<module>.dump-timings.csv
|
||||
```
|
||||
|
||||
That contains a table for each module with th name of the phase, module, number of allocations, and time
|
||||
spend on that phase.
|
||||
|
||||
And report files:
|
||||
|
||||
```
|
||||
<package-version>.csv
|
||||
```
|
||||
|
||||
That summarizes information for the package. File keeps list of modules, and total compilation time
|
||||
for module and total time for each phase. If you import `<package-version>.csv` file to some
|
||||
package that works with tables like numbers, you'll see something like this:
|
||||
|
||||
![screen1](https://github.com/qnikst/ghc-timing-report/screenshot1.png)
|
||||
|
||||
|
||||
# Project ideology.
|
||||
|
||||
Here I want to share a bits of how I work on this project and it's not usual one for me.
|
||||
I don't know where this project would lead, it has proved to be useful at least
|
||||
for me from the very early development steps. But I don't know how to get good analysis
|
||||
and I don't know that if data will be useful or not. So I use following guidance:
|
||||
|
||||
a. Write in a cheap and dirty way so you get your results as fast as possible, it allows
|
||||
me to actually see and verify if I can make any use of the data. It does not worth to
|
||||
spend much time on the design if you'll throw it away because it's not useful at all.
|
||||
But once code is proven to work and useful it worth stabilizing it by proper refactoring.
|
||||
Also this means that I sometime do not use most efficient and nice tricks like foldl
|
||||
package, unless I have a good understanding of it's use, so I don't spend much time
|
||||
learnings that. But I'd like to be pushed into the directions that can improve the
|
||||
package pipeline in a cheap way.
|
||||
|
||||
b. Despite of writing dirty way it worth to keep types that describe the problem on
|
||||
each step, to make refactoring simpler.
|
||||
|
||||
c. There are many tools that can be used for data visualization and analyses so it worth
|
||||
to store all intermediate data in a machine readable format. This way it's possble to
|
||||
not be tighed to Haskell. (Though I hope)
|
||||
|
||||
|
||||
# Notes.
|
||||
|
||||
1. It's a pity that we don't have memory residency statistics per module, because allocations
|
||||
tells nothing and it's basically just another "time" report. So for now I avoid working with
|
||||
that in statistics reports.
|
||||
2. I've tried to build a table where I output all the phases, but such a table bacame too
|
||||
big so "Numbers" that I was using for working with data refused to work with it. So I've
|
||||
abadonned that idea unless I'll find a way to represent data in a compact way.
|
||||
|
3
cabal.project
Normal file
3
cabal.project
Normal file
@ -0,0 +1,3 @@
|
||||
packages: .
|
||||
optional-packages:
|
||||
vendor/**/*.cabal
|
43
ghc-timings.cabal
Normal file
43
ghc-timings.cabal
Normal file
@ -0,0 +1,43 @@
|
||||
cabal-version: >=1.10
|
||||
name: ghc-timings-report
|
||||
version: 0.1.0.0
|
||||
synopsis: Get statistical report about how long files were compiled.
|
||||
description: Simple package that can gather information about compilation
|
||||
time for later analysis.
|
||||
bug-reports: https://github.com/qnikst/ghc-timings-report
|
||||
license: MIT
|
||||
author: Alexander Vershilov
|
||||
maintainer: alexander.vershilov@gmail.com
|
||||
-- copyright:
|
||||
-- category:
|
||||
build-type: Simple
|
||||
extra-source-files: CHANGELOG.md
|
||||
|
||||
executable ghc-timings
|
||||
main-is: Main.hs
|
||||
other-modules: GhcFile
|
||||
GhcBuildPhase
|
||||
hs-source-dirs: src
|
||||
.
|
||||
default-extensions: DerivingStrategies
|
||||
DeriveGeneric
|
||||
DeriveAnyClass
|
||||
DerivingVia
|
||||
OverloadedStrings
|
||||
RecordWildCards
|
||||
ViewPatterns
|
||||
-- other-extensions:
|
||||
build-depends: base >=4.13 && <4.14,
|
||||
aeson,
|
||||
binary,
|
||||
bytestring,
|
||||
cassava,
|
||||
conduit >= 1.3,
|
||||
containers,
|
||||
resourcet,
|
||||
filepath,
|
||||
text,
|
||||
text-show,
|
||||
vector
|
||||
ghc-options: -Wall -Werror
|
||||
default-language: Haskell2010
|
BIN
screenshot1.png
Normal file
BIN
screenshot1.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 293 KiB |
47
src/GhcBuildPhase.hs
Normal file
47
src/GhcBuildPhase.hs
Normal file
@ -0,0 +1,47 @@
|
||||
{-# LANGUAGE StrictData #-}
|
||||
-- | Definition of the types used in the analysis.
|
||||
module GhcBuildPhase
|
||||
( Phase(..)
|
||||
, parsePhases
|
||||
) where
|
||||
|
||||
import Data.Aeson
|
||||
import Data.Csv
|
||||
import Data.Functor
|
||||
import Data.Maybe
|
||||
import qualified Data.Text as T
|
||||
import GHC.Generics
|
||||
import TextShow
|
||||
import TextShow.Generic
|
||||
|
||||
-- | Build phase that is reported in the ghc timings output.
|
||||
data Phase = Phase
|
||||
{ phaseName :: T.Text
|
||||
, phaseModule :: T.Text
|
||||
, phaseAlloc :: Int
|
||||
, phaseTime :: Double
|
||||
}
|
||||
deriving stock (Generic)
|
||||
deriving anyclass (ToJSON, FromJSON)
|
||||
deriving anyclass (ToNamedRecord, DefaultOrdered)
|
||||
deriving TextShow via (FromGeneric Phase)
|
||||
|
||||
-- | Parse .ghc-timings file timings file and get list of phases.
|
||||
--
|
||||
-- This is ad-hoc parsing procedure that doesn't do anything clever like parsers-combinators
|
||||
-- regular expressions and stuff.
|
||||
--
|
||||
-- Assumes struture:
|
||||
-- @name [Module]: alloc=INT time=DOUBLE@
|
||||
--
|
||||
-- Doesn't report errors.
|
||||
parsePhases :: T.Text -> [Phase]
|
||||
parsePhases input = T.lines input <&> parseStep where
|
||||
parseStep x = case T.span (/='[') x of
|
||||
(phaseName, T.drop 1 -> rest1) -> case T.span (/=']') rest1 of
|
||||
(phaseModule, T.drop 2 -> rest2) -> case T.words rest2 of
|
||||
[allocs, time] ->
|
||||
let phaseAlloc = read $ T.unpack $ fromJust $ T.stripPrefix "alloc=" allocs -- !!!
|
||||
phaseTime = read $ T.unpack $ fromJust $ T.stripPrefix "time=" time -- !!!
|
||||
in Phase{..}
|
||||
_ -> error $ "illegal line: " <> T.unpack rest2
|
36
src/GhcFile.hs
Normal file
36
src/GhcFile.hs
Normal file
@ -0,0 +1,36 @@
|
||||
{-# LANGUAGE StrictData #-}
|
||||
module GhcFile
|
||||
( GhcFile(..)
|
||||
, rebuildFilePath
|
||||
, rebuildPlainPath
|
||||
) where
|
||||
|
||||
import Data.Aeson
|
||||
import Data.List
|
||||
import GHC.Generics (Generic)
|
||||
import System.FilePath
|
||||
|
||||
-- | Representation of the file in the filesystem structure.
|
||||
--
|
||||
-- This file follows pattern used in cabal build and may differ for
|
||||
-- other build systems. I don't care about those, but patches are welcome.
|
||||
data GhcFile = GhcFile
|
||||
{ hostOs :: String -- ^ Host
|
||||
, ghcVersion :: String
|
||||
, packageName :: String
|
||||
, modulePath :: [String]
|
||||
}
|
||||
deriving (Show, Generic)
|
||||
deriving anyclass (ToJSON, FromJSON)
|
||||
|
||||
-- | Build path to the file in the file system based on prefix and 'GhcFile'
|
||||
--
|
||||
-- It looks terrible, seems a wrong abstraction is here.
|
||||
rebuildFilePath :: FilePath -> GhcFile -> FilePath
|
||||
rebuildFilePath base GhcFile{..} =
|
||||
base </> "build" </> hostOs </> ghcVersion </> packageName </> "build" </> joinPath modulePath
|
||||
|
||||
-- | Convert 'GhcFile' into plain filename that we use in our report storage.
|
||||
rebuildPlainPath :: GhcFile -> FilePath
|
||||
rebuildPlainPath GhcFile{..} =
|
||||
intercalate "--" $ [hostOs, ghcVersion, packageName] ++ modulePath
|
Loading…
Reference in New Issue
Block a user