Callgrind: Subposition compression and weight correction (#423)

This fixes a number of bugs with callgrind import. Dealing with this file format is a big pain because the documentation on https://www.valgrind.org/docs/manual/cl-format.html doesn't contain enough examples to disambiguate some of the behaviour, and because there's a fundamental impedance mismatch between call-trees and call-graphs.

In any case, after this PR, the behavior of callgrind file import is much better.
The file provided in #414 now imports correctly and, as far as I can tell, displays the same weights as what I see in KCacheGrind.

Some of the key changes:
- Implementing subposition compression. This was just a TODO in the code that was never implemented
- Fixing a misinterpretation of how `fe` and `fi` were intended to be used. Previously, I was using it to change the filename of a symbol, meaning that an `fi` or an `fe` line in the middle of a block describing costs for an `fn` would split a node in the call-graph into multiple nodes causing all manners of problems
- Fixing a bug where `cfn` was persisting beyond a single call, also resulting in call graph nodes being split when they shouldn't be

Fixes #414
This commit is contained in:
Jamie Wong 2023-06-04 04:06:22 -07:00 committed by GitHub
parent 8da9088ec1
commit f23f65b3af
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 337 additions and 62 deletions

View File

@ -0,0 +1,30 @@
# callgrind format
events: Instructions
fl=alpha.c
fn=alpha
1 10
cfl=beta.c
cfn=beta
calls=1 1
1 10
cfn=gamma
calls=1 1
1 10
cfn=delta
calls=1 1
1 20
fn=delta
1 10
cfn=gamma
calls=1 1
1 10
fn=gamma
1 10
cfl=
fl=beta.c
fn=beta
1 10

View File

@ -18,7 +18,7 @@ calls=3 51
16 400 3000
fn=(2)
51 100 4000
51 100 15000
cfi=(2)
cfn=(3)
calls=2 20
@ -26,4 +26,4 @@ calls=2 20
fl=(2)
fn=(3)
20 700 6000
20 700 8000

View File

@ -0,0 +1,24 @@
# callgrind format
events: Instructions
fl=file1.c
fn=main
16 20
cfn=func1
calls=1 50
* 400
cfi=file2.c
cfn=func2
calls=3 20
* *
fn=func1
+35 -300
cfi=file2.c
cfn=func2
calls=2 20
* 300
fl=file2.c
fn=func2
-31 +400

View File

@ -41,6 +41,61 @@ Object {
}
`;
exports[`importFromCallgrind cfn reset 1`] = `
Object {
"frames": Array [
Frame {
"col": undefined,
"file": "alpha.c",
"key": "alpha.c:alpha",
"line": undefined,
"name": "alpha",
"selfWeight": 10,
"totalWeight": 50,
},
Frame {
"col": undefined,
"file": "beta.c",
"key": "beta.c:beta",
"line": undefined,
"name": "beta",
"selfWeight": 10,
"totalWeight": 10,
},
Frame {
"col": undefined,
"file": "alpha.c",
"key": "alpha.c:gamma",
"line": undefined,
"name": "gamma",
"selfWeight": 20,
"totalWeight": 20,
},
Frame {
"col": undefined,
"file": "alpha.c",
"key": "alpha.c:delta",
"line": undefined,
"name": "delta",
"selfWeight": 10,
"totalWeight": 20,
},
],
"name": "callgrind.cfn-reset.log -- Instructions",
"stacks": Array [
"alpha;beta 10",
"alpha;gamma 10",
"alpha;delta;gamma 10",
"alpha;delta 10",
"alpha 10",
],
}
`;
exports[`importFromCallgrind cfn reset: indexToView 1`] = `0`;
exports[`importFromCallgrind cfn reset: profileGroup.name 1`] = `"callgrind.cfn-reset.log"`;
exports[`importFromCallgrind multiple event types 1`] = `
Object {
"frames": Array [
@ -100,7 +155,7 @@ Object {
"key": "file1.c:func1",
"line": undefined,
"name": "func1",
"selfWeight": 8888.888888888889,
"selfWeight": 15000,
"totalWeight": 20000,
},
Frame {
@ -109,14 +164,14 @@ Object {
"key": "file2.c:func2",
"line": undefined,
"name": "func2",
"selfWeight": 14111.111111111111,
"totalWeight": 14111.111111111111,
"selfWeight": 8000,
"totalWeight": 8000,
},
],
"name": "callgrind.multiple-event-types.log -- Memory",
"stacks": Array [
"main;func1;func2 10.85 KB",
"main;func1 8.68 KB",
"main;func1;func2 4.88 KB",
"main;func1 14.65 KB",
"main;func2 2.93 KB",
"main 14.65 KB",
],
@ -172,6 +227,51 @@ exports[`importFromCallgrind name compression: indexToView 1`] = `0`;
exports[`importFromCallgrind name compression: profileGroup.name 1`] = `"callgrind.name-compression.log"`;
exports[`importFromCallgrind subposition compression 1`] = `
Object {
"frames": Array [
Frame {
"col": undefined,
"file": "file1.c",
"key": "file1.c:main",
"line": undefined,
"name": "main",
"selfWeight": 20,
"totalWeight": 820,
},
Frame {
"col": undefined,
"file": "file1.c",
"key": "file1.c:func1",
"line": undefined,
"name": "func1",
"selfWeight": 100,
"totalWeight": 400,
},
Frame {
"col": undefined,
"file": "file2.c",
"key": "file2.c:func2",
"line": undefined,
"name": "func2",
"selfWeight": 700,
"totalWeight": 700,
},
],
"name": "callgrind.subposition-compression.log -- Instructions",
"stacks": Array [
"main;func1;func2 300",
"main;func1 100",
"main;func2 400",
"main 20",
],
}
`;
exports[`importFromCallgrind subposition compression: indexToView 1`] = `0`;
exports[`importFromCallgrind subposition compression: profileGroup.name 1`] = `"callgrind.subposition-compression.log"`;
exports[`importFromCallgrind: indexToView 1`] = `0`;
exports[`importFromCallgrind: profileGroup.name 1`] = `"callgrind.example.log"`;

View File

@ -11,3 +11,11 @@ test('importFromCallgrind name compression', async () => {
test('importFromCallgrind multiple event types', async () => {
await checkProfileSnapshot('./sample/profiles/callgrind/callgrind.multiple-event-types.log')
})
test('importFromCallgrind subposition compression', async () => {
await checkProfileSnapshot('./sample/profiles/callgrind/callgrind.subposition-compression.log')
})
test('importFromCallgrind cfn reset', async () => {
await checkProfileSnapshot('./sample/profiles/callgrind/callgrind.cfn-reset.log')
})

View File

@ -129,26 +129,6 @@ class CallGraph {
}
toProfile(): Profile {
// To convert a call graph into a profile, we first need to identify what
// the "root weights" are. "root weights" are the total weight of each frame
// while at the bottom of the call-stack. The majority of functions will have
// zero weight while at the bottom of the call-stack, since most functions
// are never at the bottom of the call-stack.
const rootWeights = new Map<Frame, number>()
for (let [frame, totalWeight] of this.totalWeights) {
rootWeights.set(frame, totalWeight)
}
for (let [_, childMap] of this.childrenTotalWeights) {
for (let [child, weight] of childMap) {
rootWeights.set(child, getOrElse(rootWeights, child, () => weight) - weight)
}
}
let totalProfileWeight = 0
for (let [_, rootWeight] of rootWeights) {
totalProfileWeight += rootWeight
}
const profile = new CallTreeProfileBuilder()
let unitMultiplier = 1
@ -170,7 +150,12 @@ class CallGraph {
const currentStack = new Set<Frame>()
const visit = (frame: Frame, callTreeWeight: number) => {
let maxWeight = 0
for (let [_, totalWeight] of this.totalWeights) {
maxWeight = Math.max(maxWeight, totalWeight)
}
const visit = (frame: Frame, subtreeTotalWeight: number) => {
if (currentStack.has(frame)) {
// Call-graphs are allowed to have cycles. Call-trees are not. In case
// we run into a cycle, we'll just avoid recursing into the same subtree
@ -198,7 +183,7 @@ class CallGraph {
// See the comment at the top of the file for an example where this
// assumption can yield especially misleading results.
if (callTreeWeight < 1e-4 * totalProfileWeight) {
if (subtreeTotalWeight < 1e-4 * maxWeight) {
// This assumption about even distribution can cause us to generate a
// call tree with dramatically more nodes than the call graph.
//
@ -220,45 +205,113 @@ class CallGraph {
// 1000*250=250,000 nodes in the resulting call graph.
//
// To mitigate this explosion of the # of nodes, we ignore subtrees
// whose weights are less than 0.01% of the total weight of the profile.
// whose weights are less than 0.01% of the heaviest node in the call
// graph.
return
}
// totalWeightForFrame is the total weight for the given frame in the
// entire call graph.
const callGraphWeightForFrame = getOrElse(this.totalWeights, frame, () => 0)
if (callGraphWeightForFrame === 0) {
const totalWeightForFrameInCallgraph = getOrElse(this.totalWeights, frame, () => 0)
if (totalWeightForFrameInCallgraph === 0) {
return
}
// This is the portion of the total time the given child spends within the
// given parent that we'll attribute to this specific path in the call
// tree.
const ratio = callTreeWeight / callGraphWeightForFrame
let selfWeightForNodeInCallTree = subtreeTotalWeight
let selfWeightForFrame = callGraphWeightForFrame
profile.enterFrame(frame, totalCumulative * unitMultiplier)
profile.enterFrame(frame, Math.round(totalCumulative * unitMultiplier))
currentStack.add(frame)
for (let [child, callGraphEdgeWeight] of this.childrenTotalWeights.get(frame) || []) {
selfWeightForFrame -= callGraphEdgeWeight
const childCallTreeWeight = callGraphEdgeWeight * ratio
for (let [child, totalWeightAsChild] of this.childrenTotalWeights.get(frame) || []) {
// To determine the weight of the child in the call tree, we look at the
// weight of the child in the call graph relative to its parent.
const childCallTreeWeight =
subtreeTotalWeight * (totalWeightAsChild / totalWeightForFrameInCallgraph)
let prevTotalCumulative = totalCumulative
visit(child, childCallTreeWeight)
// Even though we tried to add a child with total weight equal to
// childCallTreeWeight, we might have failed for a variety of data
// consistency reasons, or due to cycles.
//
// We want to avoid losing weight in the call tree by subtracting from
// the self weight on the assumption it was added to the subtree, so we
// only subtree from the self weight the amount that was *actually* used
// by the subtree, rather than the amount we *intended* for it to use.
const actualChildCallTreeWeight = totalCumulative - prevTotalCumulative
selfWeightForNodeInCallTree -= actualChildCallTreeWeight
}
currentStack.delete(frame)
totalCumulative += selfWeightForFrame * ratio
profile.leaveFrame(frame, totalCumulative * unitMultiplier)
totalCumulative += selfWeightForNodeInCallTree
profile.leaveFrame(frame, Math.round(totalCumulative * unitMultiplier))
}
for (let [rootFrame, rootWeight] of rootWeights) {
if (rootWeight <= 0) {
continue
// It's surprisingly hard to figure out which nodes in the call graph
// constitute the root nodes of call trees.
//
// Here are a few intuitive options, and reasons why they're not always
// correct or good.
//
// ## 1. Find nodes in the call graph that have no callers
//
// This is probably right 99% of the time in practice, but since the
// callgrind is totally general, it's totally valid to have a file
// representing a profile for the following code:
//
// function a() {
// b()
// }
// function b() {
// }
// a()
// b()
//
// In this case, even though b has a caller, some of the real calltree for
// an execution trace of the program will have b on the top of the stack.
//
// ## 2. Find nodes in the call graph that still have weight if you
// subtract all of the weight caused by callers.
//
// The callgraph format, in theory, provides inclusive times for every
// function call. That means if you have a function `alpha` with a total
// weight of 20, and its only in-edge in the call-graph has weight of 10,
// that should indicate that `alpha` exists both as the root-node of a
// calltree, and as a node in some other call-tree.
//
// In theory, you should be able to figure out the weight of it as a root
// node by subtracting the weights of all the in-edges. In practice, real
// callgrind files are inconsistent in how they do accounting for in-edges
// where you end up in weird situations where the weight of in-edges
// *exceeds* the weight of nodes (where the weight of a node is its
// self-weight plus the weight of all its out-edges).
//
// ## 3. Find the heaviest node in the call graph, build its call-tree, and
// decrease the weights of other nodes in the call graph while you
// build the call tree. After you've done this, repeat with the new
// heaviest.
//
// I think this version is probably fully correct, but the performance is
// awful. The naive-version is O(n^2) because you have to re-determine which
// node is the heaviest after each time you finish building a call-tree. You
// can't just sort, because the relative ordering also changes with the
// construction of each call tree.
//
// There's probably a clever solution here which puts all of the nodes into
// a min-heap and then deletes and re-inserts nodes as their weights change,
// but reasoning about the performance of that is a big pain in the butt.
//
// Despite not always being correct, I'm opting for option (1).
const rootNodes = new Set<Frame>(this.frameSet)
for (let [_, childMap] of this.childrenTotalWeights) {
for (let [child, _] of childMap) {
rootNodes.delete(child)
}
// If we've reached here, it means that the given root frame has some
// weight while at the top of the call-stack.
visit(rootFrame, rootWeight)
}
for (let rootNode of rootNodes) {
visit(rootNode, this.totalWeights.get(rootNode)!)
}
return profile.build()
@ -344,7 +397,7 @@ class CallgrindParser {
}
private calleeFrameInfo(): FrameInfo {
const file = this.calleeFilename || '(unknown)'
const file = this.calleeFilename || this.filename || '(unknown)'
const name = this.calleeFunctionName || '(unknown)'
const key = `${file}:${name}`
return {key, name, file}
@ -384,10 +437,20 @@ class CallgrindParser {
switch (key) {
case 'fe':
case 'fi':
case 'fi': {
// fe/fi are used to indicate the source-file of a function definition
// changed mid-definition. This is for inlined code, but doesn't
// indicate that we've actually switched to referring to a different
// function, so we mostly ignore it.
//
// We still need to do the parseNameWithCompression call in case a name
// is defined and then referenced later on for name compression.
this.parseNameWithCompression(value, this.savedFileNames)
break
}
case 'fl': {
this.filename = this.parseNameWithCompression(value, this.savedFileNames)
this.calleeFilename = this.filename
break
}
@ -398,6 +461,8 @@ class CallgrindParser {
case 'cfi':
case 'cfl': {
// NOTE: unlike the fe/fi distinction described above, cfi and cfl are
// interchangeable.
this.calleeFilename = this.parseNameWithCompression(value, this.savedFileNames)
break
}
@ -413,6 +478,22 @@ class CallgrindParser {
// since it'll just be copying the exact same frame over-and-over again,
// but that might be better than ignoring it.
this.parseCostLine(this.lines[this.lineNum++], 'child')
// This isn't specified anywhere in the spec, but empirically the and
// "cfn" scope should only persist for a single "call".
//
// This seems to be what KCacheGrind does too:
//
// https://github.com/KDE/kcachegrind/blob/ea4314db2785cb8f279fe884ee7f82445642b692/libcore/cachegrindloader.cpp#L1259
this.calleeFilename = null
this.calleeFunctionName = null
break
}
case 'cob':
case 'ob': {
// We ignore these for now. They're valid lines, but we don't capture or
// display information about them.
break
}
@ -458,21 +539,52 @@ class CallgrindParser {
return name
}
private prevCostLineNumbers: number[] = []
private parseCostLine(line: string, costType: 'self' | 'child'): boolean {
// TODO(jlfwong): Handle "Subposition compression"
// TODO(jlfwong): Allow hexadecimal encoding
const parts = line.split(/\s+/)
const nums: number[] = []
for (let part of parts) {
// As far as I can tell from the specification, the callgrind format does
// not accept floating point numbers.
const asNum = parseInt(part)
if (isNaN(asNum)) {
for (let i = 0; i < parts.length; i++) {
const part = parts[i]
if (part.length === 0) {
return false
}
nums.push(asNum)
if (part === '*' || part[0] === '-' || part[1] === '+') {
// This handles "Subposition compression"
// See: https://valgrind.org/docs/manual/cl-format.html#cl-format.overview.compression2
if (this.prevCostLineNumbers.length <= i) {
throw new Error(
`Line ${this.lineNum} has a subposition on column ${i} but ` +
`previous cost line has only ${this.prevCostLineNumbers.length} ` +
`columns. Line contents: ${line}`,
)
}
const prevCostForSubposition = this.prevCostLineNumbers[i]
if (part === '*') {
nums.push(prevCostForSubposition)
} else {
// This handles both the '-' and '+' cases
const offset = parseInt(part)
if (isNaN(offset)) {
throw new Error(
`Line ${this.lineNum} has a subposition on column ${i} but ` +
`the offset is not a number. Line contents: ${line}`,
)
}
nums.push(prevCostForSubposition + offset)
}
} else {
const asNum = parseInt(part)
if (isNaN(asNum)) {
return false
}
nums.push(asNum)
}
}
if (nums.length == 0) {
@ -506,6 +618,7 @@ class CallgrindParser {
}
}
this.prevCostLineNumbers = nums
return true
}
}