mirror of
https://github.com/jlfwong/speedscope.git
synced 2024-11-22 12:53:23 +03:00
Callgrind: Subposition compression and weight correction (#423)
This fixes a number of bugs with callgrind import. Dealing with this file format is a big pain because the documentation on https://www.valgrind.org/docs/manual/cl-format.html doesn't contain enough examples to disambiguate some of the behaviour, and because there's a fundamental impedance mismatch between call-trees and call-graphs. In any case, after this PR, the behavior of callgrind file import is much better. The file provided in #414 now imports correctly and, as far as I can tell, displays the same weights as what I see in KCacheGrind. Some of the key changes: - Implementing subposition compression. This was just a TODO in the code that was never implemented - Fixing a misinterpretation of how `fe` and `fi` were intended to be used. Previously, I was using it to change the filename of a symbol, meaning that an `fi` or an `fe` line in the middle of a block describing costs for an `fn` would split a node in the call-graph into multiple nodes causing all manners of problems - Fixing a bug where `cfn` was persisting beyond a single call, also resulting in call graph nodes being split when they shouldn't be Fixes #414
This commit is contained in:
parent
8da9088ec1
commit
f23f65b3af
30
sample/profiles/callgrind/callgrind.cfn-reset.log
Normal file
30
sample/profiles/callgrind/callgrind.cfn-reset.log
Normal file
@ -0,0 +1,30 @@
|
||||
# callgrind format
|
||||
events: Instructions
|
||||
|
||||
fl=alpha.c
|
||||
fn=alpha
|
||||
1 10
|
||||
cfl=beta.c
|
||||
cfn=beta
|
||||
calls=1 1
|
||||
1 10
|
||||
cfn=gamma
|
||||
calls=1 1
|
||||
1 10
|
||||
cfn=delta
|
||||
calls=1 1
|
||||
1 20
|
||||
|
||||
fn=delta
|
||||
1 10
|
||||
cfn=gamma
|
||||
calls=1 1
|
||||
1 10
|
||||
|
||||
fn=gamma
|
||||
1 10
|
||||
cfl=
|
||||
|
||||
fl=beta.c
|
||||
fn=beta
|
||||
1 10
|
@ -18,7 +18,7 @@ calls=3 51
|
||||
16 400 3000
|
||||
|
||||
fn=(2)
|
||||
51 100 4000
|
||||
51 100 15000
|
||||
cfi=(2)
|
||||
cfn=(3)
|
||||
calls=2 20
|
||||
@ -26,4 +26,4 @@ calls=2 20
|
||||
|
||||
fl=(2)
|
||||
fn=(3)
|
||||
20 700 6000
|
||||
20 700 8000
|
@ -0,0 +1,24 @@
|
||||
# callgrind format
|
||||
events: Instructions
|
||||
|
||||
fl=file1.c
|
||||
fn=main
|
||||
16 20
|
||||
cfn=func1
|
||||
calls=1 50
|
||||
* 400
|
||||
cfi=file2.c
|
||||
cfn=func2
|
||||
calls=3 20
|
||||
* *
|
||||
|
||||
fn=func1
|
||||
+35 -300
|
||||
cfi=file2.c
|
||||
cfn=func2
|
||||
calls=2 20
|
||||
* 300
|
||||
|
||||
fl=file2.c
|
||||
fn=func2
|
||||
-31 +400
|
@ -41,6 +41,61 @@ Object {
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`importFromCallgrind cfn reset 1`] = `
|
||||
Object {
|
||||
"frames": Array [
|
||||
Frame {
|
||||
"col": undefined,
|
||||
"file": "alpha.c",
|
||||
"key": "alpha.c:alpha",
|
||||
"line": undefined,
|
||||
"name": "alpha",
|
||||
"selfWeight": 10,
|
||||
"totalWeight": 50,
|
||||
},
|
||||
Frame {
|
||||
"col": undefined,
|
||||
"file": "beta.c",
|
||||
"key": "beta.c:beta",
|
||||
"line": undefined,
|
||||
"name": "beta",
|
||||
"selfWeight": 10,
|
||||
"totalWeight": 10,
|
||||
},
|
||||
Frame {
|
||||
"col": undefined,
|
||||
"file": "alpha.c",
|
||||
"key": "alpha.c:gamma",
|
||||
"line": undefined,
|
||||
"name": "gamma",
|
||||
"selfWeight": 20,
|
||||
"totalWeight": 20,
|
||||
},
|
||||
Frame {
|
||||
"col": undefined,
|
||||
"file": "alpha.c",
|
||||
"key": "alpha.c:delta",
|
||||
"line": undefined,
|
||||
"name": "delta",
|
||||
"selfWeight": 10,
|
||||
"totalWeight": 20,
|
||||
},
|
||||
],
|
||||
"name": "callgrind.cfn-reset.log -- Instructions",
|
||||
"stacks": Array [
|
||||
"alpha;beta 10",
|
||||
"alpha;gamma 10",
|
||||
"alpha;delta;gamma 10",
|
||||
"alpha;delta 10",
|
||||
"alpha 10",
|
||||
],
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`importFromCallgrind cfn reset: indexToView 1`] = `0`;
|
||||
|
||||
exports[`importFromCallgrind cfn reset: profileGroup.name 1`] = `"callgrind.cfn-reset.log"`;
|
||||
|
||||
exports[`importFromCallgrind multiple event types 1`] = `
|
||||
Object {
|
||||
"frames": Array [
|
||||
@ -100,7 +155,7 @@ Object {
|
||||
"key": "file1.c:func1",
|
||||
"line": undefined,
|
||||
"name": "func1",
|
||||
"selfWeight": 8888.888888888889,
|
||||
"selfWeight": 15000,
|
||||
"totalWeight": 20000,
|
||||
},
|
||||
Frame {
|
||||
@ -109,14 +164,14 @@ Object {
|
||||
"key": "file2.c:func2",
|
||||
"line": undefined,
|
||||
"name": "func2",
|
||||
"selfWeight": 14111.111111111111,
|
||||
"totalWeight": 14111.111111111111,
|
||||
"selfWeight": 8000,
|
||||
"totalWeight": 8000,
|
||||
},
|
||||
],
|
||||
"name": "callgrind.multiple-event-types.log -- Memory",
|
||||
"stacks": Array [
|
||||
"main;func1;func2 10.85 KB",
|
||||
"main;func1 8.68 KB",
|
||||
"main;func1;func2 4.88 KB",
|
||||
"main;func1 14.65 KB",
|
||||
"main;func2 2.93 KB",
|
||||
"main 14.65 KB",
|
||||
],
|
||||
@ -172,6 +227,51 @@ exports[`importFromCallgrind name compression: indexToView 1`] = `0`;
|
||||
|
||||
exports[`importFromCallgrind name compression: profileGroup.name 1`] = `"callgrind.name-compression.log"`;
|
||||
|
||||
exports[`importFromCallgrind subposition compression 1`] = `
|
||||
Object {
|
||||
"frames": Array [
|
||||
Frame {
|
||||
"col": undefined,
|
||||
"file": "file1.c",
|
||||
"key": "file1.c:main",
|
||||
"line": undefined,
|
||||
"name": "main",
|
||||
"selfWeight": 20,
|
||||
"totalWeight": 820,
|
||||
},
|
||||
Frame {
|
||||
"col": undefined,
|
||||
"file": "file1.c",
|
||||
"key": "file1.c:func1",
|
||||
"line": undefined,
|
||||
"name": "func1",
|
||||
"selfWeight": 100,
|
||||
"totalWeight": 400,
|
||||
},
|
||||
Frame {
|
||||
"col": undefined,
|
||||
"file": "file2.c",
|
||||
"key": "file2.c:func2",
|
||||
"line": undefined,
|
||||
"name": "func2",
|
||||
"selfWeight": 700,
|
||||
"totalWeight": 700,
|
||||
},
|
||||
],
|
||||
"name": "callgrind.subposition-compression.log -- Instructions",
|
||||
"stacks": Array [
|
||||
"main;func1;func2 300",
|
||||
"main;func1 100",
|
||||
"main;func2 400",
|
||||
"main 20",
|
||||
],
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`importFromCallgrind subposition compression: indexToView 1`] = `0`;
|
||||
|
||||
exports[`importFromCallgrind subposition compression: profileGroup.name 1`] = `"callgrind.subposition-compression.log"`;
|
||||
|
||||
exports[`importFromCallgrind: indexToView 1`] = `0`;
|
||||
|
||||
exports[`importFromCallgrind: profileGroup.name 1`] = `"callgrind.example.log"`;
|
||||
|
@ -11,3 +11,11 @@ test('importFromCallgrind name compression', async () => {
|
||||
test('importFromCallgrind multiple event types', async () => {
|
||||
await checkProfileSnapshot('./sample/profiles/callgrind/callgrind.multiple-event-types.log')
|
||||
})
|
||||
|
||||
test('importFromCallgrind subposition compression', async () => {
|
||||
await checkProfileSnapshot('./sample/profiles/callgrind/callgrind.subposition-compression.log')
|
||||
})
|
||||
|
||||
test('importFromCallgrind cfn reset', async () => {
|
||||
await checkProfileSnapshot('./sample/profiles/callgrind/callgrind.cfn-reset.log')
|
||||
})
|
||||
|
@ -129,26 +129,6 @@ class CallGraph {
|
||||
}
|
||||
|
||||
toProfile(): Profile {
|
||||
// To convert a call graph into a profile, we first need to identify what
|
||||
// the "root weights" are. "root weights" are the total weight of each frame
|
||||
// while at the bottom of the call-stack. The majority of functions will have
|
||||
// zero weight while at the bottom of the call-stack, since most functions
|
||||
// are never at the bottom of the call-stack.
|
||||
const rootWeights = new Map<Frame, number>()
|
||||
for (let [frame, totalWeight] of this.totalWeights) {
|
||||
rootWeights.set(frame, totalWeight)
|
||||
}
|
||||
for (let [_, childMap] of this.childrenTotalWeights) {
|
||||
for (let [child, weight] of childMap) {
|
||||
rootWeights.set(child, getOrElse(rootWeights, child, () => weight) - weight)
|
||||
}
|
||||
}
|
||||
|
||||
let totalProfileWeight = 0
|
||||
for (let [_, rootWeight] of rootWeights) {
|
||||
totalProfileWeight += rootWeight
|
||||
}
|
||||
|
||||
const profile = new CallTreeProfileBuilder()
|
||||
|
||||
let unitMultiplier = 1
|
||||
@ -170,7 +150,12 @@ class CallGraph {
|
||||
|
||||
const currentStack = new Set<Frame>()
|
||||
|
||||
const visit = (frame: Frame, callTreeWeight: number) => {
|
||||
let maxWeight = 0
|
||||
for (let [_, totalWeight] of this.totalWeights) {
|
||||
maxWeight = Math.max(maxWeight, totalWeight)
|
||||
}
|
||||
|
||||
const visit = (frame: Frame, subtreeTotalWeight: number) => {
|
||||
if (currentStack.has(frame)) {
|
||||
// Call-graphs are allowed to have cycles. Call-trees are not. In case
|
||||
// we run into a cycle, we'll just avoid recursing into the same subtree
|
||||
@ -198,7 +183,7 @@ class CallGraph {
|
||||
// See the comment at the top of the file for an example where this
|
||||
// assumption can yield especially misleading results.
|
||||
|
||||
if (callTreeWeight < 1e-4 * totalProfileWeight) {
|
||||
if (subtreeTotalWeight < 1e-4 * maxWeight) {
|
||||
// This assumption about even distribution can cause us to generate a
|
||||
// call tree with dramatically more nodes than the call graph.
|
||||
//
|
||||
@ -220,45 +205,113 @@ class CallGraph {
|
||||
// 1000*250=250,000 nodes in the resulting call graph.
|
||||
//
|
||||
// To mitigate this explosion of the # of nodes, we ignore subtrees
|
||||
// whose weights are less than 0.01% of the total weight of the profile.
|
||||
// whose weights are less than 0.01% of the heaviest node in the call
|
||||
// graph.
|
||||
return
|
||||
}
|
||||
|
||||
// totalWeightForFrame is the total weight for the given frame in the
|
||||
// entire call graph.
|
||||
const callGraphWeightForFrame = getOrElse(this.totalWeights, frame, () => 0)
|
||||
if (callGraphWeightForFrame === 0) {
|
||||
const totalWeightForFrameInCallgraph = getOrElse(this.totalWeights, frame, () => 0)
|
||||
if (totalWeightForFrameInCallgraph === 0) {
|
||||
return
|
||||
}
|
||||
|
||||
// This is the portion of the total time the given child spends within the
|
||||
// given parent that we'll attribute to this specific path in the call
|
||||
// tree.
|
||||
const ratio = callTreeWeight / callGraphWeightForFrame
|
||||
let selfWeightForNodeInCallTree = subtreeTotalWeight
|
||||
|
||||
let selfWeightForFrame = callGraphWeightForFrame
|
||||
|
||||
profile.enterFrame(frame, totalCumulative * unitMultiplier)
|
||||
profile.enterFrame(frame, Math.round(totalCumulative * unitMultiplier))
|
||||
|
||||
currentStack.add(frame)
|
||||
for (let [child, callGraphEdgeWeight] of this.childrenTotalWeights.get(frame) || []) {
|
||||
selfWeightForFrame -= callGraphEdgeWeight
|
||||
const childCallTreeWeight = callGraphEdgeWeight * ratio
|
||||
for (let [child, totalWeightAsChild] of this.childrenTotalWeights.get(frame) || []) {
|
||||
// To determine the weight of the child in the call tree, we look at the
|
||||
// weight of the child in the call graph relative to its parent.
|
||||
const childCallTreeWeight =
|
||||
subtreeTotalWeight * (totalWeightAsChild / totalWeightForFrameInCallgraph)
|
||||
|
||||
let prevTotalCumulative = totalCumulative
|
||||
visit(child, childCallTreeWeight)
|
||||
|
||||
// Even though we tried to add a child with total weight equal to
|
||||
// childCallTreeWeight, we might have failed for a variety of data
|
||||
// consistency reasons, or due to cycles.
|
||||
//
|
||||
// We want to avoid losing weight in the call tree by subtracting from
|
||||
// the self weight on the assumption it was added to the subtree, so we
|
||||
// only subtree from the self weight the amount that was *actually* used
|
||||
// by the subtree, rather than the amount we *intended* for it to use.
|
||||
const actualChildCallTreeWeight = totalCumulative - prevTotalCumulative
|
||||
selfWeightForNodeInCallTree -= actualChildCallTreeWeight
|
||||
}
|
||||
currentStack.delete(frame)
|
||||
|
||||
totalCumulative += selfWeightForFrame * ratio
|
||||
profile.leaveFrame(frame, totalCumulative * unitMultiplier)
|
||||
totalCumulative += selfWeightForNodeInCallTree
|
||||
profile.leaveFrame(frame, Math.round(totalCumulative * unitMultiplier))
|
||||
}
|
||||
|
||||
for (let [rootFrame, rootWeight] of rootWeights) {
|
||||
if (rootWeight <= 0) {
|
||||
continue
|
||||
// It's surprisingly hard to figure out which nodes in the call graph
|
||||
// constitute the root nodes of call trees.
|
||||
//
|
||||
// Here are a few intuitive options, and reasons why they're not always
|
||||
// correct or good.
|
||||
//
|
||||
// ## 1. Find nodes in the call graph that have no callers
|
||||
//
|
||||
// This is probably right 99% of the time in practice, but since the
|
||||
// callgrind is totally general, it's totally valid to have a file
|
||||
// representing a profile for the following code:
|
||||
//
|
||||
// function a() {
|
||||
// b()
|
||||
// }
|
||||
// function b() {
|
||||
// }
|
||||
// a()
|
||||
// b()
|
||||
//
|
||||
// In this case, even though b has a caller, some of the real calltree for
|
||||
// an execution trace of the program will have b on the top of the stack.
|
||||
//
|
||||
// ## 2. Find nodes in the call graph that still have weight if you
|
||||
// subtract all of the weight caused by callers.
|
||||
//
|
||||
// The callgraph format, in theory, provides inclusive times for every
|
||||
// function call. That means if you have a function `alpha` with a total
|
||||
// weight of 20, and its only in-edge in the call-graph has weight of 10,
|
||||
// that should indicate that `alpha` exists both as the root-node of a
|
||||
// calltree, and as a node in some other call-tree.
|
||||
//
|
||||
// In theory, you should be able to figure out the weight of it as a root
|
||||
// node by subtracting the weights of all the in-edges. In practice, real
|
||||
// callgrind files are inconsistent in how they do accounting for in-edges
|
||||
// where you end up in weird situations where the weight of in-edges
|
||||
// *exceeds* the weight of nodes (where the weight of a node is its
|
||||
// self-weight plus the weight of all its out-edges).
|
||||
//
|
||||
// ## 3. Find the heaviest node in the call graph, build its call-tree, and
|
||||
// decrease the weights of other nodes in the call graph while you
|
||||
// build the call tree. After you've done this, repeat with the new
|
||||
// heaviest.
|
||||
//
|
||||
// I think this version is probably fully correct, but the performance is
|
||||
// awful. The naive-version is O(n^2) because you have to re-determine which
|
||||
// node is the heaviest after each time you finish building a call-tree. You
|
||||
// can't just sort, because the relative ordering also changes with the
|
||||
// construction of each call tree.
|
||||
//
|
||||
// There's probably a clever solution here which puts all of the nodes into
|
||||
// a min-heap and then deletes and re-inserts nodes as their weights change,
|
||||
// but reasoning about the performance of that is a big pain in the butt.
|
||||
//
|
||||
// Despite not always being correct, I'm opting for option (1).
|
||||
|
||||
const rootNodes = new Set<Frame>(this.frameSet)
|
||||
|
||||
for (let [_, childMap] of this.childrenTotalWeights) {
|
||||
for (let [child, _] of childMap) {
|
||||
rootNodes.delete(child)
|
||||
}
|
||||
// If we've reached here, it means that the given root frame has some
|
||||
// weight while at the top of the call-stack.
|
||||
visit(rootFrame, rootWeight)
|
||||
}
|
||||
|
||||
for (let rootNode of rootNodes) {
|
||||
visit(rootNode, this.totalWeights.get(rootNode)!)
|
||||
}
|
||||
|
||||
return profile.build()
|
||||
@ -344,7 +397,7 @@ class CallgrindParser {
|
||||
}
|
||||
|
||||
private calleeFrameInfo(): FrameInfo {
|
||||
const file = this.calleeFilename || '(unknown)'
|
||||
const file = this.calleeFilename || this.filename || '(unknown)'
|
||||
const name = this.calleeFunctionName || '(unknown)'
|
||||
const key = `${file}:${name}`
|
||||
return {key, name, file}
|
||||
@ -384,10 +437,20 @@ class CallgrindParser {
|
||||
|
||||
switch (key) {
|
||||
case 'fe':
|
||||
case 'fi':
|
||||
case 'fi': {
|
||||
// fe/fi are used to indicate the source-file of a function definition
|
||||
// changed mid-definition. This is for inlined code, but doesn't
|
||||
// indicate that we've actually switched to referring to a different
|
||||
// function, so we mostly ignore it.
|
||||
//
|
||||
// We still need to do the parseNameWithCompression call in case a name
|
||||
// is defined and then referenced later on for name compression.
|
||||
this.parseNameWithCompression(value, this.savedFileNames)
|
||||
break
|
||||
}
|
||||
|
||||
case 'fl': {
|
||||
this.filename = this.parseNameWithCompression(value, this.savedFileNames)
|
||||
this.calleeFilename = this.filename
|
||||
break
|
||||
}
|
||||
|
||||
@ -398,6 +461,8 @@ class CallgrindParser {
|
||||
|
||||
case 'cfi':
|
||||
case 'cfl': {
|
||||
// NOTE: unlike the fe/fi distinction described above, cfi and cfl are
|
||||
// interchangeable.
|
||||
this.calleeFilename = this.parseNameWithCompression(value, this.savedFileNames)
|
||||
break
|
||||
}
|
||||
@ -413,6 +478,22 @@ class CallgrindParser {
|
||||
// since it'll just be copying the exact same frame over-and-over again,
|
||||
// but that might be better than ignoring it.
|
||||
this.parseCostLine(this.lines[this.lineNum++], 'child')
|
||||
|
||||
// This isn't specified anywhere in the spec, but empirically the and
|
||||
// "cfn" scope should only persist for a single "call".
|
||||
//
|
||||
// This seems to be what KCacheGrind does too:
|
||||
//
|
||||
// https://github.com/KDE/kcachegrind/blob/ea4314db2785cb8f279fe884ee7f82445642b692/libcore/cachegrindloader.cpp#L1259
|
||||
this.calleeFilename = null
|
||||
this.calleeFunctionName = null
|
||||
break
|
||||
}
|
||||
|
||||
case 'cob':
|
||||
case 'ob': {
|
||||
// We ignore these for now. They're valid lines, but we don't capture or
|
||||
// display information about them.
|
||||
break
|
||||
}
|
||||
|
||||
@ -458,21 +539,52 @@ class CallgrindParser {
|
||||
return name
|
||||
}
|
||||
|
||||
private prevCostLineNumbers: number[] = []
|
||||
|
||||
private parseCostLine(line: string, costType: 'self' | 'child'): boolean {
|
||||
// TODO(jlfwong): Handle "Subposition compression"
|
||||
// TODO(jlfwong): Allow hexadecimal encoding
|
||||
|
||||
const parts = line.split(/\s+/)
|
||||
const nums: number[] = []
|
||||
for (let part of parts) {
|
||||
// As far as I can tell from the specification, the callgrind format does
|
||||
// not accept floating point numbers.
|
||||
const asNum = parseInt(part)
|
||||
if (isNaN(asNum)) {
|
||||
|
||||
for (let i = 0; i < parts.length; i++) {
|
||||
const part = parts[i]
|
||||
|
||||
if (part.length === 0) {
|
||||
return false
|
||||
}
|
||||
|
||||
nums.push(asNum)
|
||||
if (part === '*' || part[0] === '-' || part[1] === '+') {
|
||||
// This handles "Subposition compression"
|
||||
// See: https://valgrind.org/docs/manual/cl-format.html#cl-format.overview.compression2
|
||||
if (this.prevCostLineNumbers.length <= i) {
|
||||
throw new Error(
|
||||
`Line ${this.lineNum} has a subposition on column ${i} but ` +
|
||||
`previous cost line has only ${this.prevCostLineNumbers.length} ` +
|
||||
`columns. Line contents: ${line}`,
|
||||
)
|
||||
}
|
||||
const prevCostForSubposition = this.prevCostLineNumbers[i]
|
||||
if (part === '*') {
|
||||
nums.push(prevCostForSubposition)
|
||||
} else {
|
||||
// This handles both the '-' and '+' cases
|
||||
const offset = parseInt(part)
|
||||
if (isNaN(offset)) {
|
||||
throw new Error(
|
||||
`Line ${this.lineNum} has a subposition on column ${i} but ` +
|
||||
`the offset is not a number. Line contents: ${line}`,
|
||||
)
|
||||
}
|
||||
nums.push(prevCostForSubposition + offset)
|
||||
}
|
||||
} else {
|
||||
const asNum = parseInt(part)
|
||||
if (isNaN(asNum)) {
|
||||
return false
|
||||
}
|
||||
nums.push(asNum)
|
||||
}
|
||||
}
|
||||
|
||||
if (nums.length == 0) {
|
||||
@ -506,6 +618,7 @@ class CallgrindParser {
|
||||
}
|
||||
}
|
||||
|
||||
this.prevCostLineNumbers = nums
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user