Skip to content

Commit 6a69930

Browse files
Rule 7: Replace arbitrary spill write-count threshold with time-based analysis
Spill severity is now based on what percentage of statement elapsed time the spilling operator accounts for (Warning >= 10%, Critical >= 50%). Uses QueryTimeStats.ElapsedTimeMs for statement time and calculates operator-own time by subtracting child elapsed (row mode) or using elapsed directly (batch mode). Skips Parallelism exchange operators when subtracting child time due to known timing bugs. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 85f320f commit 6a69930

2 files changed

Lines changed: 92 additions & 6 deletions

File tree

Dashboard/Services/PlanAnalyzer.cs

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -272,11 +272,26 @@ private static void AnalyzeNode(PlanNode node, PlanStatement stmt)
272272
});
273273
}
274274

275-
// Rule 7: Spill detection — promote severity for large spills
275+
// Rule 7: Spill detection — calculate operator time and set severity
276+
// based on what percentage of statement elapsed time the spill accounts for
276277
foreach (var w in node.Warnings.ToList())
277278
{
278-
if (w.SpillDetails != null && w.SpillDetails.WritesToTempDb > 1000)
279-
w.Severity = PlanWarningSeverity.Critical;
279+
if (w.SpillDetails != null && node.ActualElapsedMs > 0)
280+
{
281+
var operatorMs = GetOperatorOwnElapsedMs(node);
282+
var stmtMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0;
283+
284+
if (stmtMs > 0)
285+
{
286+
var pct = (double)operatorMs / stmtMs;
287+
w.Message += $" Operator time: {operatorMs:N0}ms ({pct:P0} of statement).";
288+
289+
if (pct >= 0.5)
290+
w.Severity = PlanWarningSeverity.Critical;
291+
else if (pct >= 0.1)
292+
w.Severity = PlanWarningSeverity.Warning;
293+
}
294+
}
280295
}
281296

282297
// Rule 8: Parallel thread skew (actual plans with per-thread stats)
@@ -626,6 +641,34 @@ private static void FindMemoryConsumers(PlanNode node, List<string> consumers)
626641
FindMemoryConsumers(child, consumers);
627642
}
628643

644+
/// <summary>
645+
/// Calculates an operator's own elapsed time by subtracting child time.
646+
/// In batch mode, operator times are self-contained. In row mode, times are
647+
/// cumulative (include children), so we subtract the dominant child's time.
648+
/// Parallelism (exchange) operators are skipped because they have timing bugs.
649+
/// </summary>
650+
private static long GetOperatorOwnElapsedMs(PlanNode node)
651+
{
652+
if (node.ActualExecutionMode == "Batch")
653+
return node.ActualElapsedMs;
654+
655+
// Row mode: subtract the dominant child's elapsed time
656+
var maxChildElapsed = 0L;
657+
foreach (var child in node.Children)
658+
{
659+
var childElapsed = child.ActualElapsedMs;
660+
661+
// Exchange operators have timing bugs — skip to their child
662+
if (child.PhysicalOp == "Parallelism" && child.Children.Count > 0)
663+
childElapsed = child.Children.Max(c => c.ActualElapsedMs);
664+
665+
if (childElapsed > maxChildElapsed)
666+
maxChildElapsed = childElapsed;
667+
}
668+
669+
return Math.Max(0, node.ActualElapsedMs - maxChildElapsed);
670+
}
671+
629672
private static string Truncate(string value, int maxLength)
630673
{
631674
return value.Length <= maxLength ? value : value[..maxLength] + "...";

Lite/Services/PlanAnalyzer.cs

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -272,11 +272,26 @@ private static void AnalyzeNode(PlanNode node, PlanStatement stmt)
272272
});
273273
}
274274

275-
// Rule 7: Spill detection — promote severity for large spills
275+
// Rule 7: Spill detection — calculate operator time and set severity
276+
// based on what percentage of statement elapsed time the spill accounts for
276277
foreach (var w in node.Warnings.ToList())
277278
{
278-
if (w.SpillDetails != null && w.SpillDetails.WritesToTempDb > 1000)
279-
w.Severity = PlanWarningSeverity.Critical;
279+
if (w.SpillDetails != null && node.ActualElapsedMs > 0)
280+
{
281+
var operatorMs = GetOperatorOwnElapsedMs(node);
282+
var stmtMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0;
283+
284+
if (stmtMs > 0)
285+
{
286+
var pct = (double)operatorMs / stmtMs;
287+
w.Message += $" Operator time: {operatorMs:N0}ms ({pct:P0} of statement).";
288+
289+
if (pct >= 0.5)
290+
w.Severity = PlanWarningSeverity.Critical;
291+
else if (pct >= 0.1)
292+
w.Severity = PlanWarningSeverity.Warning;
293+
}
294+
}
280295
}
281296

282297
// Rule 8: Parallel thread skew (actual plans with per-thread stats)
@@ -626,6 +641,34 @@ private static void FindMemoryConsumers(PlanNode node, List<string> consumers)
626641
FindMemoryConsumers(child, consumers);
627642
}
628643

644+
/// <summary>
645+
/// Calculates an operator's own elapsed time by subtracting child time.
646+
/// In batch mode, operator times are self-contained. In row mode, times are
647+
/// cumulative (include children), so we subtract the dominant child's time.
648+
/// Parallelism (exchange) operators are skipped because they have timing bugs.
649+
/// </summary>
650+
private static long GetOperatorOwnElapsedMs(PlanNode node)
651+
{
652+
if (node.ActualExecutionMode == "Batch")
653+
return node.ActualElapsedMs;
654+
655+
// Row mode: subtract the dominant child's elapsed time
656+
var maxChildElapsed = 0L;
657+
foreach (var child in node.Children)
658+
{
659+
var childElapsed = child.ActualElapsedMs;
660+
661+
// Exchange operators have timing bugs — skip to their child
662+
if (child.PhysicalOp == "Parallelism" && child.Children.Count > 0)
663+
childElapsed = child.Children.Max(c => c.ActualElapsedMs);
664+
665+
if (childElapsed > maxChildElapsed)
666+
maxChildElapsed = childElapsed;
667+
}
668+
669+
return Math.Max(0, node.ActualElapsedMs - maxChildElapsed);
670+
}
671+
629672
private static string Truncate(string value, int maxLength)
630673
{
631674
return value.Length <= maxLength ? value : value[..maxLength] + "...";

0 commit comments

Comments
 (0)