Skip to content

Commit 57ccbb0

Browse files
joostboonclaude
andcommitted
fix: show per-dimension alert descriptions when multiple dimensions fail
When a dimension_anomalies (or volume_anomalies with dimensions) test detects failures across multiple dimension values, the alert description previously only showed the last row's description. Now it shows each failing dimension's details individually (up to 5), or a count summary with a sample of dimension values when more than 5 dimensions fail. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent f820e11 commit 57ccbb0

2 files changed

Lines changed: 94 additions & 7 deletions

File tree

integration_tests/tests/test_dimension_anomalies.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,3 +315,76 @@ def test_anomaly_in_detection_period(
315315
)
316316

317317
assert test_result["status"] == expected_status
318+
319+
320+
def test_dimension_anomalies_alert_description_few_failures(
321+
test_id: str, dbt_project: DbtProject
322+
):
323+
"""When ≤5 dimension values fail, description shows each one's anomaly details."""
324+
utc_today = datetime.utcnow().date()
325+
test_date, *training_dates = generate_dates(base_date=utc_today - timedelta(1))
326+
327+
# 3 dimension values all spike on test_date (training: 1/day, test: 10/day)
328+
anomalous_dimensions = ["Batman", "Superman", "Spiderman"]
329+
330+
data: List[Dict[str, Any]] = [
331+
{TIMESTAMP_COLUMN: test_date.strftime(DATE_FORMAT), "superhero": hero}
332+
for hero in anomalous_dimensions
333+
for _ in range(10)
334+
]
335+
data += [
336+
{TIMESTAMP_COLUMN: cur_date.strftime(DATE_FORMAT), "superhero": hero}
337+
for cur_date in training_dates
338+
for hero in anomalous_dimensions
339+
]
340+
341+
test_args = {
342+
"timestamp_column": TIMESTAMP_COLUMN,
343+
"dimensions": ["superhero"],
344+
"sensitivity": 2,
345+
}
346+
test_result = dbt_project.test(test_id, DBT_TEST_NAME, test_args, data=data)
347+
assert test_result["status"] == "fail"
348+
349+
description = test_result["test_results_description"]
350+
# Each failing dimension value should appear in the description
351+
for hero in anomalous_dimensions:
352+
assert hero in description, f"Expected '{hero}' in description: {description}"
353+
# Should NOT show the high-volume summary message
354+
assert "dimension values are anomalous" not in description
355+
356+
357+
def test_dimension_anomalies_alert_description_many_failures(
358+
test_id: str, dbt_project: DbtProject
359+
):
360+
"""When >5 dimension values fail, description shows a count summary."""
361+
utc_today = datetime.utcnow().date()
362+
test_date, *training_dates = generate_dates(base_date=utc_today - timedelta(1))
363+
364+
# 6 dimension values all spike on test_date (>5 threshold)
365+
anomalous_dimensions = ["Batman", "Superman", "Spiderman", "IronMan", "Thor", "Hulk"]
366+
367+
data: List[Dict[str, Any]] = [
368+
{TIMESTAMP_COLUMN: test_date.strftime(DATE_FORMAT), "superhero": hero}
369+
for hero in anomalous_dimensions
370+
for _ in range(10)
371+
]
372+
data += [
373+
{TIMESTAMP_COLUMN: cur_date.strftime(DATE_FORMAT), "superhero": hero}
374+
for cur_date in training_dates
375+
for hero in anomalous_dimensions
376+
]
377+
378+
test_args = {
379+
"timestamp_column": TIMESTAMP_COLUMN,
380+
"dimensions": ["superhero"],
381+
"sensitivity": 2,
382+
}
383+
test_result = dbt_project.test(test_id, DBT_TEST_NAME, test_args, data=data)
384+
assert test_result["status"] == "fail"
385+
386+
description = test_result["test_results_description"]
387+
# Should show the count summary for many failures
388+
assert "dimension values are anomalous" in description, (
389+
f"Expected summary message in description: {description}"
390+
)

macros/edr/data_monitoring/anomaly_detection/store_anomaly_test_results.sql

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -74,23 +74,37 @@
7474
and upper(column_name) = upper({{ elementary.const_as_string(column_name) }})
7575
{%- endif %}
7676
{%- endset -%}
77-
{% set test_results_description %}
78-
{% if rows_with_score %}
79-
{{ elementary.insensitive_get_dict_value(rows_with_score[-1], 'anomaly_description') }}
80-
{% else %}
81-
Not enough data to calculate anomaly score.
82-
{% endif %}
83-
{% endset %}
8477
{% set failures = namespace(data=0) %}
8578
{% set filtered_anomaly_scores_rows = [] %}
79+
{% set anomalous_rows = [] %}
8680
{% for row in anomaly_scores_rows %}
8781
{% if row.anomaly_score is not none %}
8882
{% do filtered_anomaly_scores_rows.append(row) %}
8983
{% if row.is_anomalous %}
9084
{% set failures.data = failures.data + 1 %}
85+
{% do anomalous_rows.append(row) %}
9186
{% endif %}
9287
{% endif %}
9388
{% endfor %}
89+
{%- set max_dimension_alerts = 5 -%}
90+
{% set test_results_description %}
91+
{%- if rows_with_score -%}
92+
{%- set sample_row = rows_with_score[-1] -%}
93+
{%- set row_dimension = elementary.insensitive_get_dict_value(sample_row, "dimension") -%}
94+
{%- if row_dimension is not none and anomalous_rows | length > 0 -%}
95+
{%- if anomalous_rows | length > max_dimension_alerts -%}
96+
{%- set remaining = (anomalous_rows | length) - max_dimension_alerts -%}
97+
{{ anomalous_rows | length }} dimension values are anomalous. Showing first {{ max_dimension_alerts }}: {% for row in anomalous_rows[:max_dimension_alerts] %}{{ elementary.insensitive_get_dict_value(row, "dimension_value") }}{% if not loop.last %}, {% endif %}{% endfor %}, and {{ remaining }} more.
98+
{%- else -%}
99+
{% for row in anomalous_rows %}{{ elementary.insensitive_get_dict_value(row, "anomaly_description") }}{% if not loop.last %} | {% endif %}{% endfor %}
100+
{%- endif -%}
101+
{%- else -%}
102+
{{ elementary.insensitive_get_dict_value(rows_with_score[-1], "anomaly_description") }}
103+
{%- endif -%}
104+
{%- else -%}
105+
Not enough data to calculate anomaly score.
106+
{%- endif -%}
107+
{% endset %}
94108
{% set test_result_dict = {
95109
"id": elementary.insensitive_get_dict_value(latest_row, "id"),
96110
"data_issue_id": elementary.insensitive_get_dict_value(

0 commit comments

Comments
 (0)