fix: show per-dimension alert descriptions when multiple dimensions fail

joostboon · claude · joostboon · commit 57ccbb04ab51 · 2026-03-13T09:07:59.000+02:00
When a dimension_anomalies (or volume_anomalies with dimensions) test
detects failures across multiple dimension values, the alert description
previously only showed the last row's description. Now it shows each
failing dimension's details individually (up to 5), or a count summary
with a sample of dimension values when more than 5 dimensions fail.

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/integration_tests/tests/test_dimension_anomalies.py b/integration_tests/tests/test_dimension_anomalies.py
@@ -315,3 +315,76 @@ def test_anomaly_in_detection_period(
     )
 
     assert test_result["status"] == expected_status
+
+
+def test_dimension_anomalies_alert_description_few_failures(
+    test_id: str, dbt_project: DbtProject
+):
+    """When ≤5 dimension values fail, description shows each one's anomaly details."""
+    utc_today = datetime.utcnow().date()
+    test_date, *training_dates = generate_dates(base_date=utc_today - timedelta(1))
+
+    # 3 dimension values all spike on test_date (training: 1/day, test: 10/day)
+    anomalous_dimensions = ["Batman", "Superman", "Spiderman"]
+
+    data: List[Dict[str, Any]] = [
+        {TIMESTAMP_COLUMN: test_date.strftime(DATE_FORMAT), "superhero": hero}
+        for hero in anomalous_dimensions
+        for _ in range(10)
+    ]
+    data += [
+        {TIMESTAMP_COLUMN: cur_date.strftime(DATE_FORMAT), "superhero": hero}
+        for cur_date in training_dates
+        for hero in anomalous_dimensions
+    ]
+
+    test_args = {
+        "timestamp_column": TIMESTAMP_COLUMN,
+        "dimensions": ["superhero"],
+        "sensitivity": 2,
+    }
+    test_result = dbt_project.test(test_id, DBT_TEST_NAME, test_args, data=data)
+    assert test_result["status"] == "fail"
+
+    description = test_result["test_results_description"]
+    # Each failing dimension value should appear in the description
+    for hero in anomalous_dimensions:
+        assert hero in description, f"Expected '{hero}' in description: {description}"
+    # Should NOT show the high-volume summary message
+    assert "dimension values are anomalous" not in description
+
+
+def test_dimension_anomalies_alert_description_many_failures(
+    test_id: str, dbt_project: DbtProject
+):
+    """When >5 dimension values fail, description shows a count summary."""
+    utc_today = datetime.utcnow().date()
+    test_date, *training_dates = generate_dates(base_date=utc_today - timedelta(1))
+
+    # 6 dimension values all spike on test_date (>5 threshold)
+    anomalous_dimensions = ["Batman", "Superman", "Spiderman", "IronMan", "Thor", "Hulk"]
+
+    data: List[Dict[str, Any]] = [
+        {TIMESTAMP_COLUMN: test_date.strftime(DATE_FORMAT), "superhero": hero}
+        for hero in anomalous_dimensions
+        for _ in range(10)
+    ]
+    data += [
+        {TIMESTAMP_COLUMN: cur_date.strftime(DATE_FORMAT), "superhero": hero}
+        for cur_date in training_dates
+        for hero in anomalous_dimensions
+    ]
+
+    test_args = {
+        "timestamp_column": TIMESTAMP_COLUMN,
+        "dimensions": ["superhero"],
+        "sensitivity": 2,
+    }
+    test_result = dbt_project.test(test_id, DBT_TEST_NAME, test_args, data=data)
+    assert test_result["status"] == "fail"
+
+    description = test_result["test_results_description"]
+    # Should show the count summary for many failures
+    assert "dimension values are anomalous" in description, (
+        f"Expected summary message in description: {description}"
+    )
diff --git a/macros/edr/data_monitoring/anomaly_detection/store_anomaly_test_results.sql b/macros/edr/data_monitoring/anomaly_detection/store_anomaly_test_results.sql
@@ -74,23 +74,37 @@
           and upper(column_name) = upper({{ elementary.const_as_string(column_name) }})
         {%- endif %}
     {%- endset -%}
-    {% set test_results_description %}
-      {% if rows_with_score %}
-          {{ elementary.insensitive_get_dict_value(rows_with_score[-1], 'anomaly_description') }}
-      {% else %}
-          Not enough data to calculate anomaly score.
-      {% endif %}
-    {% endset %}
     {% set failures = namespace(data=0) %}
     {% set filtered_anomaly_scores_rows = [] %}
+    {% set anomalous_rows = [] %}
     {% for row in anomaly_scores_rows %}
         {% if row.anomaly_score is not none %}
             {% do filtered_anomaly_scores_rows.append(row) %}
             {% if row.is_anomalous %}
                 {% set failures.data = failures.data + 1 %}
+                {% do anomalous_rows.append(row) %}
             {% endif %}
         {% endif %}
     {% endfor %}
+    {%- set max_dimension_alerts = 5 -%}
+    {% set test_results_description %}
+      {%- if rows_with_score -%}
+        {%- set sample_row = rows_with_score[-1] -%}
+        {%- set row_dimension = elementary.insensitive_get_dict_value(sample_row, "dimension") -%}
+        {%- if row_dimension is not none and anomalous_rows | length > 0 -%}
+          {%- if anomalous_rows | length > max_dimension_alerts -%}
+            {%- set remaining = (anomalous_rows | length) - max_dimension_alerts -%}
+            {{ anomalous_rows | length }} dimension values are anomalous. Showing first {{ max_dimension_alerts }}: {% for row in anomalous_rows[:max_dimension_alerts] %}{{ elementary.insensitive_get_dict_value(row, "dimension_value") }}{% if not loop.last %}, {% endif %}{% endfor %}, and {{ remaining }} more.
+          {%- else -%}
+            {% for row in anomalous_rows %}{{ elementary.insensitive_get_dict_value(row, "anomaly_description") }}{% if not loop.last %} | {% endif %}{% endfor %}
+          {%- endif -%}
+        {%- else -%}
+          {{ elementary.insensitive_get_dict_value(rows_with_score[-1], "anomaly_description") }}
+        {%- endif -%}
+      {%- else -%}
+        Not enough data to calculate anomaly score.
+      {%- endif -%}
+    {% endset %}
     {% set test_result_dict = {
         "id": elementary.insensitive_get_dict_value(latest_row, "id"),
         "data_issue_id": elementary.insensitive_get_dict_value(