Skip to content

Commit 9f13d9e

Browse files
Merge pull request #154 from erikdarlingdata/feature/fix-permission-denied
Handle permission denied errors gracefully in collector health (fixes #150)
2 parents 9c079d7 + aa93801 commit 9f13d9e

2 files changed

Lines changed: 19 additions & 6 deletions

File tree

Lite/Services/LocalDataService.CollectionHealth.cs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,9 @@ public async Task<List<CollectorHealthRow>> GetCollectionHealthAsync(int serverI
3131
AVG(duration_ms) AS avg_duration_ms,
3232
MAX(CASE WHEN status = 'SUCCESS' THEN collection_time END) AS last_success_time,
3333
MAX(collection_time) AS last_run_time,
34-
MAX(CASE WHEN status = 'ERROR' THEN error_message END) AS last_error,
35-
MAX(CASE WHEN status = 'ERROR' THEN collection_time END) AS last_error_time
34+
MAX(CASE WHEN status IN ('ERROR', 'PERMISSIONS') THEN error_message END) AS last_error,
35+
MAX(CASE WHEN status IN ('ERROR', 'PERMISSIONS') THEN collection_time END) AS last_error_time,
36+
SUM(CASE WHEN status = 'PERMISSIONS' THEN 1 ELSE 0 END) AS permission_denied_count
3637
FROM collection_log
3738
WHERE server_id = $1
3839
AND collection_time >= $2
@@ -56,7 +57,8 @@ GROUP BY collector_name
5657
LastSuccessTime = reader.IsDBNull(5) ? null : reader.GetDateTime(5),
5758
LastRunTime = reader.IsDBNull(6) ? null : reader.GetDateTime(6),
5859
LastError = reader.IsDBNull(7) ? null : reader.GetString(7),
59-
LastErrorTime = reader.IsDBNull(8) ? null : reader.GetDateTime(8)
60+
LastErrorTime = reader.IsDBNull(8) ? null : reader.GetDateTime(8),
61+
PermissionDeniedCount = reader.IsDBNull(9) ? 0 : ToInt64(reader.GetValue(9))
6062
});
6163
}
6264

@@ -147,6 +149,7 @@ public class CollectorHealthRow
147149
public DateTime? LastRunTime { get; set; }
148150
public string? LastError { get; set; }
149151
public DateTime? LastErrorTime { get; set; }
152+
public long PermissionDeniedCount { get; set; }
150153

151154
public double FailureRatePercent => TotalRuns > 0 ? (double)ErrorCount / TotalRuns * 100 : 0;
152155
public double HoursSinceLastSuccess => LastSuccessTime.HasValue
@@ -158,6 +161,7 @@ public string HealthStatus
158161
get
159162
{
160163
if (TotalRuns == 0) return "NEVER_RUN";
164+
if (PermissionDeniedCount > 0 && ErrorCount == 0 && SuccessCount == 0) return "NO_PERMISSIONS";
161165
if (HoursSinceLastSuccess > 24) return "FAILING";
162166
if (HoursSinceLastSuccess > 4) return "STALE";
163167
if (FailureRatePercent > 20) return "WARNING";

Lite/Services/RemoteCollectorService.cs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ public CollectorHealthSummary GetHealthSummary(int? serverId = null)
153153
/// <summary>
154154
/// Records a collector execution result for health tracking.
155155
/// </summary>
156-
private void RecordCollectorResult(int serverId, string collectorName, bool success, string? errorMessage = null)
156+
private void RecordCollectorResult(int serverId, string collectorName, string status, string? errorMessage = null)
157157
{
158158
lock (_healthLock)
159159
{
@@ -164,12 +164,20 @@ private void RecordCollectorResult(int serverId, string collectorName, bool succ
164164
_collectorHealth[key] = entry;
165165
}
166166

167-
if (success)
167+
if (status == "SUCCESS")
168168
{
169169
entry.LastSuccessTime = DateTime.UtcNow;
170170
entry.ConsecutiveErrors = 0;
171171
entry.TotalSuccesses++;
172172
}
173+
else if (status == "PERMISSIONS")
174+
{
175+
/* Permission errors are not transient — don't count as failures
176+
(which would show FAILING) but don't count as success either.
177+
Record the error message so the user can see what's wrong. */
178+
entry.LastErrorTime = DateTime.UtcNow;
179+
entry.LastErrorMessage = errorMessage;
180+
}
173181
else
174182
{
175183
entry.LastErrorTime = DateTime.UtcNow;
@@ -337,6 +345,7 @@ public async Task RunCollectorAsync(ServerConnection server, string collectorNam
337345
}
338346
else if (ex.Number == 229 || ex.Number == 297 || ex.Number == 300)
339347
{
348+
status = "PERMISSIONS";
340349
_logger?.LogWarning("Collector '{Collector}' permission denied for server '{Server}': {Message}",
341350
collectorName, server.DisplayName, ex.Message);
342351
}
@@ -369,7 +378,7 @@ public async Task RunCollectorAsync(ServerConnection server, string collectorNam
369378
}
370379

371380
// Track collector health
372-
RecordCollectorResult(GetServerId(server), collectorName, status == "SUCCESS", errorMessage);
381+
RecordCollectorResult(GetServerId(server), collectorName, status, errorMessage);
373382

374383
// Log the collection attempt
375384
await LogCollectionAsync(GetServerId(server), server.DisplayName, collectorName, startTime, status, errorMessage, rowsCollected, _lastSqlMs, _lastDuckDbMs);

0 commit comments

Comments
 (0)