Skip to content

Commit 03033ae

Browse files
Fix DuckDB IO errors from concurrent file access (#164)
Three fixes for "DuckDBOpen failed: Cannot open file" errors introduced by PR #159 (checkpoint) and PR #162 (compaction): 1. Timer initialization: DateTime.MinValue → DateTime.UtcNow prevents compaction/archival from firing on the very first collection cycle 2. Inline checkpoint: moved CHECKPOINT to end of RunDueCollectorsAsync using the existing connection pool instead of opening a separate DuckDB instance that conflicts via OS file locks 3. Atomic file swap: replaced two-step File.Move in CompactAsync with File.Replace (single OS operation, no window where the database file is missing) plus retry logic for locked files and WAL cleanup Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent e567b76 commit 03033ae

3 files changed

Lines changed: 47 additions & 12 deletions

File tree

Lite/Database/DuckDbInitializer.cs

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -613,14 +613,35 @@ public async Task<bool> CompactAsync()
613613
}
614614
}
615615

616-
/* Swap files: old → backup, compact → primary */
617-
if (File.Exists(backupPath)) File.Delete(backupPath);
618-
File.Move(_databasePath, backupPath);
619-
616+
/* Delete WAL files before swap — the old WAL belongs to the pre-compaction
617+
database and would confuse the fresh compacted file on next open */
620618
var walPath = _databasePath + ".wal";
621619
if (File.Exists(walPath)) File.Delete(walPath);
622620

623-
File.Move(tempPath, _databasePath);
621+
var tempWalPath = tempPath + ".wal";
622+
if (File.Exists(tempWalPath)) File.Delete(tempWalPath);
623+
624+
/* Atomically replace the database file with the compacted version.
625+
File.Replace swaps in a single OS operation, eliminating any window
626+
where _databasePath doesn't exist (unlike two separate File.Move calls).
627+
Retry briefly if a UI connection still has the file open. */
628+
if (File.Exists(backupPath)) File.Delete(backupPath);
629+
630+
const int maxSwapAttempts = 3;
631+
for (int attempt = 1; attempt <= maxSwapAttempts; attempt++)
632+
{
633+
try
634+
{
635+
File.Replace(tempPath, _databasePath, backupPath);
636+
break;
637+
}
638+
catch (IOException) when (attempt < maxSwapAttempts)
639+
{
640+
_logger?.LogDebug("Compaction file swap attempt {Attempt}/{Max} failed (file in use), retrying in 500ms",
641+
attempt, maxSwapAttempts);
642+
await Task.Delay(500);
643+
}
644+
}
624645

625646
/* Recreate indexes and views on the fresh database */
626647
using (var connection = CreateConnection())

Lite/Services/CollectionBackgroundService.cs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,11 @@ public class CollectionBackgroundService : BackgroundService
2929
private readonly ILogger<CollectionBackgroundService>? _logger;
3030

3131
private static readonly TimeSpan CollectionInterval = TimeSpan.FromMinutes(1);
32-
private DateTime _lastArchiveTime = DateTime.MinValue;
33-
private DateTime _lastRetentionTime = DateTime.MinValue;
34-
private DateTime _lastCompactionTime = DateTime.MinValue;
32+
/* Start at UtcNow so maintenance tasks don't all fire on the very first cycle.
33+
Archival runs after 1 hour, retention + compaction after 24 hours of uptime. */
34+
private DateTime _lastArchiveTime = DateTime.UtcNow;
35+
private DateTime _lastRetentionTime = DateTime.UtcNow;
36+
private DateTime _lastCompactionTime = DateTime.UtcNow;
3537

3638
/* Archive every hour, retention + compaction once per day */
3739
private static readonly TimeSpan ArchiveInterval = TimeSpan.FromHours(1);
@@ -93,10 +95,6 @@ protected override async Task ExecuteAsync(CancellationToken stoppingToken)
9395
IsCollecting = true;
9496
await _collectorService.RunDueCollectorsAsync(stoppingToken);
9597
LastCollectionTime = DateTime.UtcNow;
96-
97-
/* Flush WAL during idle time instead of letting auto-checkpoint
98-
stall collectors mid-write with 2-3s stop-the-world pauses */
99-
await _collectorService.CheckpointAsync();
10098
}
10199
catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested)
102100
{

Lite/Services/RemoteCollectorService.cs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,22 @@ public async Task RunDueCollectorsAsync(CancellationToken cancellationToken = de
237237
}, cancellationToken));
238238

239239
await Task.WhenAll(serverTasks);
240+
241+
/* Run CHECKPOINT here after all collector connections are closed.
242+
This avoids opening a separate DuckDB instance that could conflict
243+
with concurrent UI connections via OS file locks. */
244+
try
245+
{
246+
using var conn = _duckDb.CreateConnection();
247+
await conn.OpenAsync(cancellationToken);
248+
using var cmd = conn.CreateCommand();
249+
cmd.CommandText = "CHECKPOINT";
250+
await cmd.ExecuteNonQueryAsync(cancellationToken);
251+
}
252+
catch (Exception ex)
253+
{
254+
_logger?.LogDebug(ex, "Post-collection checkpoint failed (non-critical)");
255+
}
240256
}
241257

242258
/// <summary>

0 commit comments

Comments
 (0)