Skip to content

Commit 281452f

Browse files
CSCSoftwareclaude
andcommitted
fix: Deduplicate parent projects in global index
Parent projects that contain sub-projects are now automatically removed from the global registry. Reduces noise in cross-project queries by eliminating duplicate matches. Existing duplicates are cleaned up on next global_init run (215 → 167 projects in test index). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent b749290 commit 281452f

4 files changed

Lines changed: 70 additions & 6 deletions

File tree

.claude/CLAUDE.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ aidex_global_refresh() # Stats u
222222
- Session-Cache (5-Min TTL) für schnelle wiederholte Queries
223223
- Bulk-Index: ≤500 Code-Dateien automatisch, >500 werden dem User gezeigt
224224
- Progress-UI: SSE-basiert auf Port 3334 mit Browser-Auto-Open
225+
- Auto-Deduplizierung: Parent-Projekte mit Sub-Projekten werden übersprungen
225226

226227
### Auto-Cleanup (v1.3.1)
227228
`aidex_init` entfernt automatisch Dateien die jetzt excluded sind (z.B. build/).

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ All notable changes to AiDex will be documented in this file.
2323
- Server-Sent Events (SSE) for real-time updates
2424
- Shows per-project status (indexing/done/error), progress bar, scrolling log
2525
- Dark theme, auto-closes after completion
26+
- **Project deduplication**: Parent projects that contain sub-projects are automatically removed
27+
- e.g., `AudioGrabber/` is skipped when `AudioGrabber/AudioGrabber/` and `AudioGrabber/AudioGrabber2/` exist
28+
- Existing duplicates in global DB are cleaned up on next `global_init` run
29+
- Reduced test index from 215 to 167 projects (48 parent-duplicates removed)
2630
- **Extended excludes**: Better handling of embedded runtimes and external code
2731
- `init.ts`: Added `**/site-packages/**`, `**/Lib/**`, `**/fdk-aac/**` to DEFAULT_EXCLUDE
2832
- `global-init.ts`: Added Python venvs, embedded Python runtimes (Python310-313), `.cargo`, `packages`, `fdk-aac` to DEFAULT_EXCLUDED_DIRS

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,7 @@ aidex_global_signatures({ name: "Player", kind: "class" }) # Find cl
350350
- Results are cached in memory (5-minute TTL) for fast repeated queries
351351
- Projects are batched (8 at a time) to respect SQLite's attachment limit
352352
- Each project keeps its own `.aidex/index.db` as the single source of truth
353+
- **Auto-deduplication**: Parent projects that contain sub-projects are automatically skipped (e.g., `MyApp/` is removed when `MyApp/Frontend/` and `MyApp/Backend/` exist as separate indexed projects)
353354

354355
### Management
355356

src/commands/global/global-init.ts

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,13 @@ export async function globalInit(params: GlobalInitParams): Promise<GlobalInitRe
139139
const globalDb = openGlobalDatabase();
140140

141141
try {
142+
// --------------------------------------------------------
143+
// Deduplicate: Remove parent projects that contain sub-projects
144+
// e.g., AudioGrabber/ contains AudioGrabber/AudioGrabber/ and AudioGrabber/AudioGrabber2/
145+
// → keep only the sub-projects, skip the parent
146+
// --------------------------------------------------------
147+
const deduplicatedProjects = deduplicateProjects(scanResult.projects);
148+
142149
// Get existing projects for comparison
143150
const existingProjects = new Map(
144151
globalDb.getProjects().map(p => [p.path.replace(/\\/g, '/'), p])
@@ -147,8 +154,8 @@ export async function globalInit(params: GlobalInitParams): Promise<GlobalInitRe
147154
let newCount = 0;
148155
let updatedCount = 0;
149156

150-
// Register each found project
151-
for (const project of scanResult.projects) {
157+
// Register each found project (deduplicated)
158+
for (const project of deduplicatedProjects) {
152159
const normalizedPath = project.path.replace(/\\/g, '/');
153160
const stats = readProjectStats(project.path);
154161
if (!stats) continue;
@@ -185,9 +192,27 @@ export async function globalInit(params: GlobalInitParams): Promise<GlobalInitRe
185192
}
186193
}
187194

188-
// Collect paths of indexed projects for exclusion
195+
// Remove parent-duplicate projects already in the global DB
196+
// (from previous runs before deduplication was added)
197+
const allRegistered = globalDb.getProjects();
198+
const allPaths = allRegistered.map(p => ({
199+
id: p.id,
200+
path: p.path.replace(/\\/g, '/').replace(/\/+$/, '') + '/',
201+
}));
202+
for (const project of allPaths) {
203+
const isParent = allPaths.some(other =>
204+
other.id !== project.id &&
205+
other.path.startsWith(project.path)
206+
);
207+
if (isParent) {
208+
globalDb.unregisterProject(project.path.replace(/\/+$/, ''));
209+
removedCount++;
210+
}
211+
}
212+
213+
// Collect paths of indexed projects for exclusion (use deduplicated list)
189214
const indexedPaths = new Set(
190-
scanResult.projects.map(p => p.path.replace(/\\/g, '/'))
215+
deduplicatedProjects.map(p => p.path.replace(/\\/g, '/'))
191216
);
192217

193218
// Find projects without .aidex/ index
@@ -290,8 +315,8 @@ export async function globalInit(params: GlobalInitParams): Promise<GlobalInitRe
290315
success: true,
291316
searchPath,
292317
registered: params.indexUnindexed
293-
? scanResult.projects.length + (indexedResults?.filter(r => r.success).length ?? 0)
294-
: scanResult.projects.length,
318+
? deduplicatedProjects.length + (indexedResults?.filter(r => r.success).length ?? 0)
319+
: deduplicatedProjects.length,
295320
newProjects: newCount,
296321
updatedProjects: updatedCount,
297322
removedProjects: removedCount,
@@ -477,3 +502,36 @@ function findUnindexedProjects(searchPath: string, maxDepth: number, indexedPath
477502
walk(searchPath, 0);
478503
return projects;
479504
}
505+
506+
/**
507+
* Deduplicate projects: If project A is a parent directory of project B,
508+
* keep only B (the more specific sub-project).
509+
*
510+
* Example: Given paths [AudioGrabber/, AudioGrabber/AudioGrabber/, AudioGrabber/AudioGrabber2/]
511+
* → Remove AudioGrabber/ because it contains sub-projects.
512+
* → Keep AudioGrabber/AudioGrabber/ and AudioGrabber/AudioGrabber2/.
513+
*/
514+
function deduplicateProjects<T extends { path: string; name: string }>(projects: T[]): T[] {
515+
// Normalize all paths
516+
const normalized = projects.map(p => ({
517+
...p,
518+
_normPath: p.path.replace(/\\/g, '/').replace(/\/+$/, '') + '/',
519+
}));
520+
521+
// Sort by path length descending (deepest first)
522+
normalized.sort((a, b) => b._normPath.length - a._normPath.length);
523+
524+
// A project is a "parent" if any other project's path starts with it
525+
const result: typeof projects = [];
526+
for (const project of normalized) {
527+
const isParent = normalized.some(other =>
528+
other !== project &&
529+
other._normPath.startsWith(project._normPath)
530+
);
531+
if (!isParent) {
532+
result.push(project);
533+
}
534+
}
535+
536+
return result;
537+
}

0 commit comments

Comments
 (0)