Skip to content

Commit 61218c7

Browse files
Fix: [AEA-0000] - kb data source naming (#377)
## Summary - Routine Change --------- Co-authored-by: Beenyaa <bencegadanyi1@hotmail.com>
1 parent c719cf2 commit 61218c7

1 file changed

Lines changed: 22 additions & 11 deletions

File tree

packages/cdk/resources/VectorKnowledgeBaseResources.ts

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import {Construct} from "constructs"
2+
import * as crypto from "crypto"
23
import {Role} from "aws-cdk-lib/aws-iam"
34
import {Bucket} from "aws-cdk-lib/aws-s3"
45
import {CfnKnowledgeBase, CfnDataSource} from "aws-cdk-lib/aws-bedrock"
@@ -154,9 +155,28 @@ export class VectorKnowledgeBaseResources extends Construct {
154155

155156
// Create S3 data source for knowledge base documents
156157
// prefix pointed to processed/ to only ingest converted markdown documents
158+
159+
const chunkingConfiguration = {
160+
...ChunkingStrategy.HIERARCHICAL_TITAN.configuration,
161+
hierarchicalChunkingConfiguration: {
162+
overlapTokens: 60,
163+
levelConfigurations: [
164+
{maxTokens: 1000}, // Parent chunk configuration,
165+
{maxTokens: 300} // Child chunk configuration
166+
]
167+
}
168+
}
169+
170+
const hash = crypto.createHash("md5")
171+
.update(JSON.stringify(chunkingConfiguration))
172+
.digest("hex")
173+
.substring(0, 6)
174+
175+
// TODO: migrate to L2 constructs to avoid duplicating code thats already available in the
176+
// @cdklabs/generative-ai-cdk-constructs library
157177
const dataSource = new CfnDataSource(this, "S3DataSource", {
158178
knowledgeBaseId: knowledgeBase.attrKnowledgeBaseId,
159-
name: `${props.stackName}-s3-datasource`,
179+
name: `${props.stackName}-s3-datasource-${hash}`,
160180
dataSourceConfiguration: {
161181
type: "S3",
162182
s3Configuration: {
@@ -165,16 +185,7 @@ export class VectorKnowledgeBaseResources extends Construct {
165185
}
166186
},
167187
vectorIngestionConfiguration: {
168-
chunkingConfiguration: {
169-
...ChunkingStrategy.HIERARCHICAL_TITAN.configuration,
170-
hierarchicalChunkingConfiguration: {
171-
overlapTokens: 60,
172-
levelConfigurations: [
173-
{maxTokens: 1000}, // Parent chunk configuration,
174-
{maxTokens: 300} // Child chunk configuration
175-
]
176-
}
177-
}
188+
chunkingConfiguration: chunkingConfiguration
178189
}
179190
})
180191

0 commit comments

Comments
 (0)