11import { Construct } from "constructs"
2+ import * as crypto from "crypto"
23import { Role } from "aws-cdk-lib/aws-iam"
34import { Bucket } from "aws-cdk-lib/aws-s3"
45import { CfnKnowledgeBase , CfnDataSource } from "aws-cdk-lib/aws-bedrock"
@@ -154,9 +155,28 @@ export class VectorKnowledgeBaseResources extends Construct {
154155
155156 // Create S3 data source for knowledge base documents
156157 // prefix pointed to processed/ to only ingest converted markdown documents
158+
159+ const chunkingConfiguration = {
160+ ...ChunkingStrategy . HIERARCHICAL_TITAN . configuration ,
161+ hierarchicalChunkingConfiguration : {
162+ overlapTokens : 60 ,
163+ levelConfigurations : [
164+ { maxTokens : 1000 } , // Parent chunk configuration,
165+ { maxTokens : 300 } // Child chunk configuration
166+ ]
167+ }
168+ }
169+
170+ const hash = crypto . createHash ( "md5" )
171+ . update ( JSON . stringify ( chunkingConfiguration ) )
172+ . digest ( "hex" )
173+ . substring ( 0 , 6 )
174+
175+ // TODO: migrate to L2 constructs to avoid duplicating code thats already available in the
176+ // @cdklabs /generative-ai-cdk-constructs library
157177 const dataSource = new CfnDataSource ( this , "S3DataSource" , {
158178 knowledgeBaseId : knowledgeBase . attrKnowledgeBaseId ,
159- name : `${ props . stackName } -s3-datasource` ,
179+ name : `${ props . stackName } -s3-datasource- ${ hash } ` ,
160180 dataSourceConfiguration : {
161181 type : "S3" ,
162182 s3Configuration : {
@@ -165,16 +185,7 @@ export class VectorKnowledgeBaseResources extends Construct {
165185 }
166186 } ,
167187 vectorIngestionConfiguration : {
168- chunkingConfiguration : {
169- ...ChunkingStrategy . HIERARCHICAL_TITAN . configuration ,
170- hierarchicalChunkingConfiguration : {
171- overlapTokens : 60 ,
172- levelConfigurations : [
173- { maxTokens : 1000 } , // Parent chunk configuration,
174- { maxTokens : 300 } // Child chunk configuration
175- ]
176- }
177- }
188+ chunkingConfiguration : chunkingConfiguration
178189 }
179190 } )
180191
0 commit comments