11import * as fs from 'node:fs/promises'
2+ import * as path from 'path'
23import crypto from 'crypto'
34
45import type { ChunkParams , CodeChunkInsert } from '../../types'
@@ -40,10 +41,9 @@ export class RecursiveChunker {
4041 }
4142 }
4243
43- const chunkSize = Math . floor ( maxChunkSize / 2 )
4444 const chunks : string [ ] = [ ]
45- for ( let i = 0 ; i < content . length ; i += chunkSize ) {
46- chunks . push ( content . slice ( i , i + chunkSize ) )
45+ for ( let i = 0 ; i < content . length ; i += maxChunkSize ) {
46+ chunks . push ( content . slice ( i , i + maxChunkSize ) )
4747 }
4848
4949 return chunks
@@ -58,13 +58,15 @@ export class RecursiveChunker {
5858 for ( let i = 0 ; i < chunks . length ; i ++ ) {
5959 let chunk = chunks [ i ] !
6060
61- if ( countLengthWithoutWhitespace ( chunk ) < coalesce && i < chunks . length - 1 ) {
61+ while ( countLengthWithoutWhitespace ( chunk ) < coalesce && i < chunks . length - 1 ) {
6262 const nextChunk = chunks [ i + 1 ] !
6363 const combinedChunk = chunk + '\n' + nextChunk
6464
6565 if ( combinedChunk . length <= maxChunkSize ) {
6666 chunk = combinedChunk
6767 i ++
68+ } else {
69+ break
6870 }
6971 }
7072
@@ -78,7 +80,7 @@ export class RecursiveChunker {
7880 startLine : currentLineNumber ,
7981 endLine,
8082 nodeType : 'text' ,
81- language : null ,
83+ language : this . getLanguage ( filePath ) ,
8284 hash : crypto . createHash ( 'sha256' ) . update ( chunk ) . digest ( 'hex' ) ,
8385 size : chunk . length ,
8486 } )
@@ -88,7 +90,10 @@ export class RecursiveChunker {
8890
8991 return processedChunks
9092 }
93+
94+ private getLanguage ( filePath : string ) {
95+ return path . extname ( filePath ) . toLowerCase ( ) . replace ( '.' , '' )
96+ }
9197}
9298
9399export const recursiveChunker = new RecursiveChunker ( )
94-
0 commit comments