@@ -11,6 +11,7 @@ import (
1111 "strconv"
1212 "strings"
1313 "time"
14+ "unicode/utf8"
1415)
1516
1617const (
@@ -495,83 +496,68 @@ func builtinRegexReplaceInternal(args []Value, kwargs map[string]Value, block Va
495496}
496497
497498func regexReplaceAllWithLimit (re * regexp.Regexp , text string , replacement string , method string ) (string , error ) {
498- literalBytesPerMatch , refCountPerMatch := regexReplacementTemplateStats (replacement )
499- matchCount := int64 (0 )
500- totalMatchBytes := int64 (0 )
501- referenceMatchBytes := int64 (0 )
502-
503- // Use regexp's native replacement scanner so anchoring/boundary semantics
504- // match the final replace_all behavior while estimating worst-case output.
505- re .ReplaceAllStringFunc (text , func (match string ) string {
506- matchCount ++
507- matchBytes := int64 (len (match ))
508- totalMatchBytes += matchBytes
509- referenceMatchBytes += matchBytes * int64 (refCountPerMatch )
510- return match
511- })
512-
513- upperBound := int64 (len (text )) - totalMatchBytes + matchCount * int64 (literalBytesPerMatch ) + referenceMatchBytes
514- if upperBound > int64 (maxRegexInputBytes ) {
515- return "" , fmt .Errorf ("%s output exceeds limit %d bytes" , method , maxRegexInputBytes )
499+ out := make ([]byte , 0 , len (text ))
500+ lastAppended := 0
501+ searchStart := 0
502+ for searchStart <= len (text ) {
503+ loc , found := nextRegexReplaceAllSubmatchIndex (re , text , searchStart )
504+ if ! found {
505+ break
506+ }
507+
508+ segmentLen := loc [0 ] - lastAppended
509+ if len (out ) > maxRegexInputBytes - segmentLen {
510+ return "" , fmt .Errorf ("%s output exceeds limit %d bytes" , method , maxRegexInputBytes )
511+ }
512+ out = append (out , text [lastAppended :loc [0 ]]... )
513+ out = re .ExpandString (out , replacement , text , loc )
514+ if len (out ) > maxRegexInputBytes {
515+ return "" , fmt .Errorf ("%s output exceeds limit %d bytes" , method , maxRegexInputBytes )
516+ }
517+ lastAppended = loc [1 ]
518+
519+ if loc [1 ] > searchStart {
520+ searchStart = loc [1 ]
521+ continue
522+ }
523+ if searchStart >= len (text ) {
524+ break
525+ }
526+ _ , size := utf8 .DecodeRuneInString (text [searchStart :])
527+ if size == 0 {
528+ size = 1
529+ }
530+ searchStart += size
516531 }
517532
518- out := re . ReplaceAllString (text , replacement )
519- if len (out ) > maxRegexInputBytes {
533+ tailLen := len (text ) - lastAppended
534+ if len (out ) > maxRegexInputBytes - tailLen {
520535 return "" , fmt .Errorf ("%s output exceeds limit %d bytes" , method , maxRegexInputBytes )
521536 }
522- return out , nil
537+ out = append (out , text [lastAppended :]... )
538+ return string (out ), nil
523539}
524540
525- func regexReplacementTemplateStats (replacement string ) (literalBytes int , referenceCount int ) {
526- for i := 0 ; i < len (replacement ); {
527- if replacement [i ] != '$' {
528- literalBytes ++
529- i ++
530- continue
531- }
532- if i + 1 >= len (replacement ) {
533- literalBytes ++
534- i ++
535- continue
536- }
537- if replacement [i + 1 ] == '$' {
538- literalBytes ++
539- i += 2
541+ func nextRegexReplaceAllSubmatchIndex (re * regexp.Regexp , text string , start int ) ([]int , bool ) {
542+ windowStart := start
543+ if windowStart > 0 {
544+ windowStart --
545+ }
546+ locs := re .FindAllStringSubmatchIndex (text [windowStart :], 2 )
547+ for _ , loc := range locs {
548+ absStart := loc [0 ] + windowStart
549+ if absStart < start {
540550 continue
541551 }
542- if replacement [i + 1 ] == '{' {
543- j := i + 2
544- for j < len (replacement ) && replacement [j ] != '}' {
545- j ++
546- }
547- if j < len (replacement ) {
548- referenceCount ++
549- i = j + 1
552+ abs := make ([]int , len (loc ))
553+ for i , index := range loc {
554+ if index < 0 {
555+ abs [i ] = - 1
550556 continue
551557 }
552- literalBytes ++
553- i ++
554- continue
558+ abs [i ] = index + windowStart
555559 }
556-
557- j := i + 1
558- for j < len (replacement ) && isRegexReplacementNameByte (replacement [j ]) {
559- j ++
560- }
561- if j == i + 1 {
562- literalBytes ++
563- i ++
564- continue
565- }
566- referenceCount ++
567- i = j
560+ return abs , true
568561 }
569- return literalBytes , referenceCount
570- }
571-
572- func isRegexReplacementNameByte (char byte ) bool {
573- return (char >= '0' && char <= '9' ) ||
574- (char >= 'a' && char <= 'z' ) ||
575- (char >= 'A' && char <= 'Z' ) ||
576- char == '_'
562+ return nil , false
577563}
0 commit comments