@@ -11,7 +11,6 @@ import (
1111 "strconv"
1212 "strings"
1313 "time"
14- "unicode/utf8"
1514)
1615
1716const (
@@ -496,64 +495,83 @@ func builtinRegexReplaceInternal(args []Value, kwargs map[string]Value, block Va
496495}
497496
498497func regexReplaceAllWithLimit (re * regexp.Regexp , text string , replacement string , method string ) (string , error ) {
499- out := make ([]byte , 0 , len (text ))
500- lastAppended := 0
501- searchStart := 0
502- lastMatchEnd := - 1
503- for searchStart <= len (text ) {
504- loc := re .FindStringSubmatchIndex (text [searchStart :])
505- if loc == nil {
506- break
498+ literalBytesPerMatch , refCountPerMatch := regexReplacementTemplateStats (replacement )
499+ matchCount := int64 (0 )
500+ totalMatchBytes := int64 (0 )
501+ referenceMatchBytes := int64 (0 )
502+
503+ // Use regexp's native replacement scanner so anchoring/boundary semantics
504+ // match the final replace_all behavior while estimating worst-case output.
505+ re .ReplaceAllStringFunc (text , func (match string ) string {
506+ matchCount ++
507+ matchBytes := int64 (len (match ))
508+ totalMatchBytes += matchBytes
509+ referenceMatchBytes += matchBytes * int64 (refCountPerMatch )
510+ return match
511+ })
512+
513+ upperBound := int64 (len (text )) - totalMatchBytes + matchCount * int64 (literalBytesPerMatch ) + referenceMatchBytes
514+ if upperBound > int64 (maxRegexInputBytes ) {
515+ return "" , fmt .Errorf ("%s output exceeds limit %d bytes" , method , maxRegexInputBytes )
516+ }
517+
518+ out := re .ReplaceAllString (text , replacement )
519+ if len (out ) > maxRegexInputBytes {
520+ return "" , fmt .Errorf ("%s output exceeds limit %d bytes" , method , maxRegexInputBytes )
521+ }
522+ return out , nil
523+ }
524+
525+ func regexReplacementTemplateStats (replacement string ) (literalBytes int , referenceCount int ) {
526+ for i := 0 ; i < len (replacement ); {
527+ if replacement [i ] != '$' {
528+ literalBytes ++
529+ i ++
530+ continue
507531 }
508- for i := range loc {
509- if loc [ i ] >= 0 {
510- loc [ i ] += searchStart
511- }
532+ if i + 1 >= len ( replacement ) {
533+ literalBytes ++
534+ i ++
535+ continue
512536 }
513- start := loc [0 ]
514- end := loc [1 ]
515- if start == end && start == lastMatchEnd {
516- if searchStart >= len (text ) {
517- break
537+ if replacement [i + 1 ] == '$' {
538+ literalBytes ++
539+ i += 2
540+ continue
541+ }
542+ if replacement [i + 1 ] == '{' {
543+ j := i + 2
544+ for j < len (replacement ) && replacement [j ] != '}' {
545+ j ++
518546 }
519- _ , size := utf8 .DecodeRuneInString (text [searchStart :])
520- if size == 0 {
521- size = 1
547+ if j < len (replacement ) {
548+ referenceCount ++
549+ i = j + 1
550+ continue
522551 }
523- searchStart += size
552+ literalBytes ++
553+ i ++
524554 continue
525555 }
526556
527- segmentLen := start - lastAppended
528- if len (out ) > maxRegexInputBytes - segmentLen {
529- return "" , fmt . Errorf ( "%s output exceeds limit %d bytes" , method , maxRegexInputBytes )
557+ j := i + 1
558+ for j < len (replacement ) && isRegexReplacementNameByte ( replacement [ j ]) {
559+ j ++
530560 }
531- out = append (out , text [lastAppended :start ]... )
532- out = re .ExpandString (out , replacement , text , loc )
533- if len (out ) > maxRegexInputBytes {
534- return "" , fmt .Errorf ("%s output exceeds limit %d bytes" , method , maxRegexInputBytes )
535- }
536- lastAppended = end
537- lastMatchEnd = end
538-
539- if end > searchStart {
540- searchStart = end
561+ if j == i + 1 {
562+ literalBytes ++
563+ i ++
541564 continue
542565 }
543- if searchStart >= len (text ) {
544- break
545- }
546- _ , size := utf8 .DecodeRuneInString (text [searchStart :])
547- if size == 0 {
548- size = 1
549- }
550- searchStart += size
566+ referenceCount ++
567+ i = j
551568 }
569+ return literalBytes , referenceCount
570+ }
552571
553- tailLen := len (text ) - lastAppended
554- if len (out ) > maxRegexInputBytes - tailLen {
555- return "" , fmt .Errorf ("%s output exceeds limit %d bytes" , method , maxRegexInputBytes )
556- }
557- out = append (out , text [lastAppended :]... )
558- return string (out ), nil
572+ func isRegexReplacementNameByte (char byte ) bool {
573+ return (char >= '0' && char <= '9' ) ||
574+ (char >= 'a' && char <= 'z' ) ||
575+ (char >= 'A' && char <= 'Z' ) ||
576+ char == '_'
559577}
0 commit comments