Skip to content

Commit 23b03fd

Browse files
committed
Stream regex replace_all matching with output cap
1 parent 0c1953f commit 23b03fd

1 file changed

Lines changed: 47 additions & 12 deletions

File tree

vibes/builtins.go

Lines changed: 47 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"strconv"
1212
"strings"
1313
"time"
14+
"unicode/utf8"
1415
)
1516

1617
const (
@@ -495,30 +496,64 @@ func builtinRegexReplaceInternal(args []Value, kwargs map[string]Value, block Va
495496
}
496497

497498
func regexReplaceAllWithLimit(re *regexp.Regexp, text string, replacement string, method string) (string, error) {
498-
matches := re.FindAllStringSubmatchIndex(text, -1)
499-
if len(matches) == 0 {
500-
return text, nil
501-
}
502-
503499
out := make([]byte, 0, len(text))
504-
last := 0
505-
for _, loc := range matches {
506-
segmentLen := loc[0] - last
500+
lastAppended := 0
501+
searchStart := 0
502+
lastMatchEnd := -1
503+
for searchStart <= len(text) {
504+
loc := re.FindStringSubmatchIndex(text[searchStart:])
505+
if loc == nil {
506+
break
507+
}
508+
for i := range loc {
509+
if loc[i] >= 0 {
510+
loc[i] += searchStart
511+
}
512+
}
513+
start := loc[0]
514+
end := loc[1]
515+
if start == end && start == lastMatchEnd {
516+
if searchStart >= len(text) {
517+
break
518+
}
519+
_, size := utf8.DecodeRuneInString(text[searchStart:])
520+
if size == 0 {
521+
size = 1
522+
}
523+
searchStart += size
524+
continue
525+
}
526+
527+
segmentLen := start - lastAppended
507528
if len(out) > maxRegexInputBytes-segmentLen {
508529
return "", fmt.Errorf("%s output exceeds limit %d bytes", method, maxRegexInputBytes)
509530
}
510-
out = append(out, text[last:loc[0]]...)
531+
out = append(out, text[lastAppended:start]...)
511532
out = re.ExpandString(out, replacement, text, loc)
512533
if len(out) > maxRegexInputBytes {
513534
return "", fmt.Errorf("%s output exceeds limit %d bytes", method, maxRegexInputBytes)
514535
}
515-
last = loc[1]
536+
lastAppended = end
537+
lastMatchEnd = end
538+
539+
if end > searchStart {
540+
searchStart = end
541+
continue
542+
}
543+
if searchStart >= len(text) {
544+
break
545+
}
546+
_, size := utf8.DecodeRuneInString(text[searchStart:])
547+
if size == 0 {
548+
size = 1
549+
}
550+
searchStart += size
516551
}
517552

518-
tailLen := len(text) - last
553+
tailLen := len(text) - lastAppended
519554
if len(out) > maxRegexInputBytes-tailLen {
520555
return "", fmt.Errorf("%s output exceeds limit %d bytes", method, maxRegexInputBytes)
521556
}
522-
out = append(out, text[last:]...)
557+
out = append(out, text[lastAppended:]...)
523558
return string(out), nil
524559
}

0 commit comments

Comments
 (0)