Skip to content

Commit 76d8732

Browse files
committed
Preserve regex replace_all anchor semantics
1 parent a3b27c0 commit 76d8732

2 files changed

Lines changed: 71 additions & 49 deletions

File tree

vibes/builtins.go

Lines changed: 67 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111
"strconv"
1212
"strings"
1313
"time"
14-
"unicode/utf8"
1514
)
1615

1716
const (
@@ -496,64 +495,83 @@ func builtinRegexReplaceInternal(args []Value, kwargs map[string]Value, block Va
496495
}
497496

498497
func regexReplaceAllWithLimit(re *regexp.Regexp, text string, replacement string, method string) (string, error) {
499-
out := make([]byte, 0, len(text))
500-
lastAppended := 0
501-
searchStart := 0
502-
lastMatchEnd := -1
503-
for searchStart <= len(text) {
504-
loc := re.FindStringSubmatchIndex(text[searchStart:])
505-
if loc == nil {
506-
break
498+
literalBytesPerMatch, refCountPerMatch := regexReplacementTemplateStats(replacement)
499+
matchCount := int64(0)
500+
totalMatchBytes := int64(0)
501+
referenceMatchBytes := int64(0)
502+
503+
// Use regexp's native replacement scanner so anchoring/boundary semantics
504+
// match the final replace_all behavior while estimating worst-case output.
505+
re.ReplaceAllStringFunc(text, func(match string) string {
506+
matchCount++
507+
matchBytes := int64(len(match))
508+
totalMatchBytes += matchBytes
509+
referenceMatchBytes += matchBytes * int64(refCountPerMatch)
510+
return match
511+
})
512+
513+
upperBound := int64(len(text)) - totalMatchBytes + matchCount*int64(literalBytesPerMatch) + referenceMatchBytes
514+
if upperBound > int64(maxRegexInputBytes) {
515+
return "", fmt.Errorf("%s output exceeds limit %d bytes", method, maxRegexInputBytes)
516+
}
517+
518+
out := re.ReplaceAllString(text, replacement)
519+
if len(out) > maxRegexInputBytes {
520+
return "", fmt.Errorf("%s output exceeds limit %d bytes", method, maxRegexInputBytes)
521+
}
522+
return out, nil
523+
}
524+
525+
func regexReplacementTemplateStats(replacement string) (literalBytes int, referenceCount int) {
526+
for i := 0; i < len(replacement); {
527+
if replacement[i] != '$' {
528+
literalBytes++
529+
i++
530+
continue
507531
}
508-
for i := range loc {
509-
if loc[i] >= 0 {
510-
loc[i] += searchStart
511-
}
532+
if i+1 >= len(replacement) {
533+
literalBytes++
534+
i++
535+
continue
512536
}
513-
start := loc[0]
514-
end := loc[1]
515-
if start == end && start == lastMatchEnd {
516-
if searchStart >= len(text) {
517-
break
537+
if replacement[i+1] == '$' {
538+
literalBytes++
539+
i += 2
540+
continue
541+
}
542+
if replacement[i+1] == '{' {
543+
j := i + 2
544+
for j < len(replacement) && replacement[j] != '}' {
545+
j++
518546
}
519-
_, size := utf8.DecodeRuneInString(text[searchStart:])
520-
if size == 0 {
521-
size = 1
547+
if j < len(replacement) {
548+
referenceCount++
549+
i = j + 1
550+
continue
522551
}
523-
searchStart += size
552+
literalBytes++
553+
i++
524554
continue
525555
}
526556

527-
segmentLen := start - lastAppended
528-
if len(out) > maxRegexInputBytes-segmentLen {
529-
return "", fmt.Errorf("%s output exceeds limit %d bytes", method, maxRegexInputBytes)
557+
j := i + 1
558+
for j < len(replacement) && isRegexReplacementNameByte(replacement[j]) {
559+
j++
530560
}
531-
out = append(out, text[lastAppended:start]...)
532-
out = re.ExpandString(out, replacement, text, loc)
533-
if len(out) > maxRegexInputBytes {
534-
return "", fmt.Errorf("%s output exceeds limit %d bytes", method, maxRegexInputBytes)
535-
}
536-
lastAppended = end
537-
lastMatchEnd = end
538-
539-
if end > searchStart {
540-
searchStart = end
561+
if j == i+1 {
562+
literalBytes++
563+
i++
541564
continue
542565
}
543-
if searchStart >= len(text) {
544-
break
545-
}
546-
_, size := utf8.DecodeRuneInString(text[searchStart:])
547-
if size == 0 {
548-
size = 1
549-
}
550-
searchStart += size
566+
referenceCount++
567+
i = j
551568
}
569+
return literalBytes, referenceCount
570+
}
552571

553-
tailLen := len(text) - lastAppended
554-
if len(out) > maxRegexInputBytes-tailLen {
555-
return "", fmt.Errorf("%s output exceeds limit %d bytes", method, maxRegexInputBytes)
556-
}
557-
out = append(out, text[lastAppended:]...)
558-
return string(out), nil
572+
func isRegexReplacementNameByte(char byte) bool {
573+
return (char >= '0' && char <= '9') ||
574+
(char >= 'a' && char <= 'z') ||
575+
(char >= 'A' && char <= 'Z') ||
576+
char == '_'
559577
}

vibes/runtime_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1954,6 +1954,7 @@ func TestRegexBuiltins(t *testing.T) {
19541954
match_empty: Regex.match("^", "ID-12"),
19551955
replace_one: Regex.replace("ID-12 ID-34", "ID-[0-9]+", "X"),
19561956
replace_all: Regex.replace_all("ID-12 ID-34", "ID-[0-9]+", "X"),
1957+
replace_all_anchor: Regex.replace_all("abc", "^", "X"),
19571958
replace_capture: Regex.replace("ID-12 ID-34", "ID-([0-9]+)", "X-$1"),
19581959
replace_boundary: Regex.replace("ab", "\\Bb", "X")
19591960
}
@@ -1984,6 +1985,9 @@ func TestRegexBuiltins(t *testing.T) {
19841985
if !out["replace_all"].Equal(NewString("X X")) {
19851986
t.Fatalf("replace_all mismatch: %v", out["replace_all"])
19861987
}
1988+
if !out["replace_all_anchor"].Equal(NewString("Xabc")) {
1989+
t.Fatalf("replace_all_anchor mismatch: %v", out["replace_all_anchor"])
1990+
}
19871991
if !out["replace_capture"].Equal(NewString("X-12 ID-34")) {
19881992
t.Fatalf("replace_capture mismatch: %v", out["replace_capture"])
19891993
}

0 commit comments

Comments
 (0)