Skip to content

Commit 4b7ada8

Browse files
committed
extract regex replacement helpers
1 parent 9e14387 commit 4b7ada8

2 files changed

Lines changed: 173 additions & 167 deletions

File tree

vibes/builtins_json_regex.go

Lines changed: 0 additions & 167 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package vibes
33
import (
44
"fmt"
55
"regexp"
6-
"unicode/utf8"
76
)
87

98
func builtinRegexMatch(exec *Execution, receiver Value, args []Value, kwargs map[string]Value, block Value) (Value, error) {
@@ -38,169 +37,3 @@ func builtinRegexMatch(exec *Execution, receiver Value, args []Value, kwargs map
3837
}
3938
return NewString(text[indices[0]:indices[1]]), nil
4039
}
41-
42-
func builtinRegexReplace(exec *Execution, receiver Value, args []Value, kwargs map[string]Value, block Value) (Value, error) {
43-
return builtinRegexReplaceInternal(args, kwargs, block, false)
44-
}
45-
46-
func builtinRegexReplaceAll(exec *Execution, receiver Value, args []Value, kwargs map[string]Value, block Value) (Value, error) {
47-
return builtinRegexReplaceInternal(args, kwargs, block, true)
48-
}
49-
50-
func builtinRegexReplaceInternal(args []Value, kwargs map[string]Value, block Value, replaceAll bool) (Value, error) {
51-
method := "Regex.replace"
52-
if replaceAll {
53-
method = "Regex.replace_all"
54-
}
55-
56-
if len(args) != 3 {
57-
return NewNil(), fmt.Errorf("%s expects text, pattern, replacement", method)
58-
}
59-
if len(kwargs) > 0 {
60-
return NewNil(), fmt.Errorf("%s does not accept keyword arguments", method)
61-
}
62-
if !block.IsNil() {
63-
return NewNil(), fmt.Errorf("%s does not accept blocks", method)
64-
}
65-
if args[0].Kind() != KindString || args[1].Kind() != KindString || args[2].Kind() != KindString {
66-
return NewNil(), fmt.Errorf("%s expects string text, pattern, replacement", method)
67-
}
68-
69-
text := args[0].String()
70-
pattern := args[1].String()
71-
replacement := args[2].String()
72-
if len(pattern) > maxRegexPatternSize {
73-
return NewNil(), fmt.Errorf("%s pattern exceeds limit %d bytes", method, maxRegexPatternSize)
74-
}
75-
if len(text) > maxRegexInputBytes {
76-
return NewNil(), fmt.Errorf("%s text exceeds limit %d bytes", method, maxRegexInputBytes)
77-
}
78-
if len(replacement) > maxRegexInputBytes {
79-
return NewNil(), fmt.Errorf("%s replacement exceeds limit %d bytes", method, maxRegexInputBytes)
80-
}
81-
82-
re, err := regexp.Compile(pattern)
83-
if err != nil {
84-
return NewNil(), fmt.Errorf("%s invalid regex: %v", method, err)
85-
}
86-
87-
if replaceAll {
88-
replaced, err := regexReplaceAllWithLimit(re, text, replacement, method)
89-
if err != nil {
90-
return NewNil(), err
91-
}
92-
return NewString(replaced), nil
93-
}
94-
95-
loc := re.FindStringSubmatchIndex(text)
96-
if loc == nil {
97-
return NewString(text), nil
98-
}
99-
replaced := string(re.ExpandString(nil, replacement, text, loc))
100-
outputLen := len(text) - (loc[1] - loc[0]) + len(replaced)
101-
if outputLen > maxRegexInputBytes {
102-
return NewNil(), fmt.Errorf("%s output exceeds limit %d bytes", method, maxRegexInputBytes)
103-
}
104-
return NewString(text[:loc[0]] + replaced + text[loc[1]:]), nil
105-
}
106-
107-
func regexReplaceAllWithLimit(re *regexp.Regexp, text string, replacement string, method string) (string, error) {
108-
out := make([]byte, 0, len(text))
109-
lastAppended := 0
110-
searchStart := 0
111-
lastMatchEnd := -1
112-
for searchStart <= len(text) {
113-
loc, found := nextRegexReplaceAllSubmatchIndex(re, text, searchStart)
114-
if !found {
115-
break
116-
}
117-
if loc[0] == loc[1] && loc[0] == lastMatchEnd {
118-
if loc[0] >= len(text) {
119-
break
120-
}
121-
_, size := utf8.DecodeRuneInString(text[loc[0]:])
122-
if size == 0 {
123-
size = 1
124-
}
125-
searchStart = loc[0] + size
126-
continue
127-
}
128-
129-
segmentLen := loc[0] - lastAppended
130-
if len(out) > maxRegexInputBytes-segmentLen {
131-
return "", fmt.Errorf("%s output exceeds limit %d bytes", method, maxRegexInputBytes)
132-
}
133-
out = append(out, text[lastAppended:loc[0]]...)
134-
out = re.ExpandString(out, replacement, text, loc)
135-
if len(out) > maxRegexInputBytes {
136-
return "", fmt.Errorf("%s output exceeds limit %d bytes", method, maxRegexInputBytes)
137-
}
138-
lastAppended = loc[1]
139-
lastMatchEnd = loc[1]
140-
141-
if loc[1] > loc[0] {
142-
searchStart = loc[1]
143-
continue
144-
}
145-
if loc[1] >= len(text) {
146-
break
147-
}
148-
_, size := utf8.DecodeRuneInString(text[loc[1]:])
149-
if size == 0 {
150-
size = 1
151-
}
152-
searchStart = loc[1] + size
153-
}
154-
155-
tailLen := len(text) - lastAppended
156-
if len(out) > maxRegexInputBytes-tailLen {
157-
return "", fmt.Errorf("%s output exceeds limit %d bytes", method, maxRegexInputBytes)
158-
}
159-
out = append(out, text[lastAppended:]...)
160-
return string(out), nil
161-
}
162-
163-
func nextRegexReplaceAllSubmatchIndex(re *regexp.Regexp, text string, start int) ([]int, bool) {
164-
loc := re.FindStringSubmatchIndex(text[start:])
165-
if loc == nil {
166-
return nil, false
167-
}
168-
direct := offsetRegexSubmatchIndex(loc, start)
169-
if start == 0 || direct[0] > start {
170-
return direct, true
171-
}
172-
173-
windowStart := start - 1
174-
locs := re.FindAllStringSubmatchIndex(text[windowStart:], 2)
175-
if len(locs) == 0 {
176-
return nil, false
177-
}
178-
179-
first := offsetRegexSubmatchIndex(locs[0], windowStart)
180-
if first[0] >= start {
181-
return first, true
182-
}
183-
if first[1] > start {
184-
return direct, true
185-
}
186-
if len(locs) < 2 {
187-
return nil, false
188-
}
189-
second := offsetRegexSubmatchIndex(locs[1], windowStart)
190-
if second[0] >= start {
191-
return second, true
192-
}
193-
return nil, false
194-
}
195-
196-
func offsetRegexSubmatchIndex(loc []int, offset int) []int {
197-
abs := make([]int, len(loc))
198-
for i, index := range loc {
199-
if index < 0 {
200-
abs[i] = -1
201-
continue
202-
}
203-
abs[i] = index + offset
204-
}
205-
return abs
206-
}

vibes/builtins_regex_replace.go

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
package vibes
2+
3+
import (
4+
"fmt"
5+
"regexp"
6+
"unicode/utf8"
7+
)
8+
9+
func builtinRegexReplace(exec *Execution, receiver Value, args []Value, kwargs map[string]Value, block Value) (Value, error) {
10+
return builtinRegexReplaceInternal(args, kwargs, block, false)
11+
}
12+
13+
func builtinRegexReplaceAll(exec *Execution, receiver Value, args []Value, kwargs map[string]Value, block Value) (Value, error) {
14+
return builtinRegexReplaceInternal(args, kwargs, block, true)
15+
}
16+
17+
func builtinRegexReplaceInternal(args []Value, kwargs map[string]Value, block Value, replaceAll bool) (Value, error) {
18+
method := "Regex.replace"
19+
if replaceAll {
20+
method = "Regex.replace_all"
21+
}
22+
23+
if len(args) != 3 {
24+
return NewNil(), fmt.Errorf("%s expects text, pattern, replacement", method)
25+
}
26+
if len(kwargs) > 0 {
27+
return NewNil(), fmt.Errorf("%s does not accept keyword arguments", method)
28+
}
29+
if !block.IsNil() {
30+
return NewNil(), fmt.Errorf("%s does not accept blocks", method)
31+
}
32+
if args[0].Kind() != KindString || args[1].Kind() != KindString || args[2].Kind() != KindString {
33+
return NewNil(), fmt.Errorf("%s expects string text, pattern, replacement", method)
34+
}
35+
36+
text := args[0].String()
37+
pattern := args[1].String()
38+
replacement := args[2].String()
39+
if len(pattern) > maxRegexPatternSize {
40+
return NewNil(), fmt.Errorf("%s pattern exceeds limit %d bytes", method, maxRegexPatternSize)
41+
}
42+
if len(text) > maxRegexInputBytes {
43+
return NewNil(), fmt.Errorf("%s text exceeds limit %d bytes", method, maxRegexInputBytes)
44+
}
45+
if len(replacement) > maxRegexInputBytes {
46+
return NewNil(), fmt.Errorf("%s replacement exceeds limit %d bytes", method, maxRegexInputBytes)
47+
}
48+
49+
re, err := regexp.Compile(pattern)
50+
if err != nil {
51+
return NewNil(), fmt.Errorf("%s invalid regex: %v", method, err)
52+
}
53+
54+
if replaceAll {
55+
replaced, err := regexReplaceAllWithLimit(re, text, replacement, method)
56+
if err != nil {
57+
return NewNil(), err
58+
}
59+
return NewString(replaced), nil
60+
}
61+
62+
loc := re.FindStringSubmatchIndex(text)
63+
if loc == nil {
64+
return NewString(text), nil
65+
}
66+
replaced := string(re.ExpandString(nil, replacement, text, loc))
67+
outputLen := len(text) - (loc[1] - loc[0]) + len(replaced)
68+
if outputLen > maxRegexInputBytes {
69+
return NewNil(), fmt.Errorf("%s output exceeds limit %d bytes", method, maxRegexInputBytes)
70+
}
71+
return NewString(text[:loc[0]] + replaced + text[loc[1]:]), nil
72+
}
73+
74+
func regexReplaceAllWithLimit(re *regexp.Regexp, text string, replacement string, method string) (string, error) {
75+
out := make([]byte, 0, len(text))
76+
lastAppended := 0
77+
searchStart := 0
78+
lastMatchEnd := -1
79+
for searchStart <= len(text) {
80+
loc, found := nextRegexReplaceAllSubmatchIndex(re, text, searchStart)
81+
if !found {
82+
break
83+
}
84+
if loc[0] == loc[1] && loc[0] == lastMatchEnd {
85+
if loc[0] >= len(text) {
86+
break
87+
}
88+
_, size := utf8.DecodeRuneInString(text[loc[0]:])
89+
if size == 0 {
90+
size = 1
91+
}
92+
searchStart = loc[0] + size
93+
continue
94+
}
95+
96+
segmentLen := loc[0] - lastAppended
97+
if len(out) > maxRegexInputBytes-segmentLen {
98+
return "", fmt.Errorf("%s output exceeds limit %d bytes", method, maxRegexInputBytes)
99+
}
100+
out = append(out, text[lastAppended:loc[0]]...)
101+
out = re.ExpandString(out, replacement, text, loc)
102+
if len(out) > maxRegexInputBytes {
103+
return "", fmt.Errorf("%s output exceeds limit %d bytes", method, maxRegexInputBytes)
104+
}
105+
lastAppended = loc[1]
106+
lastMatchEnd = loc[1]
107+
108+
if loc[1] > loc[0] {
109+
searchStart = loc[1]
110+
continue
111+
}
112+
if loc[1] >= len(text) {
113+
break
114+
}
115+
_, size := utf8.DecodeRuneInString(text[loc[1]:])
116+
if size == 0 {
117+
size = 1
118+
}
119+
searchStart = loc[1] + size
120+
}
121+
122+
tailLen := len(text) - lastAppended
123+
if len(out) > maxRegexInputBytes-tailLen {
124+
return "", fmt.Errorf("%s output exceeds limit %d bytes", method, maxRegexInputBytes)
125+
}
126+
out = append(out, text[lastAppended:]...)
127+
return string(out), nil
128+
}
129+
130+
func nextRegexReplaceAllSubmatchIndex(re *regexp.Regexp, text string, start int) ([]int, bool) {
131+
loc := re.FindStringSubmatchIndex(text[start:])
132+
if loc == nil {
133+
return nil, false
134+
}
135+
direct := offsetRegexSubmatchIndex(loc, start)
136+
if start == 0 || direct[0] > start {
137+
return direct, true
138+
}
139+
140+
windowStart := start - 1
141+
locs := re.FindAllStringSubmatchIndex(text[windowStart:], 2)
142+
if len(locs) == 0 {
143+
return nil, false
144+
}
145+
146+
first := offsetRegexSubmatchIndex(locs[0], windowStart)
147+
if first[0] >= start {
148+
return first, true
149+
}
150+
if first[1] > start {
151+
return direct, true
152+
}
153+
if len(locs) < 2 {
154+
return nil, false
155+
}
156+
second := offsetRegexSubmatchIndex(locs[1], windowStart)
157+
if second[0] >= start {
158+
return second, true
159+
}
160+
return nil, false
161+
}
162+
163+
func offsetRegexSubmatchIndex(loc []int, offset int) []int {
164+
abs := make([]int, len(loc))
165+
for i, index := range loc {
166+
if index < 0 {
167+
abs[i] = -1
168+
continue
169+
}
170+
abs[i] = index + offset
171+
}
172+
return abs
173+
}

0 commit comments

Comments
 (0)