Source file
src/regexp/onepass.go
Documentation: regexp
1
2
3
4
5 package regexp
6
7 import (
8 "regexp/syntax"
9 "sort"
10 "strings"
11 "unicode"
12 )
13
14
15
16
17
18
19
20
21
22 type onePassProg struct {
23 Inst []onePassInst
24 Start int
25 NumCap int
26 }
27
28
29
30 type onePassInst struct {
31 syntax.Inst
32 Next []uint32
33 }
34
35
36
37
38
39
40 func onePassPrefix(p *syntax.Prog) (prefix string, complete bool, pc uint32) {
41 i := &p.Inst[p.Start]
42 if i.Op != syntax.InstEmptyWidth || (syntax.EmptyOp(i.Arg))&syntax.EmptyBeginText == 0 {
43 return "", i.Op == syntax.InstMatch, uint32(p.Start)
44 }
45 pc = i.Out
46 i = &p.Inst[pc]
47 for i.Op == syntax.InstNop {
48 pc = i.Out
49 i = &p.Inst[pc]
50 }
51
52 if iop(i) != syntax.InstRune || len(i.Rune) != 1 {
53 return "", i.Op == syntax.InstMatch, uint32(p.Start)
54 }
55
56
57 var buf strings.Builder
58 for iop(i) == syntax.InstRune && len(i.Rune) == 1 && syntax.Flags(i.Arg)&syntax.FoldCase == 0 {
59 buf.WriteRune(i.Rune[0])
60 pc, i = i.Out, &p.Inst[i.Out]
61 }
62 if i.Op == syntax.InstEmptyWidth &&
63 syntax.EmptyOp(i.Arg)&syntax.EmptyEndText != 0 &&
64 p.Inst[i.Out].Op == syntax.InstMatch {
65 complete = true
66 }
67 return buf.String(), complete, pc
68 }
69
70
71
72
73
74 func onePassNext(i *onePassInst, r rune) uint32 {
75 next := i.MatchRunePos(r)
76 if next >= 0 {
77 return i.Next[next]
78 }
79 if i.Op == syntax.InstAltMatch {
80 return i.Out
81 }
82 return 0
83 }
84
85 func iop(i *syntax.Inst) syntax.InstOp {
86 op := i.Op
87 switch op {
88 case syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
89 op = syntax.InstRune
90 }
91 return op
92 }
93
94
95 type queueOnePass struct {
96 sparse []uint32
97 dense []uint32
98 size, nextIndex uint32
99 }
100
101 func (q *queueOnePass) empty() bool {
102 return q.nextIndex >= q.size
103 }
104
105 func (q *queueOnePass) next() (n uint32) {
106 n = q.dense[q.nextIndex]
107 q.nextIndex++
108 return
109 }
110
111 func (q *queueOnePass) clear() {
112 q.size = 0
113 q.nextIndex = 0
114 }
115
116 func (q *queueOnePass) contains(u uint32) bool {
117 if u >= uint32(len(q.sparse)) {
118 return false
119 }
120 return q.sparse[u] < q.size && q.dense[q.sparse[u]] == u
121 }
122
123 func (q *queueOnePass) insert(u uint32) {
124 if !q.contains(u) {
125 q.insertNew(u)
126 }
127 }
128
129 func (q *queueOnePass) insertNew(u uint32) {
130 if u >= uint32(len(q.sparse)) {
131 return
132 }
133 q.sparse[u] = q.size
134 q.dense[q.size] = u
135 q.size++
136 }
137
138 func newQueue(size int) (q *queueOnePass) {
139 return &queueOnePass{
140 sparse: make([]uint32, size),
141 dense: make([]uint32, size),
142 }
143 }
144
145
146
147
148
149
150 const mergeFailed = uint32(0xffffffff)
151
152 var (
153 noRune = []rune{}
154 noNext = []uint32{mergeFailed}
155 )
156
157 func mergeRuneSets(leftRunes, rightRunes *[]rune, leftPC, rightPC uint32) ([]rune, []uint32) {
158 leftLen := len(*leftRunes)
159 rightLen := len(*rightRunes)
160 if leftLen&0x1 != 0 || rightLen&0x1 != 0 {
161 panic("mergeRuneSets odd length []rune")
162 }
163 var (
164 lx, rx int
165 )
166 merged := make([]rune, 0)
167 next := make([]uint32, 0)
168 ok := true
169 defer func() {
170 if !ok {
171 merged = nil
172 next = nil
173 }
174 }()
175
176 ix := -1
177 extend := func(newLow *int, newArray *[]rune, pc uint32) bool {
178 if ix > 0 && (*newArray)[*newLow] <= merged[ix] {
179 return false
180 }
181 merged = append(merged, (*newArray)[*newLow], (*newArray)[*newLow+1])
182 *newLow += 2
183 ix += 2
184 next = append(next, pc)
185 return true
186 }
187
188 for lx < leftLen || rx < rightLen {
189 switch {
190 case rx >= rightLen:
191 ok = extend(&lx, leftRunes, leftPC)
192 case lx >= leftLen:
193 ok = extend(&rx, rightRunes, rightPC)
194 case (*rightRunes)[rx] < (*leftRunes)[lx]:
195 ok = extend(&rx, rightRunes, rightPC)
196 default:
197 ok = extend(&lx, leftRunes, leftPC)
198 }
199 if !ok {
200 return noRune, noNext
201 }
202 }
203 return merged, next
204 }
205
206
207 func cleanupOnePass(prog *onePassProg, original *syntax.Prog) {
208 for ix, instOriginal := range original.Inst {
209 switch instOriginal.Op {
210 case syntax.InstAlt, syntax.InstAltMatch, syntax.InstRune:
211 case syntax.InstCapture, syntax.InstEmptyWidth, syntax.InstNop, syntax.InstMatch, syntax.InstFail:
212 prog.Inst[ix].Next = nil
213 case syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
214 prog.Inst[ix].Next = nil
215 prog.Inst[ix] = onePassInst{Inst: instOriginal}
216 }
217 }
218 }
219
220
221 func onePassCopy(prog *syntax.Prog) *onePassProg {
222 p := &onePassProg{
223 Start: prog.Start,
224 NumCap: prog.NumCap,
225 Inst: make([]onePassInst, len(prog.Inst)),
226 }
227 for i, inst := range prog.Inst {
228 p.Inst[i] = onePassInst{Inst: inst}
229 }
230
231
232
233
234
235
236 for pc := range p.Inst {
237 switch p.Inst[pc].Op {
238 default:
239 continue
240 case syntax.InstAlt, syntax.InstAltMatch:
241
242 p_A_Other := &p.Inst[pc].Out
243 p_A_Alt := &p.Inst[pc].Arg
244
245 instAlt := p.Inst[*p_A_Alt]
246 if !(instAlt.Op == syntax.InstAlt || instAlt.Op == syntax.InstAltMatch) {
247 p_A_Alt, p_A_Other = p_A_Other, p_A_Alt
248 instAlt = p.Inst[*p_A_Alt]
249 if !(instAlt.Op == syntax.InstAlt || instAlt.Op == syntax.InstAltMatch) {
250 continue
251 }
252 }
253 instOther := p.Inst[*p_A_Other]
254
255 if instOther.Op == syntax.InstAlt || instOther.Op == syntax.InstAltMatch {
256
257 continue
258 }
259
260
261 p_B_Alt := &p.Inst[*p_A_Alt].Out
262 p_B_Other := &p.Inst[*p_A_Alt].Arg
263 patch := false
264 if instAlt.Out == uint32(pc) {
265 patch = true
266 } else if instAlt.Arg == uint32(pc) {
267 patch = true
268 p_B_Alt, p_B_Other = p_B_Other, p_B_Alt
269 }
270 if patch {
271 *p_B_Alt = *p_A_Other
272 }
273
274
275
276 if *p_A_Other == *p_B_Alt {
277 *p_A_Alt = *p_B_Other
278 }
279 }
280 }
281 return p
282 }
283
284
285 type runeSlice []rune
286
287 func (p runeSlice) Len() int { return len(p) }
288 func (p runeSlice) Less(i, j int) bool { return p[i] < p[j] }
289 func (p runeSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
290
291 var anyRuneNotNL = []rune{0, '\n' - 1, '\n' + 1, unicode.MaxRune}
292 var anyRune = []rune{0, unicode.MaxRune}
293
294
295
296
297
298
299 func makeOnePass(p *onePassProg) *onePassProg {
300
301 if len(p.Inst) >= 1000 {
302 return nil
303 }
304
305 var (
306 instQueue = newQueue(len(p.Inst))
307 visitQueue = newQueue(len(p.Inst))
308 check func(uint32, []bool) bool
309 onePassRunes = make([][]rune, len(p.Inst))
310 )
311
312
313
314 check = func(pc uint32, m []bool) (ok bool) {
315 ok = true
316 inst := &p.Inst[pc]
317 if visitQueue.contains(pc) {
318 return
319 }
320 visitQueue.insert(pc)
321 switch inst.Op {
322 case syntax.InstAlt, syntax.InstAltMatch:
323 ok = check(inst.Out, m) && check(inst.Arg, m)
324
325 matchOut := m[inst.Out]
326 matchArg := m[inst.Arg]
327 if matchOut && matchArg {
328 ok = false
329 break
330 }
331
332 if matchArg {
333 inst.Out, inst.Arg = inst.Arg, inst.Out
334 matchOut, matchArg = matchArg, matchOut
335 }
336 if matchOut {
337 m[pc] = true
338 inst.Op = syntax.InstAltMatch
339 }
340
341
342 onePassRunes[pc], inst.Next = mergeRuneSets(
343 &onePassRunes[inst.Out], &onePassRunes[inst.Arg], inst.Out, inst.Arg)
344 if len(inst.Next) > 0 && inst.Next[0] == mergeFailed {
345 ok = false
346 break
347 }
348 case syntax.InstCapture, syntax.InstNop:
349 ok = check(inst.Out, m)
350 m[pc] = m[inst.Out]
351
352 onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...)
353 inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
354 for i := range inst.Next {
355 inst.Next[i] = inst.Out
356 }
357 case syntax.InstEmptyWidth:
358 ok = check(inst.Out, m)
359 m[pc] = m[inst.Out]
360 onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...)
361 inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
362 for i := range inst.Next {
363 inst.Next[i] = inst.Out
364 }
365 case syntax.InstMatch, syntax.InstFail:
366 m[pc] = inst.Op == syntax.InstMatch
367 case syntax.InstRune:
368 m[pc] = false
369 if len(inst.Next) > 0 {
370 break
371 }
372 instQueue.insert(inst.Out)
373 if len(inst.Rune) == 0 {
374 onePassRunes[pc] = []rune{}
375 inst.Next = []uint32{inst.Out}
376 break
377 }
378 runes := make([]rune, 0)
379 if len(inst.Rune) == 1 && syntax.Flags(inst.Arg)&syntax.FoldCase != 0 {
380 r0 := inst.Rune[0]
381 runes = append(runes, r0, r0)
382 for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
383 runes = append(runes, r1, r1)
384 }
385 sort.Sort(runeSlice(runes))
386 } else {
387 runes = append(runes, inst.Rune...)
388 }
389 onePassRunes[pc] = runes
390 inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
391 for i := range inst.Next {
392 inst.Next[i] = inst.Out
393 }
394 inst.Op = syntax.InstRune
395 case syntax.InstRune1:
396 m[pc] = false
397 if len(inst.Next) > 0 {
398 break
399 }
400 instQueue.insert(inst.Out)
401 runes := []rune{}
402
403 if syntax.Flags(inst.Arg)&syntax.FoldCase != 0 {
404 r0 := inst.Rune[0]
405 runes = append(runes, r0, r0)
406 for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
407 runes = append(runes, r1, r1)
408 }
409 sort.Sort(runeSlice(runes))
410 } else {
411 runes = append(runes, inst.Rune[0], inst.Rune[0])
412 }
413 onePassRunes[pc] = runes
414 inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
415 for i := range inst.Next {
416 inst.Next[i] = inst.Out
417 }
418 inst.Op = syntax.InstRune
419 case syntax.InstRuneAny:
420 m[pc] = false
421 if len(inst.Next) > 0 {
422 break
423 }
424 instQueue.insert(inst.Out)
425 onePassRunes[pc] = append([]rune{}, anyRune...)
426 inst.Next = []uint32{inst.Out}
427 case syntax.InstRuneAnyNotNL:
428 m[pc] = false
429 if len(inst.Next) > 0 {
430 break
431 }
432 instQueue.insert(inst.Out)
433 onePassRunes[pc] = append([]rune{}, anyRuneNotNL...)
434 inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
435 for i := range inst.Next {
436 inst.Next[i] = inst.Out
437 }
438 }
439 return
440 }
441
442 instQueue.clear()
443 instQueue.insert(uint32(p.Start))
444 m := make([]bool, len(p.Inst))
445 for !instQueue.empty() {
446 visitQueue.clear()
447 pc := instQueue.next()
448 if !check(pc, m) {
449 p = nil
450 break
451 }
452 }
453 if p != nil {
454 for i := range p.Inst {
455 p.Inst[i].Rune = onePassRunes[i]
456 }
457 }
458 return p
459 }
460
461
462
463
464
465 func compileOnePass(prog *syntax.Prog) (p *onePassProg) {
466 if prog.Start == 0 {
467 return nil
468 }
469
470 if prog.Inst[prog.Start].Op != syntax.InstEmptyWidth ||
471 syntax.EmptyOp(prog.Inst[prog.Start].Arg)&syntax.EmptyBeginText != syntax.EmptyBeginText {
472 return nil
473 }
474
475 for _, inst := range prog.Inst {
476 opOut := prog.Inst[inst.Out].Op
477 switch inst.Op {
478 default:
479 if opOut == syntax.InstMatch {
480 return nil
481 }
482 case syntax.InstAlt, syntax.InstAltMatch:
483 if opOut == syntax.InstMatch || prog.Inst[inst.Arg].Op == syntax.InstMatch {
484 return nil
485 }
486 case syntax.InstEmptyWidth:
487 if opOut == syntax.InstMatch {
488 if syntax.EmptyOp(inst.Arg)&syntax.EmptyEndText == syntax.EmptyEndText {
489 continue
490 }
491 return nil
492 }
493 }
494 }
495
496
497 p = onePassCopy(prog)
498
499
500 p = makeOnePass(p)
501
502 if p != nil {
503 cleanupOnePass(p, prog)
504 }
505 return p
506 }
507
View as plain text