Source file
src/regexp/exec_test.go
Documentation: regexp
1
2
3
4
5 package regexp
6
7 import (
8 "bufio"
9 "compress/bzip2"
10 "fmt"
11 "internal/testenv"
12 "io"
13 "os"
14 "path/filepath"
15 "regexp/syntax"
16 "strconv"
17 "strings"
18 "testing"
19 "unicode/utf8"
20 )
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66 func TestRE2Search(t *testing.T) {
67 testRE2(t, "testdata/re2-search.txt")
68 }
69
70 func testRE2(t *testing.T, file string) {
71 f, err := os.Open(file)
72 if err != nil {
73 t.Fatal(err)
74 }
75 defer f.Close()
76 var txt io.Reader
77 if strings.HasSuffix(file, ".bz2") {
78 z := bzip2.NewReader(f)
79 txt = z
80 file = file[:len(file)-len(".bz2")]
81 } else {
82 txt = f
83 }
84 lineno := 0
85 scanner := bufio.NewScanner(txt)
86 var (
87 str []string
88 input []string
89 inStrings bool
90 re *Regexp
91 refull *Regexp
92 nfail int
93 ncase int
94 )
95 for lineno := 1; scanner.Scan(); lineno++ {
96 line := scanner.Text()
97 switch {
98 case line == "":
99 t.Fatalf("%s:%d: unexpected blank line", file, lineno)
100 case line[0] == '#':
101 continue
102 case 'A' <= line[0] && line[0] <= 'Z':
103
104 t.Logf("%s\n", line)
105 continue
106 case line == "strings":
107 str = str[:0]
108 inStrings = true
109 case line == "regexps":
110 inStrings = false
111 case line[0] == '"':
112 q, err := strconv.Unquote(line)
113 if err != nil {
114
115 t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err)
116 }
117 if inStrings {
118 str = append(str, q)
119 continue
120 }
121
122 if len(input) != 0 {
123 t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q)
124 }
125 re, err = tryCompile(q)
126 if err != nil {
127 if err.Error() == "error parsing regexp: invalid escape sequence: `\\C`" {
128
129 continue
130 }
131 t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err)
132 if nfail++; nfail >= 100 {
133 t.Fatalf("stopping after %d errors", nfail)
134 }
135 continue
136 }
137 full := `\A(?:` + q + `)\z`
138 refull, err = tryCompile(full)
139 if err != nil {
140
141 t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err)
142 }
143 input = str
144 case line[0] == '-' || '0' <= line[0] && line[0] <= '9':
145
146 ncase++
147 if re == nil {
148
149 continue
150 }
151 if len(input) == 0 {
152 t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno)
153 }
154 var text string
155 text, input = input[0], input[1:]
156 if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) {
157
158
159
160
161
162 continue
163 }
164 res := strings.Split(line, ";")
165 if len(res) != len(run) {
166 t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run))
167 }
168 for i := range res {
169 have, suffix := run[i](re, refull, text)
170 want := parseResult(t, file, lineno, res[i])
171 if !same(have, want) {
172 t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want)
173 if nfail++; nfail >= 100 {
174 t.Fatalf("stopping after %d errors", nfail)
175 }
176 continue
177 }
178 b, suffix := match[i](re, refull, text)
179 if b != (want != nil) {
180 t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b)
181 if nfail++; nfail >= 100 {
182 t.Fatalf("stopping after %d errors", nfail)
183 }
184 continue
185 }
186 }
187
188 default:
189 t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line)
190 }
191 }
192 if err := scanner.Err(); err != nil {
193 t.Fatalf("%s:%d: %v", file, lineno, err)
194 }
195 if len(input) != 0 {
196 t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input))
197 }
198 t.Logf("%d cases tested", ncase)
199 }
200
201 var run = []func(*Regexp, *Regexp, string) ([]int, string){
202 runFull,
203 runPartial,
204 runFullLongest,
205 runPartialLongest,
206 }
207
208 func runFull(re, refull *Regexp, text string) ([]int, string) {
209 refull.longest = false
210 return refull.FindStringSubmatchIndex(text), "[full]"
211 }
212
213 func runPartial(re, refull *Regexp, text string) ([]int, string) {
214 re.longest = false
215 return re.FindStringSubmatchIndex(text), ""
216 }
217
218 func runFullLongest(re, refull *Regexp, text string) ([]int, string) {
219 refull.longest = true
220 return refull.FindStringSubmatchIndex(text), "[full,longest]"
221 }
222
223 func runPartialLongest(re, refull *Regexp, text string) ([]int, string) {
224 re.longest = true
225 return re.FindStringSubmatchIndex(text), "[longest]"
226 }
227
228 var match = []func(*Regexp, *Regexp, string) (bool, string){
229 matchFull,
230 matchPartial,
231 matchFullLongest,
232 matchPartialLongest,
233 }
234
235 func matchFull(re, refull *Regexp, text string) (bool, string) {
236 refull.longest = false
237 return refull.MatchString(text), "[full]"
238 }
239
240 func matchPartial(re, refull *Regexp, text string) (bool, string) {
241 re.longest = false
242 return re.MatchString(text), ""
243 }
244
245 func matchFullLongest(re, refull *Regexp, text string) (bool, string) {
246 refull.longest = true
247 return refull.MatchString(text), "[full,longest]"
248 }
249
250 func matchPartialLongest(re, refull *Regexp, text string) (bool, string) {
251 re.longest = true
252 return re.MatchString(text), "[longest]"
253 }
254
255 func isSingleBytes(s string) bool {
256 for _, c := range s {
257 if c >= utf8.RuneSelf {
258 return false
259 }
260 }
261 return true
262 }
263
264 func tryCompile(s string) (re *Regexp, err error) {
265
266 defer func() {
267 if r := recover(); r != nil {
268 err = fmt.Errorf("panic: %v", r)
269 }
270 }()
271 return Compile(s)
272 }
273
274 func parseResult(t *testing.T, file string, lineno int, res string) []int {
275
276 if res == "-" {
277 return nil
278 }
279
280 n := 1
281 for j := 0; j < len(res); j++ {
282 if res[j] == ' ' {
283 n++
284 }
285 }
286 out := make([]int, 2*n)
287 i := 0
288 n = 0
289 for j := 0; j <= len(res); j++ {
290 if j == len(res) || res[j] == ' ' {
291
292 pair := res[i:j]
293 if pair == "-" {
294 out[n] = -1
295 out[n+1] = -1
296 } else {
297 k := strings.Index(pair, "-")
298 if k < 0 {
299 t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
300 }
301 lo, err1 := strconv.Atoi(pair[:k])
302 hi, err2 := strconv.Atoi(pair[k+1:])
303 if err1 != nil || err2 != nil || lo > hi {
304 t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
305 }
306 out[n] = lo
307 out[n+1] = hi
308 }
309 n += 2
310 i = j + 1
311 }
312 }
313 return out
314 }
315
316 func same(x, y []int) bool {
317 if len(x) != len(y) {
318 return false
319 }
320 for i, xi := range x {
321 if xi != y[i] {
322 return false
323 }
324 }
325 return true
326 }
327
328
329
330
331 func TestFowler(t *testing.T) {
332 files, err := filepath.Glob("testdata/*.dat")
333 if err != nil {
334 t.Fatal(err)
335 }
336 for _, file := range files {
337 t.Log(file)
338 testFowler(t, file)
339 }
340 }
341
342 var notab = MustCompilePOSIX(`[^\t]+`)
343
344 func testFowler(t *testing.T, file string) {
345 f, err := os.Open(file)
346 if err != nil {
347 t.Error(err)
348 return
349 }
350 defer f.Close()
351 b := bufio.NewReader(f)
352 lineno := 0
353 lastRegexp := ""
354 Reading:
355 for {
356 lineno++
357 line, err := b.ReadString('\n')
358 if err != nil {
359 if err != io.EOF {
360 t.Errorf("%s:%d: %v", file, lineno, err)
361 }
362 break Reading
363 }
364
365
366
367
368
369
370
371
372 if line[0] == '#' || line[0] == '\n' {
373 continue Reading
374 }
375 line = line[:len(line)-1]
376 field := notab.FindAllString(line, -1)
377 for i, f := range field {
378 if f == "NULL" {
379 field[i] = ""
380 }
381 if f == "NIL" {
382 t.Logf("%s:%d: skip: %s", file, lineno, line)
383 continue Reading
384 }
385 }
386 if len(field) == 0 {
387 continue Reading
388 }
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450 flag := field[0]
451 switch flag[0] {
452 case '?', '&', '|', ';', '{', '}':
453
454
455 flag = flag[1:]
456 if flag == "" {
457 continue Reading
458 }
459 case ':':
460 i := strings.Index(flag[1:], ":")
461 if i < 0 {
462 t.Logf("skip: %s", line)
463 continue Reading
464 }
465 flag = flag[1+i+1:]
466 case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
467 t.Logf("skip: %s", line)
468 continue Reading
469 }
470
471
472 if len(field) < 4 {
473 t.Errorf("%s:%d: too few fields: %s", file, lineno, line)
474 continue Reading
475 }
476
477
478 if strings.Contains(flag, "$") {
479 f := `"` + field[1] + `"`
480 if field[1], err = strconv.Unquote(f); err != nil {
481 t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
482 }
483 f = `"` + field[2] + `"`
484 if field[2], err = strconv.Unquote(f); err != nil {
485 t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
486 }
487 }
488
489
490
491
492 if field[1] == "SAME" {
493 field[1] = lastRegexp
494 }
495 lastRegexp = field[1]
496
497
498 text := field[2]
499
500
501 ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3])
502 if !ok {
503 t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3])
504 continue Reading
505 }
506
507
508
509 Testing:
510
511 for _, c := range flag {
512 pattern := field[1]
513 syn := syntax.POSIX | syntax.ClassNL
514 switch c {
515 default:
516 continue Testing
517 case 'E':
518
519 case 'L':
520
521 pattern = QuoteMeta(pattern)
522 }
523
524 for _, c := range flag {
525 switch c {
526 case 'i':
527 syn |= syntax.FoldCase
528 }
529 }
530
531 re, err := compile(pattern, syn, true)
532 if err != nil {
533 if shouldCompile {
534 t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern)
535 }
536 continue Testing
537 }
538 if !shouldCompile {
539 t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern)
540 continue Testing
541 }
542 match := re.MatchString(text)
543 if match != shouldMatch {
544 t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch)
545 continue Testing
546 }
547 have := re.FindStringSubmatchIndex(text)
548 if (len(have) > 0) != match {
549 t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, pattern, text, have)
550 continue Testing
551 }
552 if len(have) > len(pos) {
553 have = have[:len(pos)]
554 }
555 if !same(have, pos) {
556 t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos)
557 }
558 }
559 }
560 }
561
562 func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) {
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577 switch {
578 case s == "":
579
580 ok = true
581 compiled = true
582 matched = true
583 return
584 case s == "NOMATCH":
585
586 ok = true
587 compiled = true
588 matched = false
589 return
590 case 'A' <= s[0] && s[0] <= 'Z':
591
592 ok = true
593 compiled = false
594 return
595 }
596 compiled = true
597
598 var x []int
599 for s != "" {
600 var end byte = ')'
601 if len(x)%2 == 0 {
602 if s[0] != '(' {
603 ok = false
604 return
605 }
606 s = s[1:]
607 end = ','
608 }
609 i := 0
610 for i < len(s) && s[i] != end {
611 i++
612 }
613 if i == 0 || i == len(s) {
614 ok = false
615 return
616 }
617 var v = -1
618 var err error
619 if s[:i] != "?" {
620 v, err = strconv.Atoi(s[:i])
621 if err != nil {
622 ok = false
623 return
624 }
625 }
626 x = append(x, v)
627 s = s[i+1:]
628 }
629 if len(x)%2 != 0 {
630 ok = false
631 return
632 }
633 ok = true
634 matched = true
635 pos = x
636 return
637 }
638
639 var text []byte
640
641 func makeText(n int) []byte {
642 if len(text) >= n {
643 return text[:n]
644 }
645 text = make([]byte, n)
646 x := ^uint32(0)
647 for i := range text {
648 x += x
649 x ^= 1
650 if int32(x) < 0 {
651 x ^= 0x88888eef
652 }
653 if x%31 == 0 {
654 text[i] = '\n'
655 } else {
656 text[i] = byte(x%(0x7E+1-0x20) + 0x20)
657 }
658 }
659 return text
660 }
661
662 func BenchmarkMatch(b *testing.B) {
663 isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race")
664
665 for _, data := range benchData {
666 r := MustCompile(data.re)
667 for _, size := range benchSizes {
668 if (isRaceBuilder || testing.Short()) && size.n > 1<<10 {
669 continue
670 }
671 t := makeText(size.n)
672 b.Run(data.name+"/"+size.name, func(b *testing.B) {
673 b.SetBytes(int64(size.n))
674 for i := 0; i < b.N; i++ {
675 if r.Match(t) {
676 b.Fatal("match!")
677 }
678 }
679 })
680 }
681 }
682 }
683
684 func BenchmarkMatch_onepass_regex(b *testing.B) {
685 isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race")
686 r := MustCompile(`(?s)\A.*\z`)
687 if r.onepass == nil {
688 b.Fatalf("want onepass regex, but %q is not onepass", r)
689 }
690 for _, size := range benchSizes {
691 if (isRaceBuilder || testing.Short()) && size.n > 1<<10 {
692 continue
693 }
694 t := makeText(size.n)
695 b.Run(size.name, func(b *testing.B) {
696 b.SetBytes(int64(size.n))
697 b.ReportAllocs()
698 for i := 0; i < b.N; i++ {
699 if !r.Match(t) {
700 b.Fatal("not match!")
701 }
702 }
703 })
704 }
705 }
706
707 var benchData = []struct{ name, re string }{
708 {"Easy0", "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
709 {"Easy0i", "(?i)ABCDEFGHIJklmnopqrstuvwxyz$"},
710 {"Easy1", "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"},
711 {"Medium", "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
712 {"Hard", "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
713 {"Hard1", "ABCD|CDEF|EFGH|GHIJ|IJKL|KLMN|MNOP|OPQR|QRST|STUV|UVWX|WXYZ"},
714 }
715
716 var benchSizes = []struct {
717 name string
718 n int
719 }{
720 {"16", 16},
721 {"32", 32},
722 {"1K", 1 << 10},
723 {"32K", 32 << 10},
724 {"1M", 1 << 20},
725 {"32M", 32 << 20},
726 }
727
728 func TestLongest(t *testing.T) {
729 re, err := Compile(`a(|b)`)
730 if err != nil {
731 t.Fatal(err)
732 }
733 if g, w := re.FindString("ab"), "a"; g != w {
734 t.Errorf("first match was %q, want %q", g, w)
735 }
736 re.Longest()
737 if g, w := re.FindString("ab"), "ab"; g != w {
738 t.Errorf("longest match was %q, want %q", g, w)
739 }
740 }
741
742
743
744 func TestProgramTooLongForBacktrack(t *testing.T) {
745 longRegex := MustCompile(`(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|twentyone|twentytwo|twentythree|twentyfour|twentyfive|twentysix|twentyseven|twentyeight|twentynine|thirty|thirtyone|thirtytwo|thirtythree|thirtyfour|thirtyfive|thirtysix|thirtyseven|thirtyeight|thirtynine|forty|fortyone|fortytwo|fortythree|fortyfour|fortyfive|fortysix|fortyseven|fortyeight|fortynine|fifty|fiftyone|fiftytwo|fiftythree|fiftyfour|fiftyfive|fiftysix|fiftyseven|fiftyeight|fiftynine|sixty|sixtyone|sixtytwo|sixtythree|sixtyfour|sixtyfive|sixtysix|sixtyseven|sixtyeight|sixtynine|seventy|seventyone|seventytwo|seventythree|seventyfour|seventyfive|seventysix|seventyseven|seventyeight|seventynine|eighty|eightyone|eightytwo|eightythree|eightyfour|eightyfive|eightysix|eightyseven|eightyeight|eightynine|ninety|ninetyone|ninetytwo|ninetythree|ninetyfour|ninetyfive|ninetysix|ninetyseven|ninetyeight|ninetynine|onehundred)`)
746 if !longRegex.MatchString("two") {
747 t.Errorf("longRegex.MatchString(\"two\") was false, want true")
748 }
749 if longRegex.MatchString("xxx") {
750 t.Errorf("longRegex.MatchString(\"xxx\") was true, want false")
751 }
752 }
753
View as plain text