...

Source file src/regexp/exec_test.go

Documentation: regexp

		 1  // Copyright 2010 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  package regexp
		 6  
		 7  import (
		 8  	"bufio"
		 9  	"compress/bzip2"
		10  	"fmt"
		11  	"internal/testenv"
		12  	"io"
		13  	"os"
		14  	"path/filepath"
		15  	"regexp/syntax"
		16  	"strconv"
		17  	"strings"
		18  	"testing"
		19  	"unicode/utf8"
		20  )
		21  
		22  // TestRE2 tests this package's regexp API against test cases
		23  // considered during RE2's exhaustive tests, which run all possible
		24  // regexps over a given set of atoms and operators, up to a given
		25  // complexity, over all possible strings over a given alphabet,
		26  // up to a given size. Rather than try to link with RE2, we read a
		27  // log file containing the test cases and the expected matches.
		28  // The log file, re2-exhaustive.txt, is generated by running 'make log'
		29  // in the open source RE2 distribution https://github.com/google/re2/.
		30  //
		31  // The test file format is a sequence of stanzas like:
		32  //
		33  //	strings
		34  //	"abc"
		35  //	"123x"
		36  //	regexps
		37  //	"[a-z]+"
		38  //	0-3;0-3
		39  //	-;-
		40  //	"([0-9])([0-9])([0-9])"
		41  //	-;-
		42  //	-;0-3 0-1 1-2 2-3
		43  //
		44  // The stanza begins by defining a set of strings, quoted
		45  // using Go double-quote syntax, one per line. Then the
		46  // regexps section gives a sequence of regexps to run on
		47  // the strings. In the block that follows a regexp, each line
		48  // gives the semicolon-separated match results of running
		49  // the regexp on the corresponding string.
		50  // Each match result is either a single -, meaning no match, or a
		51  // space-separated sequence of pairs giving the match and
		52  // submatch indices. An unmatched subexpression formats
		53  // its pair as a single - (not illustrated above).	For now
		54  // each regexp run produces two match results, one for a
		55  // ``full match'' that restricts the regexp to matching the entire
		56  // string or nothing, and one for a ``partial match'' that gives
		57  // the leftmost first match found in the string.
		58  //
		59  // Lines beginning with # are comments. Lines beginning with
		60  // a capital letter are test names printed during RE2's test suite
		61  // and are echoed into t but otherwise ignored.
		62  //
		63  // At time of writing, re2-exhaustive.txt is 59 MB but compresses to 385 kB,
		64  // so we store re2-exhaustive.txt.bz2 in the repository and decompress it on the fly.
		65  //
		66  func TestRE2Search(t *testing.T) {
		67  	testRE2(t, "testdata/re2-search.txt")
		68  }
		69  
		70  func testRE2(t *testing.T, file string) {
		71  	f, err := os.Open(file)
		72  	if err != nil {
		73  		t.Fatal(err)
		74  	}
		75  	defer f.Close()
		76  	var txt io.Reader
		77  	if strings.HasSuffix(file, ".bz2") {
		78  		z := bzip2.NewReader(f)
		79  		txt = z
		80  		file = file[:len(file)-len(".bz2")] // for error messages
		81  	} else {
		82  		txt = f
		83  	}
		84  	lineno := 0
		85  	scanner := bufio.NewScanner(txt)
		86  	var (
		87  		str			 []string
		88  		input		 []string
		89  		inStrings bool
		90  		re				*Regexp
		91  		refull		*Regexp
		92  		nfail		 int
		93  		ncase		 int
		94  	)
		95  	for lineno := 1; scanner.Scan(); lineno++ {
		96  		line := scanner.Text()
		97  		switch {
		98  		case line == "":
		99  			t.Fatalf("%s:%d: unexpected blank line", file, lineno)
	 100  		case line[0] == '#':
	 101  			continue
	 102  		case 'A' <= line[0] && line[0] <= 'Z':
	 103  			// Test name.
	 104  			t.Logf("%s\n", line)
	 105  			continue
	 106  		case line == "strings":
	 107  			str = str[:0]
	 108  			inStrings = true
	 109  		case line == "regexps":
	 110  			inStrings = false
	 111  		case line[0] == '"':
	 112  			q, err := strconv.Unquote(line)
	 113  			if err != nil {
	 114  				// Fatal because we'll get out of sync.
	 115  				t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err)
	 116  			}
	 117  			if inStrings {
	 118  				str = append(str, q)
	 119  				continue
	 120  			}
	 121  			// Is a regexp.
	 122  			if len(input) != 0 {
	 123  				t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q)
	 124  			}
	 125  			re, err = tryCompile(q)
	 126  			if err != nil {
	 127  				if err.Error() == "error parsing regexp: invalid escape sequence: `\\C`" {
	 128  					// We don't and likely never will support \C; keep going.
	 129  					continue
	 130  				}
	 131  				t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err)
	 132  				if nfail++; nfail >= 100 {
	 133  					t.Fatalf("stopping after %d errors", nfail)
	 134  				}
	 135  				continue
	 136  			}
	 137  			full := `\A(?:` + q + `)\z`
	 138  			refull, err = tryCompile(full)
	 139  			if err != nil {
	 140  				// Fatal because q worked, so this should always work.
	 141  				t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err)
	 142  			}
	 143  			input = str
	 144  		case line[0] == '-' || '0' <= line[0] && line[0] <= '9':
	 145  			// A sequence of match results.
	 146  			ncase++
	 147  			if re == nil {
	 148  				// Failed to compile: skip results.
	 149  				continue
	 150  			}
	 151  			if len(input) == 0 {
	 152  				t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno)
	 153  			}
	 154  			var text string
	 155  			text, input = input[0], input[1:]
	 156  			if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) {
	 157  				// RE2's \B considers every byte position,
	 158  				// so it sees 'not word boundary' in the
	 159  				// middle of UTF-8 sequences. This package
	 160  				// only considers the positions between runes,
	 161  				// so it disagrees. Skip those cases.
	 162  				continue
	 163  			}
	 164  			res := strings.Split(line, ";")
	 165  			if len(res) != len(run) {
	 166  				t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run))
	 167  			}
	 168  			for i := range res {
	 169  				have, suffix := run[i](re, refull, text)
	 170  				want := parseResult(t, file, lineno, res[i])
	 171  				if !same(have, want) {
	 172  					t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want)
	 173  					if nfail++; nfail >= 100 {
	 174  						t.Fatalf("stopping after %d errors", nfail)
	 175  					}
	 176  					continue
	 177  				}
	 178  				b, suffix := match[i](re, refull, text)
	 179  				if b != (want != nil) {
	 180  					t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b)
	 181  					if nfail++; nfail >= 100 {
	 182  						t.Fatalf("stopping after %d errors", nfail)
	 183  					}
	 184  					continue
	 185  				}
	 186  			}
	 187  
	 188  		default:
	 189  			t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line)
	 190  		}
	 191  	}
	 192  	if err := scanner.Err(); err != nil {
	 193  		t.Fatalf("%s:%d: %v", file, lineno, err)
	 194  	}
	 195  	if len(input) != 0 {
	 196  		t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input))
	 197  	}
	 198  	t.Logf("%d cases tested", ncase)
	 199  }
	 200  
	 201  var run = []func(*Regexp, *Regexp, string) ([]int, string){
	 202  	runFull,
	 203  	runPartial,
	 204  	runFullLongest,
	 205  	runPartialLongest,
	 206  }
	 207  
	 208  func runFull(re, refull *Regexp, text string) ([]int, string) {
	 209  	refull.longest = false
	 210  	return refull.FindStringSubmatchIndex(text), "[full]"
	 211  }
	 212  
	 213  func runPartial(re, refull *Regexp, text string) ([]int, string) {
	 214  	re.longest = false
	 215  	return re.FindStringSubmatchIndex(text), ""
	 216  }
	 217  
	 218  func runFullLongest(re, refull *Regexp, text string) ([]int, string) {
	 219  	refull.longest = true
	 220  	return refull.FindStringSubmatchIndex(text), "[full,longest]"
	 221  }
	 222  
	 223  func runPartialLongest(re, refull *Regexp, text string) ([]int, string) {
	 224  	re.longest = true
	 225  	return re.FindStringSubmatchIndex(text), "[longest]"
	 226  }
	 227  
	 228  var match = []func(*Regexp, *Regexp, string) (bool, string){
	 229  	matchFull,
	 230  	matchPartial,
	 231  	matchFullLongest,
	 232  	matchPartialLongest,
	 233  }
	 234  
	 235  func matchFull(re, refull *Regexp, text string) (bool, string) {
	 236  	refull.longest = false
	 237  	return refull.MatchString(text), "[full]"
	 238  }
	 239  
	 240  func matchPartial(re, refull *Regexp, text string) (bool, string) {
	 241  	re.longest = false
	 242  	return re.MatchString(text), ""
	 243  }
	 244  
	 245  func matchFullLongest(re, refull *Regexp, text string) (bool, string) {
	 246  	refull.longest = true
	 247  	return refull.MatchString(text), "[full,longest]"
	 248  }
	 249  
	 250  func matchPartialLongest(re, refull *Regexp, text string) (bool, string) {
	 251  	re.longest = true
	 252  	return re.MatchString(text), "[longest]"
	 253  }
	 254  
	 255  func isSingleBytes(s string) bool {
	 256  	for _, c := range s {
	 257  		if c >= utf8.RuneSelf {
	 258  			return false
	 259  		}
	 260  	}
	 261  	return true
	 262  }
	 263  
	 264  func tryCompile(s string) (re *Regexp, err error) {
	 265  	// Protect against panic during Compile.
	 266  	defer func() {
	 267  		if r := recover(); r != nil {
	 268  			err = fmt.Errorf("panic: %v", r)
	 269  		}
	 270  	}()
	 271  	return Compile(s)
	 272  }
	 273  
	 274  func parseResult(t *testing.T, file string, lineno int, res string) []int {
	 275  	// A single - indicates no match.
	 276  	if res == "-" {
	 277  		return nil
	 278  	}
	 279  	// Otherwise, a space-separated list of pairs.
	 280  	n := 1
	 281  	for j := 0; j < len(res); j++ {
	 282  		if res[j] == ' ' {
	 283  			n++
	 284  		}
	 285  	}
	 286  	out := make([]int, 2*n)
	 287  	i := 0
	 288  	n = 0
	 289  	for j := 0; j <= len(res); j++ {
	 290  		if j == len(res) || res[j] == ' ' {
	 291  			// Process a single pair.	- means no submatch.
	 292  			pair := res[i:j]
	 293  			if pair == "-" {
	 294  				out[n] = -1
	 295  				out[n+1] = -1
	 296  			} else {
	 297  				k := strings.Index(pair, "-")
	 298  				if k < 0 {
	 299  					t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
	 300  				}
	 301  				lo, err1 := strconv.Atoi(pair[:k])
	 302  				hi, err2 := strconv.Atoi(pair[k+1:])
	 303  				if err1 != nil || err2 != nil || lo > hi {
	 304  					t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
	 305  				}
	 306  				out[n] = lo
	 307  				out[n+1] = hi
	 308  			}
	 309  			n += 2
	 310  			i = j + 1
	 311  		}
	 312  	}
	 313  	return out
	 314  }
	 315  
	 316  func same(x, y []int) bool {
	 317  	if len(x) != len(y) {
	 318  		return false
	 319  	}
	 320  	for i, xi := range x {
	 321  		if xi != y[i] {
	 322  			return false
	 323  		}
	 324  	}
	 325  	return true
	 326  }
	 327  
	 328  // TestFowler runs this package's regexp API against the
	 329  // POSIX regular expression tests collected by Glenn Fowler
	 330  // at http://www2.research.att.com/~astopen/testregex/testregex.html.
	 331  func TestFowler(t *testing.T) {
	 332  	files, err := filepath.Glob("testdata/*.dat")
	 333  	if err != nil {
	 334  		t.Fatal(err)
	 335  	}
	 336  	for _, file := range files {
	 337  		t.Log(file)
	 338  		testFowler(t, file)
	 339  	}
	 340  }
	 341  
	 342  var notab = MustCompilePOSIX(`[^\t]+`)
	 343  
	 344  func testFowler(t *testing.T, file string) {
	 345  	f, err := os.Open(file)
	 346  	if err != nil {
	 347  		t.Error(err)
	 348  		return
	 349  	}
	 350  	defer f.Close()
	 351  	b := bufio.NewReader(f)
	 352  	lineno := 0
	 353  	lastRegexp := ""
	 354  Reading:
	 355  	for {
	 356  		lineno++
	 357  		line, err := b.ReadString('\n')
	 358  		if err != nil {
	 359  			if err != io.EOF {
	 360  				t.Errorf("%s:%d: %v", file, lineno, err)
	 361  			}
	 362  			break Reading
	 363  		}
	 364  
	 365  		// http://www2.research.att.com/~astopen/man/man1/testregex.html
	 366  		//
	 367  		// INPUT FORMAT
	 368  		//	 Input lines may be blank, a comment beginning with #, or a test
	 369  		//	 specification. A specification is five fields separated by one
	 370  		//	 or more tabs. NULL denotes the empty string and NIL denotes the
	 371  		//	 0 pointer.
	 372  		if line[0] == '#' || line[0] == '\n' {
	 373  			continue Reading
	 374  		}
	 375  		line = line[:len(line)-1]
	 376  		field := notab.FindAllString(line, -1)
	 377  		for i, f := range field {
	 378  			if f == "NULL" {
	 379  				field[i] = ""
	 380  			}
	 381  			if f == "NIL" {
	 382  				t.Logf("%s:%d: skip: %s", file, lineno, line)
	 383  				continue Reading
	 384  			}
	 385  		}
	 386  		if len(field) == 0 {
	 387  			continue Reading
	 388  		}
	 389  
	 390  		//	 Field 1: the regex(3) flags to apply, one character per REG_feature
	 391  		//	 flag. The test is skipped if REG_feature is not supported by the
	 392  		//	 implementation. If the first character is not [BEASKLP] then the
	 393  		//	 specification is a global control line. One or more of [BEASKLP] may be
	 394  		//	 specified; the test will be repeated for each mode.
	 395  		//
	 396  		//		 B 	basic			BRE	(grep, ed, sed)
	 397  		//		 E 	REG_EXTENDED		ERE	(egrep)
	 398  		//		 A	REG_AUGMENTED		ARE	(egrep with negation)
	 399  		//		 S	REG_SHELL		SRE	(sh glob)
	 400  		//		 K	REG_SHELL|REG_AUGMENTED	KRE	(ksh glob)
	 401  		//		 L	REG_LITERAL		LRE	(fgrep)
	 402  		//
	 403  		//		 a	REG_LEFT|REG_RIGHT	implicit ^...$
	 404  		//		 b	REG_NOTBOL		lhs does not match ^
	 405  		//		 c	REG_COMMENT		ignore space and #...\n
	 406  		//		 d	REG_SHELL_DOT		explicit leading . match
	 407  		//		 e	REG_NOTEOL		rhs does not match $
	 408  		//		 f	REG_MULTIPLE		multiple \n separated patterns
	 409  		//		 g	FNM_LEADING_DIR		testfnmatch only -- match until /
	 410  		//		 h	REG_MULTIREF		multiple digit backref
	 411  		//		 i	REG_ICASE		ignore case
	 412  		//		 j	REG_SPAN		. matches \n
	 413  		//		 k	REG_ESCAPE		\ to escape [...] delimiter
	 414  		//		 l	REG_LEFT		implicit ^...
	 415  		//		 m	REG_MINIMAL		minimal match
	 416  		//		 n	REG_NEWLINE		explicit \n match
	 417  		//		 o	REG_ENCLOSED		(|&) magic inside [@|&](...)
	 418  		//		 p	REG_SHELL_PATH		explicit / match
	 419  		//		 q	REG_DELIMITED		delimited pattern
	 420  		//		 r	REG_RIGHT		implicit ...$
	 421  		//		 s	REG_SHELL_ESCAPED	\ not special
	 422  		//		 t	REG_MUSTDELIM		all delimiters must be specified
	 423  		//		 u	standard unspecified behavior -- errors not counted
	 424  		//		 v	REG_CLASS_ESCAPE	\ special inside [...]
	 425  		//		 w	REG_NOSUB		no subexpression match array
	 426  		//		 x	REG_LENIENT		let some errors slide
	 427  		//		 y	REG_LEFT		regexec() implicit ^...
	 428  		//		 z	REG_NULL		NULL subexpressions ok
	 429  		//		 $													expand C \c escapes in fields 2 and 3
	 430  		//		 /													field 2 is a regsubcomp() expression
	 431  		//		 =													field 3 is a regdecomp() expression
	 432  		//
	 433  		//	 Field 1 control lines:
	 434  		//
	 435  		//		 C		set LC_COLLATE and LC_CTYPE to locale in field 2
	 436  		//
	 437  		//		 ?test ...	output field 5 if passed and != EXPECTED, silent otherwise
	 438  		//		 &test ...	output field 5 if current and previous passed
	 439  		//		 |test ...	output field 5 if current passed and previous failed
	 440  		//		 ; ...	output field 2 if previous failed
	 441  		//		 {test ...	skip if failed until }
	 442  		//		 }		end of skip
	 443  		//
	 444  		//		 : comment		comment copied as output NOTE
	 445  		//		 :comment:test	:comment: ignored
	 446  		//		 N[OTE] comment	comment copied as output NOTE
	 447  		//		 T[EST] comment	comment
	 448  		//
	 449  		//		 number		use number for nmatch (20 by default)
	 450  		flag := field[0]
	 451  		switch flag[0] {
	 452  		case '?', '&', '|', ';', '{', '}':
	 453  			// Ignore all the control operators.
	 454  			// Just run everything.
	 455  			flag = flag[1:]
	 456  			if flag == "" {
	 457  				continue Reading
	 458  			}
	 459  		case ':':
	 460  			i := strings.Index(flag[1:], ":")
	 461  			if i < 0 {
	 462  				t.Logf("skip: %s", line)
	 463  				continue Reading
	 464  			}
	 465  			flag = flag[1+i+1:]
	 466  		case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
	 467  			t.Logf("skip: %s", line)
	 468  			continue Reading
	 469  		}
	 470  
	 471  		// Can check field count now that we've handled the myriad comment formats.
	 472  		if len(field) < 4 {
	 473  			t.Errorf("%s:%d: too few fields: %s", file, lineno, line)
	 474  			continue Reading
	 475  		}
	 476  
	 477  		// Expand C escapes (a.k.a. Go escapes).
	 478  		if strings.Contains(flag, "$") {
	 479  			f := `"` + field[1] + `"`
	 480  			if field[1], err = strconv.Unquote(f); err != nil {
	 481  				t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
	 482  			}
	 483  			f = `"` + field[2] + `"`
	 484  			if field[2], err = strconv.Unquote(f); err != nil {
	 485  				t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
	 486  			}
	 487  		}
	 488  
	 489  		//	 Field 2: the regular expression pattern; SAME uses the pattern from
	 490  		//		 the previous specification.
	 491  		//
	 492  		if field[1] == "SAME" {
	 493  			field[1] = lastRegexp
	 494  		}
	 495  		lastRegexp = field[1]
	 496  
	 497  		//	 Field 3: the string to match.
	 498  		text := field[2]
	 499  
	 500  		//	 Field 4: the test outcome...
	 501  		ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3])
	 502  		if !ok {
	 503  			t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3])
	 504  			continue Reading
	 505  		}
	 506  
	 507  		//	 Field 5: optional comment appended to the report.
	 508  
	 509  	Testing:
	 510  		// Run test once for each specified capital letter mode that we support.
	 511  		for _, c := range flag {
	 512  			pattern := field[1]
	 513  			syn := syntax.POSIX | syntax.ClassNL
	 514  			switch c {
	 515  			default:
	 516  				continue Testing
	 517  			case 'E':
	 518  				// extended regexp (what we support)
	 519  			case 'L':
	 520  				// literal
	 521  				pattern = QuoteMeta(pattern)
	 522  			}
	 523  
	 524  			for _, c := range flag {
	 525  				switch c {
	 526  				case 'i':
	 527  					syn |= syntax.FoldCase
	 528  				}
	 529  			}
	 530  
	 531  			re, err := compile(pattern, syn, true)
	 532  			if err != nil {
	 533  				if shouldCompile {
	 534  					t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern)
	 535  				}
	 536  				continue Testing
	 537  			}
	 538  			if !shouldCompile {
	 539  				t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern)
	 540  				continue Testing
	 541  			}
	 542  			match := re.MatchString(text)
	 543  			if match != shouldMatch {
	 544  				t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch)
	 545  				continue Testing
	 546  			}
	 547  			have := re.FindStringSubmatchIndex(text)
	 548  			if (len(have) > 0) != match {
	 549  				t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, pattern, text, have)
	 550  				continue Testing
	 551  			}
	 552  			if len(have) > len(pos) {
	 553  				have = have[:len(pos)]
	 554  			}
	 555  			if !same(have, pos) {
	 556  				t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos)
	 557  			}
	 558  		}
	 559  	}
	 560  }
	 561  
	 562  func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) {
	 563  	//	 Field 4: the test outcome. This is either one of the posix error
	 564  	//		 codes (with REG_ omitted) or the match array, a list of (m,n)
	 565  	//		 entries with m and n being first and last+1 positions in the
	 566  	//		 field 3 string, or NULL if REG_NOSUB is in effect and success
	 567  	//		 is expected. BADPAT is acceptable in place of any regcomp(3)
	 568  	//		 error code. The match[] array is initialized to (-2,-2) before
	 569  	//		 each test. All array elements from 0 to nmatch-1 must be specified
	 570  	//		 in the outcome. Unspecified endpoints (offset -1) are denoted by ?.
	 571  	//		 Unset endpoints (offset -2) are denoted by X. {x}(o:n) denotes a
	 572  	//		 matched (?{...}) expression, where x is the text enclosed by {...},
	 573  	//		 o is the expression ordinal counting from 1, and n is the length of
	 574  	//		 the unmatched portion of the subject string. If x starts with a
	 575  	//		 number then that is the return value of re_execf(), otherwise 0 is
	 576  	//		 returned.
	 577  	switch {
	 578  	case s == "":
	 579  		// Match with no position information.
	 580  		ok = true
	 581  		compiled = true
	 582  		matched = true
	 583  		return
	 584  	case s == "NOMATCH":
	 585  		// Match failure.
	 586  		ok = true
	 587  		compiled = true
	 588  		matched = false
	 589  		return
	 590  	case 'A' <= s[0] && s[0] <= 'Z':
	 591  		// All the other error codes are compile errors.
	 592  		ok = true
	 593  		compiled = false
	 594  		return
	 595  	}
	 596  	compiled = true
	 597  
	 598  	var x []int
	 599  	for s != "" {
	 600  		var end byte = ')'
	 601  		if len(x)%2 == 0 {
	 602  			if s[0] != '(' {
	 603  				ok = false
	 604  				return
	 605  			}
	 606  			s = s[1:]
	 607  			end = ','
	 608  		}
	 609  		i := 0
	 610  		for i < len(s) && s[i] != end {
	 611  			i++
	 612  		}
	 613  		if i == 0 || i == len(s) {
	 614  			ok = false
	 615  			return
	 616  		}
	 617  		var v = -1
	 618  		var err error
	 619  		if s[:i] != "?" {
	 620  			v, err = strconv.Atoi(s[:i])
	 621  			if err != nil {
	 622  				ok = false
	 623  				return
	 624  			}
	 625  		}
	 626  		x = append(x, v)
	 627  		s = s[i+1:]
	 628  	}
	 629  	if len(x)%2 != 0 {
	 630  		ok = false
	 631  		return
	 632  	}
	 633  	ok = true
	 634  	matched = true
	 635  	pos = x
	 636  	return
	 637  }
	 638  
	 639  var text []byte
	 640  
	 641  func makeText(n int) []byte {
	 642  	if len(text) >= n {
	 643  		return text[:n]
	 644  	}
	 645  	text = make([]byte, n)
	 646  	x := ^uint32(0)
	 647  	for i := range text {
	 648  		x += x
	 649  		x ^= 1
	 650  		if int32(x) < 0 {
	 651  			x ^= 0x88888eef
	 652  		}
	 653  		if x%31 == 0 {
	 654  			text[i] = '\n'
	 655  		} else {
	 656  			text[i] = byte(x%(0x7E+1-0x20) + 0x20)
	 657  		}
	 658  	}
	 659  	return text
	 660  }
	 661  
	 662  func BenchmarkMatch(b *testing.B) {
	 663  	isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race")
	 664  
	 665  	for _, data := range benchData {
	 666  		r := MustCompile(data.re)
	 667  		for _, size := range benchSizes {
	 668  			if (isRaceBuilder || testing.Short()) && size.n > 1<<10 {
	 669  				continue
	 670  			}
	 671  			t := makeText(size.n)
	 672  			b.Run(data.name+"/"+size.name, func(b *testing.B) {
	 673  				b.SetBytes(int64(size.n))
	 674  				for i := 0; i < b.N; i++ {
	 675  					if r.Match(t) {
	 676  						b.Fatal("match!")
	 677  					}
	 678  				}
	 679  			})
	 680  		}
	 681  	}
	 682  }
	 683  
	 684  func BenchmarkMatch_onepass_regex(b *testing.B) {
	 685  	isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race")
	 686  	r := MustCompile(`(?s)\A.*\z`)
	 687  	if r.onepass == nil {
	 688  		b.Fatalf("want onepass regex, but %q is not onepass", r)
	 689  	}
	 690  	for _, size := range benchSizes {
	 691  		if (isRaceBuilder || testing.Short()) && size.n > 1<<10 {
	 692  			continue
	 693  		}
	 694  		t := makeText(size.n)
	 695  		b.Run(size.name, func(b *testing.B) {
	 696  			b.SetBytes(int64(size.n))
	 697  			b.ReportAllocs()
	 698  			for i := 0; i < b.N; i++ {
	 699  				if !r.Match(t) {
	 700  					b.Fatal("not match!")
	 701  				}
	 702  			}
	 703  		})
	 704  	}
	 705  }
	 706  
	 707  var benchData = []struct{ name, re string }{
	 708  	{"Easy0", "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
	 709  	{"Easy0i", "(?i)ABCDEFGHIJklmnopqrstuvwxyz$"},
	 710  	{"Easy1", "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"},
	 711  	{"Medium", "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
	 712  	{"Hard", "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
	 713  	{"Hard1", "ABCD|CDEF|EFGH|GHIJ|IJKL|KLMN|MNOP|OPQR|QRST|STUV|UVWX|WXYZ"},
	 714  }
	 715  
	 716  var benchSizes = []struct {
	 717  	name string
	 718  	n		int
	 719  }{
	 720  	{"16", 16},
	 721  	{"32", 32},
	 722  	{"1K", 1 << 10},
	 723  	{"32K", 32 << 10},
	 724  	{"1M", 1 << 20},
	 725  	{"32M", 32 << 20},
	 726  }
	 727  
	 728  func TestLongest(t *testing.T) {
	 729  	re, err := Compile(`a(|b)`)
	 730  	if err != nil {
	 731  		t.Fatal(err)
	 732  	}
	 733  	if g, w := re.FindString("ab"), "a"; g != w {
	 734  		t.Errorf("first match was %q, want %q", g, w)
	 735  	}
	 736  	re.Longest()
	 737  	if g, w := re.FindString("ab"), "ab"; g != w {
	 738  		t.Errorf("longest match was %q, want %q", g, w)
	 739  	}
	 740  }
	 741  
	 742  // TestProgramTooLongForBacktrack tests that a regex which is too long
	 743  // for the backtracker still executes properly.
	 744  func TestProgramTooLongForBacktrack(t *testing.T) {
	 745  	longRegex := MustCompile(`(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|twentyone|twentytwo|twentythree|twentyfour|twentyfive|twentysix|twentyseven|twentyeight|twentynine|thirty|thirtyone|thirtytwo|thirtythree|thirtyfour|thirtyfive|thirtysix|thirtyseven|thirtyeight|thirtynine|forty|fortyone|fortytwo|fortythree|fortyfour|fortyfive|fortysix|fortyseven|fortyeight|fortynine|fifty|fiftyone|fiftytwo|fiftythree|fiftyfour|fiftyfive|fiftysix|fiftyseven|fiftyeight|fiftynine|sixty|sixtyone|sixtytwo|sixtythree|sixtyfour|sixtyfive|sixtysix|sixtyseven|sixtyeight|sixtynine|seventy|seventyone|seventytwo|seventythree|seventyfour|seventyfive|seventysix|seventyseven|seventyeight|seventynine|eighty|eightyone|eightytwo|eightythree|eightyfour|eightyfive|eightysix|eightyseven|eightyeight|eightynine|ninety|ninetyone|ninetytwo|ninetythree|ninetyfour|ninetyfive|ninetysix|ninetyseven|ninetyeight|ninetynine|onehundred)`)
	 746  	if !longRegex.MatchString("two") {
	 747  		t.Errorf("longRegex.MatchString(\"two\") was false, want true")
	 748  	}
	 749  	if longRegex.MatchString("xxx") {
	 750  		t.Errorf("longRegex.MatchString(\"xxx\") was true, want false")
	 751  	}
	 752  }
	 753  

View as plain text