
Source file src/regexp/find_test.go

Documentation: regexp

		 1  // Copyright 2010 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 5  package regexp
		 7  import (
		 8  	"fmt"
		 9  	"strings"
		10  	"testing"
		11  )
		13  // For each pattern/text pair, what is the expected output of each function?
		14  // We can derive the textual results from the indexed results, the non-submatch
		15  // results from the submatched results, the single results from the 'all' results,
		16  // and the byte results from the string results. Therefore the table includes
		17  // only the FindAllStringSubmatchIndex result.
		18  type FindTest struct {
		19  	pat		 string
		20  	text		string
		21  	matches [][]int
		22  }
		24  func (t FindTest) String() string {
		25  	return fmt.Sprintf("pat: %#q text: %#q", t.pat, t.text)
		26  }
		28  var findTests = []FindTest{
		29  	{``, ``, build(1, 0, 0)},
		30  	{`^abcdefg`, "abcdefg", build(1, 0, 7)},
		31  	{`a+`, "baaab", build(1, 1, 4)},
		32  	{"abcd..", "abcdef", build(1, 0, 6)},
		33  	{`a`, "a", build(1, 0, 1)},
		34  	{`x`, "y", nil},
		35  	{`b`, "abc", build(1, 1, 2)},
		36  	{`.`, "a", build(1, 0, 1)},
		37  	{`.*`, "abcdef", build(1, 0, 6)},
		38  	{`^`, "abcde", build(1, 0, 0)},
		39  	{`$`, "abcde", build(1, 5, 5)},
		40  	{`^abcd$`, "abcd", build(1, 0, 4)},
		41  	{`^bcd'`, "abcdef", nil},
		42  	{`^abcd$`, "abcde", nil},
		43  	{`a+`, "baaab", build(1, 1, 4)},
		44  	{`a*`, "baaab", build(3, 0, 0, 1, 4, 5, 5)},
		45  	{`[a-z]+`, "abcd", build(1, 0, 4)},
		46  	{`[^a-z]+`, "ab1234cd", build(1, 2, 6)},
		47  	{`[a\-\]z]+`, "az]-bcz", build(2, 0, 4, 6, 7)},
		48  	{`[^\n]+`, "abcd\n", build(1, 0, 4)},
		49  	{`[日本語]+`, "日本語日本語", build(1, 0, 18)},
		50  	{`日本語+`, "日本語", build(1, 0, 9)},
		51  	{`日本語+`, "日本語語語語", build(1, 0, 18)},
		52  	{`()`, "", build(1, 0, 0, 0, 0)},
		53  	{`(a)`, "a", build(1, 0, 1, 0, 1)},
		54  	{`(.)(.)`, "日a", build(1, 0, 4, 0, 3, 3, 4)},
		55  	{`(.*)`, "", build(1, 0, 0, 0, 0)},
		56  	{`(.*)`, "abcd", build(1, 0, 4, 0, 4)},
		57  	{`(..)(..)`, "abcd", build(1, 0, 4, 0, 2, 2, 4)},
		58  	{`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)},
		59  	{`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)},
		60  	{`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)},
		61  	{`\a\f\n\r\t\v`, "\a\f\n\r\t\v", build(1, 0, 6)},
		62  	{`[\a\f\n\r\t\v]+`, "\a\f\n\r\t\v", build(1, 0, 6)},
		64  	{`a*(|(b))c*`, "aacc", build(1, 0, 4, 2, 2, -1, -1)},
		65  	{`(.*).*`, "ab", build(1, 0, 2, 0, 2)},
		66  	{`[.]`, ".", build(1, 0, 1)},
		67  	{`/$`, "/abc/", build(1, 4, 5)},
		68  	{`/$`, "/abc", nil},
		70  	// multiple matches
		71  	{`.`, "abc", build(3, 0, 1, 1, 2, 2, 3)},
		72  	{`(.)`, "abc", build(3, 0, 1, 0, 1, 1, 2, 1, 2, 2, 3, 2, 3)},
		73  	{`.(.)`, "abcd", build(2, 0, 2, 1, 2, 2, 4, 3, 4)},
		74  	{`ab*`, "abbaab", build(3, 0, 3, 3, 4, 4, 6)},
		75  	{`a(b*)`, "abbaab", build(3, 0, 3, 1, 3, 3, 4, 4, 4, 4, 6, 5, 6)},
		77  	// fixed bugs
		78  	{`ab$`, "cab", build(1, 1, 3)},
		79  	{`axxb$`, "axxcb", nil},
		80  	{`data`, "daXY data", build(1, 5, 9)},
		81  	{`da(.)a$`, "daXY data", build(1, 5, 9, 7, 8)},
		82  	{`zx+`, "zzx", build(1, 1, 3)},
		83  	{`ab$`, "abcab", build(1, 3, 5)},
		84  	{`(aa)*$`, "a", build(1, 1, 1, -1, -1)},
		85  	{`(?:.|(?:.a))`, "", nil},
		86  	{`(?:A(?:A|a))`, "Aa", build(1, 0, 2)},
		87  	{`(?:A|(?:A|a))`, "a", build(1, 0, 1)},
		88  	{`(a){0}`, "", build(1, 0, 0, -1, -1)},
		89  	{`(?-s)(?:(?:^).)`, "\n", nil},
		90  	{`(?s)(?:(?:^).)`, "\n", build(1, 0, 1)},
		91  	{`(?:(?:^).)`, "\n", nil},
		92  	{`\b`, "x", build(2, 0, 0, 1, 1)},
		93  	{`\b`, "xx", build(2, 0, 0, 2, 2)},
		94  	{`\b`, "x y", build(4, 0, 0, 1, 1, 2, 2, 3, 3)},
		95  	{`\b`, "xx yy", build(4, 0, 0, 2, 2, 3, 3, 5, 5)},
		96  	{`\B`, "x", nil},
		97  	{`\B`, "xx", build(1, 1, 1)},
		98  	{`\B`, "x y", nil},
		99  	{`\B`, "xx yy", build(2, 1, 1, 4, 4)},
	 100  	{`(|a)*`, "aa", build(3, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2)},
	 102  	// RE2 tests
	 103  	{`[^\S\s]`, "abcd", nil},
	 104  	{`[^\S[:space:]]`, "abcd", nil},
	 105  	{`[^\D\d]`, "abcd", nil},
	 106  	{`[^\D[:digit:]]`, "abcd", nil},
	 107  	{`(?i)\W`, "x", nil},
	 108  	{`(?i)\W`, "k", nil},
	 109  	{`(?i)\W`, "s", nil},
	 111  	// can backslash-escape any punctuation
	 112  	{`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,
	 113  		`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
	 114  	{`[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~]+`,
	 115  		`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
	 116  	{"\\`", "`", build(1, 0, 1)},
	 117  	{"[\\`]+", "`", build(1, 0, 1)},
	 119  	// long set of matches (longer than startSize)
	 120  	{
	 121  		".",
	 122  		"qwertyuiopasdfghjklzxcvbnm1234567890",
	 123  		build(36, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
	 124  			10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20,
	 125  			20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30,
	 126  			30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36),
	 127  	},
	 128  }
	 130  // build is a helper to construct a [][]int by extracting n sequences from x.
	 131  // This represents n matches with len(x)/n submatches each.
	 132  func build(n int, x ...int) [][]int {
	 133  	ret := make([][]int, n)
	 134  	runLength := len(x) / n
	 135  	j := 0
	 136  	for i := range ret {
	 137  		ret[i] = make([]int, runLength)
	 138  		copy(ret[i], x[j:])
	 139  		j += runLength
	 140  		if j > len(x) {
	 141  			panic("invalid build entry")
	 142  		}
	 143  	}
	 144  	return ret
	 145  }
	 147  // First the simple cases.
	 149  func TestFind(t *testing.T) {
	 150  	for _, test := range findTests {
	 151  		re := MustCompile(test.pat)
	 152  		if re.String() != test.pat {
	 153  			t.Errorf("String() = `%s`; should be `%s`", re.String(), test.pat)
	 154  		}
	 155  		result := re.Find([]byte(test.text))
	 156  		switch {
	 157  		case len(test.matches) == 0 && len(result) == 0:
	 158  			// ok
	 159  		case test.matches == nil && result != nil:
	 160  			t.Errorf("expected no match; got one: %s", test)
	 161  		case test.matches != nil && result == nil:
	 162  			t.Errorf("expected match; got none: %s", test)
	 163  		case test.matches != nil && result != nil:
	 164  			expect := test.text[test.matches[0][0]:test.matches[0][1]]
	 165  			if len(result) != cap(result) {
	 166  				t.Errorf("expected capacity %d got %d: %s", len(result), cap(result), test)
	 167  			}
	 168  			if expect != string(result) {
	 169  				t.Errorf("expected %q got %q: %s", expect, result, test)
	 170  			}
	 171  		}
	 172  	}
	 173  }
	 175  func TestFindString(t *testing.T) {
	 176  	for _, test := range findTests {
	 177  		result := MustCompile(test.pat).FindString(test.text)
	 178  		switch {
	 179  		case len(test.matches) == 0 && len(result) == 0:
	 180  			// ok
	 181  		case test.matches == nil && result != "":
	 182  			t.Errorf("expected no match; got one: %s", test)
	 183  		case test.matches != nil && result == "":
	 184  			// Tricky because an empty result has two meanings: no match or empty match.
	 185  			if test.matches[0][0] != test.matches[0][1] {
	 186  				t.Errorf("expected match; got none: %s", test)
	 187  			}
	 188  		case test.matches != nil && result != "":
	 189  			expect := test.text[test.matches[0][0]:test.matches[0][1]]
	 190  			if expect != result {
	 191  				t.Errorf("expected %q got %q: %s", expect, result, test)
	 192  			}
	 193  		}
	 194  	}
	 195  }
	 197  func testFindIndex(test *FindTest, result []int, t *testing.T) {
	 198  	switch {
	 199  	case len(test.matches) == 0 && len(result) == 0:
	 200  		// ok
	 201  	case test.matches == nil && result != nil:
	 202  		t.Errorf("expected no match; got one: %s", test)
	 203  	case test.matches != nil && result == nil:
	 204  		t.Errorf("expected match; got none: %s", test)
	 205  	case test.matches != nil && result != nil:
	 206  		expect := test.matches[0]
	 207  		if expect[0] != result[0] || expect[1] != result[1] {
	 208  			t.Errorf("expected %v got %v: %s", expect, result, test)
	 209  		}
	 210  	}
	 211  }
	 213  func TestFindIndex(t *testing.T) {
	 214  	for _, test := range findTests {
	 215  		testFindIndex(&test, MustCompile(test.pat).FindIndex([]byte(test.text)), t)
	 216  	}
	 217  }
	 219  func TestFindStringIndex(t *testing.T) {
	 220  	for _, test := range findTests {
	 221  		testFindIndex(&test, MustCompile(test.pat).FindStringIndex(test.text), t)
	 222  	}
	 223  }
	 225  func TestFindReaderIndex(t *testing.T) {
	 226  	for _, test := range findTests {
	 227  		testFindIndex(&test, MustCompile(test.pat).FindReaderIndex(strings.NewReader(test.text)), t)
	 228  	}
	 229  }
	 231  // Now come the simple All cases.
	 233  func TestFindAll(t *testing.T) {
	 234  	for _, test := range findTests {
	 235  		result := MustCompile(test.pat).FindAll([]byte(test.text), -1)
	 236  		switch {
	 237  		case test.matches == nil && result == nil:
	 238  			// ok
	 239  		case test.matches == nil && result != nil:
	 240  			t.Errorf("expected no match; got one: %s", test)
	 241  		case test.matches != nil && result == nil:
	 242  			t.Fatalf("expected match; got none: %s", test)
	 243  		case test.matches != nil && result != nil:
	 244  			if len(test.matches) != len(result) {
	 245  				t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
	 246  				continue
	 247  			}
	 248  			for k, e := range test.matches {
	 249  				got := result[k]
	 250  				if len(got) != cap(got) {
	 251  					t.Errorf("match %d: expected capacity %d got %d: %s", k, len(got), cap(got), test)
	 252  				}
	 253  				expect := test.text[e[0]:e[1]]
	 254  				if expect != string(got) {
	 255  					t.Errorf("match %d: expected %q got %q: %s", k, expect, got, test)
	 256  				}
	 257  			}
	 258  		}
	 259  	}
	 260  }
	 262  func TestFindAllString(t *testing.T) {
	 263  	for _, test := range findTests {
	 264  		result := MustCompile(test.pat).FindAllString(test.text, -1)
	 265  		switch {
	 266  		case test.matches == nil && result == nil:
	 267  			// ok
	 268  		case test.matches == nil && result != nil:
	 269  			t.Errorf("expected no match; got one: %s", test)
	 270  		case test.matches != nil && result == nil:
	 271  			t.Errorf("expected match; got none: %s", test)
	 272  		case test.matches != nil && result != nil:
	 273  			if len(test.matches) != len(result) {
	 274  				t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
	 275  				continue
	 276  			}
	 277  			for k, e := range test.matches {
	 278  				expect := test.text[e[0]:e[1]]
	 279  				if expect != result[k] {
	 280  					t.Errorf("expected %q got %q: %s", expect, result, test)
	 281  				}
	 282  			}
	 283  		}
	 284  	}
	 285  }
	 287  func testFindAllIndex(test *FindTest, result [][]int, t *testing.T) {
	 288  	switch {
	 289  	case test.matches == nil && result == nil:
	 290  		// ok
	 291  	case test.matches == nil && result != nil:
	 292  		t.Errorf("expected no match; got one: %s", test)
	 293  	case test.matches != nil && result == nil:
	 294  		t.Errorf("expected match; got none: %s", test)
	 295  	case test.matches != nil && result != nil:
	 296  		if len(test.matches) != len(result) {
	 297  			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
	 298  			return
	 299  		}
	 300  		for k, e := range test.matches {
	 301  			if e[0] != result[k][0] || e[1] != result[k][1] {
	 302  				t.Errorf("match %d: expected %v got %v: %s", k, e, result[k], test)
	 303  			}
	 304  		}
	 305  	}
	 306  }
	 308  func TestFindAllIndex(t *testing.T) {
	 309  	for _, test := range findTests {
	 310  		testFindAllIndex(&test, MustCompile(test.pat).FindAllIndex([]byte(test.text), -1), t)
	 311  	}
	 312  }
	 314  func TestFindAllStringIndex(t *testing.T) {
	 315  	for _, test := range findTests {
	 316  		testFindAllIndex(&test, MustCompile(test.pat).FindAllStringIndex(test.text, -1), t)
	 317  	}
	 318  }
	 320  // Now come the Submatch cases.
	 322  func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte, t *testing.T) {
	 323  	if len(submatches) != len(result)*2 {
	 324  		t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
	 325  		return
	 326  	}
	 327  	for k := 0; k < len(submatches); k += 2 {
	 328  		if submatches[k] == -1 {
	 329  			if result[k/2] != nil {
	 330  				t.Errorf("match %d: expected nil got %q: %s", n, result, test)
	 331  			}
	 332  			continue
	 333  		}
	 334  		got := result[k/2]
	 335  		if len(got) != cap(got) {
	 336  			t.Errorf("match %d: expected capacity %d got %d: %s", n, len(got), cap(got), test)
	 337  			return
	 338  		}
	 339  		expect := test.text[submatches[k]:submatches[k+1]]
	 340  		if expect != string(got) {
	 341  			t.Errorf("match %d: expected %q got %q: %s", n, expect, got, test)
	 342  			return
	 343  		}
	 344  	}
	 345  }
	 347  func TestFindSubmatch(t *testing.T) {
	 348  	for _, test := range findTests {
	 349  		result := MustCompile(test.pat).FindSubmatch([]byte(test.text))
	 350  		switch {
	 351  		case test.matches == nil && result == nil:
	 352  			// ok
	 353  		case test.matches == nil && result != nil:
	 354  			t.Errorf("expected no match; got one: %s", test)
	 355  		case test.matches != nil && result == nil:
	 356  			t.Errorf("expected match; got none: %s", test)
	 357  		case test.matches != nil && result != nil:
	 358  			testSubmatchBytes(&test, 0, test.matches[0], result, t)
	 359  		}
	 360  	}
	 361  }
	 363  func testSubmatchString(test *FindTest, n int, submatches []int, result []string, t *testing.T) {
	 364  	if len(submatches) != len(result)*2 {
	 365  		t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
	 366  		return
	 367  	}
	 368  	for k := 0; k < len(submatches); k += 2 {
	 369  		if submatches[k] == -1 {
	 370  			if result[k/2] != "" {
	 371  				t.Errorf("match %d: expected nil got %q: %s", n, result, test)
	 372  			}
	 373  			continue
	 374  		}
	 375  		expect := test.text[submatches[k]:submatches[k+1]]
	 376  		if expect != result[k/2] {
	 377  			t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test)
	 378  			return
	 379  		}
	 380  	}
	 381  }
	 383  func TestFindStringSubmatch(t *testing.T) {
	 384  	for _, test := range findTests {
	 385  		result := MustCompile(test.pat).FindStringSubmatch(test.text)
	 386  		switch {
	 387  		case test.matches == nil && result == nil:
	 388  			// ok
	 389  		case test.matches == nil && result != nil:
	 390  			t.Errorf("expected no match; got one: %s", test)
	 391  		case test.matches != nil && result == nil:
	 392  			t.Errorf("expected match; got none: %s", test)
	 393  		case test.matches != nil && result != nil:
	 394  			testSubmatchString(&test, 0, test.matches[0], result, t)
	 395  		}
	 396  	}
	 397  }
	 399  func testSubmatchIndices(test *FindTest, n int, expect, result []int, t *testing.T) {
	 400  	if len(expect) != len(result) {
	 401  		t.Errorf("match %d: expected %d matches; got %d: %s", n, len(expect)/2, len(result)/2, test)
	 402  		return
	 403  	}
	 404  	for k, e := range expect {
	 405  		if e != result[k] {
	 406  			t.Errorf("match %d: submatch error: expected %v got %v: %s", n, expect, result, test)
	 407  		}
	 408  	}
	 409  }
	 411  func testFindSubmatchIndex(test *FindTest, result []int, t *testing.T) {
	 412  	switch {
	 413  	case test.matches == nil && result == nil:
	 414  		// ok
	 415  	case test.matches == nil && result != nil:
	 416  		t.Errorf("expected no match; got one: %s", test)
	 417  	case test.matches != nil && result == nil:
	 418  		t.Errorf("expected match; got none: %s", test)
	 419  	case test.matches != nil && result != nil:
	 420  		testSubmatchIndices(test, 0, test.matches[0], result, t)
	 421  	}
	 422  }
	 424  func TestFindSubmatchIndex(t *testing.T) {
	 425  	for _, test := range findTests {
	 426  		testFindSubmatchIndex(&test, MustCompile(test.pat).FindSubmatchIndex([]byte(test.text)), t)
	 427  	}
	 428  }
	 430  func TestFindStringSubmatchIndex(t *testing.T) {
	 431  	for _, test := range findTests {
	 432  		testFindSubmatchIndex(&test, MustCompile(test.pat).FindStringSubmatchIndex(test.text), t)
	 433  	}
	 434  }
	 436  func TestFindReaderSubmatchIndex(t *testing.T) {
	 437  	for _, test := range findTests {
	 438  		testFindSubmatchIndex(&test, MustCompile(test.pat).FindReaderSubmatchIndex(strings.NewReader(test.text)), t)
	 439  	}
	 440  }
	 442  // Now come the monster AllSubmatch cases.
	 444  func TestFindAllSubmatch(t *testing.T) {
	 445  	for _, test := range findTests {
	 446  		result := MustCompile(test.pat).FindAllSubmatch([]byte(test.text), -1)
	 447  		switch {
	 448  		case test.matches == nil && result == nil:
	 449  			// ok
	 450  		case test.matches == nil && result != nil:
	 451  			t.Errorf("expected no match; got one: %s", test)
	 452  		case test.matches != nil && result == nil:
	 453  			t.Errorf("expected match; got none: %s", test)
	 454  		case len(test.matches) != len(result):
	 455  			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
	 456  		case test.matches != nil && result != nil:
	 457  			for k, match := range test.matches {
	 458  				testSubmatchBytes(&test, k, match, result[k], t)
	 459  			}
	 460  		}
	 461  	}
	 462  }
	 464  func TestFindAllStringSubmatch(t *testing.T) {
	 465  	for _, test := range findTests {
	 466  		result := MustCompile(test.pat).FindAllStringSubmatch(test.text, -1)
	 467  		switch {
	 468  		case test.matches == nil && result == nil:
	 469  			// ok
	 470  		case test.matches == nil && result != nil:
	 471  			t.Errorf("expected no match; got one: %s", test)
	 472  		case test.matches != nil && result == nil:
	 473  			t.Errorf("expected match; got none: %s", test)
	 474  		case len(test.matches) != len(result):
	 475  			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
	 476  		case test.matches != nil && result != nil:
	 477  			for k, match := range test.matches {
	 478  				testSubmatchString(&test, k, match, result[k], t)
	 479  			}
	 480  		}
	 481  	}
	 482  }
	 484  func testFindAllSubmatchIndex(test *FindTest, result [][]int, t *testing.T) {
	 485  	switch {
	 486  	case test.matches == nil && result == nil:
	 487  		// ok
	 488  	case test.matches == nil && result != nil:
	 489  		t.Errorf("expected no match; got one: %s", test)
	 490  	case test.matches != nil && result == nil:
	 491  		t.Errorf("expected match; got none: %s", test)
	 492  	case len(test.matches) != len(result):
	 493  		t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
	 494  	case test.matches != nil && result != nil:
	 495  		for k, match := range test.matches {
	 496  			testSubmatchIndices(test, k, match, result[k], t)
	 497  		}
	 498  	}
	 499  }
	 501  func TestFindAllSubmatchIndex(t *testing.T) {
	 502  	for _, test := range findTests {
	 503  		testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllSubmatchIndex([]byte(test.text), -1), t)
	 504  	}
	 505  }
	 507  func TestFindAllStringSubmatchIndex(t *testing.T) {
	 508  	for _, test := range findTests {
	 509  		testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllStringSubmatchIndex(test.text, -1), t)
	 510  	}
	 511  }

View as plain text