...

Source file src/strings/replace_test.go

Documentation: strings

		 1  // Copyright 2009 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  package strings_test
		 6  
		 7  import (
		 8  	"bytes"
		 9  	"fmt"
		10  	. "strings"
		11  	"testing"
		12  )
		13  
		14  var htmlEscaper = NewReplacer(
		15  	"&", "&",
		16  	"<", "&lt;",
		17  	">", "&gt;",
		18  	`"`, "&quot;",
		19  	"'", "&apos;",
		20  )
		21  
		22  var htmlUnescaper = NewReplacer(
		23  	"&amp;", "&",
		24  	"&lt;", "<",
		25  	"&gt;", ">",
		26  	"&quot;", `"`,
		27  	"&apos;", "'",
		28  )
		29  
		30  // The http package's old HTML escaping function.
		31  func oldHTMLEscape(s string) string {
		32  	s = Replace(s, "&", "&amp;", -1)
		33  	s = Replace(s, "<", "&lt;", -1)
		34  	s = Replace(s, ">", "&gt;", -1)
		35  	s = Replace(s, `"`, "&quot;", -1)
		36  	s = Replace(s, "'", "&apos;", -1)
		37  	return s
		38  }
		39  
		40  var capitalLetters = NewReplacer("a", "A", "b", "B")
		41  
		42  // TestReplacer tests the replacer implementations.
		43  func TestReplacer(t *testing.T) {
		44  	type testCase struct {
		45  		r			 *Replacer
		46  		in, out string
		47  	}
		48  	var testCases []testCase
		49  
		50  	// str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8.
		51  	str := func(b byte) string {
		52  		return string([]byte{b})
		53  	}
		54  	var s []string
		55  
		56  	// inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00".
		57  	s = nil
		58  	for i := 0; i < 256; i++ {
		59  		s = append(s, str(byte(i)), str(byte(i+1)))
		60  	}
		61  	inc := NewReplacer(s...)
		62  
		63  	// Test cases with 1-byte old strings, 1-byte new strings.
		64  	testCases = append(testCases,
		65  		testCase{capitalLetters, "brad", "BrAd"},
		66  		testCase{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)},
		67  		testCase{capitalLetters, "", ""},
		68  
		69  		testCase{inc, "brad", "csbe"},
		70  		testCase{inc, "\x00\xff", "\x01\x00"},
		71  		testCase{inc, "", ""},
		72  
		73  		testCase{NewReplacer("a", "1", "a", "2"), "brad", "br1d"},
		74  	)
		75  
		76  	// repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ...
		77  	s = nil
		78  	for i := 0; i < 256; i++ {
		79  		n := i + 1 - 'a'
		80  		if n < 1 {
		81  			n = 1
		82  		}
		83  		s = append(s, str(byte(i)), Repeat(str(byte(i)), n))
		84  	}
		85  	repeat := NewReplacer(s...)
		86  
		87  	// Test cases with 1-byte old strings, variable length new strings.
		88  	testCases = append(testCases,
		89  		testCase{htmlEscaper, "No changes", "No changes"},
		90  		testCase{htmlEscaper, "I <3 escaping & stuff", "I &lt;3 escaping &amp; stuff"},
		91  		testCase{htmlEscaper, "&&&", "&amp;&amp;&amp;"},
		92  		testCase{htmlEscaper, "", ""},
		93  
		94  		testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"},
		95  		testCase{repeat, "abba", "abbbba"},
		96  		testCase{repeat, "", ""},
		97  
		98  		testCase{NewReplacer("a", "11", "a", "22"), "brad", "br11d"},
		99  	)
	 100  
	 101  	// The remaining test cases have variable length old strings.
	 102  
	 103  	testCases = append(testCases,
	 104  		testCase{htmlUnescaper, "&amp;amp;", "&amp;"},
	 105  		testCase{htmlUnescaper, "&lt;b&gt;HTML&apos;s neat&lt;/b&gt;", "<b>HTML's neat</b>"},
	 106  		testCase{htmlUnescaper, "", ""},
	 107  
	 108  		testCase{NewReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"},
	 109  
	 110  		testCase{NewReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"},
	 111  
	 112  		testCase{NewReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"},
	 113  	)
	 114  
	 115  	// gen1 has multiple old strings of variable length. There is no
	 116  	// overall non-empty common prefix, but some pairwise common prefixes.
	 117  	gen1 := NewReplacer(
	 118  		"aaa", "3[aaa]",
	 119  		"aa", "2[aa]",
	 120  		"a", "1[a]",
	 121  		"i", "i",
	 122  		"longerst", "most long",
	 123  		"longer", "medium",
	 124  		"long", "short",
	 125  		"xx", "xx",
	 126  		"x", "X",
	 127  		"X", "Y",
	 128  		"Y", "Z",
	 129  	)
	 130  	testCases = append(testCases,
	 131  		testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"},
	 132  		testCase{gen1, "long, longerst, longer", "short, most long, medium"},
	 133  		testCase{gen1, "xxxxx", "xxxxX"},
	 134  		testCase{gen1, "XiX", "YiY"},
	 135  		testCase{gen1, "", ""},
	 136  	)
	 137  
	 138  	// gen2 has multiple old strings with no pairwise common prefix.
	 139  	gen2 := NewReplacer(
	 140  		"roses", "red",
	 141  		"violets", "blue",
	 142  		"sugar", "sweet",
	 143  	)
	 144  	testCases = append(testCases,
	 145  		testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."},
	 146  		testCase{gen2, "", ""},
	 147  	)
	 148  
	 149  	// gen3 has multiple old strings with an overall common prefix.
	 150  	gen3 := NewReplacer(
	 151  		"abracadabra", "poof",
	 152  		"abracadabrakazam", "splat",
	 153  		"abraham", "lincoln",
	 154  		"abrasion", "scrape",
	 155  		"abraham", "isaac",
	 156  	)
	 157  	testCases = append(testCases,
	 158  		testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"},
	 159  		testCase{gen3, "abrasion abracad", "scrape abracad"},
	 160  		testCase{gen3, "abba abram abrasive", "abba abram abrasive"},
	 161  		testCase{gen3, "", ""},
	 162  	)
	 163  
	 164  	// foo{1,2,3,4} have multiple old strings with an overall common prefix
	 165  	// and 1- or 2- byte extensions from the common prefix.
	 166  	foo1 := NewReplacer(
	 167  		"foo1", "A",
	 168  		"foo2", "B",
	 169  		"foo3", "C",
	 170  	)
	 171  	foo2 := NewReplacer(
	 172  		"foo1", "A",
	 173  		"foo2", "B",
	 174  		"foo31", "C",
	 175  		"foo32", "D",
	 176  	)
	 177  	foo3 := NewReplacer(
	 178  		"foo11", "A",
	 179  		"foo12", "B",
	 180  		"foo31", "C",
	 181  		"foo32", "D",
	 182  	)
	 183  	foo4 := NewReplacer(
	 184  		"foo12", "B",
	 185  		"foo32", "D",
	 186  	)
	 187  	testCases = append(testCases,
	 188  		testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"},
	 189  		testCase{foo1, "", ""},
	 190  
	 191  		testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"},
	 192  		testCase{foo2, "", ""},
	 193  
	 194  		testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"},
	 195  		testCase{foo3, "", ""},
	 196  
	 197  		testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"},
	 198  		testCase{foo4, "", ""},
	 199  	)
	 200  
	 201  	// genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things.
	 202  	allBytes := make([]byte, 256)
	 203  	for i := range allBytes {
	 204  		allBytes[i] = byte(i)
	 205  	}
	 206  	allString := string(allBytes)
	 207  	genAll := NewReplacer(
	 208  		allString, "[all]",
	 209  		"\xff", "[ff]",
	 210  		"\x00", "[00]",
	 211  	)
	 212  	testCases = append(testCases,
	 213  		testCase{genAll, allString, "[all]"},
	 214  		testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"},
	 215  		testCase{genAll, "", ""},
	 216  	)
	 217  
	 218  	// Test cases with empty old strings.
	 219  
	 220  	blankToX1 := NewReplacer("", "X")
	 221  	blankToX2 := NewReplacer("", "X", "", "")
	 222  	blankHighPriority := NewReplacer("", "X", "o", "O")
	 223  	blankLowPriority := NewReplacer("o", "O", "", "X")
	 224  	blankNoOp1 := NewReplacer("", "")
	 225  	blankNoOp2 := NewReplacer("", "", "", "A")
	 226  	blankFoo := NewReplacer("", "X", "foobar", "R", "foobaz", "Z")
	 227  	testCases = append(testCases,
	 228  		testCase{blankToX1, "foo", "XfXoXoX"},
	 229  		testCase{blankToX1, "", "X"},
	 230  
	 231  		testCase{blankToX2, "foo", "XfXoXoX"},
	 232  		testCase{blankToX2, "", "X"},
	 233  
	 234  		testCase{blankHighPriority, "oo", "XOXOX"},
	 235  		testCase{blankHighPriority, "ii", "XiXiX"},
	 236  		testCase{blankHighPriority, "oiio", "XOXiXiXOX"},
	 237  		testCase{blankHighPriority, "iooi", "XiXOXOXiX"},
	 238  		testCase{blankHighPriority, "", "X"},
	 239  
	 240  		testCase{blankLowPriority, "oo", "OOX"},
	 241  		testCase{blankLowPriority, "ii", "XiXiX"},
	 242  		testCase{blankLowPriority, "oiio", "OXiXiOX"},
	 243  		testCase{blankLowPriority, "iooi", "XiOOXiX"},
	 244  		testCase{blankLowPriority, "", "X"},
	 245  
	 246  		testCase{blankNoOp1, "foo", "foo"},
	 247  		testCase{blankNoOp1, "", ""},
	 248  
	 249  		testCase{blankNoOp2, "foo", "foo"},
	 250  		testCase{blankNoOp2, "", ""},
	 251  
	 252  		testCase{blankFoo, "foobarfoobaz", "XRXZX"},
	 253  		testCase{blankFoo, "foobar-foobaz", "XRX-XZX"},
	 254  		testCase{blankFoo, "", "X"},
	 255  	)
	 256  
	 257  	// single string replacer
	 258  
	 259  	abcMatcher := NewReplacer("abc", "[match]")
	 260  
	 261  	testCases = append(testCases,
	 262  		testCase{abcMatcher, "", ""},
	 263  		testCase{abcMatcher, "ab", "ab"},
	 264  		testCase{abcMatcher, "abc", "[match]"},
	 265  		testCase{abcMatcher, "abcd", "[match]d"},
	 266  		testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"},
	 267  	)
	 268  
	 269  	// Issue 6659 cases (more single string replacer)
	 270  
	 271  	noHello := NewReplacer("Hello", "")
	 272  	testCases = append(testCases,
	 273  		testCase{noHello, "Hello", ""},
	 274  		testCase{noHello, "Hellox", "x"},
	 275  		testCase{noHello, "xHello", "x"},
	 276  		testCase{noHello, "xHellox", "xx"},
	 277  	)
	 278  
	 279  	// No-arg test cases.
	 280  
	 281  	nop := NewReplacer()
	 282  	testCases = append(testCases,
	 283  		testCase{nop, "abc", "abc"},
	 284  		testCase{nop, "", ""},
	 285  	)
	 286  
	 287  	// Run the test cases.
	 288  
	 289  	for i, tc := range testCases {
	 290  		if s := tc.r.Replace(tc.in); s != tc.out {
	 291  			t.Errorf("%d. Replace(%q) = %q, want %q", i, tc.in, s, tc.out)
	 292  		}
	 293  		var buf bytes.Buffer
	 294  		n, err := tc.r.WriteString(&buf, tc.in)
	 295  		if err != nil {
	 296  			t.Errorf("%d. WriteString: %v", i, err)
	 297  			continue
	 298  		}
	 299  		got := buf.String()
	 300  		if got != tc.out {
	 301  			t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out)
	 302  			continue
	 303  		}
	 304  		if n != len(tc.out) {
	 305  			t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)",
	 306  				i, tc.in, n, len(tc.out), tc.out)
	 307  		}
	 308  	}
	 309  }
	 310  
	 311  var algorithmTestCases = []struct {
	 312  	r		*Replacer
	 313  	want string
	 314  }{
	 315  	{capitalLetters, "*strings.byteReplacer"},
	 316  	{htmlEscaper, "*strings.byteStringReplacer"},
	 317  	{NewReplacer("12", "123"), "*strings.singleStringReplacer"},
	 318  	{NewReplacer("1", "12"), "*strings.byteStringReplacer"},
	 319  	{NewReplacer("", "X"), "*strings.genericReplacer"},
	 320  	{NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"},
	 321  }
	 322  
	 323  // TestPickAlgorithm tests that NewReplacer picks the correct algorithm.
	 324  func TestPickAlgorithm(t *testing.T) {
	 325  	for i, tc := range algorithmTestCases {
	 326  		got := fmt.Sprintf("%T", tc.r.Replacer())
	 327  		if got != tc.want {
	 328  			t.Errorf("%d. algorithm = %s, want %s", i, got, tc.want)
	 329  		}
	 330  	}
	 331  }
	 332  
	 333  type errWriter struct{}
	 334  
	 335  func (errWriter) Write(p []byte) (n int, err error) {
	 336  	return 0, fmt.Errorf("unwritable")
	 337  }
	 338  
	 339  // TestWriteStringError tests that WriteString returns an error
	 340  // received from the underlying io.Writer.
	 341  func TestWriteStringError(t *testing.T) {
	 342  	for i, tc := range algorithmTestCases {
	 343  		n, err := tc.r.WriteString(errWriter{}, "abc")
	 344  		if n != 0 || err == nil || err.Error() != "unwritable" {
	 345  			t.Errorf("%d. WriteStringError = %d, %v, want 0, unwritable", i, n, err)
	 346  		}
	 347  	}
	 348  }
	 349  
	 350  // TestGenericTrieBuilding verifies the structure of the generated trie. There
	 351  // is one node per line, and the key ending with the current line is in the
	 352  // trie if it ends with a "+".
	 353  func TestGenericTrieBuilding(t *testing.T) {
	 354  	testCases := []struct{ in, out string }{
	 355  		{"abc;abdef;abdefgh;xx;xy;z", `-
	 356  			a-
	 357  			.b-
	 358  			..c+
	 359  			..d-
	 360  			...ef+
	 361  			.....gh+
	 362  			x-
	 363  			.x+
	 364  			.y+
	 365  			z+
	 366  			`},
	 367  		{"abracadabra;abracadabrakazam;abraham;abrasion", `-
	 368  			a-
	 369  			.bra-
	 370  			....c-
	 371  			.....adabra+
	 372  			...........kazam+
	 373  			....h-
	 374  			.....am+
	 375  			....s-
	 376  			.....ion+
	 377  			`},
	 378  		{"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `-
	 379  			X+
	 380  			Y+
	 381  			a+
	 382  			.a+
	 383  			..a+
	 384  			i+
	 385  			l-
	 386  			.ong+
	 387  			....er+
	 388  			......st+
	 389  			x+
	 390  			.x+
	 391  			`},
	 392  		{"foo;;foo;foo1", `+
	 393  			f-
	 394  			.oo+
	 395  			...1+
	 396  			`},
	 397  	}
	 398  
	 399  	for _, tc := range testCases {
	 400  		keys := Split(tc.in, ";")
	 401  		args := make([]string, len(keys)*2)
	 402  		for i, key := range keys {
	 403  			args[i*2] = key
	 404  		}
	 405  
	 406  		got := NewReplacer(args...).PrintTrie()
	 407  		// Remove tabs from tc.out
	 408  		wantbuf := make([]byte, 0, len(tc.out))
	 409  		for i := 0; i < len(tc.out); i++ {
	 410  			if tc.out[i] != '\t' {
	 411  				wantbuf = append(wantbuf, tc.out[i])
	 412  			}
	 413  		}
	 414  		want := string(wantbuf)
	 415  
	 416  		if got != want {
	 417  			t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want)
	 418  		}
	 419  	}
	 420  }
	 421  
	 422  func BenchmarkGenericNoMatch(b *testing.B) {
	 423  	str := Repeat("A", 100) + Repeat("B", 100)
	 424  	generic := NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic
	 425  	for i := 0; i < b.N; i++ {
	 426  		generic.Replace(str)
	 427  	}
	 428  }
	 429  
	 430  func BenchmarkGenericMatch1(b *testing.B) {
	 431  	str := Repeat("a", 100) + Repeat("b", 100)
	 432  	generic := NewReplacer("a", "A", "b", "B", "12", "123")
	 433  	for i := 0; i < b.N; i++ {
	 434  		generic.Replace(str)
	 435  	}
	 436  }
	 437  
	 438  func BenchmarkGenericMatch2(b *testing.B) {
	 439  	str := Repeat("It&apos;s &lt;b&gt;HTML&lt;/b&gt;!", 100)
	 440  	for i := 0; i < b.N; i++ {
	 441  		htmlUnescaper.Replace(str)
	 442  	}
	 443  }
	 444  
	 445  func benchmarkSingleString(b *testing.B, pattern, text string) {
	 446  	r := NewReplacer(pattern, "[match]")
	 447  	b.SetBytes(int64(len(text)))
	 448  	b.ResetTimer()
	 449  	for i := 0; i < b.N; i++ {
	 450  		r.Replace(text)
	 451  	}
	 452  }
	 453  
	 454  func BenchmarkSingleMaxSkipping(b *testing.B) {
	 455  	benchmarkSingleString(b, Repeat("b", 25), Repeat("a", 10000))
	 456  }
	 457  
	 458  func BenchmarkSingleLongSuffixFail(b *testing.B) {
	 459  	benchmarkSingleString(b, "b"+Repeat("a", 500), Repeat("a", 1002))
	 460  }
	 461  
	 462  func BenchmarkSingleMatch(b *testing.B) {
	 463  	benchmarkSingleString(b, "abcdef", Repeat("abcdefghijklmno", 1000))
	 464  }
	 465  
	 466  func BenchmarkByteByteNoMatch(b *testing.B) {
	 467  	str := Repeat("A", 100) + Repeat("B", 100)
	 468  	for i := 0; i < b.N; i++ {
	 469  		capitalLetters.Replace(str)
	 470  	}
	 471  }
	 472  
	 473  func BenchmarkByteByteMatch(b *testing.B) {
	 474  	str := Repeat("a", 100) + Repeat("b", 100)
	 475  	for i := 0; i < b.N; i++ {
	 476  		capitalLetters.Replace(str)
	 477  	}
	 478  }
	 479  
	 480  func BenchmarkByteStringMatch(b *testing.B) {
	 481  	str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">"
	 482  	for i := 0; i < b.N; i++ {
	 483  		htmlEscaper.Replace(str)
	 484  	}
	 485  }
	 486  
	 487  func BenchmarkHTMLEscapeNew(b *testing.B) {
	 488  	str := "I <3 to escape HTML & other text too."
	 489  	for i := 0; i < b.N; i++ {
	 490  		htmlEscaper.Replace(str)
	 491  	}
	 492  }
	 493  
	 494  func BenchmarkHTMLEscapeOld(b *testing.B) {
	 495  	str := "I <3 to escape HTML & other text too."
	 496  	for i := 0; i < b.N; i++ {
	 497  		oldHTMLEscape(str)
	 498  	}
	 499  }
	 500  
	 501  func BenchmarkByteStringReplacerWriteString(b *testing.B) {
	 502  	str := Repeat("I <3 to escape HTML & other text too.", 100)
	 503  	buf := new(bytes.Buffer)
	 504  	for i := 0; i < b.N; i++ {
	 505  		htmlEscaper.WriteString(buf, str)
	 506  		buf.Reset()
	 507  	}
	 508  }
	 509  
	 510  func BenchmarkByteReplacerWriteString(b *testing.B) {
	 511  	str := Repeat("abcdefghijklmnopqrstuvwxyz", 100)
	 512  	buf := new(bytes.Buffer)
	 513  	for i := 0; i < b.N; i++ {
	 514  		capitalLetters.WriteString(buf, str)
	 515  		buf.Reset()
	 516  	}
	 517  }
	 518  
	 519  // BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
	 520  func BenchmarkByteByteReplaces(b *testing.B) {
	 521  	str := Repeat("a", 100) + Repeat("b", 100)
	 522  	for i := 0; i < b.N; i++ {
	 523  		Replace(Replace(str, "a", "A", -1), "b", "B", -1)
	 524  	}
	 525  }
	 526  
	 527  // BenchmarkByteByteMap compares byteByteImpl against Map.
	 528  func BenchmarkByteByteMap(b *testing.B) {
	 529  	str := Repeat("a", 100) + Repeat("b", 100)
	 530  	fn := func(r rune) rune {
	 531  		switch r {
	 532  		case 'a':
	 533  			return 'A'
	 534  		case 'b':
	 535  			return 'B'
	 536  		}
	 537  		return r
	 538  	}
	 539  	for i := 0; i < b.N; i++ {
	 540  		Map(fn, str)
	 541  	}
	 542  }
	 543  
	 544  var mapdata = []struct{ name, data string }{
	 545  	{"ASCII", "a b c d e f g h i j k l m n o p q r s t u v w x y z"},
	 546  	{"Greek", "α β γ δ ε ζ η θ ι κ λ μ ν ξ ο π ρ ς σ τ υ φ χ ψ ω"},
	 547  }
	 548  
	 549  func BenchmarkMap(b *testing.B) {
	 550  	mapidentity := func(r rune) rune {
	 551  		return r
	 552  	}
	 553  
	 554  	b.Run("identity", func(b *testing.B) {
	 555  		for _, md := range mapdata {
	 556  			b.Run(md.name, func(b *testing.B) {
	 557  				for i := 0; i < b.N; i++ {
	 558  					Map(mapidentity, md.data)
	 559  				}
	 560  			})
	 561  		}
	 562  	})
	 563  
	 564  	mapchange := func(r rune) rune {
	 565  		if 'a' <= r && r <= 'z' {
	 566  			return r + 'A' - 'a'
	 567  		}
	 568  		if 'α' <= r && r <= 'ω' {
	 569  			return r + 'Α' - 'α'
	 570  		}
	 571  		return r
	 572  	}
	 573  
	 574  	b.Run("change", func(b *testing.B) {
	 575  		for _, md := range mapdata {
	 576  			b.Run(md.name, func(b *testing.B) {
	 577  				for i := 0; i < b.N; i++ {
	 578  					Map(mapchange, md.data)
	 579  				}
	 580  			})
	 581  		}
	 582  	})
	 583  }
	 584  

View as plain text