...

Source file src/strings/replace.go

Documentation: strings

		 1  // Copyright 2011 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  package strings
		 6  
		 7  import (
		 8  	"io"
		 9  	"sync"
		10  )
		11  
		12  // Replacer replaces a list of strings with replacements.
		13  // It is safe for concurrent use by multiple goroutines.
		14  type Replacer struct {
		15  	once	 sync.Once // guards buildOnce method
		16  	r			replacer
		17  	oldnew []string
		18  }
		19  
		20  // replacer is the interface that a replacement algorithm needs to implement.
		21  type replacer interface {
		22  	Replace(s string) string
		23  	WriteString(w io.Writer, s string) (n int, err error)
		24  }
		25  
		26  // NewReplacer returns a new Replacer from a list of old, new string
		27  // pairs. Replacements are performed in the order they appear in the
		28  // target string, without overlapping matches. The old string
		29  // comparisons are done in argument order.
		30  //
		31  // NewReplacer panics if given an odd number of arguments.
		32  func NewReplacer(oldnew ...string) *Replacer {
		33  	if len(oldnew)%2 == 1 {
		34  		panic("strings.NewReplacer: odd argument count")
		35  	}
		36  	return &Replacer{oldnew: append([]string(nil), oldnew...)}
		37  }
		38  
		39  func (r *Replacer) buildOnce() {
		40  	r.r = r.build()
		41  	r.oldnew = nil
		42  }
		43  
		44  func (b *Replacer) build() replacer {
		45  	oldnew := b.oldnew
		46  	if len(oldnew) == 2 && len(oldnew[0]) > 1 {
		47  		return makeSingleStringReplacer(oldnew[0], oldnew[1])
		48  	}
		49  
		50  	allNewBytes := true
		51  	for i := 0; i < len(oldnew); i += 2 {
		52  		if len(oldnew[i]) != 1 {
		53  			return makeGenericReplacer(oldnew)
		54  		}
		55  		if len(oldnew[i+1]) != 1 {
		56  			allNewBytes = false
		57  		}
		58  	}
		59  
		60  	if allNewBytes {
		61  		r := byteReplacer{}
		62  		for i := range r {
		63  			r[i] = byte(i)
		64  		}
		65  		// The first occurrence of old->new map takes precedence
		66  		// over the others with the same old string.
		67  		for i := len(oldnew) - 2; i >= 0; i -= 2 {
		68  			o := oldnew[i][0]
		69  			n := oldnew[i+1][0]
		70  			r[o] = n
		71  		}
		72  		return &r
		73  	}
		74  
		75  	r := byteStringReplacer{toReplace: make([]string, 0, len(oldnew)/2)}
		76  	// The first occurrence of old->new map takes precedence
		77  	// over the others with the same old string.
		78  	for i := len(oldnew) - 2; i >= 0; i -= 2 {
		79  		o := oldnew[i][0]
		80  		n := oldnew[i+1]
		81  		// To avoid counting repetitions multiple times.
		82  		if r.replacements[o] == nil {
		83  			// We need to use string([]byte{o}) instead of string(o),
		84  			// to avoid utf8 encoding of o.
		85  			// E. g. byte(150) produces string of length 2.
		86  			r.toReplace = append(r.toReplace, string([]byte{o}))
		87  		}
		88  		r.replacements[o] = []byte(n)
		89  
		90  	}
		91  	return &r
		92  }
		93  
		94  // Replace returns a copy of s with all replacements performed.
		95  func (r *Replacer) Replace(s string) string {
		96  	r.once.Do(r.buildOnce)
		97  	return r.r.Replace(s)
		98  }
		99  
	 100  // WriteString writes s to w with all replacements performed.
	 101  func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) {
	 102  	r.once.Do(r.buildOnce)
	 103  	return r.r.WriteString(w, s)
	 104  }
	 105  
	 106  // trieNode is a node in a lookup trie for prioritized key/value pairs. Keys
	 107  // and values may be empty. For example, the trie containing keys "ax", "ay",
	 108  // "bcbc", "x" and "xy" could have eight nodes:
	 109  //
	 110  //	n0	-
	 111  //	n1	a-
	 112  //	n2	.x+
	 113  //	n3	.y+
	 114  //	n4	b-
	 115  //	n5	.cbc+
	 116  //	n6	x+
	 117  //	n7	.y+
	 118  //
	 119  // n0 is the root node, and its children are n1, n4 and n6; n1's children are
	 120  // n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked
	 121  // with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7
	 122  // (marked with a trailing "+") are complete keys.
	 123  type trieNode struct {
	 124  	// value is the value of the trie node's key/value pair. It is empty if
	 125  	// this node is not a complete key.
	 126  	value string
	 127  	// priority is the priority (higher is more important) of the trie node's
	 128  	// key/value pair; keys are not necessarily matched shortest- or longest-
	 129  	// first. Priority is positive if this node is a complete key, and zero
	 130  	// otherwise. In the example above, positive/zero priorities are marked
	 131  	// with a trailing "+" or "-".
	 132  	priority int
	 133  
	 134  	// A trie node may have zero, one or more child nodes:
	 135  	//	* if the remaining fields are zero, there are no children.
	 136  	//	* if prefix and next are non-zero, there is one child in next.
	 137  	//	* if table is non-zero, it defines all the children.
	 138  	//
	 139  	// Prefixes are preferred over tables when there is one child, but the
	 140  	// root node always uses a table for lookup efficiency.
	 141  
	 142  	// prefix is the difference in keys between this trie node and the next.
	 143  	// In the example above, node n4 has prefix "cbc" and n4's next node is n5.
	 144  	// Node n5 has no children and so has zero prefix, next and table fields.
	 145  	prefix string
	 146  	next	 *trieNode
	 147  
	 148  	// table is a lookup table indexed by the next byte in the key, after
	 149  	// remapping that byte through genericReplacer.mapping to create a dense
	 150  	// index. In the example above, the keys only use 'a', 'b', 'c', 'x' and
	 151  	// 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and
	 152  	// genericReplacer.tableSize will be 5. Node n0's table will be
	 153  	// []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped
	 154  	// 'a', 'b' and 'x'.
	 155  	table []*trieNode
	 156  }
	 157  
	 158  func (t *trieNode) add(key, val string, priority int, r *genericReplacer) {
	 159  	if key == "" {
	 160  		if t.priority == 0 {
	 161  			t.value = val
	 162  			t.priority = priority
	 163  		}
	 164  		return
	 165  	}
	 166  
	 167  	if t.prefix != "" {
	 168  		// Need to split the prefix among multiple nodes.
	 169  		var n int // length of the longest common prefix
	 170  		for ; n < len(t.prefix) && n < len(key); n++ {
	 171  			if t.prefix[n] != key[n] {
	 172  				break
	 173  			}
	 174  		}
	 175  		if n == len(t.prefix) {
	 176  			t.next.add(key[n:], val, priority, r)
	 177  		} else if n == 0 {
	 178  			// First byte differs, start a new lookup table here. Looking up
	 179  			// what is currently t.prefix[0] will lead to prefixNode, and
	 180  			// looking up key[0] will lead to keyNode.
	 181  			var prefixNode *trieNode
	 182  			if len(t.prefix) == 1 {
	 183  				prefixNode = t.next
	 184  			} else {
	 185  				prefixNode = &trieNode{
	 186  					prefix: t.prefix[1:],
	 187  					next:	 t.next,
	 188  				}
	 189  			}
	 190  			keyNode := new(trieNode)
	 191  			t.table = make([]*trieNode, r.tableSize)
	 192  			t.table[r.mapping[t.prefix[0]]] = prefixNode
	 193  			t.table[r.mapping[key[0]]] = keyNode
	 194  			t.prefix = ""
	 195  			t.next = nil
	 196  			keyNode.add(key[1:], val, priority, r)
	 197  		} else {
	 198  			// Insert new node after the common section of the prefix.
	 199  			next := &trieNode{
	 200  				prefix: t.prefix[n:],
	 201  				next:	 t.next,
	 202  			}
	 203  			t.prefix = t.prefix[:n]
	 204  			t.next = next
	 205  			next.add(key[n:], val, priority, r)
	 206  		}
	 207  	} else if t.table != nil {
	 208  		// Insert into existing table.
	 209  		m := r.mapping[key[0]]
	 210  		if t.table[m] == nil {
	 211  			t.table[m] = new(trieNode)
	 212  		}
	 213  		t.table[m].add(key[1:], val, priority, r)
	 214  	} else {
	 215  		t.prefix = key
	 216  		t.next = new(trieNode)
	 217  		t.next.add("", val, priority, r)
	 218  	}
	 219  }
	 220  
	 221  func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) {
	 222  	// Iterate down the trie to the end, and grab the value and keylen with
	 223  	// the highest priority.
	 224  	bestPriority := 0
	 225  	node := &r.root
	 226  	n := 0
	 227  	for node != nil {
	 228  		if node.priority > bestPriority && !(ignoreRoot && node == &r.root) {
	 229  			bestPriority = node.priority
	 230  			val = node.value
	 231  			keylen = n
	 232  			found = true
	 233  		}
	 234  
	 235  		if s == "" {
	 236  			break
	 237  		}
	 238  		if node.table != nil {
	 239  			index := r.mapping[s[0]]
	 240  			if int(index) == r.tableSize {
	 241  				break
	 242  			}
	 243  			node = node.table[index]
	 244  			s = s[1:]
	 245  			n++
	 246  		} else if node.prefix != "" && HasPrefix(s, node.prefix) {
	 247  			n += len(node.prefix)
	 248  			s = s[len(node.prefix):]
	 249  			node = node.next
	 250  		} else {
	 251  			break
	 252  		}
	 253  	}
	 254  	return
	 255  }
	 256  
	 257  // genericReplacer is the fully generic algorithm.
	 258  // It's used as a fallback when nothing faster can be used.
	 259  type genericReplacer struct {
	 260  	root trieNode
	 261  	// tableSize is the size of a trie node's lookup table. It is the number
	 262  	// of unique key bytes.
	 263  	tableSize int
	 264  	// mapping maps from key bytes to a dense index for trieNode.table.
	 265  	mapping [256]byte
	 266  }
	 267  
	 268  func makeGenericReplacer(oldnew []string) *genericReplacer {
	 269  	r := new(genericReplacer)
	 270  	// Find each byte used, then assign them each an index.
	 271  	for i := 0; i < len(oldnew); i += 2 {
	 272  		key := oldnew[i]
	 273  		for j := 0; j < len(key); j++ {
	 274  			r.mapping[key[j]] = 1
	 275  		}
	 276  	}
	 277  
	 278  	for _, b := range r.mapping {
	 279  		r.tableSize += int(b)
	 280  	}
	 281  
	 282  	var index byte
	 283  	for i, b := range r.mapping {
	 284  		if b == 0 {
	 285  			r.mapping[i] = byte(r.tableSize)
	 286  		} else {
	 287  			r.mapping[i] = index
	 288  			index++
	 289  		}
	 290  	}
	 291  	// Ensure root node uses a lookup table (for performance).
	 292  	r.root.table = make([]*trieNode, r.tableSize)
	 293  
	 294  	for i := 0; i < len(oldnew); i += 2 {
	 295  		r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r)
	 296  	}
	 297  	return r
	 298  }
	 299  
	 300  type appendSliceWriter []byte
	 301  
	 302  // Write writes to the buffer to satisfy io.Writer.
	 303  func (w *appendSliceWriter) Write(p []byte) (int, error) {
	 304  	*w = append(*w, p...)
	 305  	return len(p), nil
	 306  }
	 307  
	 308  // WriteString writes to the buffer without string->[]byte->string allocations.
	 309  func (w *appendSliceWriter) WriteString(s string) (int, error) {
	 310  	*w = append(*w, s...)
	 311  	return len(s), nil
	 312  }
	 313  
	 314  type stringWriter struct {
	 315  	w io.Writer
	 316  }
	 317  
	 318  func (w stringWriter) WriteString(s string) (int, error) {
	 319  	return w.w.Write([]byte(s))
	 320  }
	 321  
	 322  func getStringWriter(w io.Writer) io.StringWriter {
	 323  	sw, ok := w.(io.StringWriter)
	 324  	if !ok {
	 325  		sw = stringWriter{w}
	 326  	}
	 327  	return sw
	 328  }
	 329  
	 330  func (r *genericReplacer) Replace(s string) string {
	 331  	buf := make(appendSliceWriter, 0, len(s))
	 332  	r.WriteString(&buf, s)
	 333  	return string(buf)
	 334  }
	 335  
	 336  func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) {
	 337  	sw := getStringWriter(w)
	 338  	var last, wn int
	 339  	var prevMatchEmpty bool
	 340  	for i := 0; i <= len(s); {
	 341  		// Fast path: s[i] is not a prefix of any pattern.
	 342  		if i != len(s) && r.root.priority == 0 {
	 343  			index := int(r.mapping[s[i]])
	 344  			if index == r.tableSize || r.root.table[index] == nil {
	 345  				i++
	 346  				continue
	 347  			}
	 348  		}
	 349  
	 350  		// Ignore the empty match iff the previous loop found the empty match.
	 351  		val, keylen, match := r.lookup(s[i:], prevMatchEmpty)
	 352  		prevMatchEmpty = match && keylen == 0
	 353  		if match {
	 354  			wn, err = sw.WriteString(s[last:i])
	 355  			n += wn
	 356  			if err != nil {
	 357  				return
	 358  			}
	 359  			wn, err = sw.WriteString(val)
	 360  			n += wn
	 361  			if err != nil {
	 362  				return
	 363  			}
	 364  			i += keylen
	 365  			last = i
	 366  			continue
	 367  		}
	 368  		i++
	 369  	}
	 370  	if last != len(s) {
	 371  		wn, err = sw.WriteString(s[last:])
	 372  		n += wn
	 373  	}
	 374  	return
	 375  }
	 376  
	 377  // singleStringReplacer is the implementation that's used when there is only
	 378  // one string to replace (and that string has more than one byte).
	 379  type singleStringReplacer struct {
	 380  	finder *stringFinder
	 381  	// value is the new string that replaces that pattern when it's found.
	 382  	value string
	 383  }
	 384  
	 385  func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer {
	 386  	return &singleStringReplacer{finder: makeStringFinder(pattern), value: value}
	 387  }
	 388  
	 389  func (r *singleStringReplacer) Replace(s string) string {
	 390  	var buf []byte
	 391  	i, matched := 0, false
	 392  	for {
	 393  		match := r.finder.next(s[i:])
	 394  		if match == -1 {
	 395  			break
	 396  		}
	 397  		matched = true
	 398  		buf = append(buf, s[i:i+match]...)
	 399  		buf = append(buf, r.value...)
	 400  		i += match + len(r.finder.pattern)
	 401  	}
	 402  	if !matched {
	 403  		return s
	 404  	}
	 405  	buf = append(buf, s[i:]...)
	 406  	return string(buf)
	 407  }
	 408  
	 409  func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
	 410  	sw := getStringWriter(w)
	 411  	var i, wn int
	 412  	for {
	 413  		match := r.finder.next(s[i:])
	 414  		if match == -1 {
	 415  			break
	 416  		}
	 417  		wn, err = sw.WriteString(s[i : i+match])
	 418  		n += wn
	 419  		if err != nil {
	 420  			return
	 421  		}
	 422  		wn, err = sw.WriteString(r.value)
	 423  		n += wn
	 424  		if err != nil {
	 425  			return
	 426  		}
	 427  		i += match + len(r.finder.pattern)
	 428  	}
	 429  	wn, err = sw.WriteString(s[i:])
	 430  	n += wn
	 431  	return
	 432  }
	 433  
	 434  // byteReplacer is the implementation that's used when all the "old"
	 435  // and "new" values are single ASCII bytes.
	 436  // The array contains replacement bytes indexed by old byte.
	 437  type byteReplacer [256]byte
	 438  
	 439  func (r *byteReplacer) Replace(s string) string {
	 440  	var buf []byte // lazily allocated
	 441  	for i := 0; i < len(s); i++ {
	 442  		b := s[i]
	 443  		if r[b] != b {
	 444  			if buf == nil {
	 445  				buf = []byte(s)
	 446  			}
	 447  			buf[i] = r[b]
	 448  		}
	 449  	}
	 450  	if buf == nil {
	 451  		return s
	 452  	}
	 453  	return string(buf)
	 454  }
	 455  
	 456  func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) {
	 457  	// TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation.
	 458  	bufsize := 32 << 10
	 459  	if len(s) < bufsize {
	 460  		bufsize = len(s)
	 461  	}
	 462  	buf := make([]byte, bufsize)
	 463  
	 464  	for len(s) > 0 {
	 465  		ncopy := copy(buf, s)
	 466  		s = s[ncopy:]
	 467  		for i, b := range buf[:ncopy] {
	 468  			buf[i] = r[b]
	 469  		}
	 470  		wn, err := w.Write(buf[:ncopy])
	 471  		n += wn
	 472  		if err != nil {
	 473  			return n, err
	 474  		}
	 475  	}
	 476  	return n, nil
	 477  }
	 478  
	 479  // byteStringReplacer is the implementation that's used when all the
	 480  // "old" values are single ASCII bytes but the "new" values vary in size.
	 481  type byteStringReplacer struct {
	 482  	// replacements contains replacement byte slices indexed by old byte.
	 483  	// A nil []byte means that the old byte should not be replaced.
	 484  	replacements [256][]byte
	 485  	// toReplace keeps a list of bytes to replace. Depending on length of toReplace
	 486  	// and length of target string it may be faster to use Count, or a plain loop.
	 487  	// We store single byte as a string, because Count takes a string.
	 488  	toReplace []string
	 489  }
	 490  
	 491  // countCutOff controls the ratio of a string length to a number of replacements
	 492  // at which (*byteStringReplacer).Replace switches algorithms.
	 493  // For strings with higher ration of length to replacements than that value,
	 494  // we call Count, for each replacement from toReplace.
	 495  // For strings, with a lower ratio we use simple loop, because of Count overhead.
	 496  // countCutOff is an empirically determined overhead multiplier.
	 497  // TODO(tocarip) revisit once we have register-based abi/mid-stack inlining.
	 498  const countCutOff = 8
	 499  
	 500  func (r *byteStringReplacer) Replace(s string) string {
	 501  	newSize := len(s)
	 502  	anyChanges := false
	 503  	// Is it faster to use Count?
	 504  	if len(r.toReplace)*countCutOff <= len(s) {
	 505  		for _, x := range r.toReplace {
	 506  			if c := Count(s, x); c != 0 {
	 507  				// The -1 is because we are replacing 1 byte with len(replacements[b]) bytes.
	 508  				newSize += c * (len(r.replacements[x[0]]) - 1)
	 509  				anyChanges = true
	 510  			}
	 511  
	 512  		}
	 513  	} else {
	 514  		for i := 0; i < len(s); i++ {
	 515  			b := s[i]
	 516  			if r.replacements[b] != nil {
	 517  				// See above for explanation of -1
	 518  				newSize += len(r.replacements[b]) - 1
	 519  				anyChanges = true
	 520  			}
	 521  		}
	 522  	}
	 523  	if !anyChanges {
	 524  		return s
	 525  	}
	 526  	buf := make([]byte, newSize)
	 527  	j := 0
	 528  	for i := 0; i < len(s); i++ {
	 529  		b := s[i]
	 530  		if r.replacements[b] != nil {
	 531  			j += copy(buf[j:], r.replacements[b])
	 532  		} else {
	 533  			buf[j] = b
	 534  			j++
	 535  		}
	 536  	}
	 537  	return string(buf)
	 538  }
	 539  
	 540  func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
	 541  	sw := getStringWriter(w)
	 542  	last := 0
	 543  	for i := 0; i < len(s); i++ {
	 544  		b := s[i]
	 545  		if r.replacements[b] == nil {
	 546  			continue
	 547  		}
	 548  		if last != i {
	 549  			nw, err := sw.WriteString(s[last:i])
	 550  			n += nw
	 551  			if err != nil {
	 552  				return n, err
	 553  			}
	 554  		}
	 555  		last = i + 1
	 556  		nw, err := w.Write(r.replacements[b])
	 557  		n += nw
	 558  		if err != nil {
	 559  			return n, err
	 560  		}
	 561  	}
	 562  	if last != len(s) {
	 563  		var nw int
	 564  		nw, err = sw.WriteString(s[last:])
	 565  		n += nw
	 566  	}
	 567  	return
	 568  }
	 569  

View as plain text