Source file
src/strings/strings.go
Documentation: strings
1
2
3
4
5
6
7
8 package strings
9
10 import (
11 "internal/bytealg"
12 "unicode"
13 "unicode/utf8"
14 )
15
16
17
18
19 func explode(s string, n int) []string {
20 l := utf8.RuneCountInString(s)
21 if n < 0 || n > l {
22 n = l
23 }
24 a := make([]string, n)
25 for i := 0; i < n-1; i++ {
26 ch, size := utf8.DecodeRuneInString(s)
27 a[i] = s[:size]
28 s = s[size:]
29 if ch == utf8.RuneError {
30 a[i] = string(utf8.RuneError)
31 }
32 }
33 if n > 0 {
34 a[n-1] = s
35 }
36 return a
37 }
38
39
40
41 func Count(s, substr string) int {
42
43 if len(substr) == 0 {
44 return utf8.RuneCountInString(s) + 1
45 }
46 if len(substr) == 1 {
47 return bytealg.CountString(s, substr[0])
48 }
49 n := 0
50 for {
51 i := Index(s, substr)
52 if i == -1 {
53 return n
54 }
55 n++
56 s = s[i+len(substr):]
57 }
58 }
59
60
61 func Contains(s, substr string) bool {
62 return Index(s, substr) >= 0
63 }
64
65
66 func ContainsAny(s, chars string) bool {
67 return IndexAny(s, chars) >= 0
68 }
69
70
71 func ContainsRune(s string, r rune) bool {
72 return IndexRune(s, r) >= 0
73 }
74
75
76 func LastIndex(s, substr string) int {
77 n := len(substr)
78 switch {
79 case n == 0:
80 return len(s)
81 case n == 1:
82 return LastIndexByte(s, substr[0])
83 case n == len(s):
84 if substr == s {
85 return 0
86 }
87 return -1
88 case n > len(s):
89 return -1
90 }
91
92 hashss, pow := bytealg.HashStrRev(substr)
93 last := len(s) - n
94 var h uint32
95 for i := len(s) - 1; i >= last; i-- {
96 h = h*bytealg.PrimeRK + uint32(s[i])
97 }
98 if h == hashss && s[last:] == substr {
99 return last
100 }
101 for i := last - 1; i >= 0; i-- {
102 h *= bytealg.PrimeRK
103 h += uint32(s[i])
104 h -= pow * uint32(s[i+n])
105 if h == hashss && s[i:i+n] == substr {
106 return i
107 }
108 }
109 return -1
110 }
111
112
113 func IndexByte(s string, c byte) int {
114 return bytealg.IndexByteString(s, c)
115 }
116
117
118
119
120
121 func IndexRune(s string, r rune) int {
122 switch {
123 case 0 <= r && r < utf8.RuneSelf:
124 return IndexByte(s, byte(r))
125 case r == utf8.RuneError:
126 for i, r := range s {
127 if r == utf8.RuneError {
128 return i
129 }
130 }
131 return -1
132 case !utf8.ValidRune(r):
133 return -1
134 default:
135 return Index(s, string(r))
136 }
137 }
138
139
140
141 func IndexAny(s, chars string) int {
142 if chars == "" {
143
144 return -1
145 }
146 if len(chars) == 1 {
147
148 r := rune(chars[0])
149 if r >= utf8.RuneSelf {
150 r = utf8.RuneError
151 }
152 return IndexRune(s, r)
153 }
154 if len(s) > 8 {
155 if as, isASCII := makeASCIISet(chars); isASCII {
156 for i := 0; i < len(s); i++ {
157 if as.contains(s[i]) {
158 return i
159 }
160 }
161 return -1
162 }
163 }
164 for i, c := range s {
165 if IndexRune(chars, c) >= 0 {
166 return i
167 }
168 }
169 return -1
170 }
171
172
173
174
175 func LastIndexAny(s, chars string) int {
176 if chars == "" {
177
178 return -1
179 }
180 if len(s) == 1 {
181 rc := rune(s[0])
182 if rc >= utf8.RuneSelf {
183 rc = utf8.RuneError
184 }
185 if IndexRune(chars, rc) >= 0 {
186 return 0
187 }
188 return -1
189 }
190 if len(s) > 8 {
191 if as, isASCII := makeASCIISet(chars); isASCII {
192 for i := len(s) - 1; i >= 0; i-- {
193 if as.contains(s[i]) {
194 return i
195 }
196 }
197 return -1
198 }
199 }
200 if len(chars) == 1 {
201 rc := rune(chars[0])
202 if rc >= utf8.RuneSelf {
203 rc = utf8.RuneError
204 }
205 for i := len(s); i > 0; {
206 r, size := utf8.DecodeLastRuneInString(s[:i])
207 i -= size
208 if rc == r {
209 return i
210 }
211 }
212 return -1
213 }
214 for i := len(s); i > 0; {
215 r, size := utf8.DecodeLastRuneInString(s[:i])
216 i -= size
217 if IndexRune(chars, r) >= 0 {
218 return i
219 }
220 }
221 return -1
222 }
223
224
225 func LastIndexByte(s string, c byte) int {
226 for i := len(s) - 1; i >= 0; i-- {
227 if s[i] == c {
228 return i
229 }
230 }
231 return -1
232 }
233
234
235
236 func genSplit(s, sep string, sepSave, n int) []string {
237 if n == 0 {
238 return nil
239 }
240 if sep == "" {
241 return explode(s, n)
242 }
243 if n < 0 {
244 n = Count(s, sep) + 1
245 }
246
247 a := make([]string, n)
248 n--
249 i := 0
250 for i < n {
251 m := Index(s, sep)
252 if m < 0 {
253 break
254 }
255 a[i] = s[:m+sepSave]
256 s = s[m+len(sep):]
257 i++
258 }
259 a[i] = s
260 return a[:i+1]
261 }
262
263
264
265
266
267
268
269
270
271
272
273 func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) }
274
275
276
277
278
279
280
281
282
283
284
285 func SplitAfterN(s, sep string, n int) []string {
286 return genSplit(s, sep, len(sep), n)
287 }
288
289
290
291
292
293
294
295
296
297
298
299 func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) }
300
301
302
303
304
305
306
307
308
309
310
311 func SplitAfter(s, sep string) []string {
312 return genSplit(s, sep, len(sep), -1)
313 }
314
315 var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
316
317
318
319
320 func Fields(s string) []string {
321
322
323 n := 0
324 wasSpace := 1
325
326 setBits := uint8(0)
327 for i := 0; i < len(s); i++ {
328 r := s[i]
329 setBits |= r
330 isSpace := int(asciiSpace[r])
331 n += wasSpace & ^isSpace
332 wasSpace = isSpace
333 }
334
335 if setBits >= utf8.RuneSelf {
336
337 return FieldsFunc(s, unicode.IsSpace)
338 }
339
340 a := make([]string, n)
341 na := 0
342 fieldStart := 0
343 i := 0
344
345 for i < len(s) && asciiSpace[s[i]] != 0 {
346 i++
347 }
348 fieldStart = i
349 for i < len(s) {
350 if asciiSpace[s[i]] == 0 {
351 i++
352 continue
353 }
354 a[na] = s[fieldStart:i]
355 na++
356 i++
357
358 for i < len(s) && asciiSpace[s[i]] != 0 {
359 i++
360 }
361 fieldStart = i
362 }
363 if fieldStart < len(s) {
364 a[na] = s[fieldStart:]
365 }
366 return a
367 }
368
369
370
371
372
373
374
375 func FieldsFunc(s string, f func(rune) bool) []string {
376
377
378 type span struct {
379 start int
380 end int
381 }
382 spans := make([]span, 0, 32)
383
384
385
386
387
388 start := -1
389 for end, rune := range s {
390 if f(rune) {
391 if start >= 0 {
392 spans = append(spans, span{start, end})
393
394
395
396 start = ^start
397 }
398 } else {
399 if start < 0 {
400 start = end
401 }
402 }
403 }
404
405
406 if start >= 0 {
407 spans = append(spans, span{start, len(s)})
408 }
409
410
411 a := make([]string, len(spans))
412 for i, span := range spans {
413 a[i] = s[span.start:span.end]
414 }
415
416 return a
417 }
418
419
420
421 func Join(elems []string, sep string) string {
422 switch len(elems) {
423 case 0:
424 return ""
425 case 1:
426 return elems[0]
427 }
428 n := len(sep) * (len(elems) - 1)
429 for i := 0; i < len(elems); i++ {
430 n += len(elems[i])
431 }
432
433 var b Builder
434 b.Grow(n)
435 b.WriteString(elems[0])
436 for _, s := range elems[1:] {
437 b.WriteString(sep)
438 b.WriteString(s)
439 }
440 return b.String()
441 }
442
443
444 func HasPrefix(s, prefix string) bool {
445 return len(s) >= len(prefix) && s[0:len(prefix)] == prefix
446 }
447
448
449 func HasSuffix(s, suffix string) bool {
450 return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix
451 }
452
453
454
455
456 func Map(mapping func(rune) rune, s string) string {
457
458
459
460
461
462
463 var b Builder
464
465 for i, c := range s {
466 r := mapping(c)
467 if r == c && c != utf8.RuneError {
468 continue
469 }
470
471 var width int
472 if c == utf8.RuneError {
473 c, width = utf8.DecodeRuneInString(s[i:])
474 if width != 1 && r == c {
475 continue
476 }
477 } else {
478 width = utf8.RuneLen(c)
479 }
480
481 b.Grow(len(s) + utf8.UTFMax)
482 b.WriteString(s[:i])
483 if r >= 0 {
484 b.WriteRune(r)
485 }
486
487 s = s[i+width:]
488 break
489 }
490
491
492 if b.Cap() == 0 {
493 return s
494 }
495
496 for _, c := range s {
497 r := mapping(c)
498
499 if r >= 0 {
500
501
502
503 if r < utf8.RuneSelf {
504 b.WriteByte(byte(r))
505 } else {
506
507 b.WriteRune(r)
508 }
509 }
510 }
511
512 return b.String()
513 }
514
515
516
517
518
519 func Repeat(s string, count int) string {
520 if count == 0 {
521 return ""
522 }
523
524
525
526
527
528 if count < 0 {
529 panic("strings: negative Repeat count")
530 } else if len(s)*count/count != len(s) {
531 panic("strings: Repeat count causes overflow")
532 }
533
534 n := len(s) * count
535 var b Builder
536 b.Grow(n)
537 b.WriteString(s)
538 for b.Len() < n {
539 if b.Len() <= n/2 {
540 b.WriteString(b.String())
541 } else {
542 b.WriteString(b.String()[:n-b.Len()])
543 break
544 }
545 }
546 return b.String()
547 }
548
549
550 func ToUpper(s string) string {
551 isASCII, hasLower := true, false
552 for i := 0; i < len(s); i++ {
553 c := s[i]
554 if c >= utf8.RuneSelf {
555 isASCII = false
556 break
557 }
558 hasLower = hasLower || ('a' <= c && c <= 'z')
559 }
560
561 if isASCII {
562 if !hasLower {
563 return s
564 }
565 var b Builder
566 b.Grow(len(s))
567 for i := 0; i < len(s); i++ {
568 c := s[i]
569 if 'a' <= c && c <= 'z' {
570 c -= 'a' - 'A'
571 }
572 b.WriteByte(c)
573 }
574 return b.String()
575 }
576 return Map(unicode.ToUpper, s)
577 }
578
579
580 func ToLower(s string) string {
581 isASCII, hasUpper := true, false
582 for i := 0; i < len(s); i++ {
583 c := s[i]
584 if c >= utf8.RuneSelf {
585 isASCII = false
586 break
587 }
588 hasUpper = hasUpper || ('A' <= c && c <= 'Z')
589 }
590
591 if isASCII {
592 if !hasUpper {
593 return s
594 }
595 var b Builder
596 b.Grow(len(s))
597 for i := 0; i < len(s); i++ {
598 c := s[i]
599 if 'A' <= c && c <= 'Z' {
600 c += 'a' - 'A'
601 }
602 b.WriteByte(c)
603 }
604 return b.String()
605 }
606 return Map(unicode.ToLower, s)
607 }
608
609
610
611 func ToTitle(s string) string { return Map(unicode.ToTitle, s) }
612
613
614
615 func ToUpperSpecial(c unicode.SpecialCase, s string) string {
616 return Map(c.ToUpper, s)
617 }
618
619
620
621 func ToLowerSpecial(c unicode.SpecialCase, s string) string {
622 return Map(c.ToLower, s)
623 }
624
625
626
627 func ToTitleSpecial(c unicode.SpecialCase, s string) string {
628 return Map(c.ToTitle, s)
629 }
630
631
632
633 func ToValidUTF8(s, replacement string) string {
634 var b Builder
635
636 for i, c := range s {
637 if c != utf8.RuneError {
638 continue
639 }
640
641 _, wid := utf8.DecodeRuneInString(s[i:])
642 if wid == 1 {
643 b.Grow(len(s) + len(replacement))
644 b.WriteString(s[:i])
645 s = s[i:]
646 break
647 }
648 }
649
650
651 if b.Cap() == 0 {
652 return s
653 }
654
655 invalid := false
656 for i := 0; i < len(s); {
657 c := s[i]
658 if c < utf8.RuneSelf {
659 i++
660 invalid = false
661 b.WriteByte(c)
662 continue
663 }
664 _, wid := utf8.DecodeRuneInString(s[i:])
665 if wid == 1 {
666 i++
667 if !invalid {
668 invalid = true
669 b.WriteString(replacement)
670 }
671 continue
672 }
673 invalid = false
674 b.WriteString(s[i : i+wid])
675 i += wid
676 }
677
678 return b.String()
679 }
680
681
682
683 func isSeparator(r rune) bool {
684
685 if r <= 0x7F {
686 switch {
687 case '0' <= r && r <= '9':
688 return false
689 case 'a' <= r && r <= 'z':
690 return false
691 case 'A' <= r && r <= 'Z':
692 return false
693 case r == '_':
694 return false
695 }
696 return true
697 }
698
699 if unicode.IsLetter(r) || unicode.IsDigit(r) {
700 return false
701 }
702
703 return unicode.IsSpace(r)
704 }
705
706
707
708
709
710 func Title(s string) string {
711
712
713
714 prev := ' '
715 return Map(
716 func(r rune) rune {
717 if isSeparator(prev) {
718 prev = r
719 return unicode.ToTitle(r)
720 }
721 prev = r
722 return r
723 },
724 s)
725 }
726
727
728
729 func TrimLeftFunc(s string, f func(rune) bool) string {
730 i := indexFunc(s, f, false)
731 if i == -1 {
732 return ""
733 }
734 return s[i:]
735 }
736
737
738
739 func TrimRightFunc(s string, f func(rune) bool) string {
740 i := lastIndexFunc(s, f, false)
741 if i >= 0 && s[i] >= utf8.RuneSelf {
742 _, wid := utf8.DecodeRuneInString(s[i:])
743 i += wid
744 } else {
745 i++
746 }
747 return s[0:i]
748 }
749
750
751
752 func TrimFunc(s string, f func(rune) bool) string {
753 return TrimRightFunc(TrimLeftFunc(s, f), f)
754 }
755
756
757
758 func IndexFunc(s string, f func(rune) bool) int {
759 return indexFunc(s, f, true)
760 }
761
762
763
764 func LastIndexFunc(s string, f func(rune) bool) int {
765 return lastIndexFunc(s, f, true)
766 }
767
768
769
770
771 func indexFunc(s string, f func(rune) bool, truth bool) int {
772 for i, r := range s {
773 if f(r) == truth {
774 return i
775 }
776 }
777 return -1
778 }
779
780
781
782
783 func lastIndexFunc(s string, f func(rune) bool, truth bool) int {
784 for i := len(s); i > 0; {
785 r, size := utf8.DecodeLastRuneInString(s[0:i])
786 i -= size
787 if f(r) == truth {
788 return i
789 }
790 }
791 return -1
792 }
793
794
795
796
797
798
799
800 type asciiSet [8]uint32
801
802
803
804 func makeASCIISet(chars string) (as asciiSet, ok bool) {
805 for i := 0; i < len(chars); i++ {
806 c := chars[i]
807 if c >= utf8.RuneSelf {
808 return as, false
809 }
810 as[c>>5] |= 1 << uint(c&31)
811 }
812 return as, true
813 }
814
815
816 func (as *asciiSet) contains(c byte) bool {
817 return (as[c>>5] & (1 << uint(c&31))) != 0
818 }
819
820 func makeCutsetFunc(cutset string) func(rune) bool {
821 if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
822 return func(r rune) bool {
823 return r == rune(cutset[0])
824 }
825 }
826 if as, isASCII := makeASCIISet(cutset); isASCII {
827 return func(r rune) bool {
828 return r < utf8.RuneSelf && as.contains(byte(r))
829 }
830 }
831 return func(r rune) bool { return IndexRune(cutset, r) >= 0 }
832 }
833
834
835
836 func Trim(s, cutset string) string {
837 if s == "" || cutset == "" {
838 return s
839 }
840 return TrimFunc(s, makeCutsetFunc(cutset))
841 }
842
843
844
845
846
847 func TrimLeft(s, cutset string) string {
848 if s == "" || cutset == "" {
849 return s
850 }
851 return TrimLeftFunc(s, makeCutsetFunc(cutset))
852 }
853
854
855
856
857
858 func TrimRight(s, cutset string) string {
859 if s == "" || cutset == "" {
860 return s
861 }
862 return TrimRightFunc(s, makeCutsetFunc(cutset))
863 }
864
865
866
867 func TrimSpace(s string) string {
868
869 start := 0
870 for ; start < len(s); start++ {
871 c := s[start]
872 if c >= utf8.RuneSelf {
873
874
875 return TrimFunc(s[start:], unicode.IsSpace)
876 }
877 if asciiSpace[c] == 0 {
878 break
879 }
880 }
881
882
883 stop := len(s)
884 for ; stop > start; stop-- {
885 c := s[stop-1]
886 if c >= utf8.RuneSelf {
887 return TrimFunc(s[start:stop], unicode.IsSpace)
888 }
889 if asciiSpace[c] == 0 {
890 break
891 }
892 }
893
894
895
896
897 return s[start:stop]
898 }
899
900
901
902 func TrimPrefix(s, prefix string) string {
903 if HasPrefix(s, prefix) {
904 return s[len(prefix):]
905 }
906 return s
907 }
908
909
910
911 func TrimSuffix(s, suffix string) string {
912 if HasSuffix(s, suffix) {
913 return s[:len(s)-len(suffix)]
914 }
915 return s
916 }
917
918
919
920
921
922
923
924 func Replace(s, old, new string, n int) string {
925 if old == new || n == 0 {
926 return s
927 }
928
929
930 if m := Count(s, old); m == 0 {
931 return s
932 } else if n < 0 || m < n {
933 n = m
934 }
935
936
937 var b Builder
938 b.Grow(len(s) + n*(len(new)-len(old)))
939 start := 0
940 for i := 0; i < n; i++ {
941 j := start
942 if len(old) == 0 {
943 if i > 0 {
944 _, wid := utf8.DecodeRuneInString(s[start:])
945 j += wid
946 }
947 } else {
948 j += Index(s[start:], old)
949 }
950 b.WriteString(s[start:j])
951 b.WriteString(new)
952 start = j + len(old)
953 }
954 b.WriteString(s[start:])
955 return b.String()
956 }
957
958
959
960
961
962
963 func ReplaceAll(s, old, new string) string {
964 return Replace(s, old, new, -1)
965 }
966
967
968
969
970 func EqualFold(s, t string) bool {
971 for s != "" && t != "" {
972
973 var sr, tr rune
974 if s[0] < utf8.RuneSelf {
975 sr, s = rune(s[0]), s[1:]
976 } else {
977 r, size := utf8.DecodeRuneInString(s)
978 sr, s = r, s[size:]
979 }
980 if t[0] < utf8.RuneSelf {
981 tr, t = rune(t[0]), t[1:]
982 } else {
983 r, size := utf8.DecodeRuneInString(t)
984 tr, t = r, t[size:]
985 }
986
987
988
989
990 if tr == sr {
991 continue
992 }
993
994
995 if tr < sr {
996 tr, sr = sr, tr
997 }
998
999 if tr < utf8.RuneSelf {
1000
1001 if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' {
1002 continue
1003 }
1004 return false
1005 }
1006
1007
1008
1009 r := unicode.SimpleFold(sr)
1010 for r != sr && r < tr {
1011 r = unicode.SimpleFold(r)
1012 }
1013 if r == tr {
1014 continue
1015 }
1016 return false
1017 }
1018
1019
1020 return s == t
1021 }
1022
1023
1024 func Index(s, substr string) int {
1025 n := len(substr)
1026 switch {
1027 case n == 0:
1028 return 0
1029 case n == 1:
1030 return IndexByte(s, substr[0])
1031 case n == len(s):
1032 if substr == s {
1033 return 0
1034 }
1035 return -1
1036 case n > len(s):
1037 return -1
1038 case n <= bytealg.MaxLen:
1039
1040 if len(s) <= bytealg.MaxBruteForce {
1041 return bytealg.IndexString(s, substr)
1042 }
1043 c0 := substr[0]
1044 c1 := substr[1]
1045 i := 0
1046 t := len(s) - n + 1
1047 fails := 0
1048 for i < t {
1049 if s[i] != c0 {
1050
1051
1052 o := IndexByte(s[i+1:t], c0)
1053 if o < 0 {
1054 return -1
1055 }
1056 i += o + 1
1057 }
1058 if s[i+1] == c1 && s[i:i+n] == substr {
1059 return i
1060 }
1061 fails++
1062 i++
1063
1064 if fails > bytealg.Cutover(i) {
1065 r := bytealg.IndexString(s[i:], substr)
1066 if r >= 0 {
1067 return r + i
1068 }
1069 return -1
1070 }
1071 }
1072 return -1
1073 }
1074 c0 := substr[0]
1075 c1 := substr[1]
1076 i := 0
1077 t := len(s) - n + 1
1078 fails := 0
1079 for i < t {
1080 if s[i] != c0 {
1081 o := IndexByte(s[i+1:t], c0)
1082 if o < 0 {
1083 return -1
1084 }
1085 i += o + 1
1086 }
1087 if s[i+1] == c1 && s[i:i+n] == substr {
1088 return i
1089 }
1090 i++
1091 fails++
1092 if fails >= 4+i>>4 && i < t {
1093
1094 j := bytealg.IndexRabinKarp(s[i:], substr)
1095 if j < 0 {
1096 return -1
1097 }
1098 return i + j
1099 }
1100 }
1101 return -1
1102 }
1103
View as plain text