1
2
3
4
5 package utf8_test
6
7 import (
8 "bytes"
9 "testing"
10 "unicode"
11 . "unicode/utf8"
12 )
13
14
15 func init() {
16 if MaxRune != unicode.MaxRune {
17 panic("utf8.MaxRune is wrong")
18 }
19 if RuneError != unicode.ReplacementChar {
20 panic("utf8.RuneError is wrong")
21 }
22 }
23
24
25 func TestConstants(t *testing.T) {
26 if MaxRune != unicode.MaxRune {
27 t.Errorf("utf8.MaxRune is wrong: %x should be %x", MaxRune, unicode.MaxRune)
28 }
29 if RuneError != unicode.ReplacementChar {
30 t.Errorf("utf8.RuneError is wrong: %x should be %x", RuneError, unicode.ReplacementChar)
31 }
32 }
33
34 type Utf8Map struct {
35 r rune
36 str string
37 }
38
39 var utf8map = []Utf8Map{
40 {0x0000, "\x00"},
41 {0x0001, "\x01"},
42 {0x007e, "\x7e"},
43 {0x007f, "\x7f"},
44 {0x0080, "\xc2\x80"},
45 {0x0081, "\xc2\x81"},
46 {0x00bf, "\xc2\xbf"},
47 {0x00c0, "\xc3\x80"},
48 {0x00c1, "\xc3\x81"},
49 {0x00c8, "\xc3\x88"},
50 {0x00d0, "\xc3\x90"},
51 {0x00e0, "\xc3\xa0"},
52 {0x00f0, "\xc3\xb0"},
53 {0x00f8, "\xc3\xb8"},
54 {0x00ff, "\xc3\xbf"},
55 {0x0100, "\xc4\x80"},
56 {0x07ff, "\xdf\xbf"},
57 {0x0400, "\xd0\x80"},
58 {0x0800, "\xe0\xa0\x80"},
59 {0x0801, "\xe0\xa0\x81"},
60 {0x1000, "\xe1\x80\x80"},
61 {0xd000, "\xed\x80\x80"},
62 {0xd7ff, "\xed\x9f\xbf"},
63 {0xe000, "\xee\x80\x80"},
64 {0xfffe, "\xef\xbf\xbe"},
65 {0xffff, "\xef\xbf\xbf"},
66 {0x10000, "\xf0\x90\x80\x80"},
67 {0x10001, "\xf0\x90\x80\x81"},
68 {0x40000, "\xf1\x80\x80\x80"},
69 {0x10fffe, "\xf4\x8f\xbf\xbe"},
70 {0x10ffff, "\xf4\x8f\xbf\xbf"},
71 {0xFFFD, "\xef\xbf\xbd"},
72 }
73
74 var surrogateMap = []Utf8Map{
75 {0xd800, "\xed\xa0\x80"},
76 {0xdfff, "\xed\xbf\xbf"},
77 }
78
79 var testStrings = []string{
80 "",
81 "abcd",
82 "☺☻☹",
83 "日a本b語ç日ð本Ê語þ日¥本¼語i日©",
84 "日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©",
85 "\x80\x80\x80\x80",
86 }
87
88 func TestFullRune(t *testing.T) {
89 for _, m := range utf8map {
90 b := []byte(m.str)
91 if !FullRune(b) {
92 t.Errorf("FullRune(%q) (%U) = false, want true", b, m.r)
93 }
94 s := m.str
95 if !FullRuneInString(s) {
96 t.Errorf("FullRuneInString(%q) (%U) = false, want true", s, m.r)
97 }
98 b1 := b[0 : len(b)-1]
99 if FullRune(b1) {
100 t.Errorf("FullRune(%q) = true, want false", b1)
101 }
102 s1 := string(b1)
103 if FullRuneInString(s1) {
104 t.Errorf("FullRune(%q) = true, want false", s1)
105 }
106 }
107 for _, s := range []string{"\xc0", "\xc1"} {
108 b := []byte(s)
109 if !FullRune(b) {
110 t.Errorf("FullRune(%q) = false, want true", s)
111 }
112 if !FullRuneInString(s) {
113 t.Errorf("FullRuneInString(%q) = false, want true", s)
114 }
115 }
116 }
117
118 func TestEncodeRune(t *testing.T) {
119 for _, m := range utf8map {
120 b := []byte(m.str)
121 var buf [10]byte
122 n := EncodeRune(buf[0:], m.r)
123 b1 := buf[0:n]
124 if !bytes.Equal(b, b1) {
125 t.Errorf("EncodeRune(%#04x) = %q want %q", m.r, b1, b)
126 }
127 }
128 }
129
130 func TestDecodeRune(t *testing.T) {
131 for _, m := range utf8map {
132 b := []byte(m.str)
133 r, size := DecodeRune(b)
134 if r != m.r || size != len(b) {
135 t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b, r, size, m.r, len(b))
136 }
137 s := m.str
138 r, size = DecodeRuneInString(s)
139 if r != m.r || size != len(b) {
140 t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, r, size, m.r, len(b))
141 }
142
143
144 r, size = DecodeRune(b[0:cap(b)])
145 if r != m.r || size != len(b) {
146 t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b, r, size, m.r, len(b))
147 }
148 s = m.str + "\x00"
149 r, size = DecodeRuneInString(s)
150 if r != m.r || size != len(b) {
151 t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, r, size, m.r, len(b))
152 }
153
154
155 wantsize := 1
156 if wantsize >= len(b) {
157 wantsize = 0
158 }
159 r, size = DecodeRune(b[0 : len(b)-1])
160 if r != RuneError || size != wantsize {
161 t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b[0:len(b)-1], r, size, RuneError, wantsize)
162 }
163 s = m.str[0 : len(m.str)-1]
164 r, size = DecodeRuneInString(s)
165 if r != RuneError || size != wantsize {
166 t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, r, size, RuneError, wantsize)
167 }
168
169
170 if len(b) == 1 {
171 b[0] = 0x80
172 } else {
173 b[len(b)-1] = 0x7F
174 }
175 r, size = DecodeRune(b)
176 if r != RuneError || size != 1 {
177 t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b, r, size, RuneError, 1)
178 }
179 s = string(b)
180 r, size = DecodeRuneInString(s)
181 if r != RuneError || size != 1 {
182 t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, r, size, RuneError, 1)
183 }
184
185 }
186 }
187
188 func TestDecodeSurrogateRune(t *testing.T) {
189 for _, m := range surrogateMap {
190 b := []byte(m.str)
191 r, size := DecodeRune(b)
192 if r != RuneError || size != 1 {
193 t.Errorf("DecodeRune(%q) = %x, %d want %x, %d", b, r, size, RuneError, 1)
194 }
195 s := m.str
196 r, size = DecodeRuneInString(s)
197 if r != RuneError || size != 1 {
198 t.Errorf("DecodeRuneInString(%q) = %x, %d want %x, %d", b, r, size, RuneError, 1)
199 }
200 }
201 }
202
203
204
205 func TestSequencing(t *testing.T) {
206 for _, ts := range testStrings {
207 for _, m := range utf8map {
208 for _, s := range []string{ts + m.str, m.str + ts, ts + m.str + ts} {
209 testSequence(t, s)
210 }
211 }
212 }
213 }
214
215 func runtimeRuneCount(s string) int {
216 return len([]rune(s))
217 }
218
219
220
221
222
223 func TestRuntimeConversion(t *testing.T) {
224 for _, ts := range testStrings {
225 count := RuneCountInString(ts)
226 if n := runtimeRuneCount(ts); n != count {
227 t.Errorf("%q: len([]rune()) counted %d runes; got %d from RuneCountInString", ts, n, count)
228 break
229 }
230
231 runes := []rune(ts)
232 if n := len(runes); n != count {
233 t.Errorf("%q: []rune() has length %d; got %d from RuneCountInString", ts, n, count)
234 break
235 }
236 i := 0
237 for _, r := range ts {
238 if r != runes[i] {
239 t.Errorf("%q[%d]: expected %c (%U); got %c (%U)", ts, i, runes[i], runes[i], r, r)
240 }
241 i++
242 }
243 }
244 }
245
246 var invalidSequenceTests = []string{
247 "\xed\xa0\x80\x80",
248 "\xed\xbf\xbf\x80",
249
250
251 "\x91\x80\x80\x80",
252
253
254 "\xC2\x7F\x80\x80",
255 "\xC2\xC0\x80\x80",
256 "\xDF\x7F\x80\x80",
257 "\xDF\xC0\x80\x80",
258
259
260 "\xE0\x9F\xBF\x80",
261 "\xE0\xA0\x7F\x80",
262 "\xE0\xBF\xC0\x80",
263 "\xE0\xC0\x80\x80",
264
265
266 "\xE1\x7F\xBF\x80",
267 "\xE1\x80\x7F\x80",
268 "\xE1\xBF\xC0\x80",
269 "\xE1\xC0\x80\x80",
270
271
272 "\xED\x7F\xBF\x80",
273 "\xED\x80\x7F\x80",
274 "\xED\x9F\xC0\x80",
275 "\xED\xA0\x80\x80",
276
277
278 "\xF0\x8F\xBF\xBF",
279 "\xF0\x90\x7F\xBF",
280 "\xF0\x90\x80\x7F",
281 "\xF0\xBF\xBF\xC0",
282 "\xF0\xBF\xC0\x80",
283 "\xF0\xC0\x80\x80",
284
285
286 "\xF1\x7F\xBF\xBF",
287 "\xF1\x80\x7F\xBF",
288 "\xF1\x80\x80\x7F",
289 "\xF1\xBF\xBF\xC0",
290 "\xF1\xBF\xC0\x80",
291 "\xF1\xC0\x80\x80",
292
293
294 "\xF4\x7F\xBF\xBF",
295 "\xF4\x80\x7F\xBF",
296 "\xF4\x80\x80\x7F",
297 "\xF4\x8F\xBF\xC0",
298 "\xF4\x8F\xC0\x80",
299 "\xF4\x90\x80\x80",
300 }
301
302 func runtimeDecodeRune(s string) rune {
303 for _, r := range s {
304 return r
305 }
306 return -1
307 }
308
309 func TestDecodeInvalidSequence(t *testing.T) {
310 for _, s := range invalidSequenceTests {
311 r1, _ := DecodeRune([]byte(s))
312 if want := RuneError; r1 != want {
313 t.Errorf("DecodeRune(%#x) = %#04x, want %#04x", s, r1, want)
314 return
315 }
316 r2, _ := DecodeRuneInString(s)
317 if want := RuneError; r2 != want {
318 t.Errorf("DecodeRuneInString(%q) = %#04x, want %#04x", s, r2, want)
319 return
320 }
321 if r1 != r2 {
322 t.Errorf("DecodeRune(%#x) = %#04x mismatch with DecodeRuneInString(%q) = %#04x", s, r1, s, r2)
323 return
324 }
325 r3 := runtimeDecodeRune(s)
326 if r2 != r3 {
327 t.Errorf("DecodeRuneInString(%q) = %#04x mismatch with runtime.decoderune(%q) = %#04x", s, r2, s, r3)
328 return
329 }
330 }
331 }
332
333 func testSequence(t *testing.T, s string) {
334 type info struct {
335 index int
336 r rune
337 }
338 index := make([]info, len(s))
339 b := []byte(s)
340 si := 0
341 j := 0
342 for i, r := range s {
343 if si != i {
344 t.Errorf("Sequence(%q) mismatched index %d, want %d", s, si, i)
345 return
346 }
347 index[j] = info{i, r}
348 j++
349 r1, size1 := DecodeRune(b[i:])
350 if r != r1 {
351 t.Errorf("DecodeRune(%q) = %#04x, want %#04x", s[i:], r1, r)
352 return
353 }
354 r2, size2 := DecodeRuneInString(s[i:])
355 if r != r2 {
356 t.Errorf("DecodeRuneInString(%q) = %#04x, want %#04x", s[i:], r2, r)
357 return
358 }
359 if size1 != size2 {
360 t.Errorf("DecodeRune/DecodeRuneInString(%q) size mismatch %d/%d", s[i:], size1, size2)
361 return
362 }
363 si += size1
364 }
365 j--
366 for si = len(s); si > 0; {
367 r1, size1 := DecodeLastRune(b[0:si])
368 r2, size2 := DecodeLastRuneInString(s[0:si])
369 if size1 != size2 {
370 t.Errorf("DecodeLastRune/DecodeLastRuneInString(%q, %d) size mismatch %d/%d", s, si, size1, size2)
371 return
372 }
373 if r1 != index[j].r {
374 t.Errorf("DecodeLastRune(%q, %d) = %#04x, want %#04x", s, si, r1, index[j].r)
375 return
376 }
377 if r2 != index[j].r {
378 t.Errorf("DecodeLastRuneInString(%q, %d) = %#04x, want %#04x", s, si, r2, index[j].r)
379 return
380 }
381 si -= size1
382 if si != index[j].index {
383 t.Errorf("DecodeLastRune(%q) index mismatch at %d, want %d", s, si, index[j].index)
384 return
385 }
386 j--
387 }
388 if si != 0 {
389 t.Errorf("DecodeLastRune(%q) finished at %d, not 0", s, si)
390 }
391 }
392
393
394 func TestNegativeRune(t *testing.T) {
395 errorbuf := make([]byte, UTFMax)
396 errorbuf = errorbuf[0:EncodeRune(errorbuf, RuneError)]
397 buf := make([]byte, UTFMax)
398 buf = buf[0:EncodeRune(buf, -1)]
399 if !bytes.Equal(buf, errorbuf) {
400 t.Errorf("incorrect encoding [% x] for -1; expected [% x]", buf, errorbuf)
401 }
402 }
403
404 type RuneCountTest struct {
405 in string
406 out int
407 }
408
409 var runecounttests = []RuneCountTest{
410 {"abcd", 4},
411 {"☺☻☹", 3},
412 {"1,2,3,4", 7},
413 {"\xe2\x00", 2},
414 {"\xe2\x80", 2},
415 {"a\xe2\x80", 3},
416 }
417
418 func TestRuneCount(t *testing.T) {
419 for _, tt := range runecounttests {
420 if out := RuneCountInString(tt.in); out != tt.out {
421 t.Errorf("RuneCountInString(%q) = %d, want %d", tt.in, out, tt.out)
422 }
423 if out := RuneCount([]byte(tt.in)); out != tt.out {
424 t.Errorf("RuneCount(%q) = %d, want %d", tt.in, out, tt.out)
425 }
426 }
427 }
428
429 type RuneLenTest struct {
430 r rune
431 size int
432 }
433
434 var runelentests = []RuneLenTest{
435 {0, 1},
436 {'e', 1},
437 {'é', 2},
438 {'☺', 3},
439 {RuneError, 3},
440 {MaxRune, 4},
441 {0xD800, -1},
442 {0xDFFF, -1},
443 {MaxRune + 1, -1},
444 {-1, -1},
445 }
446
447 func TestRuneLen(t *testing.T) {
448 for _, tt := range runelentests {
449 if size := RuneLen(tt.r); size != tt.size {
450 t.Errorf("RuneLen(%#U) = %d, want %d", tt.r, size, tt.size)
451 }
452 }
453 }
454
455 type ValidTest struct {
456 in string
457 out bool
458 }
459
460 var validTests = []ValidTest{
461 {"", true},
462 {"a", true},
463 {"abc", true},
464 {"Ж", true},
465 {"ЖЖ", true},
466 {"брэд-ЛГТМ", true},
467 {"☺☻☹", true},
468 {"aa\xe2", false},
469 {string([]byte{66, 250}), false},
470 {string([]byte{66, 250, 67}), false},
471 {"a\uFFFDb", true},
472 {string("\xF4\x8F\xBF\xBF"), true},
473 {string("\xF4\x90\x80\x80"), false},
474 {string("\xF7\xBF\xBF\xBF"), false},
475 {string("\xFB\xBF\xBF\xBF\xBF"), false},
476 {string("\xc0\x80"), false},
477 {string("\xed\xa0\x80"), false},
478 {string("\xed\xbf\xbf"), false},
479 }
480
481 func TestValid(t *testing.T) {
482 for _, tt := range validTests {
483 if Valid([]byte(tt.in)) != tt.out {
484 t.Errorf("Valid(%q) = %v; want %v", tt.in, !tt.out, tt.out)
485 }
486 if ValidString(tt.in) != tt.out {
487 t.Errorf("ValidString(%q) = %v; want %v", tt.in, !tt.out, tt.out)
488 }
489 }
490 }
491
492 type ValidRuneTest struct {
493 r rune
494 ok bool
495 }
496
497 var validrunetests = []ValidRuneTest{
498 {0, true},
499 {'e', true},
500 {'é', true},
501 {'☺', true},
502 {RuneError, true},
503 {MaxRune, true},
504 {0xD7FF, true},
505 {0xD800, false},
506 {0xDFFF, false},
507 {0xE000, true},
508 {MaxRune + 1, false},
509 {-1, false},
510 }
511
512 func TestValidRune(t *testing.T) {
513 for _, tt := range validrunetests {
514 if ok := ValidRune(tt.r); ok != tt.ok {
515 t.Errorf("ValidRune(%#U) = %t, want %t", tt.r, ok, tt.ok)
516 }
517 }
518 }
519
520 func BenchmarkRuneCountTenASCIIChars(b *testing.B) {
521 s := []byte("0123456789")
522 for i := 0; i < b.N; i++ {
523 RuneCount(s)
524 }
525 }
526
527 func BenchmarkRuneCountTenJapaneseChars(b *testing.B) {
528 s := []byte("日本語日本語日本語日")
529 for i := 0; i < b.N; i++ {
530 RuneCount(s)
531 }
532 }
533
534 func BenchmarkRuneCountInStringTenASCIIChars(b *testing.B) {
535 for i := 0; i < b.N; i++ {
536 RuneCountInString("0123456789")
537 }
538 }
539
540 func BenchmarkRuneCountInStringTenJapaneseChars(b *testing.B) {
541 for i := 0; i < b.N; i++ {
542 RuneCountInString("日本語日本語日本語日")
543 }
544 }
545
546 func BenchmarkValidTenASCIIChars(b *testing.B) {
547 s := []byte("0123456789")
548 for i := 0; i < b.N; i++ {
549 Valid(s)
550 }
551 }
552
553 func BenchmarkValidTenJapaneseChars(b *testing.B) {
554 s := []byte("日本語日本語日本語日")
555 for i := 0; i < b.N; i++ {
556 Valid(s)
557 }
558 }
559
560 func BenchmarkValidStringTenASCIIChars(b *testing.B) {
561 for i := 0; i < b.N; i++ {
562 ValidString("0123456789")
563 }
564 }
565
566 func BenchmarkValidStringTenJapaneseChars(b *testing.B) {
567 for i := 0; i < b.N; i++ {
568 ValidString("日本語日本語日本語日")
569 }
570 }
571
572 func BenchmarkEncodeASCIIRune(b *testing.B) {
573 buf := make([]byte, UTFMax)
574 for i := 0; i < b.N; i++ {
575 EncodeRune(buf, 'a')
576 }
577 }
578
579 func BenchmarkEncodeJapaneseRune(b *testing.B) {
580 buf := make([]byte, UTFMax)
581 for i := 0; i < b.N; i++ {
582 EncodeRune(buf, '本')
583 }
584 }
585
586 func BenchmarkDecodeASCIIRune(b *testing.B) {
587 a := []byte{'a'}
588 for i := 0; i < b.N; i++ {
589 DecodeRune(a)
590 }
591 }
592
593 func BenchmarkDecodeJapaneseRune(b *testing.B) {
594 nihon := []byte("本")
595 for i := 0; i < b.N; i++ {
596 DecodeRune(nihon)
597 }
598 }
599
600
601
602 var boolSink bool
603
604 func BenchmarkFullRune(b *testing.B) {
605 benchmarks := []struct {
606 name string
607 data []byte
608 }{
609 {"ASCII", []byte("a")},
610 {"Incomplete", []byte("\xf0\x90\x80")},
611 {"Japanese", []byte("本")},
612 }
613 for _, bm := range benchmarks {
614 b.Run(bm.name, func(b *testing.B) {
615 for i := 0; i < b.N; i++ {
616 boolSink = FullRune(bm.data)
617 }
618 })
619 }
620 }
621
View as plain text