...

Source file src/unicode/utf16/utf16_test.go

Documentation: unicode/utf16

		 1  // Copyright 2010 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  package utf16_test
		 6  
		 7  import (
		 8  	"reflect"
		 9  	"testing"
		10  	"unicode"
		11  	. "unicode/utf16"
		12  )
		13  
		14  // Validate the constants redefined from unicode.
		15  func TestConstants(t *testing.T) {
		16  	if MaxRune != unicode.MaxRune {
		17  		t.Errorf("utf16.maxRune is wrong: %x should be %x", MaxRune, unicode.MaxRune)
		18  	}
		19  	if ReplacementChar != unicode.ReplacementChar {
		20  		t.Errorf("utf16.replacementChar is wrong: %x should be %x", ReplacementChar, unicode.ReplacementChar)
		21  	}
		22  }
		23  
		24  type encodeTest struct {
		25  	in	[]rune
		26  	out []uint16
		27  }
		28  
		29  var encodeTests = []encodeTest{
		30  	{[]rune{1, 2, 3, 4}, []uint16{1, 2, 3, 4}},
		31  	{[]rune{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff},
		32  		[]uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff}},
		33  	{[]rune{'a', 'b', 0xd7ff, 0xd800, 0xdfff, 0xe000, 0x110000, -1},
		34  		[]uint16{'a', 'b', 0xd7ff, 0xfffd, 0xfffd, 0xe000, 0xfffd, 0xfffd}},
		35  }
		36  
		37  func TestEncode(t *testing.T) {
		38  	for _, tt := range encodeTests {
		39  		out := Encode(tt.in)
		40  		if !reflect.DeepEqual(out, tt.out) {
		41  			t.Errorf("Encode(%x) = %x; want %x", tt.in, out, tt.out)
		42  		}
		43  	}
		44  }
		45  
		46  func TestEncodeRune(t *testing.T) {
		47  	for i, tt := range encodeTests {
		48  		j := 0
		49  		for _, r := range tt.in {
		50  			r1, r2 := EncodeRune(r)
		51  			if r < 0x10000 || r > unicode.MaxRune {
		52  				if j >= len(tt.out) {
		53  					t.Errorf("#%d: ran out of tt.out", i)
		54  					break
		55  				}
		56  				if r1 != unicode.ReplacementChar || r2 != unicode.ReplacementChar {
		57  					t.Errorf("EncodeRune(%#x) = %#x, %#x; want 0xfffd, 0xfffd", r, r1, r2)
		58  				}
		59  				j++
		60  			} else {
		61  				if j+1 >= len(tt.out) {
		62  					t.Errorf("#%d: ran out of tt.out", i)
		63  					break
		64  				}
		65  				if r1 != rune(tt.out[j]) || r2 != rune(tt.out[j+1]) {
		66  					t.Errorf("EncodeRune(%#x) = %#x, %#x; want %#x, %#x", r, r1, r2, tt.out[j], tt.out[j+1])
		67  				}
		68  				j += 2
		69  				dec := DecodeRune(r1, r2)
		70  				if dec != r {
		71  					t.Errorf("DecodeRune(%#x, %#x) = %#x; want %#x", r1, r2, dec, r)
		72  				}
		73  			}
		74  		}
		75  		if j != len(tt.out) {
		76  			t.Errorf("#%d: EncodeRune didn't generate enough output", i)
		77  		}
		78  	}
		79  }
		80  
		81  type decodeTest struct {
		82  	in	[]uint16
		83  	out []rune
		84  }
		85  
		86  var decodeTests = []decodeTest{
		87  	{[]uint16{1, 2, 3, 4}, []rune{1, 2, 3, 4}},
		88  	{[]uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff},
		89  		[]rune{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff}},
		90  	{[]uint16{0xd800, 'a'}, []rune{0xfffd, 'a'}},
		91  	{[]uint16{0xdfff}, []rune{0xfffd}},
		92  }
		93  
		94  func TestDecode(t *testing.T) {
		95  	for _, tt := range decodeTests {
		96  		out := Decode(tt.in)
		97  		if !reflect.DeepEqual(out, tt.out) {
		98  			t.Errorf("Decode(%x) = %x; want %x", tt.in, out, tt.out)
		99  		}
	 100  	}
	 101  }
	 102  
	 103  var decodeRuneTests = []struct {
	 104  	r1, r2 rune
	 105  	want	 rune
	 106  }{
	 107  	{0xd800, 0xdc00, 0x10000},
	 108  	{0xd800, 0xdc01, 0x10001},
	 109  	{0xd808, 0xdf45, 0x12345},
	 110  	{0xdbff, 0xdfff, 0x10ffff},
	 111  	{0xd800, 'a', 0xfffd}, // illegal, replacement rune substituted
	 112  }
	 113  
	 114  func TestDecodeRune(t *testing.T) {
	 115  	for i, tt := range decodeRuneTests {
	 116  		got := DecodeRune(tt.r1, tt.r2)
	 117  		if got != tt.want {
	 118  			t.Errorf("%d: DecodeRune(%q, %q) = %v; want %v", i, tt.r1, tt.r2, got, tt.want)
	 119  		}
	 120  	}
	 121  }
	 122  
	 123  var surrogateTests = []struct {
	 124  	r		rune
	 125  	want bool
	 126  }{
	 127  	// from https://en.wikipedia.org/wiki/UTF-16
	 128  	{'\u007A', false},		 // LATIN SMALL LETTER Z
	 129  	{'\u6C34', false},		 // CJK UNIFIED IDEOGRAPH-6C34 (water)
	 130  	{'\uFEFF', false},		 // Byte Order Mark
	 131  	{'\U00010000', false}, // LINEAR B SYLLABLE B008 A (first non-BMP code point)
	 132  	{'\U0001D11E', false}, // MUSICAL SYMBOL G CLEF
	 133  	{'\U0010FFFD', false}, // PRIVATE USE CHARACTER-10FFFD (last Unicode code point)
	 134  
	 135  	{rune(0xd7ff), false}, // surr1-1
	 136  	{rune(0xd800), true},	// surr1
	 137  	{rune(0xdc00), true},	// surr2
	 138  	{rune(0xe000), false}, // surr3
	 139  	{rune(0xdfff), true},	// surr3-1
	 140  }
	 141  
	 142  func TestIsSurrogate(t *testing.T) {
	 143  	for i, tt := range surrogateTests {
	 144  		got := IsSurrogate(tt.r)
	 145  		if got != tt.want {
	 146  			t.Errorf("%d: IsSurrogate(%q) = %v; want %v", i, tt.r, got, tt.want)
	 147  		}
	 148  	}
	 149  }
	 150  
	 151  func BenchmarkDecodeValidASCII(b *testing.B) {
	 152  	// "hello world"
	 153  	data := []uint16{104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100}
	 154  	for i := 0; i < b.N; i++ {
	 155  		Decode(data)
	 156  	}
	 157  }
	 158  
	 159  func BenchmarkDecodeValidJapaneseChars(b *testing.B) {
	 160  	// "日本語日本語日本語"
	 161  	data := []uint16{26085, 26412, 35486, 26085, 26412, 35486, 26085, 26412, 35486}
	 162  	for i := 0; i < b.N; i++ {
	 163  		Decode(data)
	 164  	}
	 165  }
	 166  
	 167  func BenchmarkDecodeRune(b *testing.B) {
	 168  	rs := make([]rune, 10)
	 169  	// U+1D4D0 to U+1D4D4: MATHEMATICAL BOLD SCRIPT CAPITAL LETTERS
	 170  	for i, u := range []rune{'𝓐', '𝓑', '𝓒', '𝓓', '𝓔'} {
	 171  		rs[2*i], rs[2*i+1] = EncodeRune(u)
	 172  	}
	 173  
	 174  	b.ResetTimer()
	 175  	for i := 0; i < b.N; i++ {
	 176  		for j := 0; j < 5; j++ {
	 177  			DecodeRune(rs[2*j], rs[2*j+1])
	 178  		}
	 179  	}
	 180  }
	 181  
	 182  func BenchmarkEncodeValidASCII(b *testing.B) {
	 183  	data := []rune{'h', 'e', 'l', 'l', 'o'}
	 184  	for i := 0; i < b.N; i++ {
	 185  		Encode(data)
	 186  	}
	 187  }
	 188  
	 189  func BenchmarkEncodeValidJapaneseChars(b *testing.B) {
	 190  	data := []rune{'日', '本', '語'}
	 191  	for i := 0; i < b.N; i++ {
	 192  		Encode(data)
	 193  	}
	 194  }
	 195  
	 196  func BenchmarkEncodeRune(b *testing.B) {
	 197  	for i := 0; i < b.N; i++ {
	 198  		for _, u := range []rune{'𝓐', '𝓑', '𝓒', '𝓓', '𝓔'} {
	 199  			EncodeRune(u)
	 200  		}
	 201  	}
	 202  }
	 203  

View as plain text