...
Source file
src/unicode/letter.go
Documentation: unicode
1
2
3
4
5
6
7 package unicode
8
9 const (
10 MaxRune = '\U0010FFFF'
11 ReplacementChar = '\uFFFD'
12 MaxASCII = '\u007F'
13 MaxLatin1 = '\u00FF'
14 )
15
16
17
18
19
20
21 type RangeTable struct {
22 R16 []Range16
23 R32 []Range32
24 LatinOffset int
25 }
26
27
28
29 type Range16 struct {
30 Lo uint16
31 Hi uint16
32 Stride uint16
33 }
34
35
36
37
38 type Range32 struct {
39 Lo uint32
40 Hi uint32
41 Stride uint32
42 }
43
44
45
46
47
48
49
50
51
52
53
54 type CaseRange struct {
55 Lo uint32
56 Hi uint32
57 Delta d
58 }
59
60
61
62 type SpecialCase []CaseRange
63
64
65
66
67
68 const (
69 UpperCase = iota
70 LowerCase
71 TitleCase
72 MaxCase
73 )
74
75 type d [MaxCase]rune
76
77
78
79
80 const (
81 UpperLower = MaxRune + 1
82 )
83
84
85
86 const linearMax = 18
87
88
89 func is16(ranges []Range16, r uint16) bool {
90 if len(ranges) <= linearMax || r <= MaxLatin1 {
91 for i := range ranges {
92 range_ := &ranges[i]
93 if r < range_.Lo {
94 return false
95 }
96 if r <= range_.Hi {
97 return range_.Stride == 1 || (r-range_.Lo)%range_.Stride == 0
98 }
99 }
100 return false
101 }
102
103
104 lo := 0
105 hi := len(ranges)
106 for lo < hi {
107 m := lo + (hi-lo)/2
108 range_ := &ranges[m]
109 if range_.Lo <= r && r <= range_.Hi {
110 return range_.Stride == 1 || (r-range_.Lo)%range_.Stride == 0
111 }
112 if r < range_.Lo {
113 hi = m
114 } else {
115 lo = m + 1
116 }
117 }
118 return false
119 }
120
121
122 func is32(ranges []Range32, r uint32) bool {
123 if len(ranges) <= linearMax {
124 for i := range ranges {
125 range_ := &ranges[i]
126 if r < range_.Lo {
127 return false
128 }
129 if r <= range_.Hi {
130 return range_.Stride == 1 || (r-range_.Lo)%range_.Stride == 0
131 }
132 }
133 return false
134 }
135
136
137 lo := 0
138 hi := len(ranges)
139 for lo < hi {
140 m := lo + (hi-lo)/2
141 range_ := ranges[m]
142 if range_.Lo <= r && r <= range_.Hi {
143 return range_.Stride == 1 || (r-range_.Lo)%range_.Stride == 0
144 }
145 if r < range_.Lo {
146 hi = m
147 } else {
148 lo = m + 1
149 }
150 }
151 return false
152 }
153
154
155 func Is(rangeTab *RangeTable, r rune) bool {
156 r16 := rangeTab.R16
157
158 if len(r16) > 0 && uint32(r) <= uint32(r16[len(r16)-1].Hi) {
159 return is16(r16, uint16(r))
160 }
161 r32 := rangeTab.R32
162 if len(r32) > 0 && r >= rune(r32[0].Lo) {
163 return is32(r32, uint32(r))
164 }
165 return false
166 }
167
168 func isExcludingLatin(rangeTab *RangeTable, r rune) bool {
169 r16 := rangeTab.R16
170
171 if off := rangeTab.LatinOffset; len(r16) > off && uint32(r) <= uint32(r16[len(r16)-1].Hi) {
172 return is16(r16[off:], uint16(r))
173 }
174 r32 := rangeTab.R32
175 if len(r32) > 0 && r >= rune(r32[0].Lo) {
176 return is32(r32, uint32(r))
177 }
178 return false
179 }
180
181
182 func IsUpper(r rune) bool {
183
184 if uint32(r) <= MaxLatin1 {
185 return properties[uint8(r)]&pLmask == pLu
186 }
187 return isExcludingLatin(Upper, r)
188 }
189
190
191 func IsLower(r rune) bool {
192
193 if uint32(r) <= MaxLatin1 {
194 return properties[uint8(r)]&pLmask == pLl
195 }
196 return isExcludingLatin(Lower, r)
197 }
198
199
200 func IsTitle(r rune) bool {
201 if r <= MaxLatin1 {
202 return false
203 }
204 return isExcludingLatin(Title, r)
205 }
206
207
208
209 func to(_case int, r rune, caseRange []CaseRange) (mappedRune rune, foundMapping bool) {
210 if _case < 0 || MaxCase <= _case {
211 return ReplacementChar, false
212 }
213
214 lo := 0
215 hi := len(caseRange)
216 for lo < hi {
217 m := lo + (hi-lo)/2
218 cr := caseRange[m]
219 if rune(cr.Lo) <= r && r <= rune(cr.Hi) {
220 delta := cr.Delta[_case]
221 if delta > MaxRune {
222
223
224
225
226
227
228
229
230
231
232 return rune(cr.Lo) + ((r-rune(cr.Lo))&^1 | rune(_case&1)), true
233 }
234 return r + delta, true
235 }
236 if r < rune(cr.Lo) {
237 hi = m
238 } else {
239 lo = m + 1
240 }
241 }
242 return r, false
243 }
244
245
246 func To(_case int, r rune) rune {
247 r, _ = to(_case, r, CaseRanges)
248 return r
249 }
250
251
252 func ToUpper(r rune) rune {
253 if r <= MaxASCII {
254 if 'a' <= r && r <= 'z' {
255 r -= 'a' - 'A'
256 }
257 return r
258 }
259 return To(UpperCase, r)
260 }
261
262
263 func ToLower(r rune) rune {
264 if r <= MaxASCII {
265 if 'A' <= r && r <= 'Z' {
266 r += 'a' - 'A'
267 }
268 return r
269 }
270 return To(LowerCase, r)
271 }
272
273
274 func ToTitle(r rune) rune {
275 if r <= MaxASCII {
276 if 'a' <= r && r <= 'z' {
277 r -= 'a' - 'A'
278 }
279 return r
280 }
281 return To(TitleCase, r)
282 }
283
284
285 func (special SpecialCase) ToUpper(r rune) rune {
286 r1, hadMapping := to(UpperCase, r, []CaseRange(special))
287 if r1 == r && !hadMapping {
288 r1 = ToUpper(r)
289 }
290 return r1
291 }
292
293
294 func (special SpecialCase) ToTitle(r rune) rune {
295 r1, hadMapping := to(TitleCase, r, []CaseRange(special))
296 if r1 == r && !hadMapping {
297 r1 = ToTitle(r)
298 }
299 return r1
300 }
301
302
303 func (special SpecialCase) ToLower(r rune) rune {
304 r1, hadMapping := to(LowerCase, r, []CaseRange(special))
305 if r1 == r && !hadMapping {
306 r1 = ToLower(r)
307 }
308 return r1
309 }
310
311
312
313
314
315 type foldPair struct {
316 From uint16
317 To uint16
318 }
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338 func SimpleFold(r rune) rune {
339 if r < 0 || r > MaxRune {
340 return r
341 }
342
343 if int(r) < len(asciiFold) {
344 return rune(asciiFold[r])
345 }
346
347
348 lo := 0
349 hi := len(caseOrbit)
350 for lo < hi {
351 m := lo + (hi-lo)/2
352 if rune(caseOrbit[m].From) < r {
353 lo = m + 1
354 } else {
355 hi = m
356 }
357 }
358 if lo < len(caseOrbit) && rune(caseOrbit[lo].From) == r {
359 return rune(caseOrbit[lo].To)
360 }
361
362
363
364
365 if l := ToLower(r); l != r {
366 return l
367 }
368 return ToUpper(r)
369 }
370
View as plain text