Source file
src/strconv/quote.go
Documentation: strconv
1
2
3
4
5
6
7 package strconv
8
9 import (
10 "unicode/utf8"
11 )
12
13 const (
14 lowerhex = "0123456789abcdef"
15 upperhex = "0123456789ABCDEF"
16 )
17
18
19 func contains(s string, c byte) bool {
20 return index(s, c) != -1
21 }
22
23 func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string {
24 return string(appendQuotedWith(make([]byte, 0, 3*len(s)/2), s, quote, ASCIIonly, graphicOnly))
25 }
26
27 func quoteRuneWith(r rune, quote byte, ASCIIonly, graphicOnly bool) string {
28 return string(appendQuotedRuneWith(nil, r, quote, ASCIIonly, graphicOnly))
29 }
30
31 func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly bool) []byte {
32
33
34 if cap(buf)-len(buf) < len(s) {
35 nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1)
36 copy(nBuf, buf)
37 buf = nBuf
38 }
39 buf = append(buf, quote)
40 for width := 0; len(s) > 0; s = s[width:] {
41 r := rune(s[0])
42 width = 1
43 if r >= utf8.RuneSelf {
44 r, width = utf8.DecodeRuneInString(s)
45 }
46 if width == 1 && r == utf8.RuneError {
47 buf = append(buf, `\x`...)
48 buf = append(buf, lowerhex[s[0]>>4])
49 buf = append(buf, lowerhex[s[0]&0xF])
50 continue
51 }
52 buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly)
53 }
54 buf = append(buf, quote)
55 return buf
56 }
57
58 func appendQuotedRuneWith(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte {
59 buf = append(buf, quote)
60 if !utf8.ValidRune(r) {
61 r = utf8.RuneError
62 }
63 buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly)
64 buf = append(buf, quote)
65 return buf
66 }
67
68 func appendEscapedRune(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte {
69 var runeTmp [utf8.UTFMax]byte
70 if r == rune(quote) || r == '\\' {
71 buf = append(buf, '\\')
72 buf = append(buf, byte(r))
73 return buf
74 }
75 if ASCIIonly {
76 if r < utf8.RuneSelf && IsPrint(r) {
77 buf = append(buf, byte(r))
78 return buf
79 }
80 } else if IsPrint(r) || graphicOnly && isInGraphicList(r) {
81 n := utf8.EncodeRune(runeTmp[:], r)
82 buf = append(buf, runeTmp[:n]...)
83 return buf
84 }
85 switch r {
86 case '\a':
87 buf = append(buf, `\a`...)
88 case '\b':
89 buf = append(buf, `\b`...)
90 case '\f':
91 buf = append(buf, `\f`...)
92 case '\n':
93 buf = append(buf, `\n`...)
94 case '\r':
95 buf = append(buf, `\r`...)
96 case '\t':
97 buf = append(buf, `\t`...)
98 case '\v':
99 buf = append(buf, `\v`...)
100 default:
101 switch {
102 case r < ' ':
103 buf = append(buf, `\x`...)
104 buf = append(buf, lowerhex[byte(r)>>4])
105 buf = append(buf, lowerhex[byte(r)&0xF])
106 case r > utf8.MaxRune:
107 r = 0xFFFD
108 fallthrough
109 case r < 0x10000:
110 buf = append(buf, `\u`...)
111 for s := 12; s >= 0; s -= 4 {
112 buf = append(buf, lowerhex[r>>uint(s)&0xF])
113 }
114 default:
115 buf = append(buf, `\U`...)
116 for s := 28; s >= 0; s -= 4 {
117 buf = append(buf, lowerhex[r>>uint(s)&0xF])
118 }
119 }
120 }
121 return buf
122 }
123
124
125
126
127
128 func Quote(s string) string {
129 return quoteWith(s, '"', false, false)
130 }
131
132
133
134 func AppendQuote(dst []byte, s string) []byte {
135 return appendQuotedWith(dst, s, '"', false, false)
136 }
137
138
139
140
141 func QuoteToASCII(s string) string {
142 return quoteWith(s, '"', true, false)
143 }
144
145
146
147 func AppendQuoteToASCII(dst []byte, s string) []byte {
148 return appendQuotedWith(dst, s, '"', true, false)
149 }
150
151
152
153
154
155 func QuoteToGraphic(s string) string {
156 return quoteWith(s, '"', false, true)
157 }
158
159
160
161 func AppendQuoteToGraphic(dst []byte, s string) []byte {
162 return appendQuotedWith(dst, s, '"', false, true)
163 }
164
165
166
167
168 func QuoteRune(r rune) string {
169 return quoteRuneWith(r, '\'', false, false)
170 }
171
172
173
174 func AppendQuoteRune(dst []byte, r rune) []byte {
175 return appendQuotedRuneWith(dst, r, '\'', false, false)
176 }
177
178
179
180
181
182 func QuoteRuneToASCII(r rune) string {
183 return quoteRuneWith(r, '\'', true, false)
184 }
185
186
187
188 func AppendQuoteRuneToASCII(dst []byte, r rune) []byte {
189 return appendQuotedRuneWith(dst, r, '\'', true, false)
190 }
191
192
193
194
195
196 func QuoteRuneToGraphic(r rune) string {
197 return quoteRuneWith(r, '\'', false, true)
198 }
199
200
201
202 func AppendQuoteRuneToGraphic(dst []byte, r rune) []byte {
203 return appendQuotedRuneWith(dst, r, '\'', false, true)
204 }
205
206
207
208
209 func CanBackquote(s string) bool {
210 for len(s) > 0 {
211 r, wid := utf8.DecodeRuneInString(s)
212 s = s[wid:]
213 if wid > 1 {
214 if r == '\ufeff' {
215 return false
216 }
217 continue
218 }
219 if r == utf8.RuneError {
220 return false
221 }
222 if (r < ' ' && r != '\t') || r == '`' || r == '\u007F' {
223 return false
224 }
225 }
226 return true
227 }
228
229 func unhex(b byte) (v rune, ok bool) {
230 c := rune(b)
231 switch {
232 case '0' <= c && c <= '9':
233 return c - '0', true
234 case 'a' <= c && c <= 'f':
235 return c - 'a' + 10, true
236 case 'A' <= c && c <= 'F':
237 return c - 'A' + 10, true
238 }
239 return
240 }
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256 func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
257
258 if len(s) == 0 {
259 err = ErrSyntax
260 return
261 }
262 switch c := s[0]; {
263 case c == quote && (quote == '\'' || quote == '"'):
264 err = ErrSyntax
265 return
266 case c >= utf8.RuneSelf:
267 r, size := utf8.DecodeRuneInString(s)
268 return r, true, s[size:], nil
269 case c != '\\':
270 return rune(s[0]), false, s[1:], nil
271 }
272
273
274 if len(s) <= 1 {
275 err = ErrSyntax
276 return
277 }
278 c := s[1]
279 s = s[2:]
280
281 switch c {
282 case 'a':
283 value = '\a'
284 case 'b':
285 value = '\b'
286 case 'f':
287 value = '\f'
288 case 'n':
289 value = '\n'
290 case 'r':
291 value = '\r'
292 case 't':
293 value = '\t'
294 case 'v':
295 value = '\v'
296 case 'x', 'u', 'U':
297 n := 0
298 switch c {
299 case 'x':
300 n = 2
301 case 'u':
302 n = 4
303 case 'U':
304 n = 8
305 }
306 var v rune
307 if len(s) < n {
308 err = ErrSyntax
309 return
310 }
311 for j := 0; j < n; j++ {
312 x, ok := unhex(s[j])
313 if !ok {
314 err = ErrSyntax
315 return
316 }
317 v = v<<4 | x
318 }
319 s = s[n:]
320 if c == 'x' {
321
322 value = v
323 break
324 }
325 if v > utf8.MaxRune {
326 err = ErrSyntax
327 return
328 }
329 value = v
330 multibyte = true
331 case '0', '1', '2', '3', '4', '5', '6', '7':
332 v := rune(c) - '0'
333 if len(s) < 2 {
334 err = ErrSyntax
335 return
336 }
337 for j := 0; j < 2; j++ {
338 x := rune(s[j]) - '0'
339 if x < 0 || x > 7 {
340 err = ErrSyntax
341 return
342 }
343 v = (v << 3) | x
344 }
345 s = s[2:]
346 if v > 255 {
347 err = ErrSyntax
348 return
349 }
350 value = v
351 case '\\':
352 value = '\\'
353 case '\'', '"':
354 if c != quote {
355 err = ErrSyntax
356 return
357 }
358 value = rune(c)
359 default:
360 err = ErrSyntax
361 return
362 }
363 tail = s
364 return
365 }
366
367
368
369 func QuotedPrefix(s string) (string, error) {
370 out, _, err := unquote(s, false)
371 return out, err
372 }
373
374
375
376
377
378
379 func Unquote(s string) (string, error) {
380 out, rem, err := unquote(s, true)
381 if len(rem) > 0 {
382 return "", ErrSyntax
383 }
384 return out, err
385 }
386
387
388
389
390
391 func unquote(in string, unescape bool) (out, rem string, err error) {
392
393 if len(in) < 2 {
394 return "", in, ErrSyntax
395 }
396 quote := in[0]
397 end := index(in[1:], quote)
398 if end < 0 {
399 return "", in, ErrSyntax
400 }
401 end += 2
402
403 switch quote {
404 case '`':
405 switch {
406 case !unescape:
407 out = in[:end]
408 case !contains(in[:end], '\r'):
409 out = in[len("`") : end-len("`")]
410 default:
411
412
413 buf := make([]byte, 0, end-len("`")-len("\r")-len("`"))
414 for i := len("`"); i < end-len("`"); i++ {
415 if in[i] != '\r' {
416 buf = append(buf, in[i])
417 }
418 }
419 out = string(buf)
420 }
421
422
423
424
425
426 return out, in[end:], nil
427 case '"', '\'':
428
429 if !contains(in[:end], '\\') && !contains(in[:end], '\n') {
430 var valid bool
431 switch quote {
432 case '"':
433 valid = utf8.ValidString(in[len(`"`) : end-len(`"`)])
434 case '\'':
435 r, n := utf8.DecodeRuneInString(in[len("'") : end-len("'")])
436 valid = len("'")+n+len("'") == end && (r != utf8.RuneError || n != 1)
437 }
438 if valid {
439 out = in[:end]
440 if unescape {
441 out = out[1 : end-1]
442 }
443 return out, in[end:], nil
444 }
445 }
446
447
448 var buf []byte
449 in0 := in
450 in = in[1:]
451 if unescape {
452 buf = make([]byte, 0, 3*end/2)
453 }
454 for len(in) > 0 && in[0] != quote {
455
456
457 r, multibyte, rem, err := UnquoteChar(in, quote)
458 if in[0] == '\n' || err != nil {
459 return "", in0, ErrSyntax
460 }
461 in = rem
462
463
464 if unescape {
465 if r < utf8.RuneSelf || !multibyte {
466 buf = append(buf, byte(r))
467 } else {
468 var arr [utf8.UTFMax]byte
469 n := utf8.EncodeRune(arr[:], r)
470 buf = append(buf, arr[:n]...)
471 }
472 }
473
474
475 if quote == '\'' {
476 break
477 }
478 }
479
480
481 if !(len(in) > 0 && in[0] == quote) {
482 return "", in0, ErrSyntax
483 }
484 in = in[1:]
485
486 if unescape {
487 return string(buf), in, nil
488 }
489 return in0[:len(in0)-len(in)], in, nil
490 default:
491 return "", in, ErrSyntax
492 }
493 }
494
495
496
497 func bsearch16(a []uint16, x uint16) int {
498 i, j := 0, len(a)
499 for i < j {
500 h := i + (j-i)>>1
501 if a[h] < x {
502 i = h + 1
503 } else {
504 j = h
505 }
506 }
507 return i
508 }
509
510
511
512 func bsearch32(a []uint32, x uint32) int {
513 i, j := 0, len(a)
514 for i < j {
515 h := i + (j-i)>>1
516 if a[h] < x {
517 i = h + 1
518 } else {
519 j = h
520 }
521 }
522 return i
523 }
524
525
526
527
528
529
530
531
532
533
534 func IsPrint(r rune) bool {
535
536 if r <= 0xFF {
537 if 0x20 <= r && r <= 0x7E {
538
539 return true
540 }
541 if 0xA1 <= r && r <= 0xFF {
542
543 return r != 0xAD
544 }
545 return false
546 }
547
548
549
550
551
552
553
554 if 0 <= r && r < 1<<16 {
555 rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16
556 i := bsearch16(isPrint, rr)
557 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
558 return false
559 }
560 j := bsearch16(isNotPrint, rr)
561 return j >= len(isNotPrint) || isNotPrint[j] != rr
562 }
563
564 rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32
565 i := bsearch32(isPrint, rr)
566 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
567 return false
568 }
569 if r >= 0x20000 {
570 return true
571 }
572 r -= 0x10000
573 j := bsearch16(isNotPrint, uint16(r))
574 return j >= len(isNotPrint) || isNotPrint[j] != uint16(r)
575 }
576
577
578
579
580 func IsGraphic(r rune) bool {
581 if IsPrint(r) {
582 return true
583 }
584 return isInGraphicList(r)
585 }
586
587
588
589
590 func isInGraphicList(r rune) bool {
591
592 if r > 0xFFFF {
593 return false
594 }
595 rr := uint16(r)
596 i := bsearch16(isGraphic, rr)
597 return i < len(isGraphic) && rr == isGraphic[i]
598 }
599
View as plain text