1
2
3
4
5 package syntax
6
7 import (
8 "sort"
9 "strings"
10 "unicode"
11 "unicode/utf8"
12 )
13
14
15
16 type Error struct {
17 Code ErrorCode
18 Expr string
19 }
20
21 func (e *Error) Error() string {
22 return "error parsing regexp: " + e.Code.String() + ": `" + e.Expr + "`"
23 }
24
25
26 type ErrorCode string
27
28 const (
29
30 ErrInternalError ErrorCode = "regexp/syntax: internal error"
31
32
33 ErrInvalidCharClass ErrorCode = "invalid character class"
34 ErrInvalidCharRange ErrorCode = "invalid character class range"
35 ErrInvalidEscape ErrorCode = "invalid escape sequence"
36 ErrInvalidNamedCapture ErrorCode = "invalid named capture"
37 ErrInvalidPerlOp ErrorCode = "invalid or unsupported Perl syntax"
38 ErrInvalidRepeatOp ErrorCode = "invalid nested repetition operator"
39 ErrInvalidRepeatSize ErrorCode = "invalid repeat count"
40 ErrInvalidUTF8 ErrorCode = "invalid UTF-8"
41 ErrMissingBracket ErrorCode = "missing closing ]"
42 ErrMissingParen ErrorCode = "missing closing )"
43 ErrMissingRepeatArgument ErrorCode = "missing argument to repetition operator"
44 ErrTrailingBackslash ErrorCode = "trailing backslash at end of expression"
45 ErrUnexpectedParen ErrorCode = "unexpected )"
46 )
47
48 func (e ErrorCode) String() string {
49 return string(e)
50 }
51
52
53 type Flags uint16
54
55 const (
56 FoldCase Flags = 1 << iota
57 Literal
58 ClassNL
59 DotNL
60 OneLine
61 NonGreedy
62 PerlX
63 UnicodeGroups
64 WasDollar
65 Simple
66
67 MatchNL = ClassNL | DotNL
68
69 Perl = ClassNL | OneLine | PerlX | UnicodeGroups
70 POSIX Flags = 0
71 )
72
73
74 const (
75 opLeftParen = opPseudo + iota
76 opVerticalBar
77 )
78
79
80
81
82
83
84
85
86
87
88
89
90
91 const maxHeight = 1000
92
93 type parser struct {
94 flags Flags
95 stack []*Regexp
96 free *Regexp
97 numCap int
98 wholeRegexp string
99 tmpClass []rune
100 numRegexp int
101 height map[*Regexp]int
102 }
103
104 func (p *parser) newRegexp(op Op) *Regexp {
105 re := p.free
106 if re != nil {
107 p.free = re.Sub0[0]
108 *re = Regexp{}
109 } else {
110 re = new(Regexp)
111 p.numRegexp++
112 }
113 re.Op = op
114 return re
115 }
116
117 func (p *parser) reuse(re *Regexp) {
118 if p.height != nil {
119 delete(p.height, re)
120 }
121 re.Sub0[0] = p.free
122 p.free = re
123 }
124
125 func (p *parser) checkHeight(re *Regexp) {
126 if p.numRegexp < maxHeight {
127 return
128 }
129 if p.height == nil {
130 p.height = make(map[*Regexp]int)
131 for _, re := range p.stack {
132 p.checkHeight(re)
133 }
134 }
135 if p.calcHeight(re, true) > maxHeight {
136 panic(ErrInternalError)
137 }
138 }
139
140 func (p *parser) calcHeight(re *Regexp, force bool) int {
141 if !force {
142 if h, ok := p.height[re]; ok {
143 return h
144 }
145 }
146 h := 1
147 for _, sub := range re.Sub {
148 hsub := p.calcHeight(sub, false)
149 if h < 1+hsub {
150 h = 1 + hsub
151 }
152 }
153 p.height[re] = h
154 return h
155 }
156
157
158
159
160 func (p *parser) push(re *Regexp) *Regexp {
161 if re.Op == OpCharClass && len(re.Rune) == 2 && re.Rune[0] == re.Rune[1] {
162
163 if p.maybeConcat(re.Rune[0], p.flags&^FoldCase) {
164 return nil
165 }
166 re.Op = OpLiteral
167 re.Rune = re.Rune[:1]
168 re.Flags = p.flags &^ FoldCase
169 } else if re.Op == OpCharClass && len(re.Rune) == 4 &&
170 re.Rune[0] == re.Rune[1] && re.Rune[2] == re.Rune[3] &&
171 unicode.SimpleFold(re.Rune[0]) == re.Rune[2] &&
172 unicode.SimpleFold(re.Rune[2]) == re.Rune[0] ||
173 re.Op == OpCharClass && len(re.Rune) == 2 &&
174 re.Rune[0]+1 == re.Rune[1] &&
175 unicode.SimpleFold(re.Rune[0]) == re.Rune[1] &&
176 unicode.SimpleFold(re.Rune[1]) == re.Rune[0] {
177
178 if p.maybeConcat(re.Rune[0], p.flags|FoldCase) {
179 return nil
180 }
181
182
183 re.Op = OpLiteral
184 re.Rune = re.Rune[:1]
185 re.Flags = p.flags | FoldCase
186 } else {
187
188 p.maybeConcat(-1, 0)
189 }
190
191 p.stack = append(p.stack, re)
192 p.checkHeight(re)
193 return re
194 }
195
196
197
198
199
200
201
202
203
204
205 func (p *parser) maybeConcat(r rune, flags Flags) bool {
206 n := len(p.stack)
207 if n < 2 {
208 return false
209 }
210
211 re1 := p.stack[n-1]
212 re2 := p.stack[n-2]
213 if re1.Op != OpLiteral || re2.Op != OpLiteral || re1.Flags&FoldCase != re2.Flags&FoldCase {
214 return false
215 }
216
217
218 re2.Rune = append(re2.Rune, re1.Rune...)
219
220
221 if r >= 0 {
222 re1.Rune = re1.Rune0[:1]
223 re1.Rune[0] = r
224 re1.Flags = flags
225 return true
226 }
227
228 p.stack = p.stack[:n-1]
229 p.reuse(re1)
230 return false
231 }
232
233
234 func (p *parser) literal(r rune) {
235 re := p.newRegexp(OpLiteral)
236 re.Flags = p.flags
237 if p.flags&FoldCase != 0 {
238 r = minFoldRune(r)
239 }
240 re.Rune0[0] = r
241 re.Rune = re.Rune0[:1]
242 p.push(re)
243 }
244
245
246 func minFoldRune(r rune) rune {
247 if r < minFold || r > maxFold {
248 return r
249 }
250 min := r
251 r0 := r
252 for r = unicode.SimpleFold(r); r != r0; r = unicode.SimpleFold(r) {
253 if min > r {
254 min = r
255 }
256 }
257 return min
258 }
259
260
261
262 func (p *parser) op(op Op) *Regexp {
263 re := p.newRegexp(op)
264 re.Flags = p.flags
265 return p.push(re)
266 }
267
268
269
270
271
272 func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (string, error) {
273 flags := p.flags
274 if p.flags&PerlX != 0 {
275 if len(after) > 0 && after[0] == '?' {
276 after = after[1:]
277 flags ^= NonGreedy
278 }
279 if lastRepeat != "" {
280
281
282
283 return "", &Error{ErrInvalidRepeatOp, lastRepeat[:len(lastRepeat)-len(after)]}
284 }
285 }
286 n := len(p.stack)
287 if n == 0 {
288 return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]}
289 }
290 sub := p.stack[n-1]
291 if sub.Op >= opPseudo {
292 return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]}
293 }
294
295 re := p.newRegexp(op)
296 re.Min = min
297 re.Max = max
298 re.Flags = flags
299 re.Sub = re.Sub0[:1]
300 re.Sub[0] = sub
301 p.stack[n-1] = re
302 p.checkHeight(re)
303
304 if op == OpRepeat && (min >= 2 || max >= 2) && !repeatIsValid(re, 1000) {
305 return "", &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
306 }
307
308 return after, nil
309 }
310
311
312
313
314
315
316
317
318
319
320 func repeatIsValid(re *Regexp, n int) bool {
321 if re.Op == OpRepeat {
322 m := re.Max
323 if m == 0 {
324 return true
325 }
326 if m < 0 {
327 m = re.Min
328 }
329 if m > n {
330 return false
331 }
332 if m > 0 {
333 n /= m
334 }
335 }
336 for _, sub := range re.Sub {
337 if !repeatIsValid(sub, n) {
338 return false
339 }
340 }
341 return true
342 }
343
344
345 func (p *parser) concat() *Regexp {
346 p.maybeConcat(-1, 0)
347
348
349 i := len(p.stack)
350 for i > 0 && p.stack[i-1].Op < opPseudo {
351 i--
352 }
353 subs := p.stack[i:]
354 p.stack = p.stack[:i]
355
356
357 if len(subs) == 0 {
358 return p.push(p.newRegexp(OpEmptyMatch))
359 }
360
361 return p.push(p.collapse(subs, OpConcat))
362 }
363
364
365 func (p *parser) alternate() *Regexp {
366
367
368 i := len(p.stack)
369 for i > 0 && p.stack[i-1].Op < opPseudo {
370 i--
371 }
372 subs := p.stack[i:]
373 p.stack = p.stack[:i]
374
375
376
377 if len(subs) > 0 {
378 cleanAlt(subs[len(subs)-1])
379 }
380
381
382
383 if len(subs) == 0 {
384 return p.push(p.newRegexp(OpNoMatch))
385 }
386
387 return p.push(p.collapse(subs, OpAlternate))
388 }
389
390
391 func cleanAlt(re *Regexp) {
392 switch re.Op {
393 case OpCharClass:
394 re.Rune = cleanClass(&re.Rune)
395 if len(re.Rune) == 2 && re.Rune[0] == 0 && re.Rune[1] == unicode.MaxRune {
396 re.Rune = nil
397 re.Op = OpAnyChar
398 return
399 }
400 if len(re.Rune) == 4 && re.Rune[0] == 0 && re.Rune[1] == '\n'-1 && re.Rune[2] == '\n'+1 && re.Rune[3] == unicode.MaxRune {
401 re.Rune = nil
402 re.Op = OpAnyCharNotNL
403 return
404 }
405 if cap(re.Rune)-len(re.Rune) > 100 {
406
407
408 re.Rune = append(re.Rune0[:0], re.Rune...)
409 }
410 }
411 }
412
413
414
415
416
417 func (p *parser) collapse(subs []*Regexp, op Op) *Regexp {
418 if len(subs) == 1 {
419 return subs[0]
420 }
421 re := p.newRegexp(op)
422 re.Sub = re.Sub0[:0]
423 for _, sub := range subs {
424 if sub.Op == op {
425 re.Sub = append(re.Sub, sub.Sub...)
426 p.reuse(sub)
427 } else {
428 re.Sub = append(re.Sub, sub)
429 }
430 }
431 if op == OpAlternate {
432 re.Sub = p.factor(re.Sub)
433 if len(re.Sub) == 1 {
434 old := re
435 re = re.Sub[0]
436 p.reuse(old)
437 }
438 }
439 return re
440 }
441
442
443
444
445
446
447
448
449
450
451
452
453 func (p *parser) factor(sub []*Regexp) []*Regexp {
454 if len(sub) < 2 {
455 return sub
456 }
457
458
459 var str []rune
460 var strflags Flags
461 start := 0
462 out := sub[:0]
463 for i := 0; i <= len(sub); i++ {
464
465
466
467
468
469
470 var istr []rune
471 var iflags Flags
472 if i < len(sub) {
473 istr, iflags = p.leadingString(sub[i])
474 if iflags == strflags {
475 same := 0
476 for same < len(str) && same < len(istr) && str[same] == istr[same] {
477 same++
478 }
479 if same > 0 {
480
481
482 str = str[:same]
483 continue
484 }
485 }
486 }
487
488
489
490
491
492
493 if i == start {
494
495 } else if i == start+1 {
496
497 out = append(out, sub[start])
498 } else {
499
500 prefix := p.newRegexp(OpLiteral)
501 prefix.Flags = strflags
502 prefix.Rune = append(prefix.Rune[:0], str...)
503
504 for j := start; j < i; j++ {
505 sub[j] = p.removeLeadingString(sub[j], len(str))
506 }
507 suffix := p.collapse(sub[start:i], OpAlternate)
508
509 re := p.newRegexp(OpConcat)
510 re.Sub = append(re.Sub[:0], prefix, suffix)
511 out = append(out, re)
512 }
513
514
515 start = i
516 str = istr
517 strflags = iflags
518 }
519 sub = out
520
521
522
523
524
525
526
527
528
529 start = 0
530 out = sub[:0]
531 var first *Regexp
532 for i := 0; i <= len(sub); i++ {
533
534
535
536
537
538 var ifirst *Regexp
539 if i < len(sub) {
540 ifirst = p.leadingRegexp(sub[i])
541 if first != nil && first.Equal(ifirst) &&
542
543 (isCharClass(first) || (first.Op == OpRepeat && first.Min == first.Max && isCharClass(first.Sub[0]))) {
544 continue
545 }
546 }
547
548
549
550
551
552 if i == start {
553
554 } else if i == start+1 {
555
556 out = append(out, sub[start])
557 } else {
558
559 prefix := first
560 for j := start; j < i; j++ {
561 reuse := j != start
562 sub[j] = p.removeLeadingRegexp(sub[j], reuse)
563 }
564 suffix := p.collapse(sub[start:i], OpAlternate)
565
566 re := p.newRegexp(OpConcat)
567 re.Sub = append(re.Sub[:0], prefix, suffix)
568 out = append(out, re)
569 }
570
571
572 start = i
573 first = ifirst
574 }
575 sub = out
576
577
578 start = 0
579 out = sub[:0]
580 for i := 0; i <= len(sub); i++ {
581
582
583
584
585
586
587 if i < len(sub) && isCharClass(sub[i]) {
588 continue
589 }
590
591
592
593 if i == start {
594
595 } else if i == start+1 {
596 out = append(out, sub[start])
597 } else {
598
599
600 max := start
601 for j := start + 1; j < i; j++ {
602 if sub[max].Op < sub[j].Op || sub[max].Op == sub[j].Op && len(sub[max].Rune) < len(sub[j].Rune) {
603 max = j
604 }
605 }
606 sub[start], sub[max] = sub[max], sub[start]
607
608 for j := start + 1; j < i; j++ {
609 mergeCharClass(sub[start], sub[j])
610 p.reuse(sub[j])
611 }
612 cleanAlt(sub[start])
613 out = append(out, sub[start])
614 }
615
616
617 if i < len(sub) {
618 out = append(out, sub[i])
619 }
620 start = i + 1
621 }
622 sub = out
623
624
625 start = 0
626 out = sub[:0]
627 for i := range sub {
628 if i+1 < len(sub) && sub[i].Op == OpEmptyMatch && sub[i+1].Op == OpEmptyMatch {
629 continue
630 }
631 out = append(out, sub[i])
632 }
633 sub = out
634
635 return sub
636 }
637
638
639
640 func (p *parser) leadingString(re *Regexp) ([]rune, Flags) {
641 if re.Op == OpConcat && len(re.Sub) > 0 {
642 re = re.Sub[0]
643 }
644 if re.Op != OpLiteral {
645 return nil, 0
646 }
647 return re.Rune, re.Flags & FoldCase
648 }
649
650
651
652 func (p *parser) removeLeadingString(re *Regexp, n int) *Regexp {
653 if re.Op == OpConcat && len(re.Sub) > 0 {
654
655
656 sub := re.Sub[0]
657 sub = p.removeLeadingString(sub, n)
658 re.Sub[0] = sub
659 if sub.Op == OpEmptyMatch {
660 p.reuse(sub)
661 switch len(re.Sub) {
662 case 0, 1:
663
664 re.Op = OpEmptyMatch
665 re.Sub = nil
666 case 2:
667 old := re
668 re = re.Sub[1]
669 p.reuse(old)
670 default:
671 copy(re.Sub, re.Sub[1:])
672 re.Sub = re.Sub[:len(re.Sub)-1]
673 }
674 }
675 return re
676 }
677
678 if re.Op == OpLiteral {
679 re.Rune = re.Rune[:copy(re.Rune, re.Rune[n:])]
680 if len(re.Rune) == 0 {
681 re.Op = OpEmptyMatch
682 }
683 }
684 return re
685 }
686
687
688
689 func (p *parser) leadingRegexp(re *Regexp) *Regexp {
690 if re.Op == OpEmptyMatch {
691 return nil
692 }
693 if re.Op == OpConcat && len(re.Sub) > 0 {
694 sub := re.Sub[0]
695 if sub.Op == OpEmptyMatch {
696 return nil
697 }
698 return sub
699 }
700 return re
701 }
702
703
704
705
706 func (p *parser) removeLeadingRegexp(re *Regexp, reuse bool) *Regexp {
707 if re.Op == OpConcat && len(re.Sub) > 0 {
708 if reuse {
709 p.reuse(re.Sub[0])
710 }
711 re.Sub = re.Sub[:copy(re.Sub, re.Sub[1:])]
712 switch len(re.Sub) {
713 case 0:
714 re.Op = OpEmptyMatch
715 re.Sub = nil
716 case 1:
717 old := re
718 re = re.Sub[0]
719 p.reuse(old)
720 }
721 return re
722 }
723 if reuse {
724 p.reuse(re)
725 }
726 return p.newRegexp(OpEmptyMatch)
727 }
728
729 func literalRegexp(s string, flags Flags) *Regexp {
730 re := &Regexp{Op: OpLiteral}
731 re.Flags = flags
732 re.Rune = re.Rune0[:0]
733 for _, c := range s {
734 if len(re.Rune) >= cap(re.Rune) {
735
736 re.Rune = []rune(s)
737 break
738 }
739 re.Rune = append(re.Rune, c)
740 }
741 return re
742 }
743
744
745
746
747
748
749 func Parse(s string, flags Flags) (*Regexp, error) {
750 return parse(s, flags)
751 }
752
753 func parse(s string, flags Flags) (_ *Regexp, err error) {
754 defer func() {
755 switch r := recover(); r {
756 default:
757 panic(r)
758 case nil:
759
760 case ErrInternalError:
761 err = &Error{Code: ErrInternalError, Expr: s}
762 }
763 }()
764
765 if flags&Literal != 0 {
766
767 if err := checkUTF8(s); err != nil {
768 return nil, err
769 }
770 return literalRegexp(s, flags), nil
771 }
772
773
774 var (
775 p parser
776 c rune
777 op Op
778 lastRepeat string
779 )
780 p.flags = flags
781 p.wholeRegexp = s
782 t := s
783 for t != "" {
784 repeat := ""
785 BigSwitch:
786 switch t[0] {
787 default:
788 if c, t, err = nextRune(t); err != nil {
789 return nil, err
790 }
791 p.literal(c)
792
793 case '(':
794 if p.flags&PerlX != 0 && len(t) >= 2 && t[1] == '?' {
795
796 if t, err = p.parsePerlFlags(t); err != nil {
797 return nil, err
798 }
799 break
800 }
801 p.numCap++
802 p.op(opLeftParen).Cap = p.numCap
803 t = t[1:]
804 case '|':
805 if err = p.parseVerticalBar(); err != nil {
806 return nil, err
807 }
808 t = t[1:]
809 case ')':
810 if err = p.parseRightParen(); err != nil {
811 return nil, err
812 }
813 t = t[1:]
814 case '^':
815 if p.flags&OneLine != 0 {
816 p.op(OpBeginText)
817 } else {
818 p.op(OpBeginLine)
819 }
820 t = t[1:]
821 case '$':
822 if p.flags&OneLine != 0 {
823 p.op(OpEndText).Flags |= WasDollar
824 } else {
825 p.op(OpEndLine)
826 }
827 t = t[1:]
828 case '.':
829 if p.flags&DotNL != 0 {
830 p.op(OpAnyChar)
831 } else {
832 p.op(OpAnyCharNotNL)
833 }
834 t = t[1:]
835 case '[':
836 if t, err = p.parseClass(t); err != nil {
837 return nil, err
838 }
839 case '*', '+', '?':
840 before := t
841 switch t[0] {
842 case '*':
843 op = OpStar
844 case '+':
845 op = OpPlus
846 case '?':
847 op = OpQuest
848 }
849 after := t[1:]
850 if after, err = p.repeat(op, 0, 0, before, after, lastRepeat); err != nil {
851 return nil, err
852 }
853 repeat = before
854 t = after
855 case '{':
856 op = OpRepeat
857 before := t
858 min, max, after, ok := p.parseRepeat(t)
859 if !ok {
860
861 p.literal('{')
862 t = t[1:]
863 break
864 }
865 if min < 0 || min > 1000 || max > 1000 || max >= 0 && min > max {
866
867 return nil, &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
868 }
869 if after, err = p.repeat(op, min, max, before, after, lastRepeat); err != nil {
870 return nil, err
871 }
872 repeat = before
873 t = after
874 case '\\':
875 if p.flags&PerlX != 0 && len(t) >= 2 {
876 switch t[1] {
877 case 'A':
878 p.op(OpBeginText)
879 t = t[2:]
880 break BigSwitch
881 case 'b':
882 p.op(OpWordBoundary)
883 t = t[2:]
884 break BigSwitch
885 case 'B':
886 p.op(OpNoWordBoundary)
887 t = t[2:]
888 break BigSwitch
889 case 'C':
890
891 return nil, &Error{ErrInvalidEscape, t[:2]}
892 case 'Q':
893
894 var lit string
895 if i := strings.Index(t, `\E`); i < 0 {
896 lit = t[2:]
897 t = ""
898 } else {
899 lit = t[2:i]
900 t = t[i+2:]
901 }
902 for lit != "" {
903 c, rest, err := nextRune(lit)
904 if err != nil {
905 return nil, err
906 }
907 p.literal(c)
908 lit = rest
909 }
910 break BigSwitch
911 case 'z':
912 p.op(OpEndText)
913 t = t[2:]
914 break BigSwitch
915 }
916 }
917
918 re := p.newRegexp(OpCharClass)
919 re.Flags = p.flags
920
921
922 if len(t) >= 2 && (t[1] == 'p' || t[1] == 'P') {
923 r, rest, err := p.parseUnicodeClass(t, re.Rune0[:0])
924 if err != nil {
925 return nil, err
926 }
927 if r != nil {
928 re.Rune = r
929 t = rest
930 p.push(re)
931 break BigSwitch
932 }
933 }
934
935
936 if r, rest := p.parsePerlClassEscape(t, re.Rune0[:0]); r != nil {
937 re.Rune = r
938 t = rest
939 p.push(re)
940 break BigSwitch
941 }
942 p.reuse(re)
943
944
945 if c, t, err = p.parseEscape(t); err != nil {
946 return nil, err
947 }
948 p.literal(c)
949 }
950 lastRepeat = repeat
951 }
952
953 p.concat()
954 if p.swapVerticalBar() {
955
956 p.stack = p.stack[:len(p.stack)-1]
957 }
958 p.alternate()
959
960 n := len(p.stack)
961 if n != 1 {
962 return nil, &Error{ErrMissingParen, s}
963 }
964 return p.stack[0], nil
965 }
966
967
968
969
970 func (p *parser) parseRepeat(s string) (min, max int, rest string, ok bool) {
971 if s == "" || s[0] != '{' {
972 return
973 }
974 s = s[1:]
975 var ok1 bool
976 if min, s, ok1 = p.parseInt(s); !ok1 {
977 return
978 }
979 if s == "" {
980 return
981 }
982 if s[0] != ',' {
983 max = min
984 } else {
985 s = s[1:]
986 if s == "" {
987 return
988 }
989 if s[0] == '}' {
990 max = -1
991 } else if max, s, ok1 = p.parseInt(s); !ok1 {
992 return
993 } else if max < 0 {
994
995 min = -1
996 }
997 }
998 if s == "" || s[0] != '}' {
999 return
1000 }
1001 rest = s[1:]
1002 ok = true
1003 return
1004 }
1005
1006
1007
1008
1009 func (p *parser) parsePerlFlags(s string) (rest string, err error) {
1010 t := s
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027 if len(t) > 4 && t[2] == 'P' && t[3] == '<' {
1028
1029 end := strings.IndexRune(t, '>')
1030 if end < 0 {
1031 if err = checkUTF8(t); err != nil {
1032 return "", err
1033 }
1034 return "", &Error{ErrInvalidNamedCapture, s}
1035 }
1036
1037 capture := t[:end+1]
1038 name := t[4:end]
1039 if err = checkUTF8(name); err != nil {
1040 return "", err
1041 }
1042 if !isValidCaptureName(name) {
1043 return "", &Error{ErrInvalidNamedCapture, capture}
1044 }
1045
1046
1047 p.numCap++
1048 re := p.op(opLeftParen)
1049 re.Cap = p.numCap
1050 re.Name = name
1051 return t[end+1:], nil
1052 }
1053
1054
1055 var c rune
1056 t = t[2:]
1057 flags := p.flags
1058 sign := +1
1059 sawFlag := false
1060 Loop:
1061 for t != "" {
1062 if c, t, err = nextRune(t); err != nil {
1063 return "", err
1064 }
1065 switch c {
1066 default:
1067 break Loop
1068
1069
1070 case 'i':
1071 flags |= FoldCase
1072 sawFlag = true
1073 case 'm':
1074 flags &^= OneLine
1075 sawFlag = true
1076 case 's':
1077 flags |= DotNL
1078 sawFlag = true
1079 case 'U':
1080 flags |= NonGreedy
1081 sawFlag = true
1082
1083
1084 case '-':
1085 if sign < 0 {
1086 break Loop
1087 }
1088 sign = -1
1089
1090
1091 flags = ^flags
1092 sawFlag = false
1093
1094
1095 case ':', ')':
1096 if sign < 0 {
1097 if !sawFlag {
1098 break Loop
1099 }
1100 flags = ^flags
1101 }
1102 if c == ':' {
1103
1104 p.op(opLeftParen)
1105 }
1106 p.flags = flags
1107 return t, nil
1108 }
1109 }
1110
1111 return "", &Error{ErrInvalidPerlOp, s[:len(s)-len(t)]}
1112 }
1113
1114
1115
1116
1117
1118
1119 func isValidCaptureName(name string) bool {
1120 if name == "" {
1121 return false
1122 }
1123 for _, c := range name {
1124 if c != '_' && !isalnum(c) {
1125 return false
1126 }
1127 }
1128 return true
1129 }
1130
1131
1132 func (p *parser) parseInt(s string) (n int, rest string, ok bool) {
1133 if s == "" || s[0] < '0' || '9' < s[0] {
1134 return
1135 }
1136
1137 if len(s) >= 2 && s[0] == '0' && '0' <= s[1] && s[1] <= '9' {
1138 return
1139 }
1140 t := s
1141 for s != "" && '0' <= s[0] && s[0] <= '9' {
1142 s = s[1:]
1143 }
1144 rest = s
1145 ok = true
1146
1147 t = t[:len(t)-len(s)]
1148 for i := 0; i < len(t); i++ {
1149
1150 if n >= 1e8 {
1151 n = -1
1152 break
1153 }
1154 n = n*10 + int(t[i]) - '0'
1155 }
1156 return
1157 }
1158
1159
1160
1161 func isCharClass(re *Regexp) bool {
1162 return re.Op == OpLiteral && len(re.Rune) == 1 ||
1163 re.Op == OpCharClass ||
1164 re.Op == OpAnyCharNotNL ||
1165 re.Op == OpAnyChar
1166 }
1167
1168
1169 func matchRune(re *Regexp, r rune) bool {
1170 switch re.Op {
1171 case OpLiteral:
1172 return len(re.Rune) == 1 && re.Rune[0] == r
1173 case OpCharClass:
1174 for i := 0; i < len(re.Rune); i += 2 {
1175 if re.Rune[i] <= r && r <= re.Rune[i+1] {
1176 return true
1177 }
1178 }
1179 return false
1180 case OpAnyCharNotNL:
1181 return r != '\n'
1182 case OpAnyChar:
1183 return true
1184 }
1185 return false
1186 }
1187
1188
1189 func (p *parser) parseVerticalBar() error {
1190 p.concat()
1191
1192
1193
1194
1195
1196 if !p.swapVerticalBar() {
1197 p.op(opVerticalBar)
1198 }
1199
1200 return nil
1201 }
1202
1203
1204
1205
1206 func mergeCharClass(dst, src *Regexp) {
1207 switch dst.Op {
1208 case OpAnyChar:
1209
1210 case OpAnyCharNotNL:
1211
1212 if matchRune(src, '\n') {
1213 dst.Op = OpAnyChar
1214 }
1215 case OpCharClass:
1216
1217 if src.Op == OpLiteral {
1218 dst.Rune = appendLiteral(dst.Rune, src.Rune[0], src.Flags)
1219 } else {
1220 dst.Rune = appendClass(dst.Rune, src.Rune)
1221 }
1222 case OpLiteral:
1223
1224 if src.Rune[0] == dst.Rune[0] && src.Flags == dst.Flags {
1225 break
1226 }
1227 dst.Op = OpCharClass
1228 dst.Rune = appendLiteral(dst.Rune[:0], dst.Rune[0], dst.Flags)
1229 dst.Rune = appendLiteral(dst.Rune, src.Rune[0], src.Flags)
1230 }
1231 }
1232
1233
1234
1235
1236 func (p *parser) swapVerticalBar() bool {
1237
1238
1239 n := len(p.stack)
1240 if n >= 3 && p.stack[n-2].Op == opVerticalBar && isCharClass(p.stack[n-1]) && isCharClass(p.stack[n-3]) {
1241 re1 := p.stack[n-1]
1242 re3 := p.stack[n-3]
1243
1244 if re1.Op > re3.Op {
1245 re1, re3 = re3, re1
1246 p.stack[n-3] = re3
1247 }
1248 mergeCharClass(re3, re1)
1249 p.reuse(re1)
1250 p.stack = p.stack[:n-1]
1251 return true
1252 }
1253
1254 if n >= 2 {
1255 re1 := p.stack[n-1]
1256 re2 := p.stack[n-2]
1257 if re2.Op == opVerticalBar {
1258 if n >= 3 {
1259
1260
1261 cleanAlt(p.stack[n-3])
1262 }
1263 p.stack[n-2] = re1
1264 p.stack[n-1] = re2
1265 return true
1266 }
1267 }
1268 return false
1269 }
1270
1271
1272 func (p *parser) parseRightParen() error {
1273 p.concat()
1274 if p.swapVerticalBar() {
1275
1276 p.stack = p.stack[:len(p.stack)-1]
1277 }
1278 p.alternate()
1279
1280 n := len(p.stack)
1281 if n < 2 {
1282 return &Error{ErrUnexpectedParen, p.wholeRegexp}
1283 }
1284 re1 := p.stack[n-1]
1285 re2 := p.stack[n-2]
1286 p.stack = p.stack[:n-2]
1287 if re2.Op != opLeftParen {
1288 return &Error{ErrUnexpectedParen, p.wholeRegexp}
1289 }
1290
1291 p.flags = re2.Flags
1292 if re2.Cap == 0 {
1293
1294 p.push(re1)
1295 } else {
1296 re2.Op = OpCapture
1297 re2.Sub = re2.Sub0[:1]
1298 re2.Sub[0] = re1
1299 p.push(re2)
1300 }
1301 return nil
1302 }
1303
1304
1305
1306 func (p *parser) parseEscape(s string) (r rune, rest string, err error) {
1307 t := s[1:]
1308 if t == "" {
1309 return 0, "", &Error{ErrTrailingBackslash, ""}
1310 }
1311 c, t, err := nextRune(t)
1312 if err != nil {
1313 return 0, "", err
1314 }
1315
1316 Switch:
1317 switch c {
1318 default:
1319 if c < utf8.RuneSelf && !isalnum(c) {
1320
1321
1322
1323
1324 return c, t, nil
1325 }
1326
1327
1328 case '1', '2', '3', '4', '5', '6', '7':
1329
1330 if t == "" || t[0] < '0' || t[0] > '7' {
1331 break
1332 }
1333 fallthrough
1334 case '0':
1335
1336 r = c - '0'
1337 for i := 1; i < 3; i++ {
1338 if t == "" || t[0] < '0' || t[0] > '7' {
1339 break
1340 }
1341 r = r*8 + rune(t[0]) - '0'
1342 t = t[1:]
1343 }
1344 return r, t, nil
1345
1346
1347 case 'x':
1348 if t == "" {
1349 break
1350 }
1351 if c, t, err = nextRune(t); err != nil {
1352 return 0, "", err
1353 }
1354 if c == '{' {
1355
1356
1357
1358
1359 nhex := 0
1360 r = 0
1361 for {
1362 if t == "" {
1363 break Switch
1364 }
1365 if c, t, err = nextRune(t); err != nil {
1366 return 0, "", err
1367 }
1368 if c == '}' {
1369 break
1370 }
1371 v := unhex(c)
1372 if v < 0 {
1373 break Switch
1374 }
1375 r = r*16 + v
1376 if r > unicode.MaxRune {
1377 break Switch
1378 }
1379 nhex++
1380 }
1381 if nhex == 0 {
1382 break Switch
1383 }
1384 return r, t, nil
1385 }
1386
1387
1388 x := unhex(c)
1389 if c, t, err = nextRune(t); err != nil {
1390 return 0, "", err
1391 }
1392 y := unhex(c)
1393 if x < 0 || y < 0 {
1394 break
1395 }
1396 return x*16 + y, t, nil
1397
1398
1399
1400
1401
1402
1403
1404 case 'a':
1405 return '\a', t, err
1406 case 'f':
1407 return '\f', t, err
1408 case 'n':
1409 return '\n', t, err
1410 case 'r':
1411 return '\r', t, err
1412 case 't':
1413 return '\t', t, err
1414 case 'v':
1415 return '\v', t, err
1416 }
1417 return 0, "", &Error{ErrInvalidEscape, s[:len(s)-len(t)]}
1418 }
1419
1420
1421
1422 func (p *parser) parseClassChar(s, wholeClass string) (r rune, rest string, err error) {
1423 if s == "" {
1424 return 0, "", &Error{Code: ErrMissingBracket, Expr: wholeClass}
1425 }
1426
1427
1428
1429 if s[0] == '\\' {
1430 return p.parseEscape(s)
1431 }
1432
1433 return nextRune(s)
1434 }
1435
1436 type charGroup struct {
1437 sign int
1438 class []rune
1439 }
1440
1441
1442
1443
1444 func (p *parser) parsePerlClassEscape(s string, r []rune) (out []rune, rest string) {
1445 if p.flags&PerlX == 0 || len(s) < 2 || s[0] != '\\' {
1446 return
1447 }
1448 g := perlGroup[s[0:2]]
1449 if g.sign == 0 {
1450 return
1451 }
1452 return p.appendGroup(r, g), s[2:]
1453 }
1454
1455
1456
1457
1458 func (p *parser) parseNamedClass(s string, r []rune) (out []rune, rest string, err error) {
1459 if len(s) < 2 || s[0] != '[' || s[1] != ':' {
1460 return
1461 }
1462
1463 i := strings.Index(s[2:], ":]")
1464 if i < 0 {
1465 return
1466 }
1467 i += 2
1468 name, s := s[0:i+2], s[i+2:]
1469 g := posixGroup[name]
1470 if g.sign == 0 {
1471 return nil, "", &Error{ErrInvalidCharRange, name}
1472 }
1473 return p.appendGroup(r, g), s, nil
1474 }
1475
1476 func (p *parser) appendGroup(r []rune, g charGroup) []rune {
1477 if p.flags&FoldCase == 0 {
1478 if g.sign < 0 {
1479 r = appendNegatedClass(r, g.class)
1480 } else {
1481 r = appendClass(r, g.class)
1482 }
1483 } else {
1484 tmp := p.tmpClass[:0]
1485 tmp = appendFoldedClass(tmp, g.class)
1486 p.tmpClass = tmp
1487 tmp = cleanClass(&p.tmpClass)
1488 if g.sign < 0 {
1489 r = appendNegatedClass(r, tmp)
1490 } else {
1491 r = appendClass(r, tmp)
1492 }
1493 }
1494 return r
1495 }
1496
1497 var anyTable = &unicode.RangeTable{
1498 R16: []unicode.Range16{{Lo: 0, Hi: 1<<16 - 1, Stride: 1}},
1499 R32: []unicode.Range32{{Lo: 1 << 16, Hi: unicode.MaxRune, Stride: 1}},
1500 }
1501
1502
1503
1504 func unicodeTable(name string) (*unicode.RangeTable, *unicode.RangeTable) {
1505
1506 if name == "Any" {
1507 return anyTable, anyTable
1508 }
1509 if t := unicode.Categories[name]; t != nil {
1510 return t, unicode.FoldCategory[name]
1511 }
1512 if t := unicode.Scripts[name]; t != nil {
1513 return t, unicode.FoldScript[name]
1514 }
1515 return nil, nil
1516 }
1517
1518
1519
1520
1521 func (p *parser) parseUnicodeClass(s string, r []rune) (out []rune, rest string, err error) {
1522 if p.flags&UnicodeGroups == 0 || len(s) < 2 || s[0] != '\\' || s[1] != 'p' && s[1] != 'P' {
1523 return
1524 }
1525
1526
1527 sign := +1
1528 if s[1] == 'P' {
1529 sign = -1
1530 }
1531 t := s[2:]
1532 c, t, err := nextRune(t)
1533 if err != nil {
1534 return
1535 }
1536 var seq, name string
1537 if c != '{' {
1538
1539 seq = s[:len(s)-len(t)]
1540 name = seq[2:]
1541 } else {
1542
1543 end := strings.IndexRune(s, '}')
1544 if end < 0 {
1545 if err = checkUTF8(s); err != nil {
1546 return
1547 }
1548 return nil, "", &Error{ErrInvalidCharRange, s}
1549 }
1550 seq, t = s[:end+1], s[end+1:]
1551 name = s[3:end]
1552 if err = checkUTF8(name); err != nil {
1553 return
1554 }
1555 }
1556
1557
1558 if name != "" && name[0] == '^' {
1559 sign = -sign
1560 name = name[1:]
1561 }
1562
1563 tab, fold := unicodeTable(name)
1564 if tab == nil {
1565 return nil, "", &Error{ErrInvalidCharRange, seq}
1566 }
1567
1568 if p.flags&FoldCase == 0 || fold == nil {
1569 if sign > 0 {
1570 r = appendTable(r, tab)
1571 } else {
1572 r = appendNegatedTable(r, tab)
1573 }
1574 } else {
1575
1576
1577
1578 tmp := p.tmpClass[:0]
1579 tmp = appendTable(tmp, tab)
1580 tmp = appendTable(tmp, fold)
1581 p.tmpClass = tmp
1582 tmp = cleanClass(&p.tmpClass)
1583 if sign > 0 {
1584 r = appendClass(r, tmp)
1585 } else {
1586 r = appendNegatedClass(r, tmp)
1587 }
1588 }
1589 return r, t, nil
1590 }
1591
1592
1593
1594 func (p *parser) parseClass(s string) (rest string, err error) {
1595 t := s[1:]
1596 re := p.newRegexp(OpCharClass)
1597 re.Flags = p.flags
1598 re.Rune = re.Rune0[:0]
1599
1600 sign := +1
1601 if t != "" && t[0] == '^' {
1602 sign = -1
1603 t = t[1:]
1604
1605
1606
1607 if p.flags&ClassNL == 0 {
1608 re.Rune = append(re.Rune, '\n', '\n')
1609 }
1610 }
1611
1612 class := re.Rune
1613 first := true
1614 for t == "" || t[0] != ']' || first {
1615
1616
1617 if t != "" && t[0] == '-' && p.flags&PerlX == 0 && !first && (len(t) == 1 || t[1] != ']') {
1618 _, size := utf8.DecodeRuneInString(t[1:])
1619 return "", &Error{Code: ErrInvalidCharRange, Expr: t[:1+size]}
1620 }
1621 first = false
1622
1623
1624 if len(t) > 2 && t[0] == '[' && t[1] == ':' {
1625 nclass, nt, err := p.parseNamedClass(t, class)
1626 if err != nil {
1627 return "", err
1628 }
1629 if nclass != nil {
1630 class, t = nclass, nt
1631 continue
1632 }
1633 }
1634
1635
1636 nclass, nt, err := p.parseUnicodeClass(t, class)
1637 if err != nil {
1638 return "", err
1639 }
1640 if nclass != nil {
1641 class, t = nclass, nt
1642 continue
1643 }
1644
1645
1646 if nclass, nt := p.parsePerlClassEscape(t, class); nclass != nil {
1647 class, t = nclass, nt
1648 continue
1649 }
1650
1651
1652 rng := t
1653 var lo, hi rune
1654 if lo, t, err = p.parseClassChar(t, s); err != nil {
1655 return "", err
1656 }
1657 hi = lo
1658
1659 if len(t) >= 2 && t[0] == '-' && t[1] != ']' {
1660 t = t[1:]
1661 if hi, t, err = p.parseClassChar(t, s); err != nil {
1662 return "", err
1663 }
1664 if hi < lo {
1665 rng = rng[:len(rng)-len(t)]
1666 return "", &Error{Code: ErrInvalidCharRange, Expr: rng}
1667 }
1668 }
1669 if p.flags&FoldCase == 0 {
1670 class = appendRange(class, lo, hi)
1671 } else {
1672 class = appendFoldedRange(class, lo, hi)
1673 }
1674 }
1675 t = t[1:]
1676
1677
1678 re.Rune = class
1679 class = cleanClass(&re.Rune)
1680 if sign < 0 {
1681 class = negateClass(class)
1682 }
1683 re.Rune = class
1684 p.push(re)
1685 return t, nil
1686 }
1687
1688
1689
1690 func cleanClass(rp *[]rune) []rune {
1691
1692
1693 sort.Sort(ranges{rp})
1694
1695 r := *rp
1696 if len(r) < 2 {
1697 return r
1698 }
1699
1700
1701 w := 2
1702 for i := 2; i < len(r); i += 2 {
1703 lo, hi := r[i], r[i+1]
1704 if lo <= r[w-1]+1 {
1705
1706 if hi > r[w-1] {
1707 r[w-1] = hi
1708 }
1709 continue
1710 }
1711
1712 r[w] = lo
1713 r[w+1] = hi
1714 w += 2
1715 }
1716
1717 return r[:w]
1718 }
1719
1720
1721 func appendLiteral(r []rune, x rune, flags Flags) []rune {
1722 if flags&FoldCase != 0 {
1723 return appendFoldedRange(r, x, x)
1724 }
1725 return appendRange(r, x, x)
1726 }
1727
1728
1729 func appendRange(r []rune, lo, hi rune) []rune {
1730
1731
1732
1733
1734 n := len(r)
1735 for i := 2; i <= 4; i += 2 {
1736 if n >= i {
1737 rlo, rhi := r[n-i], r[n-i+1]
1738 if lo <= rhi+1 && rlo <= hi+1 {
1739 if lo < rlo {
1740 r[n-i] = lo
1741 }
1742 if hi > rhi {
1743 r[n-i+1] = hi
1744 }
1745 return r
1746 }
1747 }
1748 }
1749
1750 return append(r, lo, hi)
1751 }
1752
1753 const (
1754
1755
1756 minFold = 0x0041
1757 maxFold = 0x1e943
1758 )
1759
1760
1761
1762 func appendFoldedRange(r []rune, lo, hi rune) []rune {
1763
1764 if lo <= minFold && hi >= maxFold {
1765
1766 return appendRange(r, lo, hi)
1767 }
1768 if hi < minFold || lo > maxFold {
1769
1770 return appendRange(r, lo, hi)
1771 }
1772 if lo < minFold {
1773
1774 r = appendRange(r, lo, minFold-1)
1775 lo = minFold
1776 }
1777 if hi > maxFold {
1778
1779 r = appendRange(r, maxFold+1, hi)
1780 hi = maxFold
1781 }
1782
1783
1784 for c := lo; c <= hi; c++ {
1785 r = appendRange(r, c, c)
1786 f := unicode.SimpleFold(c)
1787 for f != c {
1788 r = appendRange(r, f, f)
1789 f = unicode.SimpleFold(f)
1790 }
1791 }
1792 return r
1793 }
1794
1795
1796
1797 func appendClass(r []rune, x []rune) []rune {
1798 for i := 0; i < len(x); i += 2 {
1799 r = appendRange(r, x[i], x[i+1])
1800 }
1801 return r
1802 }
1803
1804
1805 func appendFoldedClass(r []rune, x []rune) []rune {
1806 for i := 0; i < len(x); i += 2 {
1807 r = appendFoldedRange(r, x[i], x[i+1])
1808 }
1809 return r
1810 }
1811
1812
1813
1814 func appendNegatedClass(r []rune, x []rune) []rune {
1815 nextLo := '\u0000'
1816 for i := 0; i < len(x); i += 2 {
1817 lo, hi := x[i], x[i+1]
1818 if nextLo <= lo-1 {
1819 r = appendRange(r, nextLo, lo-1)
1820 }
1821 nextLo = hi + 1
1822 }
1823 if nextLo <= unicode.MaxRune {
1824 r = appendRange(r, nextLo, unicode.MaxRune)
1825 }
1826 return r
1827 }
1828
1829
1830 func appendTable(r []rune, x *unicode.RangeTable) []rune {
1831 for _, xr := range x.R16 {
1832 lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
1833 if stride == 1 {
1834 r = appendRange(r, lo, hi)
1835 continue
1836 }
1837 for c := lo; c <= hi; c += stride {
1838 r = appendRange(r, c, c)
1839 }
1840 }
1841 for _, xr := range x.R32 {
1842 lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
1843 if stride == 1 {
1844 r = appendRange(r, lo, hi)
1845 continue
1846 }
1847 for c := lo; c <= hi; c += stride {
1848 r = appendRange(r, c, c)
1849 }
1850 }
1851 return r
1852 }
1853
1854
1855 func appendNegatedTable(r []rune, x *unicode.RangeTable) []rune {
1856 nextLo := '\u0000'
1857 for _, xr := range x.R16 {
1858 lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
1859 if stride == 1 {
1860 if nextLo <= lo-1 {
1861 r = appendRange(r, nextLo, lo-1)
1862 }
1863 nextLo = hi + 1
1864 continue
1865 }
1866 for c := lo; c <= hi; c += stride {
1867 if nextLo <= c-1 {
1868 r = appendRange(r, nextLo, c-1)
1869 }
1870 nextLo = c + 1
1871 }
1872 }
1873 for _, xr := range x.R32 {
1874 lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
1875 if stride == 1 {
1876 if nextLo <= lo-1 {
1877 r = appendRange(r, nextLo, lo-1)
1878 }
1879 nextLo = hi + 1
1880 continue
1881 }
1882 for c := lo; c <= hi; c += stride {
1883 if nextLo <= c-1 {
1884 r = appendRange(r, nextLo, c-1)
1885 }
1886 nextLo = c + 1
1887 }
1888 }
1889 if nextLo <= unicode.MaxRune {
1890 r = appendRange(r, nextLo, unicode.MaxRune)
1891 }
1892 return r
1893 }
1894
1895
1896
1897 func negateClass(r []rune) []rune {
1898 nextLo := '\u0000'
1899 w := 0
1900 for i := 0; i < len(r); i += 2 {
1901 lo, hi := r[i], r[i+1]
1902 if nextLo <= lo-1 {
1903 r[w] = nextLo
1904 r[w+1] = lo - 1
1905 w += 2
1906 }
1907 nextLo = hi + 1
1908 }
1909 r = r[:w]
1910 if nextLo <= unicode.MaxRune {
1911
1912
1913 r = append(r, nextLo, unicode.MaxRune)
1914 }
1915 return r
1916 }
1917
1918
1919
1920
1921
1922 type ranges struct {
1923 p *[]rune
1924 }
1925
1926 func (ra ranges) Less(i, j int) bool {
1927 p := *ra.p
1928 i *= 2
1929 j *= 2
1930 return p[i] < p[j] || p[i] == p[j] && p[i+1] > p[j+1]
1931 }
1932
1933 func (ra ranges) Len() int {
1934 return len(*ra.p) / 2
1935 }
1936
1937 func (ra ranges) Swap(i, j int) {
1938 p := *ra.p
1939 i *= 2
1940 j *= 2
1941 p[i], p[i+1], p[j], p[j+1] = p[j], p[j+1], p[i], p[i+1]
1942 }
1943
1944 func checkUTF8(s string) error {
1945 for s != "" {
1946 rune, size := utf8.DecodeRuneInString(s)
1947 if rune == utf8.RuneError && size == 1 {
1948 return &Error{Code: ErrInvalidUTF8, Expr: s}
1949 }
1950 s = s[size:]
1951 }
1952 return nil
1953 }
1954
1955 func nextRune(s string) (c rune, t string, err error) {
1956 c, size := utf8.DecodeRuneInString(s)
1957 if c == utf8.RuneError && size == 1 {
1958 return 0, "", &Error{Code: ErrInvalidUTF8, Expr: s}
1959 }
1960 return c, s[size:], nil
1961 }
1962
1963 func isalnum(c rune) bool {
1964 return '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
1965 }
1966
1967 func unhex(c rune) rune {
1968 if '0' <= c && c <= '9' {
1969 return c - '0'
1970 }
1971 if 'a' <= c && c <= 'f' {
1972 return c - 'a' + 10
1973 }
1974 if 'A' <= c && c <= 'F' {
1975 return c - 'A' + 10
1976 }
1977 return -1
1978 }
1979
View as plain text