1
2
3
4
5 package parse
6
7 import (
8 "fmt"
9 "strings"
10 "unicode"
11 "unicode/utf8"
12 )
13
14
15 type item struct {
16 typ itemType
17 pos Pos
18 val string
19 line int
20 }
21
22 func (i item) String() string {
23 switch {
24 case i.typ == itemEOF:
25 return "EOF"
26 case i.typ == itemError:
27 return i.val
28 case i.typ > itemKeyword:
29 return fmt.Sprintf("<%s>", i.val)
30 case len(i.val) > 10:
31 return fmt.Sprintf("%.10q...", i.val)
32 }
33 return fmt.Sprintf("%q", i.val)
34 }
35
36
37 type itemType int
38
39 const (
40 itemError itemType = iota
41 itemBool
42 itemChar
43 itemCharConstant
44 itemComment
45 itemComplex
46 itemAssign
47 itemDeclare
48 itemEOF
49 itemField
50 itemIdentifier
51 itemLeftDelim
52 itemLeftParen
53 itemNumber
54 itemPipe
55 itemRawString
56 itemRightDelim
57 itemRightParen
58 itemSpace
59 itemString
60 itemText
61 itemVariable
62
63 itemKeyword
64 itemBlock
65 itemDot
66 itemDefine
67 itemElse
68 itemEnd
69 itemIf
70 itemNil
71 itemRange
72 itemTemplate
73 itemWith
74 )
75
76 var key = map[string]itemType{
77 ".": itemDot,
78 "block": itemBlock,
79 "define": itemDefine,
80 "else": itemElse,
81 "end": itemEnd,
82 "if": itemIf,
83 "range": itemRange,
84 "nil": itemNil,
85 "template": itemTemplate,
86 "with": itemWith,
87 }
88
89 const eof = -1
90
91
92
93
94
95
96
97
98
99 const (
100 spaceChars = " \t\r\n"
101 trimMarker = '-'
102 trimMarkerLen = Pos(1 + 1)
103 )
104
105
106 type stateFn func(*lexer) stateFn
107
108
109 type lexer struct {
110 name string
111 input string
112 leftDelim string
113 rightDelim string
114 emitComment bool
115 pos Pos
116 start Pos
117 width Pos
118 items chan item
119 parenDepth int
120 line int
121 startLine int
122 }
123
124
125 func (l *lexer) next() rune {
126 if int(l.pos) >= len(l.input) {
127 l.width = 0
128 return eof
129 }
130 r, w := utf8.DecodeRuneInString(l.input[l.pos:])
131 l.width = Pos(w)
132 l.pos += l.width
133 if r == '\n' {
134 l.line++
135 }
136 return r
137 }
138
139
140 func (l *lexer) peek() rune {
141 r := l.next()
142 l.backup()
143 return r
144 }
145
146
147 func (l *lexer) backup() {
148 l.pos -= l.width
149
150 if l.width == 1 && l.input[l.pos] == '\n' {
151 l.line--
152 }
153 }
154
155
156 func (l *lexer) emit(t itemType) {
157 l.items <- item{t, l.start, l.input[l.start:l.pos], l.startLine}
158 l.start = l.pos
159 l.startLine = l.line
160 }
161
162
163 func (l *lexer) ignore() {
164 l.line += strings.Count(l.input[l.start:l.pos], "\n")
165 l.start = l.pos
166 l.startLine = l.line
167 }
168
169
170 func (l *lexer) accept(valid string) bool {
171 if strings.ContainsRune(valid, l.next()) {
172 return true
173 }
174 l.backup()
175 return false
176 }
177
178
179 func (l *lexer) acceptRun(valid string) {
180 for strings.ContainsRune(valid, l.next()) {
181 }
182 l.backup()
183 }
184
185
186
187 func (l *lexer) errorf(format string, args ...interface{}) stateFn {
188 l.items <- item{itemError, l.start, fmt.Sprintf(format, args...), l.startLine}
189 return nil
190 }
191
192
193
194 func (l *lexer) nextItem() item {
195 return <-l.items
196 }
197
198
199
200 func (l *lexer) drain() {
201 for range l.items {
202 }
203 }
204
205
206 func lex(name, input, left, right string, emitComment bool) *lexer {
207 if left == "" {
208 left = leftDelim
209 }
210 if right == "" {
211 right = rightDelim
212 }
213 l := &lexer{
214 name: name,
215 input: input,
216 leftDelim: left,
217 rightDelim: right,
218 emitComment: emitComment,
219 items: make(chan item),
220 line: 1,
221 startLine: 1,
222 }
223 go l.run()
224 return l
225 }
226
227
228 func (l *lexer) run() {
229 for state := lexText; state != nil; {
230 state = state(l)
231 }
232 close(l.items)
233 }
234
235
236
237 const (
238 leftDelim = "{{"
239 rightDelim = "}}"
240 leftComment = "/*"
241 rightComment = "*/"
242 )
243
244
245 func lexText(l *lexer) stateFn {
246 l.width = 0
247 if x := strings.Index(l.input[l.pos:], l.leftDelim); x >= 0 {
248 ldn := Pos(len(l.leftDelim))
249 l.pos += Pos(x)
250 trimLength := Pos(0)
251 if hasLeftTrimMarker(l.input[l.pos+ldn:]) {
252 trimLength = rightTrimLength(l.input[l.start:l.pos])
253 }
254 l.pos -= trimLength
255 if l.pos > l.start {
256 l.line += strings.Count(l.input[l.start:l.pos], "\n")
257 l.emit(itemText)
258 }
259 l.pos += trimLength
260 l.ignore()
261 return lexLeftDelim
262 }
263 l.pos = Pos(len(l.input))
264
265 if l.pos > l.start {
266 l.line += strings.Count(l.input[l.start:l.pos], "\n")
267 l.emit(itemText)
268 }
269 l.emit(itemEOF)
270 return nil
271 }
272
273
274 func rightTrimLength(s string) Pos {
275 return Pos(len(s) - len(strings.TrimRight(s, spaceChars)))
276 }
277
278
279 func (l *lexer) atRightDelim() (delim, trimSpaces bool) {
280 if hasRightTrimMarker(l.input[l.pos:]) && strings.HasPrefix(l.input[l.pos+trimMarkerLen:], l.rightDelim) {
281 return true, true
282 }
283 if strings.HasPrefix(l.input[l.pos:], l.rightDelim) {
284 return true, false
285 }
286 return false, false
287 }
288
289
290 func leftTrimLength(s string) Pos {
291 return Pos(len(s) - len(strings.TrimLeft(s, spaceChars)))
292 }
293
294
295 func lexLeftDelim(l *lexer) stateFn {
296 l.pos += Pos(len(l.leftDelim))
297 trimSpace := hasLeftTrimMarker(l.input[l.pos:])
298 afterMarker := Pos(0)
299 if trimSpace {
300 afterMarker = trimMarkerLen
301 }
302 if strings.HasPrefix(l.input[l.pos+afterMarker:], leftComment) {
303 l.pos += afterMarker
304 l.ignore()
305 return lexComment
306 }
307 l.emit(itemLeftDelim)
308 l.pos += afterMarker
309 l.ignore()
310 l.parenDepth = 0
311 return lexInsideAction
312 }
313
314
315 func lexComment(l *lexer) stateFn {
316 l.pos += Pos(len(leftComment))
317 i := strings.Index(l.input[l.pos:], rightComment)
318 if i < 0 {
319 return l.errorf("unclosed comment")
320 }
321 l.pos += Pos(i + len(rightComment))
322 delim, trimSpace := l.atRightDelim()
323 if !delim {
324 return l.errorf("comment ends before closing delimiter")
325 }
326 if l.emitComment {
327 l.emit(itemComment)
328 }
329 if trimSpace {
330 l.pos += trimMarkerLen
331 }
332 l.pos += Pos(len(l.rightDelim))
333 if trimSpace {
334 l.pos += leftTrimLength(l.input[l.pos:])
335 }
336 l.ignore()
337 return lexText
338 }
339
340
341 func lexRightDelim(l *lexer) stateFn {
342 trimSpace := hasRightTrimMarker(l.input[l.pos:])
343 if trimSpace {
344 l.pos += trimMarkerLen
345 l.ignore()
346 }
347 l.pos += Pos(len(l.rightDelim))
348 l.emit(itemRightDelim)
349 if trimSpace {
350 l.pos += leftTrimLength(l.input[l.pos:])
351 l.ignore()
352 }
353 return lexText
354 }
355
356
357 func lexInsideAction(l *lexer) stateFn {
358
359
360
361 delim, _ := l.atRightDelim()
362 if delim {
363 if l.parenDepth == 0 {
364 return lexRightDelim
365 }
366 return l.errorf("unclosed left paren")
367 }
368 switch r := l.next(); {
369 case r == eof:
370 return l.errorf("unclosed action")
371 case isSpace(r):
372 l.backup()
373 return lexSpace
374 case r == '=':
375 l.emit(itemAssign)
376 case r == ':':
377 if l.next() != '=' {
378 return l.errorf("expected :=")
379 }
380 l.emit(itemDeclare)
381 case r == '|':
382 l.emit(itemPipe)
383 case r == '"':
384 return lexQuote
385 case r == '`':
386 return lexRawQuote
387 case r == '$':
388 return lexVariable
389 case r == '\'':
390 return lexChar
391 case r == '.':
392
393 if l.pos < Pos(len(l.input)) {
394 r := l.input[l.pos]
395 if r < '0' || '9' < r {
396 return lexField
397 }
398 }
399 fallthrough
400 case r == '+' || r == '-' || ('0' <= r && r <= '9'):
401 l.backup()
402 return lexNumber
403 case isAlphaNumeric(r):
404 l.backup()
405 return lexIdentifier
406 case r == '(':
407 l.emit(itemLeftParen)
408 l.parenDepth++
409 case r == ')':
410 l.emit(itemRightParen)
411 l.parenDepth--
412 if l.parenDepth < 0 {
413 return l.errorf("unexpected right paren %#U", r)
414 }
415 case r <= unicode.MaxASCII && unicode.IsPrint(r):
416 l.emit(itemChar)
417 default:
418 return l.errorf("unrecognized character in action: %#U", r)
419 }
420 return lexInsideAction
421 }
422
423
424
425
426 func lexSpace(l *lexer) stateFn {
427 var r rune
428 var numSpaces int
429 for {
430 r = l.peek()
431 if !isSpace(r) {
432 break
433 }
434 l.next()
435 numSpaces++
436 }
437
438
439 if hasRightTrimMarker(l.input[l.pos-1:]) && strings.HasPrefix(l.input[l.pos-1+trimMarkerLen:], l.rightDelim) {
440 l.backup()
441 if numSpaces == 1 {
442 return lexRightDelim
443 }
444 }
445 l.emit(itemSpace)
446 return lexInsideAction
447 }
448
449
450 func lexIdentifier(l *lexer) stateFn {
451 Loop:
452 for {
453 switch r := l.next(); {
454 case isAlphaNumeric(r):
455
456 default:
457 l.backup()
458 word := l.input[l.start:l.pos]
459 if !l.atTerminator() {
460 return l.errorf("bad character %#U", r)
461 }
462 switch {
463 case key[word] > itemKeyword:
464 l.emit(key[word])
465 case word[0] == '.':
466 l.emit(itemField)
467 case word == "true", word == "false":
468 l.emit(itemBool)
469 default:
470 l.emit(itemIdentifier)
471 }
472 break Loop
473 }
474 }
475 return lexInsideAction
476 }
477
478
479
480 func lexField(l *lexer) stateFn {
481 return lexFieldOrVariable(l, itemField)
482 }
483
484
485
486 func lexVariable(l *lexer) stateFn {
487 if l.atTerminator() {
488 l.emit(itemVariable)
489 return lexInsideAction
490 }
491 return lexFieldOrVariable(l, itemVariable)
492 }
493
494
495
496 func lexFieldOrVariable(l *lexer, typ itemType) stateFn {
497 if l.atTerminator() {
498 if typ == itemVariable {
499 l.emit(itemVariable)
500 } else {
501 l.emit(itemDot)
502 }
503 return lexInsideAction
504 }
505 var r rune
506 for {
507 r = l.next()
508 if !isAlphaNumeric(r) {
509 l.backup()
510 break
511 }
512 }
513 if !l.atTerminator() {
514 return l.errorf("bad character %#U", r)
515 }
516 l.emit(typ)
517 return lexInsideAction
518 }
519
520
521
522
523
524 func (l *lexer) atTerminator() bool {
525 r := l.peek()
526 if isSpace(r) {
527 return true
528 }
529 switch r {
530 case eof, '.', ',', '|', ':', ')', '(':
531 return true
532 }
533
534
535
536 if rd, _ := utf8.DecodeRuneInString(l.rightDelim); rd == r {
537 return true
538 }
539 return false
540 }
541
542
543
544 func lexChar(l *lexer) stateFn {
545 Loop:
546 for {
547 switch l.next() {
548 case '\\':
549 if r := l.next(); r != eof && r != '\n' {
550 break
551 }
552 fallthrough
553 case eof, '\n':
554 return l.errorf("unterminated character constant")
555 case '\'':
556 break Loop
557 }
558 }
559 l.emit(itemCharConstant)
560 return lexInsideAction
561 }
562
563
564
565
566
567 func lexNumber(l *lexer) stateFn {
568 if !l.scanNumber() {
569 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
570 }
571 if sign := l.peek(); sign == '+' || sign == '-' {
572
573 if !l.scanNumber() || l.input[l.pos-1] != 'i' {
574 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
575 }
576 l.emit(itemComplex)
577 } else {
578 l.emit(itemNumber)
579 }
580 return lexInsideAction
581 }
582
583 func (l *lexer) scanNumber() bool {
584
585 l.accept("+-")
586
587 digits := "0123456789_"
588 if l.accept("0") {
589
590 if l.accept("xX") {
591 digits = "0123456789abcdefABCDEF_"
592 } else if l.accept("oO") {
593 digits = "01234567_"
594 } else if l.accept("bB") {
595 digits = "01_"
596 }
597 }
598 l.acceptRun(digits)
599 if l.accept(".") {
600 l.acceptRun(digits)
601 }
602 if len(digits) == 10+1 && l.accept("eE") {
603 l.accept("+-")
604 l.acceptRun("0123456789_")
605 }
606 if len(digits) == 16+6+1 && l.accept("pP") {
607 l.accept("+-")
608 l.acceptRun("0123456789_")
609 }
610
611 l.accept("i")
612
613 if isAlphaNumeric(l.peek()) {
614 l.next()
615 return false
616 }
617 return true
618 }
619
620
621 func lexQuote(l *lexer) stateFn {
622 Loop:
623 for {
624 switch l.next() {
625 case '\\':
626 if r := l.next(); r != eof && r != '\n' {
627 break
628 }
629 fallthrough
630 case eof, '\n':
631 return l.errorf("unterminated quoted string")
632 case '"':
633 break Loop
634 }
635 }
636 l.emit(itemString)
637 return lexInsideAction
638 }
639
640
641 func lexRawQuote(l *lexer) stateFn {
642 Loop:
643 for {
644 switch l.next() {
645 case eof:
646 return l.errorf("unterminated raw quoted string")
647 case '`':
648 break Loop
649 }
650 }
651 l.emit(itemRawString)
652 return lexInsideAction
653 }
654
655
656 func isSpace(r rune) bool {
657 return r == ' ' || r == '\t' || r == '\r' || r == '\n'
658 }
659
660
661 func isAlphaNumeric(r rune) bool {
662 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
663 }
664
665 func hasLeftTrimMarker(s string) bool {
666 return len(s) >= 2 && s[0] == trimMarker && isSpace(rune(s[1]))
667 }
668
669 func hasRightTrimMarker(s string) bool {
670 return len(s) >= 2 && isSpace(rune(s[0])) && s[1] == trimMarker
671 }
672
View as plain text