1
2
3
4
5 package parse
6
7 import (
8 "fmt"
9 "strings"
10 "unicode"
11 "unicode/utf8"
12 )
13
14
15 type item struct {
16 typ itemType
17 pos Pos
18 val string
19 line int
20 }
21
22 func (i item) String() string {
23 switch {
24 case i.typ == itemEOF:
25 return "EOF"
26 case i.typ == itemError:
27 return i.val
28 case i.typ > itemKeyword:
29 return fmt.Sprintf("<%s>", i.val)
30 case len(i.val) > 10:
31 return fmt.Sprintf("%.10q...", i.val)
32 }
33 return fmt.Sprintf("%q", i.val)
34 }
35
36
37 type itemType int
38
39 const (
40 itemError itemType = iota
41 itemBool
42 itemChar
43 itemCharConstant
44 itemComplex
45 itemAssign
46 itemDeclare
47 itemEOF
48 itemField
49 itemIdentifier
50 itemLeftDelim
51 itemLeftParen
52 itemNumber
53 itemPipe
54 itemRawString
55 itemRightDelim
56 itemRightParen
57 itemSpace
58 itemString
59 itemText
60 itemVariable
61
62 itemKeyword
63 itemBlock
64 itemDot
65 itemDefine
66 itemElse
67 itemEnd
68 itemIf
69 itemNil
70 itemRange
71 itemTemplate
72 itemWith
73 )
74
75 var key = map[string]itemType{
76 ".": itemDot,
77 "block": itemBlock,
78 "define": itemDefine,
79 "else": itemElse,
80 "end": itemEnd,
81 "if": itemIf,
82 "range": itemRange,
83 "nil": itemNil,
84 "template": itemTemplate,
85 "with": itemWith,
86 }
87
88 const eof = -1
89
90
91
92
93
94
95
96
97
98 const (
99 spaceChars = " \t\r\n"
100 leftTrimMarker = "- "
101 rightTrimMarker = " -"
102 trimMarkerLen = Pos(len(leftTrimMarker))
103 )
104
105
106 type stateFn func(*lexer) stateFn
107
108
109 type lexer struct {
110 name string
111 input string
112 leftDelim string
113 rightDelim string
114 trimRightDelim string
115 pos Pos
116 start Pos
117 width Pos
118 items chan item
119 parenDepth int
120 line int
121 startLine int
122 }
123
124
125 func (l *lexer) next() rune {
126 if int(l.pos) >= len(l.input) {
127 l.width = 0
128 return eof
129 }
130 r, w := utf8.DecodeRuneInString(l.input[l.pos:])
131 l.width = Pos(w)
132 l.pos += l.width
133 if r == '\n' {
134 l.line++
135 }
136 return r
137 }
138
139
140 func (l *lexer) peek() rune {
141 r := l.next()
142 l.backup()
143 return r
144 }
145
146
147 func (l *lexer) backup() {
148 l.pos -= l.width
149
150 if l.width == 1 && l.input[l.pos] == '\n' {
151 l.line--
152 }
153 }
154
155
156 func (l *lexer) emit(t itemType) {
157 l.items <- item{t, l.start, l.input[l.start:l.pos], l.startLine}
158 l.start = l.pos
159 l.startLine = l.line
160 }
161
162
163 func (l *lexer) ignore() {
164 l.line += strings.Count(l.input[l.start:l.pos], "\n")
165 l.start = l.pos
166 l.startLine = l.line
167 }
168
169
170 func (l *lexer) accept(valid string) bool {
171 if strings.ContainsRune(valid, l.next()) {
172 return true
173 }
174 l.backup()
175 return false
176 }
177
178
179 func (l *lexer) acceptRun(valid string) {
180 for strings.ContainsRune(valid, l.next()) {
181 }
182 l.backup()
183 }
184
185
186
187 func (l *lexer) errorf(format string, args ...interface{}) stateFn {
188 l.items <- item{itemError, l.start, fmt.Sprintf(format, args...), l.startLine}
189 return nil
190 }
191
192
193
194 func (l *lexer) nextItem() item {
195 return <-l.items
196 }
197
198
199
200 func (l *lexer) drain() {
201 for range l.items {
202 }
203 }
204
205
206 func lex(name, input, left, right string) *lexer {
207 if left == "" {
208 left = leftDelim
209 }
210 if right == "" {
211 right = rightDelim
212 }
213 l := &lexer{
214 name: name,
215 input: input,
216 leftDelim: left,
217 rightDelim: right,
218 trimRightDelim: rightTrimMarker + right,
219 items: make(chan item),
220 line: 1,
221 startLine: 1,
222 }
223 go l.run()
224 return l
225 }
226
227
228 func (l *lexer) run() {
229 for state := lexText; state != nil; {
230 state = state(l)
231 }
232 close(l.items)
233 }
234
235
236
237 const (
238 leftDelim = "{{"
239 rightDelim = "}}"
240 leftComment = "/*"
241 rightComment = "*/"
242 )
243
244
245 func lexText(l *lexer) stateFn {
246 l.width = 0
247 if x := strings.Index(l.input[l.pos:], l.leftDelim); x >= 0 {
248 ldn := Pos(len(l.leftDelim))
249 l.pos += Pos(x)
250 trimLength := Pos(0)
251 if strings.HasPrefix(l.input[l.pos+ldn:], leftTrimMarker) {
252 trimLength = rightTrimLength(l.input[l.start:l.pos])
253 }
254 l.pos -= trimLength
255 if l.pos > l.start {
256 l.line += strings.Count(l.input[l.start:l.pos], "\n")
257 l.emit(itemText)
258 }
259 l.pos += trimLength
260 l.ignore()
261 return lexLeftDelim
262 }
263 l.pos = Pos(len(l.input))
264
265 if l.pos > l.start {
266 l.line += strings.Count(l.input[l.start:l.pos], "\n")
267 l.emit(itemText)
268 }
269 l.emit(itemEOF)
270 return nil
271 }
272
273
274 func rightTrimLength(s string) Pos {
275 return Pos(len(s) - len(strings.TrimRight(s, spaceChars)))
276 }
277
278
279 func (l *lexer) atRightDelim() (delim, trimSpaces bool) {
280 if strings.HasPrefix(l.input[l.pos:], l.trimRightDelim) {
281 return true, true
282 }
283 if strings.HasPrefix(l.input[l.pos:], l.rightDelim) {
284 return true, false
285 }
286 return false, false
287 }
288
289
290 func leftTrimLength(s string) Pos {
291 return Pos(len(s) - len(strings.TrimLeft(s, spaceChars)))
292 }
293
294
295 func lexLeftDelim(l *lexer) stateFn {
296 l.pos += Pos(len(l.leftDelim))
297 trimSpace := strings.HasPrefix(l.input[l.pos:], leftTrimMarker)
298 afterMarker := Pos(0)
299 if trimSpace {
300 afterMarker = trimMarkerLen
301 }
302 if strings.HasPrefix(l.input[l.pos+afterMarker:], leftComment) {
303 l.pos += afterMarker
304 l.ignore()
305 return lexComment
306 }
307 l.emit(itemLeftDelim)
308 l.pos += afterMarker
309 l.ignore()
310 l.parenDepth = 0
311 return lexInsideAction
312 }
313
314
315 func lexComment(l *lexer) stateFn {
316 l.pos += Pos(len(leftComment))
317 i := strings.Index(l.input[l.pos:], rightComment)
318 if i < 0 {
319 return l.errorf("unclosed comment")
320 }
321 l.pos += Pos(i + len(rightComment))
322 delim, trimSpace := l.atRightDelim()
323 if !delim {
324 return l.errorf("comment ends before closing delimiter")
325 }
326 if trimSpace {
327 l.pos += trimMarkerLen
328 }
329 l.pos += Pos(len(l.rightDelim))
330 if trimSpace {
331 l.pos += leftTrimLength(l.input[l.pos:])
332 }
333 l.ignore()
334 return lexText
335 }
336
337
338 func lexRightDelim(l *lexer) stateFn {
339 trimSpace := strings.HasPrefix(l.input[l.pos:], rightTrimMarker)
340 if trimSpace {
341 l.pos += trimMarkerLen
342 l.ignore()
343 }
344 l.pos += Pos(len(l.rightDelim))
345 l.emit(itemRightDelim)
346 if trimSpace {
347 l.pos += leftTrimLength(l.input[l.pos:])
348 l.ignore()
349 }
350 return lexText
351 }
352
353
354 func lexInsideAction(l *lexer) stateFn {
355
356
357
358 delim, _ := l.atRightDelim()
359 if delim {
360 if l.parenDepth == 0 {
361 return lexRightDelim
362 }
363 return l.errorf("unclosed left paren")
364 }
365 switch r := l.next(); {
366 case r == eof || isEndOfLine(r):
367 return l.errorf("unclosed action")
368 case isSpace(r):
369 l.backup()
370 return lexSpace
371 case r == '=':
372 l.emit(itemAssign)
373 case r == ':':
374 if l.next() != '=' {
375 return l.errorf("expected :=")
376 }
377 l.emit(itemDeclare)
378 case r == '|':
379 l.emit(itemPipe)
380 case r == '"':
381 return lexQuote
382 case r == '`':
383 return lexRawQuote
384 case r == '$':
385 return lexVariable
386 case r == '\'':
387 return lexChar
388 case r == '.':
389
390 if l.pos < Pos(len(l.input)) {
391 r := l.input[l.pos]
392 if r < '0' || '9' < r {
393 return lexField
394 }
395 }
396 fallthrough
397 case r == '+' || r == '-' || ('0' <= r && r <= '9'):
398 l.backup()
399 return lexNumber
400 case isAlphaNumeric(r):
401 l.backup()
402 return lexIdentifier
403 case r == '(':
404 l.emit(itemLeftParen)
405 l.parenDepth++
406 case r == ')':
407 l.emit(itemRightParen)
408 l.parenDepth--
409 if l.parenDepth < 0 {
410 return l.errorf("unexpected right paren %#U", r)
411 }
412 case r <= unicode.MaxASCII && unicode.IsPrint(r):
413 l.emit(itemChar)
414 default:
415 return l.errorf("unrecognized character in action: %#U", r)
416 }
417 return lexInsideAction
418 }
419
420
421
422
423 func lexSpace(l *lexer) stateFn {
424 var r rune
425 var numSpaces int
426 for {
427 r = l.peek()
428 if !isSpace(r) {
429 break
430 }
431 l.next()
432 numSpaces++
433 }
434
435
436 if strings.HasPrefix(l.input[l.pos-1:], l.trimRightDelim) {
437 l.backup()
438 if numSpaces == 1 {
439 return lexRightDelim
440 }
441 }
442 l.emit(itemSpace)
443 return lexInsideAction
444 }
445
446
447 func lexIdentifier(l *lexer) stateFn {
448 Loop:
449 for {
450 switch r := l.next(); {
451 case isAlphaNumeric(r):
452
453 default:
454 l.backup()
455 word := l.input[l.start:l.pos]
456 if !l.atTerminator() {
457 return l.errorf("bad character %#U", r)
458 }
459 switch {
460 case key[word] > itemKeyword:
461 l.emit(key[word])
462 case word[0] == '.':
463 l.emit(itemField)
464 case word == "true", word == "false":
465 l.emit(itemBool)
466 default:
467 l.emit(itemIdentifier)
468 }
469 break Loop
470 }
471 }
472 return lexInsideAction
473 }
474
475
476
477 func lexField(l *lexer) stateFn {
478 return lexFieldOrVariable(l, itemField)
479 }
480
481
482
483 func lexVariable(l *lexer) stateFn {
484 if l.atTerminator() {
485 l.emit(itemVariable)
486 return lexInsideAction
487 }
488 return lexFieldOrVariable(l, itemVariable)
489 }
490
491
492
493 func lexFieldOrVariable(l *lexer, typ itemType) stateFn {
494 if l.atTerminator() {
495 if typ == itemVariable {
496 l.emit(itemVariable)
497 } else {
498 l.emit(itemDot)
499 }
500 return lexInsideAction
501 }
502 var r rune
503 for {
504 r = l.next()
505 if !isAlphaNumeric(r) {
506 l.backup()
507 break
508 }
509 }
510 if !l.atTerminator() {
511 return l.errorf("bad character %#U", r)
512 }
513 l.emit(typ)
514 return lexInsideAction
515 }
516
517
518
519
520
521 func (l *lexer) atTerminator() bool {
522 r := l.peek()
523 if isSpace(r) || isEndOfLine(r) {
524 return true
525 }
526 switch r {
527 case eof, '.', ',', '|', ':', ')', '(':
528 return true
529 }
530
531
532
533 if rd, _ := utf8.DecodeRuneInString(l.rightDelim); rd == r {
534 return true
535 }
536 return false
537 }
538
539
540
541 func lexChar(l *lexer) stateFn {
542 Loop:
543 for {
544 switch l.next() {
545 case '\\':
546 if r := l.next(); r != eof && r != '\n' {
547 break
548 }
549 fallthrough
550 case eof, '\n':
551 return l.errorf("unterminated character constant")
552 case '\'':
553 break Loop
554 }
555 }
556 l.emit(itemCharConstant)
557 return lexInsideAction
558 }
559
560
561
562
563
564 func lexNumber(l *lexer) stateFn {
565 if !l.scanNumber() {
566 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
567 }
568 if sign := l.peek(); sign == '+' || sign == '-' {
569
570 if !l.scanNumber() || l.input[l.pos-1] != 'i' {
571 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
572 }
573 l.emit(itemComplex)
574 } else {
575 l.emit(itemNumber)
576 }
577 return lexInsideAction
578 }
579
580 func (l *lexer) scanNumber() bool {
581
582 l.accept("+-")
583
584 digits := "0123456789_"
585 if l.accept("0") {
586
587 if l.accept("xX") {
588 digits = "0123456789abcdefABCDEF_"
589 } else if l.accept("oO") {
590 digits = "01234567_"
591 } else if l.accept("bB") {
592 digits = "01_"
593 }
594 }
595 l.acceptRun(digits)
596 if l.accept(".") {
597 l.acceptRun(digits)
598 }
599 if len(digits) == 10+1 && l.accept("eE") {
600 l.accept("+-")
601 l.acceptRun("0123456789_")
602 }
603 if len(digits) == 16+6+1 && l.accept("pP") {
604 l.accept("+-")
605 l.acceptRun("0123456789_")
606 }
607
608 l.accept("i")
609
610 if isAlphaNumeric(l.peek()) {
611 l.next()
612 return false
613 }
614 return true
615 }
616
617
618 func lexQuote(l *lexer) stateFn {
619 Loop:
620 for {
621 switch l.next() {
622 case '\\':
623 if r := l.next(); r != eof && r != '\n' {
624 break
625 }
626 fallthrough
627 case eof, '\n':
628 return l.errorf("unterminated quoted string")
629 case '"':
630 break Loop
631 }
632 }
633 l.emit(itemString)
634 return lexInsideAction
635 }
636
637
638 func lexRawQuote(l *lexer) stateFn {
639 Loop:
640 for {
641 switch l.next() {
642 case eof:
643 return l.errorf("unterminated raw quoted string")
644 case '`':
645 break Loop
646 }
647 }
648 l.emit(itemRawString)
649 return lexInsideAction
650 }
651
652
653 func isSpace(r rune) bool {
654 return r == ' ' || r == '\t'
655 }
656
657
658 func isEndOfLine(r rune) bool {
659 return r == '\r' || r == '\n'
660 }
661
662
663 func isAlphaNumeric(r rune) bool {
664 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
665 }
666
View as plain text