1
2
3
4
5 package parse
6
7 import (
8 "fmt"
9 "strings"
10 "unicode"
11 "unicode/utf8"
12 )
13
14
15 type item struct {
16 typ itemType
17 pos Pos
18 val string
19 line int
20 }
21
22 func (i item) String() string {
23 switch {
24 case i.typ == itemEOF:
25 return "EOF"
26 case i.typ == itemError:
27 return i.val
28 case i.typ > itemKeyword:
29 return fmt.Sprintf("<%s>", i.val)
30 case len(i.val) > 10:
31 return fmt.Sprintf("%.10q...", i.val)
32 }
33 return fmt.Sprintf("%q", i.val)
34 }
35
36
37 type itemType int
38
39 const (
40 itemError itemType = iota
41 itemBool
42 itemChar
43 itemCharConstant
44 itemComment
45 itemComplex
46 itemAssign
47 itemDeclare
48 itemEOF
49 itemField
50 itemIdentifier
51 itemLeftDelim
52 itemLeftParen
53 itemNumber
54 itemPipe
55 itemRawString
56 itemRightDelim
57 itemRightParen
58 itemSpace
59 itemString
60 itemText
61 itemVariable
62
63 itemKeyword
64 itemBlock
65 itemBreak
66 itemContinue
67 itemDot
68 itemDefine
69 itemElse
70 itemEnd
71 itemIf
72 itemNil
73 itemRange
74 itemTemplate
75 itemWith
76 )
77
78 var key = map[string]itemType{
79 ".": itemDot,
80 "block": itemBlock,
81 "break": itemBreak,
82 "continue": itemContinue,
83 "define": itemDefine,
84 "else": itemElse,
85 "end": itemEnd,
86 "if": itemIf,
87 "range": itemRange,
88 "nil": itemNil,
89 "template": itemTemplate,
90 "with": itemWith,
91 }
92
93 const eof = -1
94
95
96
97
98
99
100
101
102
103 const (
104 spaceChars = " \t\r\n"
105 trimMarker = '-'
106 trimMarkerLen = Pos(1 + 1)
107 )
108
109
110 type stateFn func(*lexer) stateFn
111
112
113 type lexer struct {
114 name string
115 input string
116 leftDelim string
117 rightDelim string
118 pos Pos
119 start Pos
120 atEOF bool
121 parenDepth int
122 line int
123 startLine int
124 item item
125 insideAction bool
126 options lexOptions
127 }
128
129
130 type lexOptions struct {
131 emitComment bool
132 breakOK bool
133 continueOK bool
134 }
135
136
137 func (l *lexer) next() rune {
138 if int(l.pos) >= len(l.input) {
139 l.atEOF = true
140 return eof
141 }
142 r, w := utf8.DecodeRuneInString(l.input[l.pos:])
143 l.pos += Pos(w)
144 if r == '\n' {
145 l.line++
146 }
147 return r
148 }
149
150
151 func (l *lexer) peek() rune {
152 r := l.next()
153 l.backup()
154 return r
155 }
156
157
158 func (l *lexer) backup() {
159 if !l.atEOF && l.pos > 0 {
160 r, w := utf8.DecodeLastRuneInString(l.input[:l.pos])
161 l.pos -= Pos(w)
162
163 if r == '\n' {
164 l.line--
165 }
166 }
167 }
168
169
170
171 func (l *lexer) thisItem(t itemType) item {
172 i := item{t, l.start, l.input[l.start:l.pos], l.startLine}
173 l.start = l.pos
174 l.startLine = l.line
175 return i
176 }
177
178
179 func (l *lexer) emit(t itemType) stateFn {
180 return l.emitItem(l.thisItem(t))
181 }
182
183
184 func (l *lexer) emitItem(i item) stateFn {
185 l.item = i
186 return nil
187 }
188
189
190
191
192 func (l *lexer) ignore() {
193 l.line += strings.Count(l.input[l.start:l.pos], "\n")
194 l.start = l.pos
195 l.startLine = l.line
196 }
197
198
199 func (l *lexer) accept(valid string) bool {
200 if strings.ContainsRune(valid, l.next()) {
201 return true
202 }
203 l.backup()
204 return false
205 }
206
207
208 func (l *lexer) acceptRun(valid string) {
209 for strings.ContainsRune(valid, l.next()) {
210 }
211 l.backup()
212 }
213
214
215
216 func (l *lexer) errorf(format string, args ...any) stateFn {
217 l.item = item{itemError, l.start, fmt.Sprintf(format, args...), l.startLine}
218 l.start = 0
219 l.pos = 0
220 l.input = l.input[:0]
221 return nil
222 }
223
224
225
226 func (l *lexer) nextItem() item {
227 l.item = item{itemEOF, l.pos, "EOF", l.startLine}
228 state := lexText
229 if l.insideAction {
230 state = lexInsideAction
231 }
232 for {
233 state = state(l)
234 if state == nil {
235 return l.item
236 }
237 }
238 }
239
240
241 func lex(name, input, left, right string) *lexer {
242 if left == "" {
243 left = leftDelim
244 }
245 if right == "" {
246 right = rightDelim
247 }
248 l := &lexer{
249 name: name,
250 input: input,
251 leftDelim: left,
252 rightDelim: right,
253 line: 1,
254 startLine: 1,
255 insideAction: false,
256 }
257 return l
258 }
259
260
261
262 const (
263 leftDelim = "{{"
264 rightDelim = "}}"
265 leftComment = "/*"
266 rightComment = "*/"
267 )
268
269
270 func lexText(l *lexer) stateFn {
271 if x := strings.Index(l.input[l.pos:], l.leftDelim); x >= 0 {
272 if x > 0 {
273 l.pos += Pos(x)
274
275 trimLength := Pos(0)
276 delimEnd := l.pos + Pos(len(l.leftDelim))
277 if hasLeftTrimMarker(l.input[delimEnd:]) {
278 trimLength = rightTrimLength(l.input[l.start:l.pos])
279 }
280 l.pos -= trimLength
281 l.line += strings.Count(l.input[l.start:l.pos], "\n")
282 i := l.thisItem(itemText)
283 l.pos += trimLength
284 l.ignore()
285 if len(i.val) > 0 {
286 return l.emitItem(i)
287 }
288 }
289 return lexLeftDelim
290 }
291 l.pos = Pos(len(l.input))
292
293 if l.pos > l.start {
294 l.line += strings.Count(l.input[l.start:l.pos], "\n")
295 return l.emit(itemText)
296 }
297 return l.emit(itemEOF)
298 }
299
300
301 func rightTrimLength(s string) Pos {
302 return Pos(len(s) - len(strings.TrimRight(s, spaceChars)))
303 }
304
305
306 func (l *lexer) atRightDelim() (delim, trimSpaces bool) {
307 if hasRightTrimMarker(l.input[l.pos:]) && strings.HasPrefix(l.input[l.pos+trimMarkerLen:], l.rightDelim) {
308 return true, true
309 }
310 if strings.HasPrefix(l.input[l.pos:], l.rightDelim) {
311 return true, false
312 }
313 return false, false
314 }
315
316
317 func leftTrimLength(s string) Pos {
318 return Pos(len(s) - len(strings.TrimLeft(s, spaceChars)))
319 }
320
321
322
323 func lexLeftDelim(l *lexer) stateFn {
324 l.pos += Pos(len(l.leftDelim))
325 trimSpace := hasLeftTrimMarker(l.input[l.pos:])
326 afterMarker := Pos(0)
327 if trimSpace {
328 afterMarker = trimMarkerLen
329 }
330 if strings.HasPrefix(l.input[l.pos+afterMarker:], leftComment) {
331 l.pos += afterMarker
332 l.ignore()
333 return lexComment
334 }
335 i := l.thisItem(itemLeftDelim)
336 l.insideAction = true
337 l.pos += afterMarker
338 l.ignore()
339 l.parenDepth = 0
340 return l.emitItem(i)
341 }
342
343
344 func lexComment(l *lexer) stateFn {
345 l.pos += Pos(len(leftComment))
346 x := strings.Index(l.input[l.pos:], rightComment)
347 if x < 0 {
348 return l.errorf("unclosed comment")
349 }
350 l.pos += Pos(x + len(rightComment))
351 delim, trimSpace := l.atRightDelim()
352 if !delim {
353 return l.errorf("comment ends before closing delimiter")
354 }
355 i := l.thisItem(itemComment)
356 if trimSpace {
357 l.pos += trimMarkerLen
358 }
359 l.pos += Pos(len(l.rightDelim))
360 if trimSpace {
361 l.pos += leftTrimLength(l.input[l.pos:])
362 }
363 l.ignore()
364 if l.options.emitComment {
365 return l.emitItem(i)
366 }
367 return lexText
368 }
369
370
371 func lexRightDelim(l *lexer) stateFn {
372 _, trimSpace := l.atRightDelim()
373 if trimSpace {
374 l.pos += trimMarkerLen
375 l.ignore()
376 }
377 l.pos += Pos(len(l.rightDelim))
378 i := l.thisItem(itemRightDelim)
379 if trimSpace {
380 l.pos += leftTrimLength(l.input[l.pos:])
381 l.ignore()
382 }
383 l.insideAction = false
384 return l.emitItem(i)
385 }
386
387
388 func lexInsideAction(l *lexer) stateFn {
389
390
391
392 delim, _ := l.atRightDelim()
393 if delim {
394 if l.parenDepth == 0 {
395 return lexRightDelim
396 }
397 return l.errorf("unclosed left paren")
398 }
399 switch r := l.next(); {
400 case r == eof:
401 return l.errorf("unclosed action")
402 case isSpace(r):
403 l.backup()
404 return lexSpace
405 case r == '=':
406 return l.emit(itemAssign)
407 case r == ':':
408 if l.next() != '=' {
409 return l.errorf("expected :=")
410 }
411 return l.emit(itemDeclare)
412 case r == '|':
413 return l.emit(itemPipe)
414 case r == '"':
415 return lexQuote
416 case r == '`':
417 return lexRawQuote
418 case r == '$':
419 return lexVariable
420 case r == '\'':
421 return lexChar
422 case r == '.':
423
424 if l.pos < Pos(len(l.input)) {
425 r := l.input[l.pos]
426 if r < '0' || '9' < r {
427 return lexField
428 }
429 }
430 fallthrough
431 case r == '+' || r == '-' || ('0' <= r && r <= '9'):
432 l.backup()
433 return lexNumber
434 case isAlphaNumeric(r):
435 l.backup()
436 return lexIdentifier
437 case r == '(':
438 l.parenDepth++
439 return l.emit(itemLeftParen)
440 case r == ')':
441 l.parenDepth--
442 if l.parenDepth < 0 {
443 return l.errorf("unexpected right paren")
444 }
445 return l.emit(itemRightParen)
446 case r <= unicode.MaxASCII && unicode.IsPrint(r):
447 return l.emit(itemChar)
448 default:
449 return l.errorf("unrecognized character in action: %#U", r)
450 }
451 }
452
453
454
455
456 func lexSpace(l *lexer) stateFn {
457 var r rune
458 var numSpaces int
459 for {
460 r = l.peek()
461 if !isSpace(r) {
462 break
463 }
464 l.next()
465 numSpaces++
466 }
467
468
469 if hasRightTrimMarker(l.input[l.pos-1:]) && strings.HasPrefix(l.input[l.pos-1+trimMarkerLen:], l.rightDelim) {
470 l.backup()
471 if numSpaces == 1 {
472 return lexRightDelim
473 }
474 }
475 return l.emit(itemSpace)
476 }
477
478
479 func lexIdentifier(l *lexer) stateFn {
480 for {
481 switch r := l.next(); {
482 case isAlphaNumeric(r):
483
484 default:
485 l.backup()
486 word := l.input[l.start:l.pos]
487 if !l.atTerminator() {
488 return l.errorf("bad character %#U", r)
489 }
490 switch {
491 case key[word] > itemKeyword:
492 item := key[word]
493 if item == itemBreak && !l.options.breakOK || item == itemContinue && !l.options.continueOK {
494 return l.emit(itemIdentifier)
495 }
496 return l.emit(item)
497 case word[0] == '.':
498 return l.emit(itemField)
499 case word == "true", word == "false":
500 return l.emit(itemBool)
501 default:
502 return l.emit(itemIdentifier)
503 }
504 }
505 }
506 }
507
508
509
510 func lexField(l *lexer) stateFn {
511 return lexFieldOrVariable(l, itemField)
512 }
513
514
515
516 func lexVariable(l *lexer) stateFn {
517 if l.atTerminator() {
518 return l.emit(itemVariable)
519 }
520 return lexFieldOrVariable(l, itemVariable)
521 }
522
523
524
525 func lexFieldOrVariable(l *lexer, typ itemType) stateFn {
526 if l.atTerminator() {
527 if typ == itemVariable {
528 return l.emit(itemVariable)
529 }
530 return l.emit(itemDot)
531 }
532 var r rune
533 for {
534 r = l.next()
535 if !isAlphaNumeric(r) {
536 l.backup()
537 break
538 }
539 }
540 if !l.atTerminator() {
541 return l.errorf("bad character %#U", r)
542 }
543 return l.emit(typ)
544 }
545
546
547
548
549
550 func (l *lexer) atTerminator() bool {
551 r := l.peek()
552 if isSpace(r) {
553 return true
554 }
555 switch r {
556 case eof, '.', ',', '|', ':', ')', '(':
557 return true
558 }
559 return strings.HasPrefix(l.input[l.pos:], l.rightDelim)
560 }
561
562
563
564 func lexChar(l *lexer) stateFn {
565 Loop:
566 for {
567 switch l.next() {
568 case '\\':
569 if r := l.next(); r != eof && r != '\n' {
570 break
571 }
572 fallthrough
573 case eof, '\n':
574 return l.errorf("unterminated character constant")
575 case '\'':
576 break Loop
577 }
578 }
579 return l.emit(itemCharConstant)
580 }
581
582
583
584
585
586 func lexNumber(l *lexer) stateFn {
587 if !l.scanNumber() {
588 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
589 }
590 if sign := l.peek(); sign == '+' || sign == '-' {
591
592 if !l.scanNumber() || l.input[l.pos-1] != 'i' {
593 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
594 }
595 return l.emit(itemComplex)
596 }
597 return l.emit(itemNumber)
598 }
599
600 func (l *lexer) scanNumber() bool {
601
602 l.accept("+-")
603
604 digits := "0123456789_"
605 if l.accept("0") {
606
607 if l.accept("xX") {
608 digits = "0123456789abcdefABCDEF_"
609 } else if l.accept("oO") {
610 digits = "01234567_"
611 } else if l.accept("bB") {
612 digits = "01_"
613 }
614 }
615 l.acceptRun(digits)
616 if l.accept(".") {
617 l.acceptRun(digits)
618 }
619 if len(digits) == 10+1 && l.accept("eE") {
620 l.accept("+-")
621 l.acceptRun("0123456789_")
622 }
623 if len(digits) == 16+6+1 && l.accept("pP") {
624 l.accept("+-")
625 l.acceptRun("0123456789_")
626 }
627
628 l.accept("i")
629
630 if isAlphaNumeric(l.peek()) {
631 l.next()
632 return false
633 }
634 return true
635 }
636
637
638 func lexQuote(l *lexer) stateFn {
639 Loop:
640 for {
641 switch l.next() {
642 case '\\':
643 if r := l.next(); r != eof && r != '\n' {
644 break
645 }
646 fallthrough
647 case eof, '\n':
648 return l.errorf("unterminated quoted string")
649 case '"':
650 break Loop
651 }
652 }
653 return l.emit(itemString)
654 }
655
656
657 func lexRawQuote(l *lexer) stateFn {
658 Loop:
659 for {
660 switch l.next() {
661 case eof:
662 return l.errorf("unterminated raw quoted string")
663 case '`':
664 break Loop
665 }
666 }
667 return l.emit(itemRawString)
668 }
669
670
671 func isSpace(r rune) bool {
672 return r == ' ' || r == '\t' || r == '\r' || r == '\n'
673 }
674
675
676 func isAlphaNumeric(r rune) bool {
677 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
678 }
679
680 func hasLeftTrimMarker(s string) bool {
681 return len(s) >= 2 && s[0] == trimMarker && isSpace(rune(s[1]))
682 }
683
684 func hasRightTrimMarker(s string) bool {
685 return len(s) >= 2 && isSpace(rune(s[0])) && s[1] == trimMarker
686 }
687
View as plain text