1
2
3
4
5 package markdown
6
7 import (
8 "bytes"
9 "fmt"
10 "strings"
11 "unicode/utf8"
12
13 "golang.org/x/text/cases"
14 )
15
16 func parseLinkRefDef(p buildState, s string) (int, bool) {
17
18
19
20
21
22
23
24
25
26 i := skipSpace(s, 0)
27 label, i, ok := parseLinkLabel(p.(*parseState), s, i)
28 if !ok || i >= len(s) || s[i] != ':' {
29 return 0, false
30 }
31 i = skipSpace(s, i+1)
32 suf := s[i:]
33 dest, i, ok := parseLinkDest(s, i)
34 if !ok {
35 if suf != "" && suf[0] == '<' {
36
37 p.(*parseState).corner = true
38 }
39 return 0, false
40 }
41 moved := false
42 for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
43 moved = true
44 i++
45 }
46
47
48 j := i
49 if j >= len(s) || s[j] == '\n' {
50 moved = true
51 if j < len(s) {
52 j++
53 }
54 }
55
56 var title string
57 var titleChar byte
58 var corner bool
59 if moved {
60 for j < len(s) && (s[j] == ' ' || s[j] == '\t') {
61 j++
62 }
63 if t, c, j, ok := parseLinkTitle(s, j); ok {
64 for j < len(s) && (s[j] == ' ' || s[j] == '\t') {
65 j++
66 }
67 if j >= len(s) || s[j] == '\n' {
68 i = j
69 if t == "" {
70
71
72 corner = true
73 }
74 title = t
75 titleChar = c
76 }
77 }
78 }
79
80
81 if i < len(s) && s[i] != '\n' {
82 return 0, false
83 }
84 if i < len(s) {
85 i++
86 }
87
88 label = normalizeLabel(label)
89 if p.link(label) == nil {
90 p.defineLink(label, &Link{URL: dest, Title: title, TitleChar: titleChar, corner: corner})
91 }
92 return i, true
93 }
94
95 func parseLinkTitle(s string, i int) (title string, char byte, next int, found bool) {
96 if i < len(s) && (s[i] == '"' || s[i] == '\'' || s[i] == '(') {
97 want := s[i]
98 if want == '(' {
99 want = ')'
100 }
101 j := i + 1
102 for ; j < len(s); j++ {
103 if s[j] == want {
104 title := s[i+1 : j]
105
106 return mdUnescaper.Replace(title), want, j + 1, true
107 }
108 if s[j] == '(' && want == ')' {
109 break
110 }
111 if s[j] == '\\' && j+1 < len(s) {
112 j++
113 }
114 }
115 }
116 return "", 0, 0, false
117 }
118
119 func parseLinkLabel(p *parseState, s string, i int) (string, int, bool) {
120
121
122
123
124
125
126
127 if i >= len(s) || s[i] != '[' {
128 return "", 0, false
129 }
130 j := i + 1
131 for ; j < len(s); j++ {
132 if s[j] == ']' {
133 if j-(i+1) > 999 {
134
135 p.corner = true
136 break
137 }
138 if label := trimSpaceTabNewline(s[i+1 : j]); label != "" {
139
140 return label, j + 1, true
141 }
142 break
143 }
144 if s[j] == '[' {
145 break
146 }
147 if s[j] == '\\' && j+1 < len(s) {
148 j++
149 }
150 }
151 return "", 0, false
152 }
153
154 func normalizeLabel(s string) string {
155 if strings.Contains(s, "[") || strings.Contains(s, "]") {
156
157
158
159
160 return ""
161 }
162
163
164
165
166 s = trimSpaceTabNewline(s)
167 var b strings.Builder
168 space := false
169 hi := false
170 for i := 0; i < len(s); i++ {
171 c := s[i]
172 switch c {
173 case ' ', '\t', '\n':
174 space = true
175 continue
176 default:
177 if space {
178 b.WriteByte(' ')
179 space = false
180 }
181 if 'A' <= c && c <= 'Z' {
182 c += 'a' - 'A'
183 }
184 if c >= 0x80 {
185 hi = true
186 }
187 b.WriteByte(c)
188 }
189 }
190 s = b.String()
191 if hi {
192 s = cases.Fold().String(s)
193 }
194 return s
195 }
196
197 func parseLinkDest(s string, i int) (string, int, bool) {
198 if i >= len(s) {
199 return "", 0, false
200 }
201
202
203
204 if s[i] == '<' {
205 for j := i + 1; ; j++ {
206 if j >= len(s) || s[j] == '\n' || s[j] == '<' {
207 return "", 0, false
208 }
209 if s[j] == '>' {
210
211 return mdUnescape(s[i+1 : j]), j + 1, true
212 }
213 if s[j] == '\\' {
214 j++
215 }
216 }
217 }
218
219
220
221
222
223 depth := 0
224 j := i
225 Loop:
226 for ; j < len(s); j++ {
227 switch s[j] {
228 case '(':
229 depth++
230 if depth > 32 {
231
232
233 return "", 0, false
234 }
235 case ')':
236 if depth == 0 {
237 break Loop
238 }
239 depth--
240 case '\\':
241 if j+1 < len(s) {
242 if s[j+1] == ' ' || s[j+1] == '\t' {
243 return "", 0, false
244 }
245 j++
246 }
247 case ' ', '\t', '\n':
248 break Loop
249 }
250 }
251
252 dest := s[i:j]
253
254
255
256 return mdUnescape(dest), j, true
257 }
258
259 func parseAutoLinkURI(s string, i int) (Inline, int, bool) {
260
261
262
263
264
265
266
267
268
269
270
271
272 j := i
273 if j+1 >= len(s) || s[j] != '<' || !isLetter(s[j+1]) {
274 return nil, 0, false
275 }
276 j++
277 for j < len(s) && isScheme(s[j]) && j-(i+1) <= 32 {
278 j++
279 }
280 if j-(i+1) < 2 || j-(i+1) > 32 || j >= len(s) || s[j] != ':' {
281 return nil, 0, false
282 }
283 j++
284 for j < len(s) && isURL(s[j]) {
285 j++
286 }
287 if j >= len(s) || s[j] != '>' {
288 return nil, 0, false
289 }
290 link := s[i+1 : j]
291
292 return &AutoLink{link, link}, j + 1, true
293 }
294
295 func parseAutoLinkEmail(s string, i int) (Inline, int, bool) {
296
297
298
299
300
301
302
303 j := i
304 if j+1 >= len(s) || s[j] != '<' || !isUser(s[j+1]) {
305 return nil, 0, false
306 }
307 j++
308 for j < len(s) && isUser(s[j]) {
309 j++
310 }
311 if j >= len(s) || s[j] != '@' {
312 return nil, 0, false
313 }
314 for {
315 j++
316 n, ok := skipDomainElem(s[j:])
317 if !ok {
318 return nil, 0, false
319 }
320 j += n
321 if j >= len(s) || s[j] != '.' && s[j] != '>' {
322 return nil, 0, false
323 }
324 if s[j] == '>' {
325 break
326 }
327 }
328 email := s[i+1 : j]
329 return &AutoLink{email, "mailto:" + email}, j + 1, true
330 }
331
332 func isUser(c byte) bool {
333 if isLetterDigit(c) {
334 return true
335 }
336 s := ".!#$%&'*+/=?^_`{|}~-"
337 for i := 0; i < len(s); i++ {
338 if c == s[i] {
339 return true
340 }
341 }
342 return false
343 }
344
345 func isHexDigit(c byte) bool {
346 return 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f' || '0' <= c && c <= '9'
347 }
348
349 func isDigit(c byte) bool {
350 return '0' <= c && c <= '9'
351 }
352
353 func skipDomainElem(s string) (int, bool) {
354
355
356
357 if len(s) < 1 || !isLetterDigit(s[0]) {
358 return 0, false
359 }
360 i := 1
361 for i < len(s) && isLDH(s[i]) && i <= 63 {
362 i++
363 }
364 if i > 63 || !isLetterDigit(s[i-1]) {
365 return 0, false
366 }
367 return i, true
368 }
369
370 func isScheme(c byte) bool {
371 return isLetterDigit(c) || c == '+' || c == '.' || c == '-'
372 }
373
374 func isURL(c byte) bool {
375 return c > ' ' && c != '<' && c != '>'
376 }
377
378 type AutoLink struct {
379 Text string
380 URL string
381 }
382
383 func (*AutoLink) Inline() {}
384
385 func (x *AutoLink) PrintHTML(buf *bytes.Buffer) {
386 fmt.Fprintf(buf, "<a href=\"%s\">%s</a>", htmlLinkEscaper.Replace(x.URL), htmlEscaper.Replace(x.Text))
387 }
388
389 func (x *AutoLink) printMarkdown(buf *bytes.Buffer) {
390 fmt.Fprintf(buf, "<%s>", x.Text)
391 }
392
393 func (x *AutoLink) PrintText(buf *bytes.Buffer) {
394 fmt.Fprintf(buf, "%s", htmlEscaper.Replace(x.Text))
395 }
396
397 type Link struct {
398 Inner []Inline
399 URL string
400 Title string
401 TitleChar byte
402 corner bool
403 }
404
405 func (*Link) Inline() {}
406
407 func (x *Link) PrintHTML(buf *bytes.Buffer) {
408 fmt.Fprintf(buf, "<a href=\"%s\"", htmlLinkEscaper.Replace(x.URL))
409 if x.Title != "" {
410 fmt.Fprintf(buf, " title=\"%s\"", htmlQuoteEscaper.Replace(x.Title))
411 }
412 buf.WriteString(">")
413 for _, c := range x.Inner {
414 c.PrintHTML(buf)
415 }
416 buf.WriteString("</a>")
417 }
418
419 func (x *Link) printMarkdown(buf *bytes.Buffer) {
420 buf.WriteByte('[')
421 x.printRemainingMarkdown(buf)
422 }
423
424 func (x *Link) printRemainingMarkdown(buf *bytes.Buffer) {
425 for _, c := range x.Inner {
426 c.printMarkdown(buf)
427 }
428 buf.WriteString("](")
429 buf.WriteString(x.URL)
430 printLinkTitleMarkdown(buf, x.Title, x.TitleChar)
431 buf.WriteByte(')')
432 }
433
434 func printLinkTitleMarkdown(buf *bytes.Buffer, title string, titleChar byte) {
435 if title == "" {
436 return
437 }
438 closeChar := titleChar
439 openChar := closeChar
440 if openChar == ')' {
441 openChar = '('
442 }
443 fmt.Fprintf(buf, " %c%s%c", openChar, title , closeChar)
444 }
445
446 func (x *Link) PrintText(buf *bytes.Buffer) {
447 for _, c := range x.Inner {
448 c.PrintText(buf)
449 }
450 }
451
452 type Image struct {
453 Inner []Inline
454 URL string
455 Title string
456 TitleChar byte
457 corner bool
458 }
459
460 func (*Image) Inline() {}
461
462 func (x *Image) PrintHTML(buf *bytes.Buffer) {
463 fmt.Fprintf(buf, "<img src=\"%s\"", htmlLinkEscaper.Replace(x.URL))
464 fmt.Fprintf(buf, " alt=\"")
465 i := buf.Len()
466 for _, c := range x.Inner {
467 c.PrintText(buf)
468 }
469
470
471
472
473 out := buf.Bytes()
474 for ; i < len(out); i++ {
475 if out[i] == '\n' {
476 out[i] = ' '
477 }
478 }
479 fmt.Fprintf(buf, "\"")
480 if x.Title != "" {
481 fmt.Fprintf(buf, " title=\"%s\"", htmlQuoteEscaper.Replace(x.Title))
482 }
483 buf.WriteString(" />")
484 }
485
486 func (x *Image) printMarkdown(buf *bytes.Buffer) {
487 buf.WriteString("![")
488 (*Link)(x).printRemainingMarkdown(buf)
489 }
490
491 func (x *Image) PrintText(buf *bytes.Buffer) {
492 for _, c := range x.Inner {
493 c.PrintText(buf)
494 }
495 }
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518 func (p *parseState) autoLinkText(list []Inline) []Inline {
519 if !p.AutoLinkText {
520 return list
521 }
522
523 var out []Inline
524 for i, x := range list {
525 switch x := x.(type) {
526 case *Plain:
527 if rewrite := p.autoLinkPlain(x.Text); rewrite != nil {
528 if out == nil {
529 out = append(out, list[:i]...)
530 }
531 out = append(out, rewrite...)
532 continue
533 }
534 case *Strong:
535 x.Inner = p.autoLinkText(x.Inner)
536 case *Del:
537 x.Inner = p.autoLinkText(x.Inner)
538 case *Emph:
539 x.Inner = p.autoLinkText(x.Inner)
540 }
541 if out != nil {
542 out = append(out, x)
543 }
544 }
545 if out == nil {
546 return list
547 }
548 return out
549 }
550
551 func (p *parseState) autoLinkPlain(s string) []Inline {
552 vd := &validDomainChecker{s: s}
553 var out []Inline
554 Restart:
555 for i := 0; i < len(s); i++ {
556 c := s[i]
557 if c == '@' {
558 if before, link, after, ok := p.parseAutoEmail(s, i); ok {
559 if before != "" {
560 out = append(out, &Plain{Text: before})
561 }
562 out = append(out, link)
563 vd.skip(len(s) - len(after))
564 s = after
565 goto Restart
566 }
567 }
568
569 if (c == 'h' || c == 'm' || c == 'x' || c == 'w') && (i == 0 || !isLetter(s[i-1])) {
570 if link, after, ok := p.parseAutoProto(s, i, vd); ok {
571 if i > 0 {
572 out = append(out, &Plain{Text: s[:i]})
573 }
574 out = append(out, link)
575 vd.skip(len(s) - len(after))
576 s = after
577 goto Restart
578 }
579 }
580 }
581 if out == nil {
582 return nil
583 }
584 out = append(out, &Plain{Text: s})
585 return out
586 }
587
588 func (p *parseState) parseAutoProto(s string, i int, vd *validDomainChecker) (link *Link, after string, found bool) {
589 if s == "" {
590 return
591 }
592 switch s[i] {
593 case 'h':
594 var n int
595 if strings.HasPrefix(s[i:], "https://") {
596 n = len("https://")
597 } else if strings.HasPrefix(s[i:], "http://") {
598 n = len("http://")
599 } else {
600 return
601 }
602 return p.parseAutoHTTP(s[i:i+n], s, i, i+n, i+n+1, vd)
603 case 'w':
604 if !strings.HasPrefix(s[i:], "www.") {
605 return
606 }
607
608
609
610
611 scheme := "https://"
612 if p.AutoLinkAssumeHTTP {
613 scheme = "http://"
614 }
615 return p.parseAutoHTTP(scheme, s, i, i, i+3, vd)
616 case 'm':
617 if !strings.HasPrefix(s[i:], "mailto:") {
618 return
619 }
620 return p.parseAutoMailto(s, i)
621 case 'x':
622 if !strings.HasPrefix(s[i:], "xmpp:") {
623 return
624 }
625 return p.parseAutoXmpp(s, i)
626 }
627 return
628 }
629
630
631
632 func (p *parseState) parseAutoHTTP(scheme, s string, textstart, start, min int, vd *validDomainChecker) (link *Link, after string, found bool) {
633 n, ok := vd.parseValidDomain(start)
634 if !ok {
635 return
636 }
637 i := start + n
638 domEnd := i
639
640
641 paren := 0
642 for i < len(s) {
643 r, n := utf8.DecodeRuneInString(s[i:])
644 if isUnicodeSpace(r) || r == '<' {
645 break
646 }
647 if r == '(' {
648 paren++
649 }
650 if r == ')' {
651 paren--
652 }
653 i += n
654 }
655
656
657 Trim:
658 for i > min {
659 switch s[i-1] {
660 case '?', '!', '.', ',', ':', '@', '_', '~':
661
662 i--
663 continue Trim
664
665 case ')':
666
667 if paren < 0 {
668 for s[i-1] == ')' && paren < 0 {
669 paren++
670 i--
671 }
672 continue Trim
673 }
674
675 case ';':
676
677
678
679
680
681
682
683 for j := i - 2; j > start; j-- {
684 if j < i-2 && s[j] == '&' {
685 i = j
686 continue Trim
687 }
688 if !isLetterDigit(s[j]) {
689 break Trim
690 }
691 }
692 }
693 break Trim
694 }
695
696
697
698
699
700
701
702
703
704
705 if textstart == start && i > domEnd && s[domEnd] != '/' {
706 i = domEnd
707 }
708
709 if i < min {
710 return
711 }
712
713 link = &Link{
714 Inner: []Inline{&Plain{Text: s[textstart:i]}},
715 URL: scheme + s[start:i],
716 }
717 return link, s[i:], true
718 }
719
720 type validDomainChecker struct {
721 s string
722 cut int
723 }
724
725 func (v *validDomainChecker) skip(i int) {
726 v.s = v.s[i:]
727 v.cut -= i
728 }
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746 func (v *validDomainChecker) parseValidDomain(start int) (n int, found bool) {
747 if start < v.cut {
748 return 0, false
749 }
750 i := start
751 dots := 0
752 for ; i < len(v.s); i++ {
753 c := v.s[i]
754 if c == '_' {
755 dots = -2
756 continue
757 }
758 if c == '.' {
759 dots++
760 continue
761 }
762 if !isLDH(c) {
763 break
764 }
765 }
766 if dots >= 0 && i > start {
767 return i - start, true
768 }
769 v.cut = i
770 return 0, false
771 }
772
773 func (p *parseState) parseAutoEmail(s string, i int) (before string, link *Link, after string, ok bool) {
774 if s[i] != '@' {
775 return
776 }
777
778
779 j := i
780 for j > 0 && (isLDH(s[j-1]) || s[j-1] == '_' || s[j-1] == '+' || s[j-1] == '.') {
781 j--
782 }
783 if i-j < 1 {
784 return
785 }
786
787
788
789 dots := 0
790 k := i + 1
791 for k < len(s) && (isLDH(s[k]) || s[k] == '_' || s[k] == '.') {
792 if s[k] == '.' {
793 if s[k-1] == '.' {
794
795 break
796 }
797 dots++
798 }
799 k++
800 }
801
802
803
804 if s[k-1] == '.' {
805 dots--
806 k--
807 }
808 if s[k-1] == '-' || s[k-1] == '_' {
809 return
810 }
811 if k-(i+1)-dots < 2 || dots < 1 {
812 return
813 }
814
815 link = &Link{
816 Inner: []Inline{&Plain{Text: s[j:k]}},
817 URL: "mailto:" + s[j:k],
818 }
819 return s[:j], link, s[k:], true
820 }
821
822 func (p *parseState) parseAutoMailto(s string, i int) (link *Link, after string, ok bool) {
823 j := i + len("mailto:")
824 for j < len(s) && (isLDH(s[j]) || s[j] == '_' || s[j] == '+' || s[j] == '.') {
825 j++
826 }
827 if j >= len(s) || s[j] != '@' {
828 return
829 }
830 before, link, after, ok := p.parseAutoEmail(s[i:], j-i)
831 if before != "mailto:" || !ok {
832 return nil, "", false
833 }
834 link.Inner[0] = &Plain{Text: s[i : len(s)-len(after)]}
835 return link, after, true
836 }
837
838 func (p *parseState) parseAutoXmpp(s string, i int) (link *Link, after string, ok bool) {
839 j := i + len("xmpp:")
840 for j < len(s) && (isLDH(s[j]) || s[j] == '_' || s[j] == '+' || s[j] == '.') {
841 j++
842 }
843 if j >= len(s) || s[j] != '@' {
844 return
845 }
846 before, link, after, ok := p.parseAutoEmail(s[i:], j-i)
847 if before != "xmpp:" || !ok {
848 return nil, "", false
849 }
850 if after != "" && after[0] == '/' {
851 k := 1
852 for k < len(after) && (isLetterDigit(after[k]) || after[k] == '@' || after[k] == '.') {
853 k++
854 }
855 after = after[k:]
856 }
857 url := s[i : len(s)-len(after)]
858 link.Inner[0] = &Plain{Text: url}
859 link.URL = url
860 return link, after, true
861 }
862
View as plain text