1
2
3
4
5 package markdown
6
7 import (
8 "bytes"
9 "fmt"
10 "strings"
11 "unicode"
12 "unicode/utf8"
13 )
14
15
67
68 type Inline interface {
69 PrintHTML(*bytes.Buffer)
70 PrintText(*bytes.Buffer)
71 printMarkdown(*bytes.Buffer)
72 }
73
74 type Plain struct {
75 Text string
76 }
77
78 func (*Plain) Inline() {}
79
80 func (x *Plain) PrintHTML(buf *bytes.Buffer) {
81 htmlEscaper.WriteString(buf, x.Text)
82 }
83
84 func (x *Plain) printMarkdown(buf *bytes.Buffer) {
85 buf.WriteString(x.Text)
86 }
87
88 func (x *Plain) PrintText(buf *bytes.Buffer) {
89 htmlEscaper.WriteString(buf, x.Text)
90 }
91
92 type openPlain struct {
93 Plain
94 i int
95 }
96
97 type emphPlain struct {
98 Plain
99 canOpen bool
100 canClose bool
101 i int
102 n int
103 }
104
105 type Escaped struct {
106 Plain
107 }
108
109 func (x *Escaped) printMarkdown(buf *bytes.Buffer) {
110 buf.WriteByte('\\')
111 x.Plain.printMarkdown(buf)
112 }
113
114 type Code struct {
115 Text string
116 }
117
118 func (*Code) Inline() {}
119
120 func (x *Code) PrintHTML(buf *bytes.Buffer) {
121 fmt.Fprintf(buf, "<code>%s</code>", htmlEscaper.Replace(x.Text))
122 }
123
124 func (x *Code) printMarkdown(buf *bytes.Buffer) {
125 if len(x.Text) == 0 {
126 return
127 }
128
129 ticks := strings.Repeat("`", longestSequence(x.Text, '`')+1)
130 buf.WriteString(ticks)
131 if x.Text[0] == '`' {
132 buf.WriteByte(' ')
133 }
134 buf.WriteString(x.Text)
135 if x.Text[len(x.Text)-1] == '`' {
136 buf.WriteByte(' ')
137 }
138 buf.WriteString(ticks)
139 }
140
141
142 func longestSequence(s string, b byte) int {
143 max := 0
144 cur := 0
145 for i := range s {
146 if s[i] == b {
147 cur++
148 } else {
149 if cur > max {
150 max = cur
151 }
152 cur = 0
153 }
154 }
155 if cur > max {
156 max = cur
157 }
158 return max
159 }
160
161 func (x *Code) PrintText(buf *bytes.Buffer) {
162 htmlEscaper.WriteString(buf, x.Text)
163 }
164
165 type Strong struct {
166 Marker string
167 Inner []Inline
168 }
169
170 func (x *Strong) Inline() {
171 }
172
173 func (x *Strong) PrintHTML(buf *bytes.Buffer) {
174 buf.WriteString("<strong>")
175 for _, c := range x.Inner {
176 c.PrintHTML(buf)
177 }
178 buf.WriteString("</strong>")
179 }
180
181 func (x *Strong) printMarkdown(buf *bytes.Buffer) {
182 buf.WriteString(x.Marker)
183 for _, c := range x.Inner {
184 c.printMarkdown(buf)
185 }
186 buf.WriteString(x.Marker)
187 }
188
189 func (x *Strong) PrintText(buf *bytes.Buffer) {
190 for _, c := range x.Inner {
191 c.PrintText(buf)
192 }
193 }
194
195 type Del struct {
196 Marker string
197 Inner []Inline
198 }
199
200 func (x *Del) Inline() {
201
202 }
203
204 func (x *Del) PrintHTML(buf *bytes.Buffer) {
205 buf.WriteString("<del>")
206 for _, c := range x.Inner {
207 c.PrintHTML(buf)
208 }
209 buf.WriteString("</del>")
210 }
211
212 func (x *Del) printMarkdown(buf *bytes.Buffer) {
213 buf.WriteString(x.Marker)
214 for _, c := range x.Inner {
215 c.printMarkdown(buf)
216 }
217 buf.WriteString(x.Marker)
218 }
219
220 func (x *Del) PrintText(buf *bytes.Buffer) {
221 for _, c := range x.Inner {
222 c.PrintText(buf)
223 }
224 }
225
226 type Emph struct {
227 Marker string
228 Inner []Inline
229 }
230
231 func (*Emph) Inline() {}
232
233 func (x *Emph) PrintHTML(buf *bytes.Buffer) {
234 buf.WriteString("<em>")
235 for _, c := range x.Inner {
236 c.PrintHTML(buf)
237 }
238 buf.WriteString("</em>")
239 }
240
241 func (x *Emph) printMarkdown(buf *bytes.Buffer) {
242 buf.WriteString(x.Marker)
243 for _, c := range x.Inner {
244 c.printMarkdown(buf)
245 }
246 buf.WriteString(x.Marker)
247 }
248
249 func (x *Emph) PrintText(buf *bytes.Buffer) {
250 for _, c := range x.Inner {
251 c.PrintText(buf)
252 }
253 }
254
255 func (p *parseState) emit(i int) {
256 if p.emitted < i {
257 p.list = append(p.list, &Plain{p.s[p.emitted:i]})
258 p.emitted = i
259 }
260 }
261
262 func (p *parseState) skip(i int) {
263 p.emitted = i
264 }
265
266 func (p *parseState) inline(s string) []Inline {
267 s = trimSpaceTab(s)
268
269
270
271
272 p.s = s
273 p.list = nil
274 p.emitted = 0
275 var opens []int
276 var lastLinkOpen int
277 backticks := false
278 i := 0
279 for i < len(s) {
280 var parser func(*parseState, string, int) (Inline, int, int, bool)
281 switch s[i] {
282 case '\\':
283 parser = parseEscape
284 case '`':
285 if !backticks {
286 backticks = true
287 p.backticks.reset()
288 }
289 parser = p.backticks.parseCodeSpan
290 case '<':
291 parser = parseAutoLinkOrHTML
292 case '[':
293 parser = parseLinkOpen
294 case '!':
295 parser = parseImageOpen
296 case '_', '*':
297 parser = parseEmph
298 case '.':
299 if p.SmartDot {
300 parser = parseDot
301 }
302 case '-':
303 if p.SmartDash {
304 parser = parseDash
305 }
306 case '"', '\'':
307 if p.SmartQuote {
308 parser = parseEmph
309 }
310 case '~':
311 if p.Strikethrough {
312 parser = parseEmph
313 }
314 case '\n':
315 parser = parseBreak
316 case '&':
317 parser = parseHTMLEntity
318 case ':':
319 if p.Emoji {
320 parser = parseEmoji
321 }
322 }
323 if parser != nil {
324 if x, start, end, ok := parser(p, s, i); ok {
325 p.emit(start)
326 if _, ok := x.(*openPlain); ok {
327 opens = append(opens, len(p.list))
328 }
329 p.list = append(p.list, x)
330 i = end
331 p.skip(i)
332 continue
333 }
334 }
335 if s[i] == ']' && len(opens) > 0 {
336 oi := opens[len(opens)-1]
337 open := p.list[oi].(*openPlain)
338 opens = opens[:len(opens)-1]
339 if open.Text[0] == '!' || lastLinkOpen <= open.i {
340 if x, end, ok := p.parseLinkClose(s, i, open); ok {
341 p.corner = p.corner || x.corner || linkCorner(x.URL)
342 p.emit(i)
343 x.Inner = p.emph(nil, p.list[oi+1:])
344 if open.Text[0] == '!' {
345 p.list[oi] = (*Image)(x)
346 } else {
347 p.list[oi] = x
348 }
349 p.list = p.list[:oi+1]
350 p.skip(end)
351 i = end
352 if open.Text[0] == '[' {
353
354 lastLinkOpen = open.i
355 }
356 continue
357 }
358 }
359 }
360 i++
361 }
362 p.emit(len(s))
363 p.list = p.emph(p.list[:0], p.list)
364 p.list = p.mergePlain(p.list)
365 p.list = p.autoLinkText(p.list)
366
367 return p.list
368 }
369
370 func (ps *parseState) emph(dst, src []Inline) []Inline {
371 const chars = "_*~\"'"
372 var stack [len(chars)][]*emphPlain
373 stackOf := func(c byte) int {
374 return strings.IndexByte(chars, c)
375 }
376
377 trimStack := func() {
378 for i := range stack {
379 stk := &stack[i]
380 for len(*stk) > 0 && (*stk)[len(*stk)-1].i >= len(dst) {
381 *stk = (*stk)[:len(*stk)-1]
382 }
383 }
384 }
385
386 Src:
387 for i := 0; i < len(src); i++ {
388 if open, ok := src[i].(*openPlain); ok {
389
390 dst = append(dst, &open.Plain)
391 continue
392 }
393 p, ok := src[i].(*emphPlain)
394 if !ok {
395 dst = append(dst, src[i])
396 continue
397 }
398 if p.canClose {
399 stk := &stack[stackOf(p.Text[0])]
400 Loop:
401 for p.Text != "" {
402
403 for i := len(*stk) - 1; i >= 0; i-- {
404 start := (*stk)[i]
405 if (p.Text[0] == '*' || p.Text[0] == '_') && (p.canOpen && p.canClose || start.canOpen && start.canClose) && (p.n+start.n)%3 == 0 && (p.n%3 != 0 || start.n%3 != 0) {
406 continue
407 }
408 if p.Text[0] == '~' && len(p.Text) != len(start.Text) {
409 continue
410 }
411 if p.Text[0] == '"' {
412 dst[start.i].(*emphPlain).Text = "“"
413 p.Text = "”"
414 dst = append(dst, p)
415 *stk = (*stk)[:i]
416
417 continue Src
418 }
419 if p.Text[0] == '\'' {
420 dst[start.i].(*emphPlain).Text = "‘"
421 p.Text = "’"
422 dst = append(dst, p)
423 *stk = (*stk)[:i]
424
425 continue Src
426 }
427 var d int
428 if len(p.Text) >= 2 && len(start.Text) >= 2 {
429
430 d = 2
431 } else {
432
433 d = 1
434 }
435 del := p.Text[0] == '~'
436 x := &Emph{Marker: p.Text[:d], Inner: append([]Inline(nil), dst[start.i+1:]...)}
437 start.Text = start.Text[:len(start.Text)-d]
438 p.Text = p.Text[d:]
439 if start.Text == "" {
440 dst = dst[:start.i]
441 } else {
442 dst = dst[:start.i+1]
443 }
444 trimStack()
445 if del {
446 dst = append(dst, (*Del)(x))
447 } else if d == 2 {
448 dst = append(dst, (*Strong)(x))
449 } else {
450 dst = append(dst, x)
451 }
452 continue Loop
453 }
454 break
455 }
456 }
457 if p.Text != "" {
458 stk := &stack[stackOf(p.Text[0])]
459 if p.Text == "'" {
460 p.Text = "’"
461 }
462 if p.Text == "\"" {
463 if p.canClose {
464 p.Text = "”"
465 } else {
466 p.Text = "“"
467 }
468 }
469 if p.canOpen {
470 p.i = len(dst)
471 dst = append(dst, p)
472 *stk = append(*stk, p)
473 } else {
474 dst = append(dst, &p.Plain)
475 }
476 }
477 }
478 return dst
479 }
480
481 func mdUnescape(s string) string {
482 if !strings.Contains(s, `\`) && !strings.Contains(s, `&`) {
483 return s
484 }
485 return mdUnescaper.Replace(s)
486 }
487
488 var mdUnescaper = func() *strings.Replacer {
489 var list = []string{
490 `\!`, `!`,
491 `\"`, `"`,
492 `\#`, `#`,
493 `\$`, `$`,
494 `\%`, `%`,
495 `\&`, `&`,
496 `\'`, `'`,
497 `\(`, `(`,
498 `\)`, `)`,
499 `\*`, `*`,
500 `\+`, `+`,
501 `\,`, `,`,
502 `\-`, `-`,
503 `\.`, `.`,
504 `\/`, `/`,
505 `\:`, `:`,
506 `\;`, `;`,
507 `\<`, `<`,
508 `\=`, `=`,
509 `\>`, `>`,
510 `\?`, `?`,
511 `\@`, `@`,
512 `\[`, `[`,
513 `\\`, `\`,
514 `\]`, `]`,
515 `\^`, `^`,
516 `\_`, `_`,
517 "\\`", "`",
518 `\{`, `{`,
519 `\|`, `|`,
520 `\}`, `}`,
521 `\~`, `~`,
522 }
523
524 for name, repl := range htmlEntity {
525 list = append(list, name, repl)
526 }
527 return strings.NewReplacer(list...)
528 }()
529
530 func isPunct(c byte) bool {
531 return '!' <= c && c <= '/' || ':' <= c && c <= '@' || '[' <= c && c <= '`' || '{' <= c && c <= '~'
532 }
533
534 func parseEscape(p *parseState, s string, i int) (Inline, int, int, bool) {
535 if i+1 < len(s) {
536 c := s[i+1]
537 if isPunct(c) {
538 return &Escaped{Plain{s[i+1 : i+2]}}, i, i + 2, true
539 }
540 if c == '\n' {
541 if i > 0 && s[i-1] == '\\' {
542 p.corner = true
543 }
544 end := i + 2
545 for end < len(s) && (s[end] == ' ' || s[end] == '\t') {
546 end++
547 }
548 return &HardBreak{}, i, end, true
549 }
550 }
551 return nil, 0, 0, false
552 }
553
554 func parseDot(p *parseState, s string, i int) (Inline, int, int, bool) {
555 if i+2 < len(s) && s[i+1] == '.' && s[i+2] == '.' {
556 return &Plain{"…"}, i, i + 3, true
557 }
558 return nil, 0, 0, false
559 }
560
561 func parseDash(p *parseState, s string, i int) (Inline, int, int, bool) {
562 if i+1 >= len(s) || s[i+1] != '-' {
563 return nil, 0, 0, false
564 }
565
566 n := 2
567 for i+n < len(s) && s[i+n] == '-' {
568 n++
569 }
570
571
572 em, en := 0, 0
573 switch {
574 case n%3 == 0:
575 em = n / 3
576 case n%2 == 0:
577 en = n / 2
578 case n%3 == 2:
579 em = (n - 2) / 3
580 en = 1
581 case n%3 == 1:
582 em = (n - 4) / 3
583 en = 2
584 }
585 return &Plain{strings.Repeat("—", em) + strings.Repeat("–", en)}, i, i + n, true
586 }
587
588
589 const maxBackticks = 80
590
591 type backtickParser struct {
592 last [maxBackticks]int
593 scanned bool
594 }
595
596 func (b *backtickParser) reset() {
597 *b = backtickParser{}
598 }
599
600 func (b *backtickParser) parseCodeSpan(p *parseState, s string, i int) (Inline, int, int, bool) {
601 start := i
602
603 n := 1
604 for i+n < len(s) && s[i+n] == '`' {
605 n++
606 }
607
608
609
610
611
612 if n > len(b.last) || b.scanned && b.last[n-1] < i+n {
613 goto NoMatch
614 }
615
616 for end := i + n; end < len(s); {
617 if s[end] != '`' {
618 end++
619 continue
620 }
621 estart := end
622 for end < len(s) && s[end] == '`' {
623 end++
624 }
625 m := end - estart
626 if !b.scanned && m < len(b.last) {
627 b.last[m-1] = estart
628 }
629 if m == n {
630
631
632 text := s[i+n : estart]
633 text = strings.ReplaceAll(text, "\n", " ")
634
635
636
637 if len(text) >= 2 && text[0] == ' ' && text[len(text)-1] == ' ' && trimSpace(text) != "" {
638 text = text[1 : len(text)-1]
639 }
640
641 return &Code{text}, start, end, true
642 }
643 }
644 b.scanned = true
645
646 NoMatch:
647
648
649
650 return &Plain{s[i : i+n]}, start, i + n, true
651 }
652
653 func parseAutoLinkOrHTML(p *parseState, s string, i int) (Inline, int, int, bool) {
654 if x, end, ok := parseAutoLinkURI(s, i); ok {
655 return x, i, end, true
656 }
657 if x, end, ok := parseAutoLinkEmail(s, i); ok {
658 return x, i, end, true
659 }
660 if x, end, ok := parseHTMLTag(p, s, i); ok {
661 return x, i, end, true
662 }
663 return nil, 0, 0, false
664 }
665
666 func isLetter(c byte) bool {
667 return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
668 }
669
670 func isLDH(c byte) bool {
671 return isLetterDigit(c) || c == '-'
672 }
673
674 func isLetterDigit(c byte) bool {
675 return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9'
676 }
677
678 func parseLinkOpen(_ *parseState, s string, i int) (Inline, int, int, bool) {
679 return &openPlain{Plain{s[i : i+1]}, i + 1}, i, i + 1, true
680 }
681
682 func parseImageOpen(_ *parseState, s string, i int) (Inline, int, int, bool) {
683 if i+1 < len(s) && s[i+1] == '[' {
684 return &openPlain{Plain{s[i : i+2]}, i + 2}, i, i + 2, true
685 }
686 return nil, 0, 0, false
687 }
688
689 func parseEmph(p *parseState, s string, i int) (Inline, int, int, bool) {
690 c := s[i]
691 j := i + 1
692 if c == '*' || c == '~' || c == '_' {
693 for j < len(s) && s[j] == c {
694 j++
695 }
696 }
697 if c == '~' && j-i != 2 {
698
699
700
701 p.corner = true
702 }
703 if c == '~' && j-i > 2 {
704 return &Plain{s[i:j]}, i, j, true
705 }
706
707 var before, after rune
708 if i == 0 {
709 before = ' '
710 } else {
711 before, _ = utf8.DecodeLastRuneInString(s[:i])
712 }
713 if j >= len(s) {
714 after = ' '
715 } else {
716 after, _ = utf8.DecodeRuneInString(s[j:])
717 }
718
719
720
721
722
723
724
725
726 leftFlank := !isUnicodeSpace(after) &&
727 (!isUnicodePunct(after) || isUnicodeSpace(before) || isUnicodePunct(before))
728
729
730
731
732
733
734
735
736 rightFlank := !isUnicodeSpace(before) &&
737 (!isUnicodePunct(before) || isUnicodeSpace(after) || isUnicodePunct(after))
738
739 var canOpen, canClose bool
740
741 switch c {
742 case '\'', '"':
743 canOpen = leftFlank && !rightFlank && before != ']' && before != ')'
744 canClose = rightFlank
745 case '*', '~':
746
747
748
749
750
751 canOpen = leftFlank
752
753
754
755
756
757
758 canClose = rightFlank
759 case '_':
760
761
762
763
764
765
766
767
768
769 canOpen = leftFlank && (!rightFlank || isUnicodePunct(before))
770
771
772
773
774
775
776
777
778
779
780 canClose = rightFlank && (!leftFlank || isUnicodePunct(after))
781 }
782
783 return &emphPlain{Plain: Plain{s[i:j]}, canOpen: canOpen, canClose: canClose, n: j - i}, i, j, true
784 }
785
786 func isUnicodeSpace(r rune) bool {
787 if r < 0x80 {
788 return r == ' ' || r == '\t' || r == '\f' || r == '\n'
789 }
790 return unicode.In(r, unicode.Zs)
791 }
792
793 func isUnicodePunct(r rune) bool {
794 if r < 0x80 {
795 return isPunct(byte(r))
796 }
797 return unicode.In(r, unicode.Punct)
798 }
799
800 func (p *parseState) parseLinkClose(s string, i int, open *openPlain) (*Link, int, bool) {
801 if i+1 < len(s) {
802 switch s[i+1] {
803 case '(':
804
805 i := skipSpace(s, i+2)
806 var dest, title string
807 var titleChar byte
808 var corner bool
809 if i < len(s) && s[i] != ')' {
810 var ok bool
811 dest, i, ok = parseLinkDest(s, i)
812 if !ok {
813 break
814 }
815 i = skipSpace(s, i)
816 if i < len(s) && s[i] != ')' {
817 title, titleChar, i, ok = parseLinkTitle(s, i)
818 if title == "" {
819 corner = true
820 }
821 if !ok {
822 break
823 }
824 i = skipSpace(s, i)
825 }
826 }
827 if i < len(s) && s[i] == ')' {
828 return &Link{URL: dest, Title: title, TitleChar: titleChar, corner: corner}, i + 1, true
829 }
830
831
832
833 case '[':
834
835 label, i, ok := parseLinkLabel(p, s, i+1)
836 if !ok {
837 break
838 }
839 if link, ok := p.links[normalizeLabel(label)]; ok {
840 return &Link{URL: link.URL, Title: link.Title, corner: link.corner}, i, true
841 }
842
843
844
845 return nil, 0, false
846 }
847 }
848
849
850 end := i + 1
851 if strings.HasPrefix(s[end:], "[]") {
852 end += 2
853 }
854
855 if link, ok := p.links[normalizeLabel(s[open.i:i])]; ok {
856 return &Link{URL: link.URL, Title: link.Title, corner: link.corner}, end, true
857 }
858 return nil, 0, false
859 }
860
861 func skipSpace(s string, i int) int {
862
863 for i < len(s) && (s[i] == ' ' || s[i] == '\t' || s[i] == '\n') {
864 i++
865 }
866 return i
867 }
868
869 func linkCorner(url string) bool {
870 for i := 0; i < len(url); i++ {
871 if url[i] == '%' {
872 if i+2 >= len(url) || !isHexDigit(url[i+1]) || !isHexDigit(url[i+2]) {
873
874
875 return true
876 }
877 }
878 }
879 return false
880 }
881
882 func (p *parseState) mergePlain(list []Inline) []Inline {
883 out := list[:0]
884 start := 0
885 for i := 0; ; i++ {
886 if i < len(list) && toPlain(list[i]) != nil {
887 continue
888 }
889
890 if start < i {
891 out = append(out, mergePlain1(list[start:i]))
892 }
893 if i >= len(list) {
894 break
895 }
896 out = append(out, list[i])
897 start = i + 1
898 }
899 return out
900 }
901
902 func toPlain(x Inline) *Plain {
903
904 switch x := x.(type) {
905 case *Plain:
906 return x
907 case *emphPlain:
908 return &x.Plain
909 case *openPlain:
910 return &x.Plain
911 }
912 return nil
913 }
914
915 func mergePlain1(list []Inline) *Plain {
916 if len(list) == 1 {
917 return toPlain(list[0])
918 }
919 var all []string
920 for _, pl := range list {
921 all = append(all, toPlain(pl).Text)
922 }
923 return &Plain{Text: strings.Join(all, "")}
924 }
925
926 func parseEmoji(p *parseState, s string, i int) (Inline, int, int, bool) {
927 for j := i + 1; ; j++ {
928 if j >= len(s) || j-i > 2+maxEmojiLen {
929 break
930 }
931 if s[j] == ':' {
932 name := s[i+1 : j]
933 if utf, ok := emoji[name]; ok {
934 return &Emoji{s[i : j+1], utf}, i, j + 1, true
935 }
936 break
937 }
938 }
939 return nil, 0, 0, false
940 }
941
942 type Emoji struct {
943 Name string
944 Text string
945 }
946
947 func (*Emoji) Inline() {}
948
949 func (x *Emoji) PrintHTML(buf *bytes.Buffer) {
950 htmlEscaper.WriteString(buf, x.Text)
951 }
952
953 func (x *Emoji) printMarkdown(buf *bytes.Buffer) {
954 buf.WriteString(x.Text)
955 }
956
957 func (x *Emoji) PrintText(buf *bytes.Buffer) {
958 htmlEscaper.WriteString(buf, x.Text)
959 }
960
View as plain text