1
2
3
4
5 package markdown
6
7 import (
8 "bytes"
9 "strconv"
10 "strings"
11 "unicode"
12 )
13
14 type HTMLBlock struct {
15 Position
16 Text []string
17 }
18
19 func (b *HTMLBlock) PrintHTML(buf *bytes.Buffer) {
20 for _, s := range b.Text {
21 buf.WriteString(s)
22 buf.WriteString("\n")
23 }
24 }
25
26 func (b *HTMLBlock) printMarkdown(buf *bytes.Buffer, s mdState) {
27 if s.prefix1 != "" {
28 buf.WriteString(s.prefix1)
29 } else {
30 buf.WriteString(s.prefix)
31 }
32 b.PrintHTML(buf)
33 }
34
35 type htmlBuilder struct {
36 endBlank bool
37 text []string
38 endFunc func(string) bool
39 }
40
41 func (c *htmlBuilder) extend(p *parseState, s line) (line, bool) {
42 if c.endBlank && s.isBlank() {
43 return s, false
44 }
45 t := s.string()
46 c.text = append(c.text, t)
47 if c.endFunc != nil && c.endFunc(t) {
48 return line{}, false
49 }
50 return line{}, true
51 }
52
53 func (c *htmlBuilder) build(p buildState) Block {
54 return &HTMLBlock{
55 p.pos(),
56 c.text,
57 }
58 }
59
60 func newHTML(p *parseState, s line) (line, bool) {
61 peek := s
62 if p.startHTML(&peek) {
63 return line{}, true
64 }
65 return s, false
66 }
67
68 func (p *parseState) startHTML(s *line) bool {
69 tt := *s
70 tt.trimSpace(0, 3, false)
71 if tt.peek() != '<' {
72 return false
73 }
74 t := tt.string()
75
76 var end string
77 switch {
78 case strings.HasPrefix(t, "<!--"):
79 end = "-->"
80 case strings.HasPrefix(t, "<?"):
81 end = "?>"
82 case strings.HasPrefix(t, "<![CDATA["):
83 end = "]]>"
84 case strings.HasPrefix(t, "<!") && len(t) >= 3 && isLetter(t[2]):
85 if 'a' <= t[2] && t[2] <= 'z' {
86
87 p.corner = true
88 }
89 end = ">"
90 }
91 if end != "" {
92 b := &htmlBuilder{endFunc: func(s string) bool { return strings.Contains(s, end) }}
93 p.addBlock(b)
94 b.text = append(b.text, s.string())
95 if b.endFunc(t) {
96 p.closeBlock()
97 }
98 return true
99 }
100
101
102 i := 1
103 if i < len(t) && t[i] == '/' {
104 i++
105 }
106 buf := make([]byte, 0, 16)
107 for ; i < len(t) && len(buf) < 16; i++ {
108 c := t[i]
109 if 'A' <= c && c <= 'Z' {
110 c += 'a' - 'A'
111 }
112 if !('a' <= c && c <= 'z') && !('0' <= c && c <= '9') {
113 break
114 }
115 buf = append(buf, c)
116 }
117 var sep byte
118 if i < len(t) {
119 switch t[i] {
120 default:
121 goto Next
122 case ' ', '\t', '>':
123
124 sep = t[i]
125 case '/':
126 if i+1 >= len(t) || t[i+1] != '>' {
127 goto Next
128 }
129 }
130 }
131
132 if len(buf) == 0 {
133 goto Next
134 }
135 {
136 c := buf[0]
137 var ok bool
138 for _, name := range htmlTags {
139 if name[0] == c && len(name) == len(buf) && name == string(buf) {
140 if sep == '\t' {
141
142
143 p.corner = true
144 }
145 ok = true
146 break
147 }
148 }
149 if !ok {
150 goto Next
151 }
152 }
153
154 {
155 b := &htmlBuilder{endBlank: true}
156 p.addBlock(b)
157 b.text = append(b.text, s.string())
158 return true
159 }
160
161 Next:
162
163 if len(t) > 1 && t[1] != '/' && (i >= len(t) || t[i] == ' ' || t[i] == '\t' || t[i] == '>') {
164 switch string(buf) {
165 case "pre", "script", "style", "textarea":
166 b := &htmlBuilder{endFunc: hasEndPre}
167 p.addBlock(b)
168 b.text = append(b.text, s.string())
169 if hasEndPre(t) {
170 p.closeBlock()
171 }
172 return true
173 }
174 }
175
176
177 if p.para() == nil {
178 if _, e, ok := parseHTMLOpenTag(p, t, 0); ok && skipSpace(t, e) == len(t) {
179 if e != len(t) {
180
181 p.corner = true
182 }
183 b := &htmlBuilder{endBlank: true}
184 p.addBlock(b)
185 b.text = append(b.text, s.string())
186 return true
187 }
188 if _, e, ok := parseHTMLClosingTag(p, t, 0); ok && skipSpace(t, e) == len(t) {
189 b := &htmlBuilder{endBlank: true}
190 p.addBlock(b)
191 b.text = append(b.text, s.string())
192 return true
193 }
194 }
195
196 return false
197 }
198
199 func hasEndPre(s string) bool {
200 for i := 0; i < len(s); i++ {
201 if s[i] == '<' && i+1 < len(s) && s[i+1] == '/' {
202 buf := make([]byte, 0, 8)
203 for i += 2; i < len(s) && len(buf) < 8; i++ {
204 c := s[i]
205 if 'A' <= c && c <= 'Z' {
206 c += 'a' - 'A'
207 }
208 if c < 'a' || 'z' < c {
209 break
210 }
211 buf = append(buf, c)
212 }
213 if i < len(s) && s[i] == '>' {
214 switch string(buf) {
215 case "pre", "script", "style", "textarea":
216 return true
217 }
218 }
219 }
220 }
221 return false
222 }
223
224 func parseHTMLTag(p *parseState, s string, i int) (Inline, int, bool) {
225
226
227 if i+3 <= len(s) && s[i] == '<' {
228 switch s[i+1] {
229 default:
230 return parseHTMLOpenTag(p, s, i)
231 case '/':
232 return parseHTMLClosingTag(p, s, i)
233 case '!':
234 switch s[i+2] {
235 case '-':
236 return parseHTMLComment(s, i)
237 case '[':
238 return parseHTMLCDATA(s, i)
239 default:
240 return parseHTMLDecl(p, s, i)
241 }
242 case '?':
243 return parseHTMLProcInst(s, i)
244 }
245 }
246 return nil, 0, false
247 }
248
249 func parseHTMLOpenTag(p *parseState, s string, i int) (Inline, int, bool) {
250 if i >= len(s) || s[i] != '<' {
251 return nil, 0, false
252 }
253
254
255 if name, j, ok := parseTagName(s, i+1); ok {
256 switch name {
257 case "pre", "script", "style", "textarea":
258
259
260 p.corner = true
261 }
262 for {
263 if j >= len(s) || s[j] != ' ' && s[j] != '\t' && s[j] != '\n' && s[j] != '/' && s[j] != '>' {
264 return nil, 0, false
265 }
266 _, k, ok := parseAttr(p, s, j)
267 if !ok {
268 break
269 }
270 j = k
271 }
272 k := skipSpace(s, j)
273 if k != j {
274
275 p.corner = true
276 }
277 j = k
278 if j < len(s) && s[j] == '/' {
279 j++
280 }
281 if j < len(s) && s[j] == '>' {
282 return &HTMLTag{s[i : j+1]}, j + 1, true
283 }
284 }
285 return nil, 0, false
286 }
287
288 func parseHTMLClosingTag(p *parseState, s string, i int) (Inline, int, bool) {
289
290
291 if i+2 >= len(s) || s[i] != '<' || s[i+1] != '/' {
292 return nil, 0, false
293 }
294 if skipSpace(s, i+2) != i+2 {
295
296 p.corner = true
297 }
298
299 if _, j, ok := parseTagName(s, i+2); ok {
300 j = skipSpace(s, j)
301 if j < len(s) && s[j] == '>' {
302 return &HTMLTag{s[i : j+1]}, j + 1, true
303 }
304 }
305 return nil, 0, false
306 }
307
308 func parseTagName(s string, i int) (string, int, bool) {
309
310 if i < len(s) && isLetter(s[i]) {
311 j := i + 1
312 for j < len(s) && isLDH(s[j]) {
313 j++
314 }
315 return s[i:j], j, true
316 }
317 return "", 0, false
318 }
319
320 func parseAttr(p *parseState, s string, i int) (string, int, bool) {
321
322
323 i = skipSpace(s, i)
324 if _, j, ok := parseAttrName(s, i); ok {
325 if _, k, ok := parseAttrValueSpec(p, s, j); ok {
326 j = k
327 }
328 return s[i:j], j, true
329 }
330 return "", 0, false
331 }
332
333 func parseAttrName(s string, i int) (string, int, bool) {
334
335
336 if i+1 < len(s) && (isLetter(s[i]) || s[i] == '_' || s[i] == ':') {
337 j := i + 1
338 for j < len(s) && (isLDH(s[j]) || s[j] == '_' || s[j] == '.' || s[j] == ':') {
339 j++
340 }
341 return s[i:j], j, true
342 }
343 return "", 0, false
344 }
345
346 func parseAttrValueSpec(p *parseState, s string, i int) (string, int, bool) {
347
348
349
350
351
352 i = skipSpace(s, i)
353 if i+1 < len(s) && s[i] == '=' {
354 i = skipSpace(s, i+1)
355 if _, j, ok := parseAttrValue(s, i); ok {
356 p.corner = p.corner || strings.Contains(s[i:j], "\ufffd")
357 return s[i:j], j, true
358 }
359 }
360 return "", 0, false
361 }
362
363 func parseAttrValue(s string, i int) (string, int, bool) {
364
365
366
367
368
369 if i < len(s) && (s[i] == '\'' || s[i] == '"') {
370
371
372
373
374 if j := strings.IndexByte(s[i+1:], s[i]); j >= 0 {
375 end := i + 1 + j + 1
376 return s[i:end], end, true
377 }
378 }
379
380
381
382 j := i
383 for j < len(s) && strings.IndexByte(" \t\n\"'=<>`", s[j]) < 0 {
384 j++
385 }
386 if j > i {
387 return s[i:j], j, true
388 }
389 return "", 0, false
390 }
391
392 func parseHTMLComment(s string, i int) (Inline, int, bool) {
393
394
395
396 if !strings.HasPrefix(s[i:], "<!-->") &&
397 !strings.HasPrefix(s[i:], "<!--->") {
398 if x, end, ok := parseHTMLMarker(s, i, "<!--", "-->"); ok {
399 if t := x.(*HTMLTag).Text; !strings.Contains(t[len("<!--"):len(t)-len("->")], "--") {
400 return x, end, ok
401 }
402 }
403 }
404 return nil, 0, false
405 }
406
407 func parseHTMLCDATA(s string, i int) (Inline, int, bool) {
408
409
410 return parseHTMLMarker(s, i, "<![CDATA[", "]]>")
411 }
412
413 func parseHTMLDecl(p *parseState, s string, i int) (Inline, int, bool) {
414
415
416 if i+2 < len(s) && isLetter(s[i+2]) {
417 if 'a' <= s[i+2] && s[i+2] <= 'z' {
418 p.corner = true
419 }
420 return parseHTMLMarker(s, i, "<!", ">")
421 }
422 return nil, 0, false
423 }
424
425 func parseHTMLProcInst(s string, i int) (Inline, int, bool) {
426
427
428 return parseHTMLMarker(s, i, "<?", "?>")
429 }
430
431 func parseHTMLMarker(s string, i int, prefix, suffix string) (Inline, int, bool) {
432 if strings.HasPrefix(s[i:], prefix) {
433 if j := strings.Index(s[i+len(prefix):], suffix); j >= 0 {
434 end := i + len(prefix) + j + len(suffix)
435 return &HTMLTag{s[i:end]}, end, true
436 }
437 }
438 return nil, 0, false
439 }
440
441 func parseHTMLEntity(_ *parseState, s string, i int) (Inline, int, int, bool) {
442 start := i
443 if i+1 < len(s) && s[i+1] == '#' {
444 i += 2
445 var r, end int
446 if i < len(s) && (s[i] == 'x' || s[i] == 'X') {
447
448 i++
449 j := i
450 for j < len(s) && isHexDigit(s[j]) {
451 j++
452 }
453 if j-i < 1 || j-i > 6 || j >= len(s) || s[j] != ';' {
454 return nil, 0, 0, false
455 }
456 r64, _ := strconv.ParseInt(s[i:j], 16, 0)
457 r = int(r64)
458 end = j + 1
459 } else {
460
461 j := i
462 for j < len(s) && isDigit(s[j]) {
463 j++
464 }
465 if j-i < 1 || j-i > 7 || j >= len(s) || s[j] != ';' {
466 return nil, 0, 0, false
467 }
468 r, _ = strconv.Atoi(s[i:j])
469 end = j + 1
470 }
471 if r > unicode.MaxRune || r == 0 {
472 r = unicode.ReplacementChar
473 }
474 return &Plain{string(rune(r))}, start, end, true
475 }
476
477
478 for j := i + 1; j < len(s) && j-i < 64; j++ {
479 if s[j] == '&' {
480 break
481 }
482 if s[j] == ';' {
483 if r, ok := htmlEntity[s[i:j+1]]; ok {
484 return &Plain{r}, start, j + 1, true
485 }
486 break
487 }
488 }
489
490 return nil, 0, 0, false
491 }
492
493 type HTMLTag struct {
494 Text string
495 }
496
497 func (*HTMLTag) Inline() {}
498
499 func (x *HTMLTag) PrintHTML(buf *bytes.Buffer) {
500 buf.WriteString(x.Text)
501 }
502
503 func (x *HTMLTag) printMarkdown(buf *bytes.Buffer) {
504 x.PrintHTML(buf)
505 }
506
507 func (x *HTMLTag) PrintText(buf *bytes.Buffer) {}
508
View as plain text