Source file
src/strconv/quote.go
1
2
3
4
5
6
7 package strconv
8
9 import (
10 "unicode/utf8"
11 )
12
13 const (
14 lowerhex = "0123456789abcdef"
15 upperhex = "0123456789ABCDEF"
16 )
17
18
19 func contains(s string, c byte) bool {
20 return index(s, c) != -1
21 }
22
23 func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string {
24 return string(appendQuotedWith(make([]byte, 0, 3*len(s)/2), s, quote, ASCIIonly, graphicOnly))
25 }
26
27 func quoteRuneWith(r rune, quote byte, ASCIIonly, graphicOnly bool) string {
28 return string(appendQuotedRuneWith(nil, r, quote, ASCIIonly, graphicOnly))
29 }
30
31 func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly bool) []byte {
32
33
34 if cap(buf)-len(buf) < len(s) {
35 nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1)
36 copy(nBuf, buf)
37 buf = nBuf
38 }
39 buf = append(buf, quote)
40 for width := 0; len(s) > 0; s = s[width:] {
41 r := rune(s[0])
42 width = 1
43 if r >= utf8.RuneSelf {
44 r, width = utf8.DecodeRuneInString(s)
45 }
46 if width == 1 && r == utf8.RuneError {
47 buf = append(buf, `\x`...)
48 buf = append(buf, lowerhex[s[0]>>4])
49 buf = append(buf, lowerhex[s[0]&0xF])
50 continue
51 }
52 buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly)
53 }
54 buf = append(buf, quote)
55 return buf
56 }
57
58 func appendQuotedRuneWith(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte {
59 buf = append(buf, quote)
60 if !utf8.ValidRune(r) {
61 r = utf8.RuneError
62 }
63 buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly)
64 buf = append(buf, quote)
65 return buf
66 }
67
68 func appendEscapedRune(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte {
69 if r == rune(quote) || r == '\\' {
70 buf = append(buf, '\\')
71 buf = append(buf, byte(r))
72 return buf
73 }
74 if ASCIIonly {
75 if r < utf8.RuneSelf && IsPrint(r) {
76 buf = append(buf, byte(r))
77 return buf
78 }
79 } else if IsPrint(r) || graphicOnly && isInGraphicList(r) {
80 return utf8.AppendRune(buf, r)
81 }
82 switch r {
83 case '\a':
84 buf = append(buf, `\a`...)
85 case '\b':
86 buf = append(buf, `\b`...)
87 case '\f':
88 buf = append(buf, `\f`...)
89 case '\n':
90 buf = append(buf, `\n`...)
91 case '\r':
92 buf = append(buf, `\r`...)
93 case '\t':
94 buf = append(buf, `\t`...)
95 case '\v':
96 buf = append(buf, `\v`...)
97 default:
98 switch {
99 case r < ' ' || r == 0x7f:
100 buf = append(buf, `\x`...)
101 buf = append(buf, lowerhex[byte(r)>>4])
102 buf = append(buf, lowerhex[byte(r)&0xF])
103 case !utf8.ValidRune(r):
104 r = 0xFFFD
105 fallthrough
106 case r < 0x10000:
107 buf = append(buf, `\u`...)
108 for s := 12; s >= 0; s -= 4 {
109 buf = append(buf, lowerhex[r>>uint(s)&0xF])
110 }
111 default:
112 buf = append(buf, `\U`...)
113 for s := 28; s >= 0; s -= 4 {
114 buf = append(buf, lowerhex[r>>uint(s)&0xF])
115 }
116 }
117 }
118 return buf
119 }
120
121
122
123
124
125 func Quote(s string) string {
126 return quoteWith(s, '"', false, false)
127 }
128
129
130
131 func AppendQuote(dst []byte, s string) []byte {
132 return appendQuotedWith(dst, s, '"', false, false)
133 }
134
135
136
137
138 func QuoteToASCII(s string) string {
139 return quoteWith(s, '"', true, false)
140 }
141
142
143
144 func AppendQuoteToASCII(dst []byte, s string) []byte {
145 return appendQuotedWith(dst, s, '"', true, false)
146 }
147
148
149
150
151
152 func QuoteToGraphic(s string) string {
153 return quoteWith(s, '"', false, true)
154 }
155
156
157
158 func AppendQuoteToGraphic(dst []byte, s string) []byte {
159 return appendQuotedWith(dst, s, '"', false, true)
160 }
161
162
163
164
165
166
167 func QuoteRune(r rune) string {
168 return quoteRuneWith(r, '\'', false, false)
169 }
170
171
172
173 func AppendQuoteRune(dst []byte, r rune) []byte {
174 return appendQuotedRuneWith(dst, r, '\'', false, false)
175 }
176
177
178
179
180
181
182
183 func QuoteRuneToASCII(r rune) string {
184 return quoteRuneWith(r, '\'', true, false)
185 }
186
187
188
189 func AppendQuoteRuneToASCII(dst []byte, r rune) []byte {
190 return appendQuotedRuneWith(dst, r, '\'', true, false)
191 }
192
193
194
195
196
197
198
199 func QuoteRuneToGraphic(r rune) string {
200 return quoteRuneWith(r, '\'', false, true)
201 }
202
203
204
205 func AppendQuoteRuneToGraphic(dst []byte, r rune) []byte {
206 return appendQuotedRuneWith(dst, r, '\'', false, true)
207 }
208
209
210
211
212 func CanBackquote(s string) bool {
213 for len(s) > 0 {
214 r, wid := utf8.DecodeRuneInString(s)
215 s = s[wid:]
216 if wid > 1 {
217 if r == '\ufeff' {
218 return false
219 }
220 continue
221 }
222 if r == utf8.RuneError {
223 return false
224 }
225 if (r < ' ' && r != '\t') || r == '`' || r == '\u007F' {
226 return false
227 }
228 }
229 return true
230 }
231
232 func unhex(b byte) (v rune, ok bool) {
233 c := rune(b)
234 switch {
235 case '0' <= c && c <= '9':
236 return c - '0', true
237 case 'a' <= c && c <= 'f':
238 return c - 'a' + 10, true
239 case 'A' <= c && c <= 'F':
240 return c - 'A' + 10, true
241 }
242 return
243 }
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259 func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
260
261 if len(s) == 0 {
262 err = ErrSyntax
263 return
264 }
265 switch c := s[0]; {
266 case c == quote && (quote == '\'' || quote == '"'):
267 err = ErrSyntax
268 return
269 case c >= utf8.RuneSelf:
270 r, size := utf8.DecodeRuneInString(s)
271 return r, true, s[size:], nil
272 case c != '\\':
273 return rune(s[0]), false, s[1:], nil
274 }
275
276
277 if len(s) <= 1 {
278 err = ErrSyntax
279 return
280 }
281 c := s[1]
282 s = s[2:]
283
284 switch c {
285 case 'a':
286 value = '\a'
287 case 'b':
288 value = '\b'
289 case 'f':
290 value = '\f'
291 case 'n':
292 value = '\n'
293 case 'r':
294 value = '\r'
295 case 't':
296 value = '\t'
297 case 'v':
298 value = '\v'
299 case 'x', 'u', 'U':
300 n := 0
301 switch c {
302 case 'x':
303 n = 2
304 case 'u':
305 n = 4
306 case 'U':
307 n = 8
308 }
309 var v rune
310 if len(s) < n {
311 err = ErrSyntax
312 return
313 }
314 for j := 0; j < n; j++ {
315 x, ok := unhex(s[j])
316 if !ok {
317 err = ErrSyntax
318 return
319 }
320 v = v<<4 | x
321 }
322 s = s[n:]
323 if c == 'x' {
324
325 value = v
326 break
327 }
328 if !utf8.ValidRune(v) {
329 err = ErrSyntax
330 return
331 }
332 value = v
333 multibyte = true
334 case '0', '1', '2', '3', '4', '5', '6', '7':
335 v := rune(c) - '0'
336 if len(s) < 2 {
337 err = ErrSyntax
338 return
339 }
340 for j := 0; j < 2; j++ {
341 x := rune(s[j]) - '0'
342 if x < 0 || x > 7 {
343 err = ErrSyntax
344 return
345 }
346 v = (v << 3) | x
347 }
348 s = s[2:]
349 if v > 255 {
350 err = ErrSyntax
351 return
352 }
353 value = v
354 case '\\':
355 value = '\\'
356 case '\'', '"':
357 if c != quote {
358 err = ErrSyntax
359 return
360 }
361 value = rune(c)
362 default:
363 err = ErrSyntax
364 return
365 }
366 tail = s
367 return
368 }
369
370
371
372 func QuotedPrefix(s string) (string, error) {
373 out, _, err := unquote(s, false)
374 return out, err
375 }
376
377
378
379
380
381
382
383 func Unquote(s string) (string, error) {
384 out, rem, err := unquote(s, true)
385 if len(rem) > 0 {
386 return "", ErrSyntax
387 }
388 return out, err
389 }
390
391
392
393
394
395 func unquote(in string, unescape bool) (out, rem string, err error) {
396
397 if len(in) < 2 {
398 return "", in, ErrSyntax
399 }
400 quote := in[0]
401 end := index(in[1:], quote)
402 if end < 0 {
403 return "", in, ErrSyntax
404 }
405 end += 2
406
407 switch quote {
408 case '`':
409 switch {
410 case !unescape:
411 out = in[:end]
412 case !contains(in[:end], '\r'):
413 out = in[len("`") : end-len("`")]
414 default:
415
416
417 buf := make([]byte, 0, end-len("`")-len("\r")-len("`"))
418 for i := len("`"); i < end-len("`"); i++ {
419 if in[i] != '\r' {
420 buf = append(buf, in[i])
421 }
422 }
423 out = string(buf)
424 }
425
426
427
428
429
430 return out, in[end:], nil
431 case '"', '\'':
432
433 if !contains(in[:end], '\\') && !contains(in[:end], '\n') {
434 var valid bool
435 switch quote {
436 case '"':
437 valid = utf8.ValidString(in[len(`"`) : end-len(`"`)])
438 case '\'':
439 r, n := utf8.DecodeRuneInString(in[len("'") : end-len("'")])
440 valid = len("'")+n+len("'") == end && (r != utf8.RuneError || n != 1)
441 }
442 if valid {
443 out = in[:end]
444 if unescape {
445 out = out[1 : end-1]
446 }
447 return out, in[end:], nil
448 }
449 }
450
451
452 var buf []byte
453 in0 := in
454 in = in[1:]
455 if unescape {
456 buf = make([]byte, 0, 3*end/2)
457 }
458 for len(in) > 0 && in[0] != quote {
459
460
461 r, multibyte, rem, err := UnquoteChar(in, quote)
462 if in[0] == '\n' || err != nil {
463 return "", in0, ErrSyntax
464 }
465 in = rem
466
467
468 if unescape {
469 if r < utf8.RuneSelf || !multibyte {
470 buf = append(buf, byte(r))
471 } else {
472 buf = utf8.AppendRune(buf, r)
473 }
474 }
475
476
477 if quote == '\'' {
478 break
479 }
480 }
481
482
483 if !(len(in) > 0 && in[0] == quote) {
484 return "", in0, ErrSyntax
485 }
486 in = in[1:]
487
488 if unescape {
489 return string(buf), in, nil
490 }
491 return in0[:len(in0)-len(in)], in, nil
492 default:
493 return "", in, ErrSyntax
494 }
495 }
496
497
498
499 func bsearch[S ~[]E, E ~uint16 | ~uint32](s S, v E) (int, bool) {
500 n := len(s)
501 i, j := 0, n
502 for i < j {
503 h := i + (j-i)>>1
504 if s[h] < v {
505 i = h + 1
506 } else {
507 j = h
508 }
509 }
510 return i, i < n && s[i] == v
511 }
512
513
514
515
516
517
518
519
520
521
522 func IsPrint(r rune) bool {
523
524 if r <= 0xFF {
525 if 0x20 <= r && r <= 0x7E {
526
527 return true
528 }
529 if 0xA1 <= r && r <= 0xFF {
530
531 return r != 0xAD
532 }
533 return false
534 }
535
536
537
538
539
540
541
542 if 0 <= r && r < 1<<16 {
543 rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16
544 i, _ := bsearch(isPrint, rr)
545 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
546 return false
547 }
548 _, found := bsearch(isNotPrint, rr)
549 return !found
550 }
551
552 rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32
553 i, _ := bsearch(isPrint, rr)
554 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
555 return false
556 }
557 if r >= 0x20000 {
558 return true
559 }
560 r -= 0x10000
561 _, found := bsearch(isNotPrint, uint16(r))
562 return !found
563 }
564
565
566
567
568 func IsGraphic(r rune) bool {
569 if IsPrint(r) {
570 return true
571 }
572 return isInGraphicList(r)
573 }
574
575
576
577
578 func isInGraphicList(r rune) bool {
579
580 if r > 0xFFFF {
581 return false
582 }
583 _, found := bsearch(isGraphic, uint16(r))
584 return found
585 }
586
View as plain text