Source file src/net/textproto/reader.go

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package textproto
     6  
     7  import (
     8  	"bufio"
     9  	"bytes"
    10  	"errors"
    11  	"fmt"
    12  	"io"
    13  	"math"
    14  	"strconv"
    15  	"strings"
    16  	"sync"
    17  	_ "unsafe" // for linkname
    18  )
    19  
    20  // TODO: This should be a distinguishable error (ErrMessageTooLarge)
    21  // to allow mime/multipart to detect it.
    22  var errMessageTooLarge = errors.New("message too large")
    23  
    24  // A Reader implements convenience methods for reading requests
    25  // or responses from a text protocol network connection.
    26  type Reader struct {
    27  	R   *bufio.Reader
    28  	dot *dotReader
    29  	buf []byte // a re-usable buffer for readContinuedLineSlice
    30  }
    31  
    32  // NewReader returns a new [Reader] reading from r.
    33  //
    34  // To avoid denial of service attacks, the provided [bufio.Reader]
    35  // should be reading from an [io.LimitReader] or similar Reader to bound
    36  // the size of responses.
    37  func NewReader(r *bufio.Reader) *Reader {
    38  	return &Reader{R: r}
    39  }
    40  
    41  // ReadLine reads a single line from r,
    42  // eliding the final \n or \r\n from the returned string.
    43  func (r *Reader) ReadLine() (string, error) {
    44  	line, err := r.readLineSlice(-1)
    45  	return string(line), err
    46  }
    47  
    48  // ReadLineBytes is like [Reader.ReadLine] but returns a []byte instead of a string.
    49  func (r *Reader) ReadLineBytes() ([]byte, error) {
    50  	line, err := r.readLineSlice(-1)
    51  	if line != nil {
    52  		line = bytes.Clone(line)
    53  	}
    54  	return line, err
    55  }
    56  
    57  // readLineSlice reads a single line from r,
    58  // up to lim bytes long (or unlimited if lim is less than 0),
    59  // eliding the final \r or \r\n from the returned string.
    60  func (r *Reader) readLineSlice(lim int64) ([]byte, error) {
    61  	r.closeDot()
    62  	var line []byte
    63  	for {
    64  		l, more, err := r.R.ReadLine()
    65  		if err != nil {
    66  			return nil, err
    67  		}
    68  		if lim >= 0 && int64(len(line))+int64(len(l)) > lim {
    69  			return nil, errMessageTooLarge
    70  		}
    71  		// Avoid the copy if the first call produced a full line.
    72  		if line == nil && !more {
    73  			return l, nil
    74  		}
    75  		line = append(line, l...)
    76  		if !more {
    77  			break
    78  		}
    79  	}
    80  	return line, nil
    81  }
    82  
    83  // ReadContinuedLine reads a possibly continued line from r,
    84  // eliding the final trailing ASCII white space.
    85  // Lines after the first are considered continuations if they
    86  // begin with a space or tab character. In the returned data,
    87  // continuation lines are separated from the previous line
    88  // only by a single space: the newline and leading white space
    89  // are removed.
    90  //
    91  // For example, consider this input:
    92  //
    93  //	Line 1
    94  //	  continued...
    95  //	Line 2
    96  //
    97  // The first call to ReadContinuedLine will return "Line 1 continued..."
    98  // and the second will return "Line 2".
    99  //
   100  // Empty lines are never continued.
   101  func (r *Reader) ReadContinuedLine() (string, error) {
   102  	line, err := r.readContinuedLineSlice(-1, noValidation)
   103  	return string(line), err
   104  }
   105  
   106  // trim returns s with leading and trailing spaces and tabs removed.
   107  // It does not assume Unicode or UTF-8.
   108  func trim(s []byte) []byte {
   109  	i := 0
   110  	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
   111  		i++
   112  	}
   113  	s = s[i:]
   114  	n := len(s) - 1
   115  	for n >= 0 && (s[n] == ' ' || s[n] == '\t') {
   116  		n--
   117  	}
   118  	return s[:n+1]
   119  }
   120  
   121  // ReadContinuedLineBytes is like [Reader.ReadContinuedLine] but
   122  // returns a []byte instead of a string.
   123  func (r *Reader) ReadContinuedLineBytes() ([]byte, error) {
   124  	line, err := r.readContinuedLineSlice(-1, noValidation)
   125  	if line != nil {
   126  		line = bytes.Clone(line)
   127  	}
   128  	return line, err
   129  }
   130  
   131  // readContinuedLineSlice reads continued lines from the reader buffer,
   132  // returning a byte slice with all lines. The validateFirstLine function
   133  // is run on the first read line, and if it returns an error then this
   134  // error is returned from readContinuedLineSlice.
   135  // It reads up to lim bytes of data (or unlimited if lim is less than 0).
   136  func (r *Reader) readContinuedLineSlice(lim int64, validateFirstLine func([]byte) error) ([]byte, error) {
   137  	if validateFirstLine == nil {
   138  		return nil, fmt.Errorf("missing validateFirstLine func")
   139  	}
   140  
   141  	// Read the first line.
   142  	line, err := r.readLineSlice(lim)
   143  	if err != nil {
   144  		return nil, err
   145  	}
   146  	if len(line) == 0 { // blank line - no continuation
   147  		return line, nil
   148  	}
   149  
   150  	if err := validateFirstLine(line); err != nil {
   151  		return nil, err
   152  	}
   153  
   154  	// Optimistically assume that we have started to buffer the next line
   155  	// and it starts with an ASCII letter (the next header key), or a blank
   156  	// line, so we can avoid copying that buffered data around in memory
   157  	// and skipping over non-existent whitespace.
   158  	if r.R.Buffered() > 1 {
   159  		peek, _ := r.R.Peek(2)
   160  		if len(peek) > 0 && (isASCIILetter(peek[0]) || peek[0] == '\n') ||
   161  			len(peek) == 2 && peek[0] == '\r' && peek[1] == '\n' {
   162  			return trim(line), nil
   163  		}
   164  	}
   165  
   166  	// ReadByte or the next readLineSlice will flush the read buffer;
   167  	// copy the slice into buf.
   168  	r.buf = append(r.buf[:0], trim(line)...)
   169  
   170  	if lim < 0 {
   171  		lim = math.MaxInt64
   172  	}
   173  	lim -= int64(len(r.buf))
   174  
   175  	// Read continuation lines.
   176  	for r.skipSpace() > 0 {
   177  		r.buf = append(r.buf, ' ')
   178  		if int64(len(r.buf)) >= lim {
   179  			return nil, errMessageTooLarge
   180  		}
   181  		line, err := r.readLineSlice(lim - int64(len(r.buf)))
   182  		if err != nil {
   183  			break
   184  		}
   185  		r.buf = append(r.buf, trim(line)...)
   186  	}
   187  	return r.buf, nil
   188  }
   189  
   190  // skipSpace skips R over all spaces and returns the number of bytes skipped.
   191  func (r *Reader) skipSpace() int {
   192  	n := 0
   193  	for {
   194  		c, err := r.R.ReadByte()
   195  		if err != nil {
   196  			// Bufio will keep err until next read.
   197  			break
   198  		}
   199  		if c != ' ' && c != '\t' {
   200  			r.R.UnreadByte()
   201  			break
   202  		}
   203  		n++
   204  	}
   205  	return n
   206  }
   207  
   208  func (r *Reader) readCodeLine(expectCode int) (code int, continued bool, message string, err error) {
   209  	line, err := r.ReadLine()
   210  	if err != nil {
   211  		return
   212  	}
   213  	return parseCodeLine(line, expectCode)
   214  }
   215  
   216  func parseCodeLine(line string, expectCode int) (code int, continued bool, message string, err error) {
   217  	if len(line) < 4 || line[3] != ' ' && line[3] != '-' {
   218  		err = ProtocolError("short response: " + line)
   219  		return
   220  	}
   221  	continued = line[3] == '-'
   222  	code, err = strconv.Atoi(line[0:3])
   223  	if err != nil || code < 100 {
   224  		err = ProtocolError("invalid response code: " + line)
   225  		return
   226  	}
   227  	message = line[4:]
   228  	if 1 <= expectCode && expectCode < 10 && code/100 != expectCode ||
   229  		10 <= expectCode && expectCode < 100 && code/10 != expectCode ||
   230  		100 <= expectCode && expectCode < 1000 && code != expectCode {
   231  		err = &Error{code, message}
   232  	}
   233  	return
   234  }
   235  
   236  // ReadCodeLine reads a response code line of the form
   237  //
   238  //	code message
   239  //
   240  // where code is a three-digit status code and the message
   241  // extends to the rest of the line. An example of such a line is:
   242  //
   243  //	220 plan9.bell-labs.com ESMTP
   244  //
   245  // If the prefix of the status does not match the digits in expectCode,
   246  // ReadCodeLine returns with err set to &Error{code, message}.
   247  // For example, if expectCode is 31, an error will be returned if
   248  // the status is not in the range [310,319].
   249  //
   250  // If the response is multi-line, ReadCodeLine returns an error.
   251  //
   252  // An expectCode <= 0 disables the check of the status code.
   253  func (r *Reader) ReadCodeLine(expectCode int) (code int, message string, err error) {
   254  	code, continued, message, err := r.readCodeLine(expectCode)
   255  	if err == nil && continued {
   256  		err = ProtocolError("unexpected multi-line response: " + message)
   257  	}
   258  	return
   259  }
   260  
   261  // ReadResponse reads a multi-line response of the form:
   262  //
   263  //	code-message line 1
   264  //	code-message line 2
   265  //	...
   266  //	code message line n
   267  //
   268  // where code is a three-digit status code. The first line starts with the
   269  // code and a hyphen. The response is terminated by a line that starts
   270  // with the same code followed by a space. Each line in message is
   271  // separated by a newline (\n).
   272  //
   273  // See page 36 of RFC 959 (https://www.ietf.org/rfc/rfc959.txt) for
   274  // details of another form of response accepted:
   275  //
   276  //	code-message line 1
   277  //	message line 2
   278  //	...
   279  //	code message line n
   280  //
   281  // If the prefix of the status does not match the digits in expectCode,
   282  // ReadResponse returns with err set to &Error{code, message}.
   283  // For example, if expectCode is 31, an error will be returned if
   284  // the status is not in the range [310,319].
   285  //
   286  // An expectCode <= 0 disables the check of the status code.
   287  func (r *Reader) ReadResponse(expectCode int) (code int, message string, err error) {
   288  	code, continued, first, err := r.readCodeLine(expectCode)
   289  	multi := continued
   290  	var messageBuilder strings.Builder
   291  	messageBuilder.WriteString(first)
   292  	for continued {
   293  		line, err := r.ReadLine()
   294  		if err != nil {
   295  			return 0, "", err
   296  		}
   297  
   298  		var code2 int
   299  		var moreMessage string
   300  		code2, continued, moreMessage, err = parseCodeLine(line, 0)
   301  		if err != nil || code2 != code {
   302  			messageBuilder.WriteByte('\n')
   303  			messageBuilder.WriteString(strings.TrimRight(line, "\r\n"))
   304  			continued = true
   305  			continue
   306  		}
   307  		messageBuilder.WriteByte('\n')
   308  		messageBuilder.WriteString(moreMessage)
   309  	}
   310  	message = messageBuilder.String()
   311  	if err != nil && multi && message != "" {
   312  		// replace one line error message with all lines (full message)
   313  		err = &Error{code, message}
   314  	}
   315  	return
   316  }
   317  
   318  // DotReader returns a new [Reader] that satisfies Reads using the
   319  // decoded text of a dot-encoded block read from r.
   320  // The returned Reader is only valid until the next call
   321  // to a method on r.
   322  //
   323  // Dot encoding is a common framing used for data blocks
   324  // in text protocols such as SMTP.  The data consists of a sequence
   325  // of lines, each of which ends in "\r\n".  The sequence itself
   326  // ends at a line containing just a dot: ".\r\n".  Lines beginning
   327  // with a dot are escaped with an additional dot to avoid
   328  // looking like the end of the sequence.
   329  //
   330  // The decoded form returned by the Reader's Read method
   331  // rewrites the "\r\n" line endings into the simpler "\n",
   332  // removes leading dot escapes if present, and stops with error [io.EOF]
   333  // after consuming (and discarding) the end-of-sequence line.
   334  func (r *Reader) DotReader() io.Reader {
   335  	r.closeDot()
   336  	r.dot = &dotReader{r: r}
   337  	return r.dot
   338  }
   339  
   340  type dotReader struct {
   341  	r     *Reader
   342  	state int
   343  }
   344  
   345  // Read satisfies reads by decoding dot-encoded data read from d.r.
   346  func (d *dotReader) Read(b []byte) (n int, err error) {
   347  	// Run data through a simple state machine to
   348  	// elide leading dots, rewrite trailing \r\n into \n,
   349  	// and detect ending .\r\n line.
   350  	const (
   351  		stateBeginLine = iota // beginning of line; initial state; must be zero
   352  		stateDot              // read . at beginning of line
   353  		stateDotCR            // read .\r at beginning of line
   354  		stateCR               // read \r (possibly at end of line)
   355  		stateData             // reading data in middle of line
   356  		stateEOF              // reached .\r\n end marker line
   357  	)
   358  	br := d.r.R
   359  	for n < len(b) && d.state != stateEOF {
   360  		var c byte
   361  		c, err = br.ReadByte()
   362  		if err != nil {
   363  			if err == io.EOF {
   364  				err = io.ErrUnexpectedEOF
   365  			}
   366  			break
   367  		}
   368  		switch d.state {
   369  		case stateBeginLine:
   370  			if c == '.' {
   371  				d.state = stateDot
   372  				continue
   373  			}
   374  			if c == '\r' {
   375  				d.state = stateCR
   376  				continue
   377  			}
   378  			d.state = stateData
   379  
   380  		case stateDot:
   381  			if c == '\r' {
   382  				d.state = stateDotCR
   383  				continue
   384  			}
   385  			if c == '\n' {
   386  				d.state = stateEOF
   387  				continue
   388  			}
   389  			d.state = stateData
   390  
   391  		case stateDotCR:
   392  			if c == '\n' {
   393  				d.state = stateEOF
   394  				continue
   395  			}
   396  			// Not part of .\r\n.
   397  			// Consume leading dot and emit saved \r.
   398  			br.UnreadByte()
   399  			c = '\r'
   400  			d.state = stateData
   401  
   402  		case stateCR:
   403  			if c == '\n' {
   404  				d.state = stateBeginLine
   405  				break
   406  			}
   407  			// Not part of \r\n. Emit saved \r
   408  			br.UnreadByte()
   409  			c = '\r'
   410  			d.state = stateData
   411  
   412  		case stateData:
   413  			if c == '\r' {
   414  				d.state = stateCR
   415  				continue
   416  			}
   417  			if c == '\n' {
   418  				d.state = stateBeginLine
   419  			}
   420  		}
   421  		b[n] = c
   422  		n++
   423  	}
   424  	if err == nil && d.state == stateEOF {
   425  		err = io.EOF
   426  	}
   427  	if err != nil && d.r.dot == d {
   428  		d.r.dot = nil
   429  	}
   430  	return
   431  }
   432  
   433  // closeDot drains the current DotReader if any,
   434  // making sure that it reads until the ending dot line.
   435  func (r *Reader) closeDot() {
   436  	if r.dot == nil {
   437  		return
   438  	}
   439  	buf := make([]byte, 128)
   440  	for r.dot != nil {
   441  		// When Read reaches EOF or an error,
   442  		// it will set r.dot == nil.
   443  		r.dot.Read(buf)
   444  	}
   445  }
   446  
   447  // ReadDotBytes reads a dot-encoding and returns the decoded data.
   448  //
   449  // See the documentation for the [Reader.DotReader] method for details about dot-encoding.
   450  func (r *Reader) ReadDotBytes() ([]byte, error) {
   451  	return io.ReadAll(r.DotReader())
   452  }
   453  
   454  // ReadDotLines reads a dot-encoding and returns a slice
   455  // containing the decoded lines, with the final \r\n or \n elided from each.
   456  //
   457  // See the documentation for the [Reader.DotReader] method for details about dot-encoding.
   458  func (r *Reader) ReadDotLines() ([]string, error) {
   459  	// We could use ReadDotBytes and then Split it,
   460  	// but reading a line at a time avoids needing a
   461  	// large contiguous block of memory and is simpler.
   462  	var v []string
   463  	var err error
   464  	for {
   465  		var line string
   466  		line, err = r.ReadLine()
   467  		if err != nil {
   468  			if err == io.EOF {
   469  				err = io.ErrUnexpectedEOF
   470  			}
   471  			break
   472  		}
   473  
   474  		// Dot by itself marks end; otherwise cut one dot.
   475  		if len(line) > 0 && line[0] == '.' {
   476  			if len(line) == 1 {
   477  				break
   478  			}
   479  			line = line[1:]
   480  		}
   481  		v = append(v, line)
   482  	}
   483  	return v, err
   484  }
   485  
   486  var colon = []byte(":")
   487  
   488  // ReadMIMEHeader reads a MIME-style header from r.
   489  // The header is a sequence of possibly continued Key: Value lines
   490  // ending in a blank line.
   491  // The returned map m maps [CanonicalMIMEHeaderKey](key) to a
   492  // sequence of values in the same order encountered in the input.
   493  //
   494  // For example, consider this input:
   495  //
   496  //	My-Key: Value 1
   497  //	Long-Key: Even
   498  //	       Longer Value
   499  //	My-Key: Value 2
   500  //
   501  // Given that input, ReadMIMEHeader returns the map:
   502  //
   503  //	map[string][]string{
   504  //		"My-Key": {"Value 1", "Value 2"},
   505  //		"Long-Key": {"Even Longer Value"},
   506  //	}
   507  func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) {
   508  	return readMIMEHeader(r, math.MaxInt64, math.MaxInt64)
   509  }
   510  
   511  // readMIMEHeader is accessed from mime/multipart.
   512  //go:linkname readMIMEHeader
   513  
   514  // readMIMEHeader is a version of ReadMIMEHeader which takes a limit on the header size.
   515  // It is called by the mime/multipart package.
   516  func readMIMEHeader(r *Reader, maxMemory, maxHeaders int64) (MIMEHeader, error) {
   517  	// Avoid lots of small slice allocations later by allocating one
   518  	// large one ahead of time which we'll cut up into smaller
   519  	// slices. If this isn't big enough later, we allocate small ones.
   520  	var strs []string
   521  	hint := r.upcomingHeaderKeys()
   522  	if hint > 0 {
   523  		if hint > 1000 {
   524  			hint = 1000 // set a cap to avoid overallocation
   525  		}
   526  		strs = make([]string, hint)
   527  	}
   528  
   529  	m := make(MIMEHeader, hint)
   530  
   531  	// Account for 400 bytes of overhead for the MIMEHeader, plus 200 bytes per entry.
   532  	// Benchmarking map creation as of go1.20, a one-entry MIMEHeader is 416 bytes and large
   533  	// MIMEHeaders average about 200 bytes per entry.
   534  	maxMemory -= 400
   535  	const mapEntryOverhead = 200
   536  
   537  	// The first line cannot start with a leading space.
   538  	if buf, err := r.R.Peek(1); err == nil && (buf[0] == ' ' || buf[0] == '\t') {
   539  		const errorLimit = 80 // arbitrary limit on how much of the line we'll quote
   540  		line, err := r.readLineSlice(errorLimit)
   541  		if err != nil {
   542  			return m, err
   543  		}
   544  		return m, ProtocolError("malformed MIME header initial line: " + string(line))
   545  	}
   546  
   547  	for {
   548  		kv, err := r.readContinuedLineSlice(maxMemory, mustHaveFieldNameColon)
   549  		if len(kv) == 0 {
   550  			return m, err
   551  		}
   552  
   553  		// Key ends at first colon.
   554  		k, v, ok := bytes.Cut(kv, colon)
   555  		if !ok {
   556  			return m, ProtocolError("malformed MIME header line: " + string(kv))
   557  		}
   558  		key, ok := canonicalMIMEHeaderKey(k)
   559  		if !ok {
   560  			return m, ProtocolError("malformed MIME header line: " + string(kv))
   561  		}
   562  		for _, c := range v {
   563  			if !validHeaderValueByte(c) {
   564  				return m, ProtocolError("malformed MIME header line: " + string(kv))
   565  			}
   566  		}
   567  
   568  		maxHeaders--
   569  		if maxHeaders < 0 {
   570  			return nil, errMessageTooLarge
   571  		}
   572  
   573  		// Skip initial spaces in value.
   574  		value := string(bytes.TrimLeft(v, " \t"))
   575  
   576  		vv := m[key]
   577  		if vv == nil {
   578  			maxMemory -= int64(len(key))
   579  			maxMemory -= mapEntryOverhead
   580  		}
   581  		maxMemory -= int64(len(value))
   582  		if maxMemory < 0 {
   583  			return m, errMessageTooLarge
   584  		}
   585  		if vv == nil && len(strs) > 0 {
   586  			// More than likely this will be a single-element key.
   587  			// Most headers aren't multi-valued.
   588  			// Set the capacity on strs[0] to 1, so any future append
   589  			// won't extend the slice into the other strings.
   590  			vv, strs = strs[:1:1], strs[1:]
   591  			vv[0] = value
   592  			m[key] = vv
   593  		} else {
   594  			m[key] = append(vv, value)
   595  		}
   596  
   597  		if err != nil {
   598  			return m, err
   599  		}
   600  	}
   601  }
   602  
   603  // noValidation is a no-op validation func for readContinuedLineSlice
   604  // that permits any lines.
   605  func noValidation(_ []byte) error { return nil }
   606  
   607  // mustHaveFieldNameColon ensures that, per RFC 7230, the
   608  // field-name is on a single line, so the first line must
   609  // contain a colon.
   610  func mustHaveFieldNameColon(line []byte) error {
   611  	if bytes.IndexByte(line, ':') < 0 {
   612  		return ProtocolError(fmt.Sprintf("malformed MIME header: missing colon: %q", line))
   613  	}
   614  	return nil
   615  }
   616  
   617  var nl = []byte("\n")
   618  
   619  // upcomingHeaderKeys returns an approximation of the number of keys
   620  // that will be in this header. If it gets confused, it returns 0.
   621  func (r *Reader) upcomingHeaderKeys() (n int) {
   622  	// Try to determine the 'hint' size.
   623  	r.R.Peek(1) // force a buffer load if empty
   624  	s := r.R.Buffered()
   625  	if s == 0 {
   626  		return
   627  	}
   628  	peek, _ := r.R.Peek(s)
   629  	for len(peek) > 0 && n < 1000 {
   630  		var line []byte
   631  		line, peek, _ = bytes.Cut(peek, nl)
   632  		if len(line) == 0 || (len(line) == 1 && line[0] == '\r') {
   633  			// Blank line separating headers from the body.
   634  			break
   635  		}
   636  		if line[0] == ' ' || line[0] == '\t' {
   637  			// Folded continuation of the previous line.
   638  			continue
   639  		}
   640  		n++
   641  	}
   642  	return n
   643  }
   644  
   645  // CanonicalMIMEHeaderKey returns the canonical format of the
   646  // MIME header key s. The canonicalization converts the first
   647  // letter and any letter following a hyphen to upper case;
   648  // the rest are converted to lowercase. For example, the
   649  // canonical key for "accept-encoding" is "Accept-Encoding".
   650  // MIME header keys are assumed to be ASCII only.
   651  // If s contains a space or invalid header field bytes as
   652  // defined by RFC 9112, it is returned without modifications.
   653  func CanonicalMIMEHeaderKey(s string) string {
   654  	// Quick check for canonical encoding.
   655  	upper := true
   656  	for i := 0; i < len(s); i++ {
   657  		c := s[i]
   658  		if !validHeaderFieldByte(c) {
   659  			return s
   660  		}
   661  		if upper && 'a' <= c && c <= 'z' {
   662  			s, _ = canonicalMIMEHeaderKey([]byte(s))
   663  			return s
   664  		}
   665  		if !upper && 'A' <= c && c <= 'Z' {
   666  			s, _ = canonicalMIMEHeaderKey([]byte(s))
   667  			return s
   668  		}
   669  		upper = c == '-'
   670  	}
   671  	return s
   672  }
   673  
   674  const toLower = 'a' - 'A'
   675  
   676  // validHeaderFieldByte reports whether c is a valid byte in a header
   677  // field name. RFC 7230 says:
   678  //
   679  //	header-field   = field-name ":" OWS field-value OWS
   680  //	field-name     = token
   681  //	tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
   682  //	        "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
   683  //	token = 1*tchar
   684  func validHeaderFieldByte(c byte) bool {
   685  	// mask is a 128-bit bitmap with 1s for allowed bytes,
   686  	// so that the byte c can be tested with a shift and an and.
   687  	// If c >= 128, then 1<<c and 1<<(c-64) will both be zero,
   688  	// and this function will return false.
   689  	const mask = 0 |
   690  		(1<<(10)-1)<<'0' |
   691  		(1<<(26)-1)<<'a' |
   692  		(1<<(26)-1)<<'A' |
   693  		1<<'!' |
   694  		1<<'#' |
   695  		1<<'$' |
   696  		1<<'%' |
   697  		1<<'&' |
   698  		1<<'\'' |
   699  		1<<'*' |
   700  		1<<'+' |
   701  		1<<'-' |
   702  		1<<'.' |
   703  		1<<'^' |
   704  		1<<'_' |
   705  		1<<'`' |
   706  		1<<'|' |
   707  		1<<'~'
   708  	return ((uint64(1)<<c)&(mask&(1<<64-1)) |
   709  		(uint64(1)<<(c-64))&(mask>>64)) != 0
   710  }
   711  
   712  // validHeaderValueByte reports whether c is a valid byte in a header
   713  // field value. RFC 7230 says:
   714  //
   715  //	field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
   716  //	field-vchar    = VCHAR / obs-text
   717  //	obs-text       = %x80-FF
   718  //
   719  // RFC 5234 says:
   720  //
   721  //	HTAB           =  %x09
   722  //	SP             =  %x20
   723  //	VCHAR          =  %x21-7E
   724  func validHeaderValueByte(c byte) bool {
   725  	// mask is a 128-bit bitmap with 1s for allowed bytes,
   726  	// so that the byte c can be tested with a shift and an and.
   727  	// If c >= 128, then 1<<c and 1<<(c-64) will both be zero.
   728  	// Since this is the obs-text range, we invert the mask to
   729  	// create a bitmap with 1s for disallowed bytes.
   730  	const mask = 0 |
   731  		(1<<(0x7f-0x21)-1)<<0x21 | // VCHAR: %x21-7E
   732  		1<<0x20 | // SP: %x20
   733  		1<<0x09 // HTAB: %x09
   734  	return ((uint64(1)<<c)&^(mask&(1<<64-1)) |
   735  		(uint64(1)<<(c-64))&^(mask>>64)) == 0
   736  }
   737  
   738  // canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is
   739  // allowed to mutate the provided byte slice before returning the
   740  // string.
   741  //
   742  // For invalid inputs (if a contains spaces or non-token bytes), a
   743  // is unchanged and a string copy is returned.
   744  //
   745  // ok is true if the header key contains only valid characters and spaces.
   746  // ReadMIMEHeader accepts header keys containing spaces, but does not
   747  // canonicalize them.
   748  func canonicalMIMEHeaderKey(a []byte) (_ string, ok bool) {
   749  	if len(a) == 0 {
   750  		return "", false
   751  	}
   752  
   753  	// See if a looks like a header key. If not, return it unchanged.
   754  	noCanon := false
   755  	for _, c := range a {
   756  		if validHeaderFieldByte(c) {
   757  			continue
   758  		}
   759  		// Don't canonicalize.
   760  		if c == ' ' {
   761  			// We accept invalid headers with a space before the
   762  			// colon, but must not canonicalize them.
   763  			// See https://go.dev/issue/34540.
   764  			noCanon = true
   765  			continue
   766  		}
   767  		return string(a), false
   768  	}
   769  	if noCanon {
   770  		return string(a), true
   771  	}
   772  
   773  	upper := true
   774  	for i, c := range a {
   775  		// Canonicalize: first letter upper case
   776  		// and upper case after each dash.
   777  		// (Host, User-Agent, If-Modified-Since).
   778  		// MIME headers are ASCII only, so no Unicode issues.
   779  		if upper && 'a' <= c && c <= 'z' {
   780  			c -= toLower
   781  		} else if !upper && 'A' <= c && c <= 'Z' {
   782  			c += toLower
   783  		}
   784  		a[i] = c
   785  		upper = c == '-' // for next time
   786  	}
   787  	commonHeaderOnce.Do(initCommonHeader)
   788  	// The compiler recognizes m[string(byteSlice)] as a special
   789  	// case, so a copy of a's bytes into a new string does not
   790  	// happen in this map lookup:
   791  	if v := commonHeader[string(a)]; v != "" {
   792  		return v, true
   793  	}
   794  	return string(a), true
   795  }
   796  
   797  // commonHeader interns common header strings.
   798  var commonHeader map[string]string
   799  
   800  var commonHeaderOnce sync.Once
   801  
   802  func initCommonHeader() {
   803  	commonHeader = make(map[string]string)
   804  	for _, v := range []string{
   805  		"Accept",
   806  		"Accept-Charset",
   807  		"Accept-Encoding",
   808  		"Accept-Language",
   809  		"Accept-Ranges",
   810  		"Cache-Control",
   811  		"Cc",
   812  		"Connection",
   813  		"Content-Id",
   814  		"Content-Language",
   815  		"Content-Length",
   816  		"Content-Transfer-Encoding",
   817  		"Content-Type",
   818  		"Cookie",
   819  		"Date",
   820  		"Dkim-Signature",
   821  		"Etag",
   822  		"Expires",
   823  		"From",
   824  		"Host",
   825  		"If-Modified-Since",
   826  		"If-None-Match",
   827  		"In-Reply-To",
   828  		"Last-Modified",
   829  		"Location",
   830  		"Message-Id",
   831  		"Mime-Version",
   832  		"Pragma",
   833  		"Received",
   834  		"Return-Path",
   835  		"Server",
   836  		"Set-Cookie",
   837  		"Subject",
   838  		"To",
   839  		"User-Agent",
   840  		"Via",
   841  		"X-Forwarded-For",
   842  		"X-Imforwards",
   843  		"X-Powered-By",
   844  	} {
   845  		commonHeader[v] = v
   846  	}
   847  }
   848  

View as plain text