reader.go

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package textproto
     6  
     7  import (
     8  	"bufio"
     9  	"bytes"
    10  	"errors"
    11  	"fmt"
    12  	"io"
    13  	"math"
    14  	"strconv"
    15  	"strings"
    16  	"sync"
    17  	_ "unsafe" // for linkname
    18  )
    19  
    20  // TODO: This should be a distinguishable error (ErrMessageTooLarge)
    21  // to allow mime/multipart to detect it.
    22  var errMessageTooLarge = errors.New("message too large")
    23  
    24  // A Reader implements convenience methods for reading requests
    25  // or responses from a text protocol network connection.
    26  type Reader struct {
    27  	R   *bufio.Reader
    28  	dot *dotReader
    29  	buf []byte // a re-usable buffer for readContinuedLineSlice
    30  }
    31  
    32  // NewReader returns a new [Reader] reading from r.
    33  //
    34  // To avoid denial of service attacks, the provided [bufio.Reader]
    35  // should be reading from an [io.LimitReader] or similar Reader to bound
    36  // the size of responses.
    37  func NewReader(r *bufio.Reader) *Reader {
    38  	return &Reader{R: r}
    39  }
    40  
    41  // ReadLine reads a single line from r,
    42  // eliding the final \n or \r\n from the returned string.
    43  func (r *Reader) ReadLine() (string, error) {
    44  	line, err := r.readLineSlice(-1)
    45  	return string(line), err
    46  }
    47  
    48  // ReadLineBytes is like [Reader.ReadLine] but returns a []byte instead of a string.
    49  func (r *Reader) ReadLineBytes() ([]byte, error) {
    50  	line, err := r.readLineSlice(-1)
    51  	if line != nil {
    52  		line = bytes.Clone(line)
    53  	}
    54  	return line, err
    55  }
    56  
    57  // readLineSlice reads a single line from r,
    58  // up to lim bytes long (or unlimited if lim is less than 0),
    59  // eliding the final \r or \r\n from the returned string.
    60  func (r *Reader) readLineSlice(lim int64) ([]byte, error) {
    61  	r.closeDot()
    62  	var line []byte
    63  	for {
    64  		l, more, err := r.R.ReadLine()
    65  		if err != nil {
    66  			return nil, err
    67  		}
    68  		if lim >= 0 && int64(len(line))+int64(len(l)) > lim {
    69  			return nil, errMessageTooLarge
    70  		}
    71  		// Avoid the copy if the first call produced a full line.
    72  		if line == nil && !more {
    73  			return l, nil
    74  		}
    75  		line = append(line, l...)
    76  		if !more {
    77  			break
    78  		}
    79  	}
    80  	return line, nil
    81  }
    82  
    83  // ReadContinuedLine reads a possibly continued line from r,
    84  // eliding the final trailing ASCII white space.
    85  // Lines after the first are considered continuations if they
    86  // begin with a space or tab character. In the returned data,
    87  // continuation lines are separated from the previous line
    88  // only by a single space: the newline and leading white space
    89  // are removed.
    90  //
    91  // For example, consider this input:
    92  //
    93  //	Line 1
    94  //	  continued...
    95  //	Line 2
    96  //
    97  // The first call to ReadContinuedLine will return "Line 1 continued..."
    98  // and the second will return "Line 2".
    99  //
   100  // Empty lines are never continued.
   101  func (r *Reader) ReadContinuedLine() (string, error) {
   102  	line, err := r.readContinuedLineSlice(-1, noValidation)
   103  	return string(line), err
   104  }
   105  
   106  // trim returns s with leading and trailing spaces and tabs removed.
   107  // It does not assume Unicode or UTF-8.
   108  func trim(s []byte) []byte {
   109  	i := 0
   110  	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
   111  		i++
   112  	}
   113  	n := len(s)
   114  	for n > i && (s[n-1] == ' ' || s[n-1] == '\t') {
   115  		n--
   116  	}
   117  	return s[i:n]
   118  }
   119  
   120  // ReadContinuedLineBytes is like [Reader.ReadContinuedLine] but
   121  // returns a []byte instead of a string.
   122  func (r *Reader) ReadContinuedLineBytes() ([]byte, error) {
   123  	line, err := r.readContinuedLineSlice(-1, noValidation)
   124  	if line != nil {
   125  		line = bytes.Clone(line)
   126  	}
   127  	return line, err
   128  }
   129  
   130  // readContinuedLineSlice reads continued lines from the reader buffer,
   131  // returning a byte slice with all lines. The validateFirstLine function
   132  // is run on the first read line, and if it returns an error then this
   133  // error is returned from readContinuedLineSlice.
   134  // It reads up to lim bytes of data (or unlimited if lim is less than 0).
   135  func (r *Reader) readContinuedLineSlice(lim int64, validateFirstLine func([]byte) error) ([]byte, error) {
   136  	if validateFirstLine == nil {
   137  		return nil, fmt.Errorf("missing validateFirstLine func")
   138  	}
   139  
   140  	// Read the first line.
   141  	line, err := r.readLineSlice(lim)
   142  	if err != nil {
   143  		return nil, err
   144  	}
   145  	if len(line) == 0 { // blank line - no continuation
   146  		return line, nil
   147  	}
   148  
   149  	if err := validateFirstLine(line); err != nil {
   150  		return nil, err
   151  	}
   152  
   153  	// Optimistically assume that we have started to buffer the next line
   154  	// and it starts with an ASCII letter (the next header key), or a blank
   155  	// line, so we can avoid copying that buffered data around in memory
   156  	// and skipping over non-existent whitespace.
   157  	if r.R.Buffered() > 1 {
   158  		peek, _ := r.R.Peek(2)
   159  		if len(peek) > 0 && (isASCIILetter(peek[0]) || peek[0] == '\n') ||
   160  			len(peek) == 2 && peek[0] == '\r' && peek[1] == '\n' {
   161  			return trim(line), nil
   162  		}
   163  	}
   164  
   165  	// ReadByte or the next readLineSlice will flush the read buffer;
   166  	// copy the slice into buf.
   167  	r.buf = append(r.buf[:0], trim(line)...)
   168  
   169  	if lim < 0 {
   170  		lim = math.MaxInt64
   171  	}
   172  	lim -= int64(len(r.buf))
   173  
   174  	// Read continuation lines.
   175  	for r.skipSpace() > 0 {
   176  		r.buf = append(r.buf, ' ')
   177  		if int64(len(r.buf)) >= lim {
   178  			return nil, errMessageTooLarge
   179  		}
   180  		line, err := r.readLineSlice(lim - int64(len(r.buf)))
   181  		if err != nil {
   182  			break
   183  		}
   184  		r.buf = append(r.buf, trim(line)...)
   185  	}
   186  	return r.buf, nil
   187  }
   188  
   189  // skipSpace skips R over all spaces and returns the number of bytes skipped.
   190  func (r *Reader) skipSpace() int {
   191  	n := 0
   192  	for {
   193  		c, err := r.R.ReadByte()
   194  		if err != nil {
   195  			// Bufio will keep err until next read.
   196  			break
   197  		}
   198  		if c != ' ' && c != '\t' {
   199  			r.R.UnreadByte()
   200  			break
   201  		}
   202  		n++
   203  	}
   204  	return n
   205  }
   206  
   207  func (r *Reader) readCodeLine(expectCode int) (code int, continued bool, message string, err error) {
   208  	line, err := r.ReadLine()
   209  	if err != nil {
   210  		return
   211  	}
   212  	return parseCodeLine(line, expectCode)
   213  }
   214  
   215  func parseCodeLine(line string, expectCode int) (code int, continued bool, message string, err error) {
   216  	if len(line) < 4 || line[3] != ' ' && line[3] != '-' {
   217  		err = ProtocolError("short response: " + line)
   218  		return
   219  	}
   220  	continued = line[3] == '-'
   221  	code, err = strconv.Atoi(line[0:3])
   222  	if err != nil || code < 100 {
   223  		err = ProtocolError("invalid response code: " + line)
   224  		return
   225  	}
   226  	message = line[4:]
   227  	if 1 <= expectCode && expectCode < 10 && code/100 != expectCode ||
   228  		10 <= expectCode && expectCode < 100 && code/10 != expectCode ||
   229  		100 <= expectCode && expectCode < 1000 && code != expectCode {
   230  		err = &Error{code, message}
   231  	}
   232  	return
   233  }
   234  
   235  // ReadCodeLine reads a response code line of the form
   236  //
   237  //	code message
   238  //
   239  // where code is a three-digit status code and the message
   240  // extends to the rest of the line. An example of such a line is:
   241  //
   242  //	220 plan9.bell-labs.com ESMTP
   243  //
   244  // If the prefix of the status does not match the digits in expectCode,
   245  // ReadCodeLine returns with err set to &Error{code, message}.
   246  // For example, if expectCode is 31, an error will be returned if
   247  // the status is not in the range [310,319].
   248  //
   249  // If the response is multi-line, ReadCodeLine returns an error.
   250  //
   251  // An expectCode <= 0 disables the check of the status code.
   252  func (r *Reader) ReadCodeLine(expectCode int) (code int, message string, err error) {
   253  	code, continued, message, err := r.readCodeLine(expectCode)
   254  	if err == nil && continued {
   255  		err = ProtocolError("unexpected multi-line response: " + message)
   256  	}
   257  	return
   258  }
   259  
   260  // ReadResponse reads a multi-line response of the form:
   261  //
   262  //	code-message line 1
   263  //	code-message line 2
   264  //	...
   265  //	code message line n
   266  //
   267  // where code is a three-digit status code. The first line starts with the
   268  // code and a hyphen. The response is terminated by a line that starts
   269  // with the same code followed by a space. Each line in message is
   270  // separated by a newline (\n).
   271  //
   272  // See page 36 of RFC 959 (https://www.ietf.org/rfc/rfc959.txt) for
   273  // details of another form of response accepted:
   274  //
   275  //	code-message line 1
   276  //	message line 2
   277  //	...
   278  //	code message line n
   279  //
   280  // If the prefix of the status does not match the digits in expectCode,
   281  // ReadResponse returns with err set to &Error{code, message}.
   282  // For example, if expectCode is 31, an error will be returned if
   283  // the status is not in the range [310,319].
   284  //
   285  // An expectCode <= 0 disables the check of the status code.
   286  func (r *Reader) ReadResponse(expectCode int) (code int, message string, err error) {
   287  	code, continued, first, err := r.readCodeLine(expectCode)
   288  	multi := continued
   289  	var messageBuilder strings.Builder
   290  	messageBuilder.WriteString(first)
   291  	for continued {
   292  		line, err := r.ReadLine()
   293  		if err != nil {
   294  			return 0, "", err
   295  		}
   296  
   297  		var code2 int
   298  		var moreMessage string
   299  		code2, continued, moreMessage, err = parseCodeLine(line, 0)
   300  		if err != nil || code2 != code {
   301  			messageBuilder.WriteByte('\n')
   302  			messageBuilder.WriteString(strings.TrimRight(line, "\r\n"))
   303  			continued = true
   304  			continue
   305  		}
   306  		messageBuilder.WriteByte('\n')
   307  		messageBuilder.WriteString(moreMessage)
   308  	}
   309  	message = messageBuilder.String()
   310  	if err != nil && multi && message != "" {
   311  		// replace one line error message with all lines (full message)
   312  		err = &Error{code, message}
   313  	}
   314  	return
   315  }
   316  
   317  // DotReader returns a new [Reader] that satisfies Reads using the
   318  // decoded text of a dot-encoded block read from r.
   319  // The returned Reader is only valid until the next call
   320  // to a method on r.
   321  //
   322  // Dot encoding is a common framing used for data blocks
   323  // in text protocols such as SMTP.  The data consists of a sequence
   324  // of lines, each of which ends in "\r\n".  The sequence itself
   325  // ends at a line containing just a dot: ".\r\n".  Lines beginning
   326  // with a dot are escaped with an additional dot to avoid
   327  // looking like the end of the sequence.
   328  //
   329  // The decoded form returned by the Reader's Read method
   330  // rewrites the "\r\n" line endings into the simpler "\n",
   331  // removes leading dot escapes if present, and stops with error [io.EOF]
   332  // after consuming (and discarding) the end-of-sequence line.
   333  func (r *Reader) DotReader() io.Reader {
   334  	r.closeDot()
   335  	r.dot = &dotReader{r: r}
   336  	return r.dot
   337  }
   338  
   339  type dotReader struct {
   340  	r     *Reader
   341  	state int
   342  }
   343  
   344  // Read satisfies reads by decoding dot-encoded data read from d.r.
   345  func (d *dotReader) Read(b []byte) (n int, err error) {
   346  	// Run data through a simple state machine to
   347  	// elide leading dots, rewrite trailing \r\n into \n,
   348  	// and detect ending .\r\n line.
   349  	const (
   350  		stateBeginLine = iota // beginning of line; initial state; must be zero
   351  		stateDot              // read . at beginning of line
   352  		stateDotCR            // read .\r at beginning of line
   353  		stateCR               // read \r (possibly at end of line)
   354  		stateData             // reading data in middle of line
   355  		stateEOF              // reached .\r\n end marker line
   356  	)
   357  	br := d.r.R
   358  	for n < len(b) && d.state != stateEOF {
   359  		var c byte
   360  		c, err = br.ReadByte()
   361  		if err != nil {
   362  			if err == io.EOF {
   363  				err = io.ErrUnexpectedEOF
   364  			}
   365  			break
   366  		}
   367  		switch d.state {
   368  		case stateBeginLine:
   369  			if c == '.' {
   370  				d.state = stateDot
   371  				continue
   372  			}
   373  			if c == '\r' {
   374  				d.state = stateCR
   375  				continue
   376  			}
   377  			d.state = stateData
   378  
   379  		case stateDot:
   380  			if c == '\r' {
   381  				d.state = stateDotCR
   382  				continue
   383  			}
   384  			if c == '\n' {
   385  				d.state = stateEOF
   386  				continue
   387  			}
   388  			d.state = stateData
   389  
   390  		case stateDotCR:
   391  			if c == '\n' {
   392  				d.state = stateEOF
   393  				continue
   394  			}
   395  			// Not part of .\r\n.
   396  			// Consume leading dot and emit saved \r.
   397  			br.UnreadByte()
   398  			c = '\r'
   399  			d.state = stateData
   400  
   401  		case stateCR:
   402  			if c == '\n' {
   403  				d.state = stateBeginLine
   404  				break
   405  			}
   406  			// Not part of \r\n. Emit saved \r
   407  			br.UnreadByte()
   408  			c = '\r'
   409  			d.state = stateData
   410  
   411  		case stateData:
   412  			if c == '\r' {
   413  				d.state = stateCR
   414  				continue
   415  			}
   416  			if c == '\n' {
   417  				d.state = stateBeginLine
   418  			}
   419  		}
   420  		b[n] = c
   421  		n++
   422  	}
   423  	if err == nil && d.state == stateEOF {
   424  		err = io.EOF
   425  	}
   426  	if err != nil && d.r.dot == d {
   427  		d.r.dot = nil
   428  	}
   429  	return
   430  }
   431  
   432  // closeDot drains the current DotReader if any,
   433  // making sure that it reads until the ending dot line.
   434  func (r *Reader) closeDot() {
   435  	if r.dot == nil {
   436  		return
   437  	}
   438  	buf := make([]byte, 128)
   439  	for r.dot != nil {
   440  		// When Read reaches EOF or an error,
   441  		// it will set r.dot == nil.
   442  		r.dot.Read(buf)
   443  	}
   444  }
   445  
   446  // ReadDotBytes reads a dot-encoding and returns the decoded data.
   447  //
   448  // See the documentation for the [Reader.DotReader] method for details about dot-encoding.
   449  func (r *Reader) ReadDotBytes() ([]byte, error) {
   450  	return io.ReadAll(r.DotReader())
   451  }
   452  
   453  // ReadDotLines reads a dot-encoding and returns a slice
   454  // containing the decoded lines, with the final \r\n or \n elided from each.
   455  //
   456  // See the documentation for the [Reader.DotReader] method for details about dot-encoding.
   457  func (r *Reader) ReadDotLines() ([]string, error) {
   458  	// We could use ReadDotBytes and then Split it,
   459  	// but reading a line at a time avoids needing a
   460  	// large contiguous block of memory and is simpler.
   461  	var v []string
   462  	var err error
   463  	for {
   464  		var line string
   465  		line, err = r.ReadLine()
   466  		if err != nil {
   467  			if err == io.EOF {
   468  				err = io.ErrUnexpectedEOF
   469  			}
   470  			break
   471  		}
   472  
   473  		// Dot by itself marks end; otherwise cut one dot.
   474  		if len(line) > 0 && line[0] == '.' {
   475  			if len(line) == 1 {
   476  				break
   477  			}
   478  			line = line[1:]
   479  		}
   480  		v = append(v, line)
   481  	}
   482  	return v, err
   483  }
   484  
   485  var colon = []byte(":")
   486  
   487  // ReadMIMEHeader reads a MIME-style header from r.
   488  // The header is a sequence of possibly continued Key: Value lines
   489  // ending in a blank line.
   490  // The returned map m maps [CanonicalMIMEHeaderKey](key) to a
   491  // sequence of values in the same order encountered in the input.
   492  //
   493  // For example, consider this input:
   494  //
   495  //	My-Key: Value 1
   496  //	Long-Key: Even
   497  //	       Longer Value
   498  //	My-Key: Value 2
   499  //
   500  // Given that input, ReadMIMEHeader returns the map:
   501  //
   502  //	map[string][]string{
   503  //		"My-Key": {"Value 1", "Value 2"},
   504  //		"Long-Key": {"Even Longer Value"},
   505  //	}
   506  func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) {
   507  	return readMIMEHeader(r, math.MaxInt64, math.MaxInt64)
   508  }
   509  
   510  // readMIMEHeader is accessed from mime/multipart.
   511  //go:linkname readMIMEHeader
   512  
   513  // readMIMEHeader is a version of ReadMIMEHeader which takes a limit on the header size.
   514  // It is called by the mime/multipart package.
   515  func readMIMEHeader(r *Reader, maxMemory, maxHeaders int64) (MIMEHeader, error) {
   516  	// Avoid lots of small slice allocations later by allocating one
   517  	// large one ahead of time which we'll cut up into smaller
   518  	// slices. If this isn't big enough later, we allocate small ones.
   519  	var strs []string
   520  	hint := r.upcomingHeaderKeys()
   521  	if hint > 0 {
   522  		if hint > 1000 {
   523  			hint = 1000 // set a cap to avoid overallocation
   524  		}
   525  		strs = make([]string, hint)
   526  	}
   527  
   528  	m := make(MIMEHeader, hint)
   529  
   530  	// Account for 400 bytes of overhead for the MIMEHeader, plus 200 bytes per entry.
   531  	// Benchmarking map creation as of go1.20, a one-entry MIMEHeader is 416 bytes and large
   532  	// MIMEHeaders average about 200 bytes per entry.
   533  	maxMemory -= 400
   534  	const mapEntryOverhead = 200
   535  
   536  	// The first line cannot start with a leading space.
   537  	if buf, err := r.R.Peek(1); err == nil && (buf[0] == ' ' || buf[0] == '\t') {
   538  		const errorLimit = 80 // arbitrary limit on how much of the line we'll quote
   539  		line, err := r.readLineSlice(errorLimit)
   540  		if err != nil {
   541  			return m, err
   542  		}
   543  		return m, ProtocolError("malformed MIME header initial line: " + string(line))
   544  	}
   545  
   546  	for {
   547  		kv, err := r.readContinuedLineSlice(maxMemory, mustHaveFieldNameColon)
   548  		if len(kv) == 0 {
   549  			return m, err
   550  		}
   551  
   552  		// Key ends at first colon.
   553  		k, v, ok := bytes.Cut(kv, colon)
   554  		if !ok {
   555  			return m, ProtocolError("malformed MIME header line: " + string(kv))
   556  		}
   557  		key, ok := canonicalMIMEHeaderKey(k)
   558  		if !ok {
   559  			return m, ProtocolError("malformed MIME header line: " + string(kv))
   560  		}
   561  		for _, c := range v {
   562  			if !validHeaderValueByte(c) {
   563  				return m, ProtocolError("malformed MIME header line: " + string(kv))
   564  			}
   565  		}
   566  
   567  		maxHeaders--
   568  		if maxHeaders < 0 {
   569  			return nil, errMessageTooLarge
   570  		}
   571  
   572  		// Skip initial spaces in value.
   573  		value := string(bytes.TrimLeft(v, " \t"))
   574  
   575  		vv := m[key]
   576  		if vv == nil {
   577  			maxMemory -= int64(len(key))
   578  			maxMemory -= mapEntryOverhead
   579  		}
   580  		maxMemory -= int64(len(value))
   581  		if maxMemory < 0 {
   582  			return m, errMessageTooLarge
   583  		}
   584  		if vv == nil && len(strs) > 0 {
   585  			// More than likely this will be a single-element key.
   586  			// Most headers aren't multi-valued.
   587  			// Set the capacity on strs[0] to 1, so any future append
   588  			// won't extend the slice into the other strings.
   589  			vv, strs = strs[:1:1], strs[1:]
   590  			vv[0] = value
   591  			m[key] = vv
   592  		} else {
   593  			m[key] = append(vv, value)
   594  		}
   595  
   596  		if err != nil {
   597  			return m, err
   598  		}
   599  	}
   600  }
   601  
   602  // noValidation is a no-op validation func for readContinuedLineSlice
   603  // that permits any lines.
   604  func noValidation(_ []byte) error { return nil }
   605  
   606  // mustHaveFieldNameColon ensures that, per RFC 7230, the
   607  // field-name is on a single line, so the first line must
   608  // contain a colon.
   609  func mustHaveFieldNameColon(line []byte) error {
   610  	if bytes.IndexByte(line, ':') < 0 {
   611  		return ProtocolError(fmt.Sprintf("malformed MIME header: missing colon: %q", line))
   612  	}
   613  	return nil
   614  }
   615  
   616  var nl = []byte("\n")
   617  
   618  // upcomingHeaderKeys returns an approximation of the number of keys
   619  // that will be in this header. If it gets confused, it returns 0.
   620  func (r *Reader) upcomingHeaderKeys() (n int) {
   621  	// Try to determine the 'hint' size.
   622  	r.R.Peek(1) // force a buffer load if empty
   623  	s := r.R.Buffered()
   624  	if s == 0 {
   625  		return
   626  	}
   627  	peek, _ := r.R.Peek(s)
   628  	for len(peek) > 0 && n < 1000 {
   629  		var line []byte
   630  		line, peek, _ = bytes.Cut(peek, nl)
   631  		if len(line) == 0 || (len(line) == 1 && line[0] == '\r') {
   632  			// Blank line separating headers from the body.
   633  			break
   634  		}
   635  		if line[0] == ' ' || line[0] == '\t' {
   636  			// Folded continuation of the previous line.
   637  			continue
   638  		}
   639  		n++
   640  	}
   641  	return n
   642  }
   643  
   644  // CanonicalMIMEHeaderKey returns the canonical format of the
   645  // MIME header key s. The canonicalization converts the first
   646  // letter and any letter following a hyphen to upper case;
   647  // the rest are converted to lowercase. For example, the
   648  // canonical key for "accept-encoding" is "Accept-Encoding".
   649  // MIME header keys are assumed to be ASCII only.
   650  // If s contains a space or invalid header field bytes as
   651  // defined by RFC 9112, it is returned without modifications.
   652  func CanonicalMIMEHeaderKey(s string) string {
   653  	// Quick check for canonical encoding.
   654  	upper := true
   655  	for i := 0; i < len(s); i++ {
   656  		c := s[i]
   657  		if !validHeaderFieldByte(c) {
   658  			return s
   659  		}
   660  		if upper && 'a' <= c && c <= 'z' {
   661  			s, _ = canonicalMIMEHeaderKey([]byte(s))
   662  			return s
   663  		}
   664  		if !upper && 'A' <= c && c <= 'Z' {
   665  			s, _ = canonicalMIMEHeaderKey([]byte(s))
   666  			return s
   667  		}
   668  		upper = c == '-'
   669  	}
   670  	return s
   671  }
   672  
   673  const toLower = 'a' - 'A'
   674  
   675  // validHeaderFieldByte reports whether c is a valid byte in a header
   676  // field name. RFC 7230 says:
   677  //
   678  //	header-field   = field-name ":" OWS field-value OWS
   679  //	field-name     = token
   680  //	tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
   681  //	        "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
   682  //	token = 1*tchar
   683  func validHeaderFieldByte(c byte) bool {
   684  	// mask is a 128-bit bitmap with 1s for allowed bytes,
   685  	// so that the byte c can be tested with a shift and an and.
   686  	// If c >= 128, then 1<<c and 1<<(c-64) will both be zero,
   687  	// and this function will return false.
   688  	const mask = 0 |
   689  		(1<<(10)-1)<<'0' |
   690  		(1<<(26)-1)<<'a' |
   691  		(1<<(26)-1)<<'A' |
   692  		1<<'!' |
   693  		1<<'#' |
   694  		1<<'$' |
   695  		1<<'%' |
   696  		1<<'&' |
   697  		1<<'\'' |
   698  		1<<'*' |
   699  		1<<'+' |
   700  		1<<'-' |
   701  		1<<'.' |
   702  		1<<'^' |
   703  		1<<'_' |
   704  		1<<'`' |
   705  		1<<'|' |
   706  		1<<'~'
   707  	return ((uint64(1)<<c)&(mask&(1<<64-1)) |
   708  		(uint64(1)<<(c-64))&(mask>>64)) != 0
   709  }
   710  
   711  // validHeaderValueByte reports whether c is a valid byte in a header
   712  // field value. RFC 7230 says:
   713  //
   714  //	field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
   715  //	field-vchar    = VCHAR / obs-text
   716  //	obs-text       = %x80-FF
   717  //
   718  // RFC 5234 says:
   719  //
   720  //	HTAB           =  %x09
   721  //	SP             =  %x20
   722  //	VCHAR          =  %x21-7E
   723  func validHeaderValueByte(c byte) bool {
   724  	// mask is a 128-bit bitmap with 1s for allowed bytes,
   725  	// so that the byte c can be tested with a shift and an and.
   726  	// If c >= 128, then 1<<c and 1<<(c-64) will both be zero.
   727  	// Since this is the obs-text range, we invert the mask to
   728  	// create a bitmap with 1s for disallowed bytes.
   729  	const mask = 0 |
   730  		(1<<(0x7f-0x21)-1)<<0x21 | // VCHAR: %x21-7E
   731  		1<<0x20 | // SP: %x20
   732  		1<<0x09 // HTAB: %x09
   733  	return ((uint64(1)<<c)&^(mask&(1<<64-1)) |
   734  		(uint64(1)<<(c-64))&^(mask>>64)) == 0
   735  }
   736  
   737  // canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is
   738  // allowed to mutate the provided byte slice before returning the
   739  // string.
   740  //
   741  // For invalid inputs (if a contains spaces or non-token bytes), a
   742  // is unchanged and a string copy is returned.
   743  //
   744  // ok is true if the header key contains only valid characters and spaces.
   745  // ReadMIMEHeader accepts header keys containing spaces, but does not
   746  // canonicalize them.
   747  func canonicalMIMEHeaderKey(a []byte) (_ string, ok bool) {
   748  	if len(a) == 0 {
   749  		return "", false
   750  	}
   751  
   752  	// See if a looks like a header key. If not, return it unchanged.
   753  	noCanon := false
   754  	for _, c := range a {
   755  		if validHeaderFieldByte(c) {
   756  			continue
   757  		}
   758  		// Don't canonicalize.
   759  		if c == ' ' {
   760  			// We accept invalid headers with a space before the
   761  			// colon, but must not canonicalize them.
   762  			// See https://go.dev/issue/34540.
   763  			noCanon = true
   764  			continue
   765  		}
   766  		return string(a), false
   767  	}
   768  	if noCanon {
   769  		return string(a), true
   770  	}
   771  
   772  	upper := true
   773  	for i, c := range a {
   774  		// Canonicalize: first letter upper case
   775  		// and upper case after each dash.
   776  		// (Host, User-Agent, If-Modified-Since).
   777  		// MIME headers are ASCII only, so no Unicode issues.
   778  		if upper && 'a' <= c && c <= 'z' {
   779  			c -= toLower
   780  		} else if !upper && 'A' <= c && c <= 'Z' {
   781  			c += toLower
   782  		}
   783  		a[i] = c
   784  		upper = c == '-' // for next time
   785  	}
   786  	commonHeaderOnce.Do(initCommonHeader)
   787  	// The compiler recognizes m[string(byteSlice)] as a special
   788  	// case, so a copy of a's bytes into a new string does not
   789  	// happen in this map lookup:
   790  	if v := commonHeader[string(a)]; v != "" {
   791  		return v, true
   792  	}
   793  	return string(a), true
   794  }
   795  
   796  // commonHeader interns common header strings.
   797  var commonHeader map[string]string
   798  
   799  var commonHeaderOnce sync.Once
   800  
   801  func initCommonHeader() {
   802  	commonHeader = make(map[string]string)
   803  	for _, v := range []string{
   804  		"Accept",
   805  		"Accept-Charset",
   806  		"Accept-Encoding",
   807  		"Accept-Language",
   808  		"Accept-Ranges",
   809  		"Cache-Control",
   810  		"Cc",
   811  		"Connection",
   812  		"Content-Id",
   813  		"Content-Language",
   814  		"Content-Length",
   815  		"Content-Transfer-Encoding",
   816  		"Content-Type",
   817  		"Cookie",
   818  		"Date",
   819  		"Dkim-Signature",
   820  		"Etag",
   821  		"Expires",
   822  		"From",
   823  		"Host",
   824  		"If-Modified-Since",
   825  		"If-None-Match",
   826  		"In-Reply-To",
   827  		"Last-Modified",
   828  		"Location",
   829  		"Message-Id",
   830  		"Mime-Version",
   831  		"Pragma",
   832  		"Received",
   833  		"Return-Path",
   834  		"Server",
   835  		"Set-Cookie",
   836  		"Subject",
   837  		"To",
   838  		"User-Agent",
   839  		"Via",
   840  		"X-Forwarded-For",
   841  		"X-Imforwards",
   842  		"X-Powered-By",
   843  	} {
   844  		commonHeader[v] = v
   845  	}
   846  }
   847
View as plain text