Source file src/net/mail/message.go

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  /*
     6  Package mail implements parsing of mail messages.
     7  
     8  For the most part, this package follows the syntax as specified by RFC 5322 and
     9  extended by RFC 6532.
    10  Notable divergences:
    11    - Obsolete address formats are not parsed, including addresses with
    12      embedded route information.
    13    - The full range of spacing (the CFWS syntax element) is not supported,
    14      such as breaking addresses across lines.
    15    - No unicode normalization is performed.
    16    - The special characters ()[]:;@\, are allowed to appear unquoted in names.
    17  */
    18  package mail
    19  
    20  import (
    21  	"bufio"
    22  	"errors"
    23  	"fmt"
    24  	"io"
    25  	"log"
    26  	"mime"
    27  	"net/textproto"
    28  	"strings"
    29  	"sync"
    30  	"time"
    31  	"unicode/utf8"
    32  )
    33  
    34  var debug = debugT(false)
    35  
    36  type debugT bool
    37  
    38  func (d debugT) Printf(format string, args ...any) {
    39  	if d {
    40  		log.Printf(format, args...)
    41  	}
    42  }
    43  
    44  // A Message represents a parsed mail message.
    45  type Message struct {
    46  	Header Header
    47  	Body   io.Reader
    48  }
    49  
    50  // ReadMessage reads a message from r.
    51  // The headers are parsed, and the body of the message will be available
    52  // for reading from msg.Body.
    53  func ReadMessage(r io.Reader) (msg *Message, err error) {
    54  	tp := textproto.NewReader(bufio.NewReader(r))
    55  
    56  	hdr, err := tp.ReadMIMEHeader()
    57  	if err != nil {
    58  		return nil, err
    59  	}
    60  
    61  	return &Message{
    62  		Header: Header(hdr),
    63  		Body:   tp.R,
    64  	}, nil
    65  }
    66  
    67  // Layouts suitable for passing to time.Parse.
    68  // These are tried in order.
    69  var (
    70  	dateLayoutsBuildOnce sync.Once
    71  	dateLayouts          []string
    72  )
    73  
    74  func buildDateLayouts() {
    75  	// Generate layouts based on RFC 5322, section 3.3.
    76  
    77  	dows := [...]string{"", "Mon, "}   // day-of-week
    78  	days := [...]string{"2", "02"}     // day = 1*2DIGIT
    79  	years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT
    80  	seconds := [...]string{":05", ""}  // second
    81  	// "-0700 (MST)" is not in RFC 5322, but is common.
    82  	zones := [...]string{"-0700", "MST", "UT"} // zone = (("+" / "-") 4DIGIT) / "UT" / "GMT" / ...
    83  
    84  	for _, dow := range dows {
    85  		for _, day := range days {
    86  			for _, year := range years {
    87  				for _, second := range seconds {
    88  					for _, zone := range zones {
    89  						s := dow + day + " Jan " + year + " 15:04" + second + " " + zone
    90  						dateLayouts = append(dateLayouts, s)
    91  					}
    92  				}
    93  			}
    94  		}
    95  	}
    96  }
    97  
    98  // ParseDate parses an RFC 5322 date string.
    99  func ParseDate(date string) (time.Time, error) {
   100  	dateLayoutsBuildOnce.Do(buildDateLayouts)
   101  	// CR and LF must match and are tolerated anywhere in the date field.
   102  	date = strings.ReplaceAll(date, "\r\n", "")
   103  	if strings.Contains(date, "\r") {
   104  		return time.Time{}, errors.New("mail: header has a CR without LF")
   105  	}
   106  	// Re-using some addrParser methods which support obsolete text, i.e. non-printable ASCII
   107  	p := addrParser{date, nil}
   108  	p.skipSpace()
   109  
   110  	// RFC 5322: zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone
   111  	// zone length is always 5 chars unless obsolete (obs-zone)
   112  	if ind := strings.IndexAny(p.s, "+-"); ind != -1 && len(p.s) >= ind+5 {
   113  		date = p.s[:ind+5]
   114  		p.s = p.s[ind+5:]
   115  	} else {
   116  		ind := strings.Index(p.s, "T")
   117  		if ind == 0 {
   118  			// In this case we have the following date formats:
   119  			// * Thu, 20 Nov 1997 09:55:06 MDT
   120  			// * Thu, 20 Nov 1997 09:55:06 MDT (MDT)
   121  			// * Thu, 20 Nov 1997 09:55:06 MDT (This comment)
   122  			ind = strings.Index(p.s[1:], "T")
   123  			if ind != -1 {
   124  				ind++
   125  			}
   126  		}
   127  
   128  		if ind != -1 && len(p.s) >= ind+5 {
   129  			// The last letter T of the obsolete time zone is checked when no standard time zone is found.
   130  			// If T is misplaced, the date to parse is garbage.
   131  			date = p.s[:ind+1]
   132  			p.s = p.s[ind+1:]
   133  		}
   134  	}
   135  	if !p.skipCFWS() {
   136  		return time.Time{}, errors.New("mail: misformatted parenthetical comment")
   137  	}
   138  	for _, layout := range dateLayouts {
   139  		t, err := time.Parse(layout, date)
   140  		if err == nil {
   141  			return t, nil
   142  		}
   143  	}
   144  	return time.Time{}, errors.New("mail: header could not be parsed")
   145  }
   146  
   147  // A Header represents the key-value pairs in a mail message header.
   148  type Header map[string][]string
   149  
   150  // Get gets the first value associated with the given key.
   151  // It is case insensitive; CanonicalMIMEHeaderKey is used
   152  // to canonicalize the provided key.
   153  // If there are no values associated with the key, Get returns "".
   154  // To access multiple values of a key, or to use non-canonical keys,
   155  // access the map directly.
   156  func (h Header) Get(key string) string {
   157  	return textproto.MIMEHeader(h).Get(key)
   158  }
   159  
   160  var ErrHeaderNotPresent = errors.New("mail: header not in message")
   161  
   162  // Date parses the Date header field.
   163  func (h Header) Date() (time.Time, error) {
   164  	hdr := h.Get("Date")
   165  	if hdr == "" {
   166  		return time.Time{}, ErrHeaderNotPresent
   167  	}
   168  	return ParseDate(hdr)
   169  }
   170  
   171  // AddressList parses the named header field as a list of addresses.
   172  func (h Header) AddressList(key string) ([]*Address, error) {
   173  	hdr := h.Get(key)
   174  	if hdr == "" {
   175  		return nil, ErrHeaderNotPresent
   176  	}
   177  	return ParseAddressList(hdr)
   178  }
   179  
   180  // Address represents a single mail address.
   181  // An address such as "Barry Gibbs <bg@example.com>" is represented
   182  // as Address{Name: "Barry Gibbs", Address: "bg@example.com"}.
   183  type Address struct {
   184  	Name    string // Proper name; may be empty.
   185  	Address string // user@domain
   186  }
   187  
   188  // ParseAddress parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>"
   189  func ParseAddress(address string) (*Address, error) {
   190  	return (&addrParser{s: address}).parseSingleAddress()
   191  }
   192  
   193  // ParseAddressList parses the given string as a list of addresses.
   194  func ParseAddressList(list string) ([]*Address, error) {
   195  	return (&addrParser{s: list}).parseAddressList()
   196  }
   197  
   198  // An AddressParser is an RFC 5322 address parser.
   199  type AddressParser struct {
   200  	// WordDecoder optionally specifies a decoder for RFC 2047 encoded-words.
   201  	WordDecoder *mime.WordDecoder
   202  }
   203  
   204  // Parse parses a single RFC 5322 address of the
   205  // form "Gogh Fir <gf@example.com>" or "foo@example.com".
   206  func (p *AddressParser) Parse(address string) (*Address, error) {
   207  	return (&addrParser{s: address, dec: p.WordDecoder}).parseSingleAddress()
   208  }
   209  
   210  // ParseList parses the given string as a list of comma-separated addresses
   211  // of the form "Gogh Fir <gf@example.com>" or "foo@example.com".
   212  func (p *AddressParser) ParseList(list string) ([]*Address, error) {
   213  	return (&addrParser{s: list, dec: p.WordDecoder}).parseAddressList()
   214  }
   215  
   216  // String formats the address as a valid RFC 5322 address.
   217  // If the address's name contains non-ASCII characters
   218  // the name will be rendered according to RFC 2047.
   219  func (a *Address) String() string {
   220  	// Format address local@domain
   221  	at := strings.LastIndex(a.Address, "@")
   222  	var local, domain string
   223  	if at < 0 {
   224  		// This is a malformed address ("@" is required in addr-spec);
   225  		// treat the whole address as local-part.
   226  		local = a.Address
   227  	} else {
   228  		local, domain = a.Address[:at], a.Address[at+1:]
   229  	}
   230  
   231  	// Add quotes if needed
   232  	quoteLocal := false
   233  	for i, r := range local {
   234  		if isAtext(r, false, false) {
   235  			continue
   236  		}
   237  		if r == '.' {
   238  			// Dots are okay if they are surrounded by atext.
   239  			// We only need to check that the previous byte is
   240  			// not a dot, and this isn't the end of the string.
   241  			if i > 0 && local[i-1] != '.' && i < len(local)-1 {
   242  				continue
   243  			}
   244  		}
   245  		quoteLocal = true
   246  		break
   247  	}
   248  	if quoteLocal {
   249  		local = quoteString(local)
   250  
   251  	}
   252  
   253  	s := "<" + local + "@" + domain + ">"
   254  
   255  	if a.Name == "" {
   256  		return s
   257  	}
   258  
   259  	// If every character is printable ASCII, quoting is simple.
   260  	allPrintable := true
   261  	for _, r := range a.Name {
   262  		// isWSP here should actually be isFWS,
   263  		// but we don't support folding yet.
   264  		if !isVchar(r) && !isWSP(r) || isMultibyte(r) {
   265  			allPrintable = false
   266  			break
   267  		}
   268  	}
   269  	if allPrintable {
   270  		return quoteString(a.Name) + " " + s
   271  	}
   272  
   273  	// Text in an encoded-word in a display-name must not contain certain
   274  	// characters like quotes or parentheses (see RFC 2047 section 5.3).
   275  	// When this is the case encode the name using base64 encoding.
   276  	if strings.ContainsAny(a.Name, "\"#$%&'(),.:;<>@[]^`{|}~") {
   277  		return mime.BEncoding.Encode("utf-8", a.Name) + " " + s
   278  	}
   279  	return mime.QEncoding.Encode("utf-8", a.Name) + " " + s
   280  }
   281  
   282  type addrParser struct {
   283  	s   string
   284  	dec *mime.WordDecoder // may be nil
   285  }
   286  
   287  func (p *addrParser) parseAddressList() ([]*Address, error) {
   288  	var list []*Address
   289  	for {
   290  		p.skipSpace()
   291  
   292  		// allow skipping empty entries (RFC5322 obs-addr-list)
   293  		if p.consume(',') {
   294  			continue
   295  		}
   296  
   297  		addrs, err := p.parseAddress(true)
   298  		if err != nil {
   299  			return nil, err
   300  		}
   301  		list = append(list, addrs...)
   302  
   303  		if !p.skipCFWS() {
   304  			return nil, errors.New("mail: misformatted parenthetical comment")
   305  		}
   306  		if p.empty() {
   307  			break
   308  		}
   309  		if p.peek() != ',' {
   310  			return nil, errors.New("mail: expected comma")
   311  		}
   312  
   313  		// Skip empty entries for obs-addr-list.
   314  		for p.consume(',') {
   315  			p.skipSpace()
   316  		}
   317  		if p.empty() {
   318  			break
   319  		}
   320  	}
   321  	return list, nil
   322  }
   323  
   324  func (p *addrParser) parseSingleAddress() (*Address, error) {
   325  	addrs, err := p.parseAddress(true)
   326  	if err != nil {
   327  		return nil, err
   328  	}
   329  	if !p.skipCFWS() {
   330  		return nil, errors.New("mail: misformatted parenthetical comment")
   331  	}
   332  	if !p.empty() {
   333  		return nil, fmt.Errorf("mail: expected single address, got %q", p.s)
   334  	}
   335  	if len(addrs) == 0 {
   336  		return nil, errors.New("mail: empty group")
   337  	}
   338  	if len(addrs) > 1 {
   339  		return nil, errors.New("mail: group with multiple addresses")
   340  	}
   341  	return addrs[0], nil
   342  }
   343  
   344  // parseAddress parses a single RFC 5322 address at the start of p.
   345  func (p *addrParser) parseAddress(handleGroup bool) ([]*Address, error) {
   346  	debug.Printf("parseAddress: %q", p.s)
   347  	p.skipSpace()
   348  	if p.empty() {
   349  		return nil, errors.New("mail: no address")
   350  	}
   351  
   352  	// address = mailbox / group
   353  	// mailbox = name-addr / addr-spec
   354  	// group = display-name ":" [group-list] ";" [CFWS]
   355  
   356  	// addr-spec has a more restricted grammar than name-addr,
   357  	// so try parsing it first, and fallback to name-addr.
   358  	// TODO(dsymonds): Is this really correct?
   359  	spec, err := p.consumeAddrSpec()
   360  	if err == nil {
   361  		var displayName string
   362  		p.skipSpace()
   363  		if !p.empty() && p.peek() == '(' {
   364  			displayName, err = p.consumeDisplayNameComment()
   365  			if err != nil {
   366  				return nil, err
   367  			}
   368  		}
   369  
   370  		return []*Address{{
   371  			Name:    displayName,
   372  			Address: spec,
   373  		}}, err
   374  	}
   375  	debug.Printf("parseAddress: not an addr-spec: %v", err)
   376  	debug.Printf("parseAddress: state is now %q", p.s)
   377  
   378  	// display-name
   379  	var displayName string
   380  	if p.peek() != '<' {
   381  		displayName, err = p.consumePhrase()
   382  		if err != nil {
   383  			return nil, err
   384  		}
   385  	}
   386  	debug.Printf("parseAddress: displayName=%q", displayName)
   387  
   388  	p.skipSpace()
   389  	if handleGroup {
   390  		if p.consume(':') {
   391  			return p.consumeGroupList()
   392  		}
   393  	}
   394  	// angle-addr = "<" addr-spec ">"
   395  	if !p.consume('<') {
   396  		atext := true
   397  		for _, r := range displayName {
   398  			if !isAtext(r, true, false) {
   399  				atext = false
   400  				break
   401  			}
   402  		}
   403  		if atext {
   404  			// The input is like "foo.bar"; it's possible the input
   405  			// meant to be "foo.bar@domain", or "foo.bar <...>".
   406  			return nil, errors.New("mail: missing '@' or angle-addr")
   407  		}
   408  		// The input is like "Full Name", which couldn't possibly be a
   409  		// valid email address if followed by "@domain"; the input
   410  		// likely meant to be "Full Name <...>".
   411  		return nil, errors.New("mail: no angle-addr")
   412  	}
   413  	spec, err = p.consumeAddrSpec()
   414  	if err != nil {
   415  		return nil, err
   416  	}
   417  	if !p.consume('>') {
   418  		return nil, errors.New("mail: unclosed angle-addr")
   419  	}
   420  	debug.Printf("parseAddress: spec=%q", spec)
   421  
   422  	return []*Address{{
   423  		Name:    displayName,
   424  		Address: spec,
   425  	}}, nil
   426  }
   427  
   428  func (p *addrParser) consumeGroupList() ([]*Address, error) {
   429  	var group []*Address
   430  	// handle empty group.
   431  	p.skipSpace()
   432  	if p.consume(';') {
   433  		p.skipCFWS()
   434  		return group, nil
   435  	}
   436  
   437  	for {
   438  		p.skipSpace()
   439  		// embedded groups not allowed.
   440  		addrs, err := p.parseAddress(false)
   441  		if err != nil {
   442  			return nil, err
   443  		}
   444  		group = append(group, addrs...)
   445  
   446  		if !p.skipCFWS() {
   447  			return nil, errors.New("mail: misformatted parenthetical comment")
   448  		}
   449  		if p.consume(';') {
   450  			p.skipCFWS()
   451  			break
   452  		}
   453  		if !p.consume(',') {
   454  			return nil, errors.New("mail: expected comma")
   455  		}
   456  	}
   457  	return group, nil
   458  }
   459  
   460  // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p.
   461  func (p *addrParser) consumeAddrSpec() (spec string, err error) {
   462  	debug.Printf("consumeAddrSpec: %q", p.s)
   463  
   464  	orig := *p
   465  	defer func() {
   466  		if err != nil {
   467  			*p = orig
   468  		}
   469  	}()
   470  
   471  	// local-part = dot-atom / quoted-string
   472  	var localPart string
   473  	p.skipSpace()
   474  	if p.empty() {
   475  		return "", errors.New("mail: no addr-spec")
   476  	}
   477  	if p.peek() == '"' {
   478  		// quoted-string
   479  		debug.Printf("consumeAddrSpec: parsing quoted-string")
   480  		localPart, err = p.consumeQuotedString()
   481  		if localPart == "" {
   482  			err = errors.New("mail: empty quoted string in addr-spec")
   483  		}
   484  	} else {
   485  		// dot-atom
   486  		debug.Printf("consumeAddrSpec: parsing dot-atom")
   487  		localPart, err = p.consumeAtom(true, false)
   488  	}
   489  	if err != nil {
   490  		debug.Printf("consumeAddrSpec: failed: %v", err)
   491  		return "", err
   492  	}
   493  
   494  	if !p.consume('@') {
   495  		return "", errors.New("mail: missing @ in addr-spec")
   496  	}
   497  
   498  	// domain = dot-atom / domain-literal
   499  	var domain string
   500  	p.skipSpace()
   501  	if p.empty() {
   502  		return "", errors.New("mail: no domain in addr-spec")
   503  	}
   504  	// TODO(dsymonds): Handle domain-literal
   505  	domain, err = p.consumeAtom(true, false)
   506  	if err != nil {
   507  		return "", err
   508  	}
   509  
   510  	return localPart + "@" + domain, nil
   511  }
   512  
   513  // consumePhrase parses the RFC 5322 phrase at the start of p.
   514  func (p *addrParser) consumePhrase() (phrase string, err error) {
   515  	debug.Printf("consumePhrase: [%s]", p.s)
   516  	// phrase = 1*word
   517  	var words []string
   518  	var isPrevEncoded bool
   519  	for {
   520  		// word = atom / quoted-string
   521  		var word string
   522  		p.skipSpace()
   523  		if p.empty() {
   524  			break
   525  		}
   526  		isEncoded := false
   527  		if p.peek() == '"' {
   528  			// quoted-string
   529  			word, err = p.consumeQuotedString()
   530  		} else {
   531  			// atom
   532  			// We actually parse dot-atom here to be more permissive
   533  			// than what RFC 5322 specifies.
   534  			word, err = p.consumeAtom(true, true)
   535  			if err == nil {
   536  				word, isEncoded, err = p.decodeRFC2047Word(word)
   537  			}
   538  		}
   539  
   540  		if err != nil {
   541  			break
   542  		}
   543  		debug.Printf("consumePhrase: consumed %q", word)
   544  		if isPrevEncoded && isEncoded {
   545  			words[len(words)-1] += word
   546  		} else {
   547  			words = append(words, word)
   548  		}
   549  		isPrevEncoded = isEncoded
   550  	}
   551  	// Ignore any error if we got at least one word.
   552  	if err != nil && len(words) == 0 {
   553  		debug.Printf("consumePhrase: hit err: %v", err)
   554  		return "", fmt.Errorf("mail: missing word in phrase: %v", err)
   555  	}
   556  	phrase = strings.Join(words, " ")
   557  	return phrase, nil
   558  }
   559  
   560  // consumeQuotedString parses the quoted string at the start of p.
   561  func (p *addrParser) consumeQuotedString() (qs string, err error) {
   562  	// Assume first byte is '"'.
   563  	i := 1
   564  	qsb := make([]rune, 0, 10)
   565  
   566  	escaped := false
   567  
   568  Loop:
   569  	for {
   570  		r, size := utf8.DecodeRuneInString(p.s[i:])
   571  
   572  		switch {
   573  		case size == 0:
   574  			return "", errors.New("mail: unclosed quoted-string")
   575  
   576  		case size == 1 && r == utf8.RuneError:
   577  			return "", fmt.Errorf("mail: invalid utf-8 in quoted-string: %q", p.s)
   578  
   579  		case escaped:
   580  			//  quoted-pair = ("\" (VCHAR / WSP))
   581  
   582  			if !isVchar(r) && !isWSP(r) {
   583  				return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
   584  			}
   585  
   586  			qsb = append(qsb, r)
   587  			escaped = false
   588  
   589  		case isQtext(r) || isWSP(r):
   590  			// qtext (printable US-ASCII excluding " and \), or
   591  			// FWS (almost; we're ignoring CRLF)
   592  			qsb = append(qsb, r)
   593  
   594  		case r == '"':
   595  			break Loop
   596  
   597  		case r == '\\':
   598  			escaped = true
   599  
   600  		default:
   601  			return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
   602  
   603  		}
   604  
   605  		i += size
   606  	}
   607  	p.s = p.s[i+1:]
   608  	return string(qsb), nil
   609  }
   610  
   611  // consumeAtom parses an RFC 5322 atom at the start of p.
   612  // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.
   613  // If permissive is true, consumeAtom will not fail on:
   614  // - leading/trailing/double dots in the atom (see golang.org/issue/4938)
   615  // - special characters (RFC 5322 3.2.3) except '<', '>', ':' and '"' (see golang.org/issue/21018)
   616  func (p *addrParser) consumeAtom(dot bool, permissive bool) (atom string, err error) {
   617  	i := 0
   618  
   619  Loop:
   620  	for {
   621  		r, size := utf8.DecodeRuneInString(p.s[i:])
   622  		switch {
   623  		case size == 1 && r == utf8.RuneError:
   624  			return "", fmt.Errorf("mail: invalid utf-8 in address: %q", p.s)
   625  
   626  		case size == 0 || !isAtext(r, dot, permissive):
   627  			break Loop
   628  
   629  		default:
   630  			i += size
   631  
   632  		}
   633  	}
   634  
   635  	if i == 0 {
   636  		return "", errors.New("mail: invalid string")
   637  	}
   638  	atom, p.s = p.s[:i], p.s[i:]
   639  	if !permissive {
   640  		if strings.HasPrefix(atom, ".") {
   641  			return "", errors.New("mail: leading dot in atom")
   642  		}
   643  		if strings.Contains(atom, "..") {
   644  			return "", errors.New("mail: double dot in atom")
   645  		}
   646  		if strings.HasSuffix(atom, ".") {
   647  			return "", errors.New("mail: trailing dot in atom")
   648  		}
   649  	}
   650  	return atom, nil
   651  }
   652  
   653  func (p *addrParser) consumeDisplayNameComment() (string, error) {
   654  	if !p.consume('(') {
   655  		return "", errors.New("mail: comment does not start with (")
   656  	}
   657  	comment, ok := p.consumeComment()
   658  	if !ok {
   659  		return "", errors.New("mail: misformatted parenthetical comment")
   660  	}
   661  
   662  	// TODO(stapelberg): parse quoted-string within comment
   663  	words := strings.FieldsFunc(comment, func(r rune) bool { return r == ' ' || r == '\t' })
   664  	for idx, word := range words {
   665  		decoded, isEncoded, err := p.decodeRFC2047Word(word)
   666  		if err != nil {
   667  			return "", err
   668  		}
   669  		if isEncoded {
   670  			words[idx] = decoded
   671  		}
   672  	}
   673  
   674  	return strings.Join(words, " "), nil
   675  }
   676  
   677  func (p *addrParser) consume(c byte) bool {
   678  	if p.empty() || p.peek() != c {
   679  		return false
   680  	}
   681  	p.s = p.s[1:]
   682  	return true
   683  }
   684  
   685  // skipSpace skips the leading space and tab characters.
   686  func (p *addrParser) skipSpace() {
   687  	p.s = strings.TrimLeft(p.s, " \t")
   688  }
   689  
   690  func (p *addrParser) peek() byte {
   691  	return p.s[0]
   692  }
   693  
   694  func (p *addrParser) empty() bool {
   695  	return p.len() == 0
   696  }
   697  
   698  func (p *addrParser) len() int {
   699  	return len(p.s)
   700  }
   701  
   702  // skipCFWS skips CFWS as defined in RFC5322.
   703  func (p *addrParser) skipCFWS() bool {
   704  	p.skipSpace()
   705  
   706  	for {
   707  		if !p.consume('(') {
   708  			break
   709  		}
   710  
   711  		if _, ok := p.consumeComment(); !ok {
   712  			return false
   713  		}
   714  
   715  		p.skipSpace()
   716  	}
   717  
   718  	return true
   719  }
   720  
   721  func (p *addrParser) consumeComment() (string, bool) {
   722  	// '(' already consumed.
   723  	depth := 1
   724  
   725  	var comment string
   726  	for {
   727  		if p.empty() || depth == 0 {
   728  			break
   729  		}
   730  
   731  		if p.peek() == '\\' && p.len() > 1 {
   732  			p.s = p.s[1:]
   733  		} else if p.peek() == '(' {
   734  			depth++
   735  		} else if p.peek() == ')' {
   736  			depth--
   737  		}
   738  		if depth > 0 {
   739  			comment += p.s[:1]
   740  		}
   741  		p.s = p.s[1:]
   742  	}
   743  
   744  	return comment, depth == 0
   745  }
   746  
   747  func (p *addrParser) decodeRFC2047Word(s string) (word string, isEncoded bool, err error) {
   748  	dec := p.dec
   749  	if dec == nil {
   750  		dec = &rfc2047Decoder
   751  	}
   752  
   753  	// Substitute our own CharsetReader function so that we can tell
   754  	// whether an error from the Decode method was due to the
   755  	// CharsetReader (meaning the charset is invalid).
   756  	// We used to look for the charsetError type in the error result,
   757  	// but that behaves badly with CharsetReaders other than the
   758  	// one in rfc2047Decoder.
   759  	adec := *dec
   760  	charsetReaderError := false
   761  	adec.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
   762  		if dec.CharsetReader == nil {
   763  			charsetReaderError = true
   764  			return nil, charsetError(charset)
   765  		}
   766  		r, err := dec.CharsetReader(charset, input)
   767  		if err != nil {
   768  			charsetReaderError = true
   769  		}
   770  		return r, err
   771  	}
   772  	word, err = adec.Decode(s)
   773  	if err == nil {
   774  		return word, true, nil
   775  	}
   776  
   777  	// If the error came from the character set reader
   778  	// (meaning the character set itself is invalid
   779  	// but the decoding worked fine until then),
   780  	// return the original text and the error,
   781  	// with isEncoded=true.
   782  	if charsetReaderError {
   783  		return s, true, err
   784  	}
   785  
   786  	// Ignore invalid RFC 2047 encoded-word errors.
   787  	return s, false, nil
   788  }
   789  
   790  var rfc2047Decoder = mime.WordDecoder{
   791  	CharsetReader: func(charset string, input io.Reader) (io.Reader, error) {
   792  		return nil, charsetError(charset)
   793  	},
   794  }
   795  
   796  type charsetError string
   797  
   798  func (e charsetError) Error() string {
   799  	return fmt.Sprintf("charset not supported: %q", string(e))
   800  }
   801  
   802  // isAtext reports whether r is an RFC 5322 atext character.
   803  // If dot is true, period is included.
   804  // If permissive is true, RFC 5322 3.2.3 specials is included,
   805  // except '<', '>', ':' and '"'.
   806  func isAtext(r rune, dot, permissive bool) bool {
   807  	switch r {
   808  	case '.':
   809  		return dot
   810  
   811  	// RFC 5322 3.2.3. specials
   812  	case '(', ')', '[', ']', ';', '@', '\\', ',':
   813  		return permissive
   814  
   815  	case '<', '>', '"', ':':
   816  		return false
   817  	}
   818  	return isVchar(r)
   819  }
   820  
   821  // isQtext reports whether r is an RFC 5322 qtext character.
   822  func isQtext(r rune) bool {
   823  	// Printable US-ASCII, excluding backslash or quote.
   824  	if r == '\\' || r == '"' {
   825  		return false
   826  	}
   827  	return isVchar(r)
   828  }
   829  
   830  // quoteString renders a string as an RFC 5322 quoted-string.
   831  func quoteString(s string) string {
   832  	var buf strings.Builder
   833  	buf.WriteByte('"')
   834  	for _, r := range s {
   835  		if isQtext(r) || isWSP(r) {
   836  			buf.WriteRune(r)
   837  		} else if isVchar(r) {
   838  			buf.WriteByte('\\')
   839  			buf.WriteRune(r)
   840  		}
   841  	}
   842  	buf.WriteByte('"')
   843  	return buf.String()
   844  }
   845  
   846  // isVchar reports whether r is an RFC 5322 VCHAR character.
   847  func isVchar(r rune) bool {
   848  	// Visible (printing) characters.
   849  	return '!' <= r && r <= '~' || isMultibyte(r)
   850  }
   851  
   852  // isMultibyte reports whether r is a multi-byte UTF-8 character
   853  // as supported by RFC 6532
   854  func isMultibyte(r rune) bool {
   855  	return r >= utf8.RuneSelf
   856  }
   857  
   858  // isWSP reports whether r is a WSP (white space).
   859  // WSP is a space or horizontal tab (RFC 5234 Appendix B).
   860  func isWSP(r rune) bool {
   861  	return r == ' ' || r == '\t'
   862  }
   863  

View as plain text