...

Source file src/net/mail/message.go

Documentation: net/mail

		 1  // Copyright 2011 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  /*
		 6  Package mail implements parsing of mail messages.
		 7  
		 8  For the most part, this package follows the syntax as specified by RFC 5322 and
		 9  extended by RFC 6532.
		10  Notable divergences:
		11  	* Obsolete address formats are not parsed, including addresses with
		12  		embedded route information.
		13  	* The full range of spacing (the CFWS syntax element) is not supported,
		14  		such as breaking addresses across lines.
		15  	* No unicode normalization is performed.
		16  	* The special characters ()[]:;@\, are allowed to appear unquoted in names.
		17  */
		18  package mail
		19  
		20  import (
		21  	"bufio"
		22  	"errors"
		23  	"fmt"
		24  	"io"
		25  	"log"
		26  	"mime"
		27  	"net/textproto"
		28  	"strings"
		29  	"sync"
		30  	"time"
		31  	"unicode/utf8"
		32  )
		33  
		34  var debug = debugT(false)
		35  
		36  type debugT bool
		37  
		38  func (d debugT) Printf(format string, args ...interface{}) {
		39  	if d {
		40  		log.Printf(format, args...)
		41  	}
		42  }
		43  
		44  // A Message represents a parsed mail message.
		45  type Message struct {
		46  	Header Header
		47  	Body	 io.Reader
		48  }
		49  
		50  // ReadMessage reads a message from r.
		51  // The headers are parsed, and the body of the message will be available
		52  // for reading from msg.Body.
		53  func ReadMessage(r io.Reader) (msg *Message, err error) {
		54  	tp := textproto.NewReader(bufio.NewReader(r))
		55  
		56  	hdr, err := tp.ReadMIMEHeader()
		57  	if err != nil {
		58  		return nil, err
		59  	}
		60  
		61  	return &Message{
		62  		Header: Header(hdr),
		63  		Body:	 tp.R,
		64  	}, nil
		65  }
		66  
		67  // Layouts suitable for passing to time.Parse.
		68  // These are tried in order.
		69  var (
		70  	dateLayoutsBuildOnce sync.Once
		71  	dateLayouts					[]string
		72  )
		73  
		74  func buildDateLayouts() {
		75  	// Generate layouts based on RFC 5322, section 3.3.
		76  
		77  	dows := [...]string{"", "Mon, "}	 // day-of-week
		78  	days := [...]string{"2", "02"}		 // day = 1*2DIGIT
		79  	years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT
		80  	seconds := [...]string{":05", ""}	// second
		81  	// "-0700 (MST)" is not in RFC 5322, but is common.
		82  	zones := [...]string{"-0700", "MST"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ...
		83  
		84  	for _, dow := range dows {
		85  		for _, day := range days {
		86  			for _, year := range years {
		87  				for _, second := range seconds {
		88  					for _, zone := range zones {
		89  						s := dow + day + " Jan " + year + " 15:04" + second + " " + zone
		90  						dateLayouts = append(dateLayouts, s)
		91  					}
		92  				}
		93  			}
		94  		}
		95  	}
		96  }
		97  
		98  // ParseDate parses an RFC 5322 date string.
		99  func ParseDate(date string) (time.Time, error) {
	 100  	dateLayoutsBuildOnce.Do(buildDateLayouts)
	 101  	// CR and LF must match and are tolerated anywhere in the date field.
	 102  	date = strings.ReplaceAll(date, "\r\n", "")
	 103  	if strings.Index(date, "\r") != -1 {
	 104  		return time.Time{}, errors.New("mail: header has a CR without LF")
	 105  	}
	 106  	// Re-using some addrParser methods which support obsolete text, i.e. non-printable ASCII
	 107  	p := addrParser{date, nil}
	 108  	p.skipSpace()
	 109  
	 110  	// RFC 5322: zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone
	 111  	// zone length is always 5 chars unless obsolete (obs-zone)
	 112  	if ind := strings.IndexAny(p.s, "+-"); ind != -1 && len(p.s) >= ind+5 {
	 113  		date = p.s[:ind+5]
	 114  		p.s = p.s[ind+5:]
	 115  	} else {
	 116  		ind := strings.Index(p.s, "T")
	 117  		if ind == 0 {
	 118  			// In this case we have the following date formats:
	 119  			// * Thu, 20 Nov 1997 09:55:06 MDT
	 120  			// * Thu, 20 Nov 1997 09:55:06 MDT (MDT)
	 121  			// * Thu, 20 Nov 1997 09:55:06 MDT (This comment)
	 122  			ind = strings.Index(p.s[1:], "T")
	 123  			if ind != -1 {
	 124  				ind++
	 125  			}
	 126  		}
	 127  
	 128  		if ind != -1 && len(p.s) >= ind+5 {
	 129  			// The last letter T of the obsolete time zone is checked when no standard time zone is found.
	 130  			// If T is misplaced, the date to parse is garbage.
	 131  			date = p.s[:ind+1]
	 132  			p.s = p.s[ind+1:]
	 133  		}
	 134  	}
	 135  	if !p.skipCFWS() {
	 136  		return time.Time{}, errors.New("mail: misformatted parenthetical comment")
	 137  	}
	 138  	for _, layout := range dateLayouts {
	 139  		t, err := time.Parse(layout, date)
	 140  		if err == nil {
	 141  			return t, nil
	 142  		}
	 143  	}
	 144  	return time.Time{}, errors.New("mail: header could not be parsed")
	 145  }
	 146  
	 147  // A Header represents the key-value pairs in a mail message header.
	 148  type Header map[string][]string
	 149  
	 150  // Get gets the first value associated with the given key.
	 151  // It is case insensitive; CanonicalMIMEHeaderKey is used
	 152  // to canonicalize the provided key.
	 153  // If there are no values associated with the key, Get returns "".
	 154  // To access multiple values of a key, or to use non-canonical keys,
	 155  // access the map directly.
	 156  func (h Header) Get(key string) string {
	 157  	return textproto.MIMEHeader(h).Get(key)
	 158  }
	 159  
	 160  var ErrHeaderNotPresent = errors.New("mail: header not in message")
	 161  
	 162  // Date parses the Date header field.
	 163  func (h Header) Date() (time.Time, error) {
	 164  	hdr := h.Get("Date")
	 165  	if hdr == "" {
	 166  		return time.Time{}, ErrHeaderNotPresent
	 167  	}
	 168  	return ParseDate(hdr)
	 169  }
	 170  
	 171  // AddressList parses the named header field as a list of addresses.
	 172  func (h Header) AddressList(key string) ([]*Address, error) {
	 173  	hdr := h.Get(key)
	 174  	if hdr == "" {
	 175  		return nil, ErrHeaderNotPresent
	 176  	}
	 177  	return ParseAddressList(hdr)
	 178  }
	 179  
	 180  // Address represents a single mail address.
	 181  // An address such as "Barry Gibbs <[email protected]>" is represented
	 182  // as Address{Name: "Barry Gibbs", Address: "[email protected]"}.
	 183  type Address struct {
	 184  	Name		string // Proper name; may be empty.
	 185  	Address string // user@domain
	 186  }
	 187  
	 188  // ParseAddress parses a single RFC 5322 address, e.g. "Barry Gibbs <[email protected]>"
	 189  func ParseAddress(address string) (*Address, error) {
	 190  	return (&addrParser{s: address}).parseSingleAddress()
	 191  }
	 192  
	 193  // ParseAddressList parses the given string as a list of addresses.
	 194  func ParseAddressList(list string) ([]*Address, error) {
	 195  	return (&addrParser{s: list}).parseAddressList()
	 196  }
	 197  
	 198  // An AddressParser is an RFC 5322 address parser.
	 199  type AddressParser struct {
	 200  	// WordDecoder optionally specifies a decoder for RFC 2047 encoded-words.
	 201  	WordDecoder *mime.WordDecoder
	 202  }
	 203  
	 204  // Parse parses a single RFC 5322 address of the
	 205  // form "Gogh Fir <[email protected]>" or "[email protected]".
	 206  func (p *AddressParser) Parse(address string) (*Address, error) {
	 207  	return (&addrParser{s: address, dec: p.WordDecoder}).parseSingleAddress()
	 208  }
	 209  
	 210  // ParseList parses the given string as a list of comma-separated addresses
	 211  // of the form "Gogh Fir <[email protected]>" or "[email protected]".
	 212  func (p *AddressParser) ParseList(list string) ([]*Address, error) {
	 213  	return (&addrParser{s: list, dec: p.WordDecoder}).parseAddressList()
	 214  }
	 215  
	 216  // String formats the address as a valid RFC 5322 address.
	 217  // If the address's name contains non-ASCII characters
	 218  // the name will be rendered according to RFC 2047.
	 219  func (a *Address) String() string {
	 220  	// Format address local@domain
	 221  	at := strings.LastIndex(a.Address, "@")
	 222  	var local, domain string
	 223  	if at < 0 {
	 224  		// This is a malformed address ("@" is required in addr-spec);
	 225  		// treat the whole address as local-part.
	 226  		local = a.Address
	 227  	} else {
	 228  		local, domain = a.Address[:at], a.Address[at+1:]
	 229  	}
	 230  
	 231  	// Add quotes if needed
	 232  	quoteLocal := false
	 233  	for i, r := range local {
	 234  		if isAtext(r, false, false) {
	 235  			continue
	 236  		}
	 237  		if r == '.' {
	 238  			// Dots are okay if they are surrounded by atext.
	 239  			// We only need to check that the previous byte is
	 240  			// not a dot, and this isn't the end of the string.
	 241  			if i > 0 && local[i-1] != '.' && i < len(local)-1 {
	 242  				continue
	 243  			}
	 244  		}
	 245  		quoteLocal = true
	 246  		break
	 247  	}
	 248  	if quoteLocal {
	 249  		local = quoteString(local)
	 250  
	 251  	}
	 252  
	 253  	s := "<" + local + "@" + domain + ">"
	 254  
	 255  	if a.Name == "" {
	 256  		return s
	 257  	}
	 258  
	 259  	// If every character is printable ASCII, quoting is simple.
	 260  	allPrintable := true
	 261  	for _, r := range a.Name {
	 262  		// isWSP here should actually be isFWS,
	 263  		// but we don't support folding yet.
	 264  		if !isVchar(r) && !isWSP(r) || isMultibyte(r) {
	 265  			allPrintable = false
	 266  			break
	 267  		}
	 268  	}
	 269  	if allPrintable {
	 270  		return quoteString(a.Name) + " " + s
	 271  	}
	 272  
	 273  	// Text in an encoded-word in a display-name must not contain certain
	 274  	// characters like quotes or parentheses (see RFC 2047 section 5.3).
	 275  	// When this is the case encode the name using base64 encoding.
	 276  	if strings.ContainsAny(a.Name, "\"#$%&'(),.:;<>@[]^`{|}~") {
	 277  		return mime.BEncoding.Encode("utf-8", a.Name) + " " + s
	 278  	}
	 279  	return mime.QEncoding.Encode("utf-8", a.Name) + " " + s
	 280  }
	 281  
	 282  type addrParser struct {
	 283  	s	 string
	 284  	dec *mime.WordDecoder // may be nil
	 285  }
	 286  
	 287  func (p *addrParser) parseAddressList() ([]*Address, error) {
	 288  	var list []*Address
	 289  	for {
	 290  		p.skipSpace()
	 291  
	 292  		// allow skipping empty entries (RFC5322 obs-addr-list)
	 293  		if p.consume(',') {
	 294  			continue
	 295  		}
	 296  
	 297  		addrs, err := p.parseAddress(true)
	 298  		if err != nil {
	 299  			return nil, err
	 300  		}
	 301  		list = append(list, addrs...)
	 302  
	 303  		if !p.skipCFWS() {
	 304  			return nil, errors.New("mail: misformatted parenthetical comment")
	 305  		}
	 306  		if p.empty() {
	 307  			break
	 308  		}
	 309  		if p.peek() != ',' {
	 310  			return nil, errors.New("mail: expected comma")
	 311  		}
	 312  
	 313  		// Skip empty entries for obs-addr-list.
	 314  		for p.consume(',') {
	 315  			p.skipSpace()
	 316  		}
	 317  		if p.empty() {
	 318  			break
	 319  		}
	 320  	}
	 321  	return list, nil
	 322  }
	 323  
	 324  func (p *addrParser) parseSingleAddress() (*Address, error) {
	 325  	addrs, err := p.parseAddress(true)
	 326  	if err != nil {
	 327  		return nil, err
	 328  	}
	 329  	if !p.skipCFWS() {
	 330  		return nil, errors.New("mail: misformatted parenthetical comment")
	 331  	}
	 332  	if !p.empty() {
	 333  		return nil, fmt.Errorf("mail: expected single address, got %q", p.s)
	 334  	}
	 335  	if len(addrs) == 0 {
	 336  		return nil, errors.New("mail: empty group")
	 337  	}
	 338  	if len(addrs) > 1 {
	 339  		return nil, errors.New("mail: group with multiple addresses")
	 340  	}
	 341  	return addrs[0], nil
	 342  }
	 343  
	 344  // parseAddress parses a single RFC 5322 address at the start of p.
	 345  func (p *addrParser) parseAddress(handleGroup bool) ([]*Address, error) {
	 346  	debug.Printf("parseAddress: %q", p.s)
	 347  	p.skipSpace()
	 348  	if p.empty() {
	 349  		return nil, errors.New("mail: no address")
	 350  	}
	 351  
	 352  	// address = mailbox / group
	 353  	// mailbox = name-addr / addr-spec
	 354  	// group = display-name ":" [group-list] ";" [CFWS]
	 355  
	 356  	// addr-spec has a more restricted grammar than name-addr,
	 357  	// so try parsing it first, and fallback to name-addr.
	 358  	// TODO(dsymonds): Is this really correct?
	 359  	spec, err := p.consumeAddrSpec()
	 360  	if err == nil {
	 361  		var displayName string
	 362  		p.skipSpace()
	 363  		if !p.empty() && p.peek() == '(' {
	 364  			displayName, err = p.consumeDisplayNameComment()
	 365  			if err != nil {
	 366  				return nil, err
	 367  			}
	 368  		}
	 369  
	 370  		return []*Address{{
	 371  			Name:		displayName,
	 372  			Address: spec,
	 373  		}}, err
	 374  	}
	 375  	debug.Printf("parseAddress: not an addr-spec: %v", err)
	 376  	debug.Printf("parseAddress: state is now %q", p.s)
	 377  
	 378  	// display-name
	 379  	var displayName string
	 380  	if p.peek() != '<' {
	 381  		displayName, err = p.consumePhrase()
	 382  		if err != nil {
	 383  			return nil, err
	 384  		}
	 385  	}
	 386  	debug.Printf("parseAddress: displayName=%q", displayName)
	 387  
	 388  	p.skipSpace()
	 389  	if handleGroup {
	 390  		if p.consume(':') {
	 391  			return p.consumeGroupList()
	 392  		}
	 393  	}
	 394  	// angle-addr = "<" addr-spec ">"
	 395  	if !p.consume('<') {
	 396  		atext := true
	 397  		for _, r := range displayName {
	 398  			if !isAtext(r, true, false) {
	 399  				atext = false
	 400  				break
	 401  			}
	 402  		}
	 403  		if atext {
	 404  			// The input is like "foo.bar"; it's possible the input
	 405  			// meant to be "foo.bar@domain", or "foo.bar <...>".
	 406  			return nil, errors.New("mail: missing '@' or angle-addr")
	 407  		}
	 408  		// The input is like "Full Name", which couldn't possibly be a
	 409  		// valid email address if followed by "@domain"; the input
	 410  		// likely meant to be "Full Name <...>".
	 411  		return nil, errors.New("mail: no angle-addr")
	 412  	}
	 413  	spec, err = p.consumeAddrSpec()
	 414  	if err != nil {
	 415  		return nil, err
	 416  	}
	 417  	if !p.consume('>') {
	 418  		return nil, errors.New("mail: unclosed angle-addr")
	 419  	}
	 420  	debug.Printf("parseAddress: spec=%q", spec)
	 421  
	 422  	return []*Address{{
	 423  		Name:		displayName,
	 424  		Address: spec,
	 425  	}}, nil
	 426  }
	 427  
	 428  func (p *addrParser) consumeGroupList() ([]*Address, error) {
	 429  	var group []*Address
	 430  	// handle empty group.
	 431  	p.skipSpace()
	 432  	if p.consume(';') {
	 433  		p.skipCFWS()
	 434  		return group, nil
	 435  	}
	 436  
	 437  	for {
	 438  		p.skipSpace()
	 439  		// embedded groups not allowed.
	 440  		addrs, err := p.parseAddress(false)
	 441  		if err != nil {
	 442  			return nil, err
	 443  		}
	 444  		group = append(group, addrs...)
	 445  
	 446  		if !p.skipCFWS() {
	 447  			return nil, errors.New("mail: misformatted parenthetical comment")
	 448  		}
	 449  		if p.consume(';') {
	 450  			p.skipCFWS()
	 451  			break
	 452  		}
	 453  		if !p.consume(',') {
	 454  			return nil, errors.New("mail: expected comma")
	 455  		}
	 456  	}
	 457  	return group, nil
	 458  }
	 459  
	 460  // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p.
	 461  func (p *addrParser) consumeAddrSpec() (spec string, err error) {
	 462  	debug.Printf("consumeAddrSpec: %q", p.s)
	 463  
	 464  	orig := *p
	 465  	defer func() {
	 466  		if err != nil {
	 467  			*p = orig
	 468  		}
	 469  	}()
	 470  
	 471  	// local-part = dot-atom / quoted-string
	 472  	var localPart string
	 473  	p.skipSpace()
	 474  	if p.empty() {
	 475  		return "", errors.New("mail: no addr-spec")
	 476  	}
	 477  	if p.peek() == '"' {
	 478  		// quoted-string
	 479  		debug.Printf("consumeAddrSpec: parsing quoted-string")
	 480  		localPart, err = p.consumeQuotedString()
	 481  		if localPart == "" {
	 482  			err = errors.New("mail: empty quoted string in addr-spec")
	 483  		}
	 484  	} else {
	 485  		// dot-atom
	 486  		debug.Printf("consumeAddrSpec: parsing dot-atom")
	 487  		localPart, err = p.consumeAtom(true, false)
	 488  	}
	 489  	if err != nil {
	 490  		debug.Printf("consumeAddrSpec: failed: %v", err)
	 491  		return "", err
	 492  	}
	 493  
	 494  	if !p.consume('@') {
	 495  		return "", errors.New("mail: missing @ in addr-spec")
	 496  	}
	 497  
	 498  	// domain = dot-atom / domain-literal
	 499  	var domain string
	 500  	p.skipSpace()
	 501  	if p.empty() {
	 502  		return "", errors.New("mail: no domain in addr-spec")
	 503  	}
	 504  	// TODO(dsymonds): Handle domain-literal
	 505  	domain, err = p.consumeAtom(true, false)
	 506  	if err != nil {
	 507  		return "", err
	 508  	}
	 509  
	 510  	return localPart + "@" + domain, nil
	 511  }
	 512  
	 513  // consumePhrase parses the RFC 5322 phrase at the start of p.
	 514  func (p *addrParser) consumePhrase() (phrase string, err error) {
	 515  	debug.Printf("consumePhrase: [%s]", p.s)
	 516  	// phrase = 1*word
	 517  	var words []string
	 518  	var isPrevEncoded bool
	 519  	for {
	 520  		// word = atom / quoted-string
	 521  		var word string
	 522  		p.skipSpace()
	 523  		if p.empty() {
	 524  			break
	 525  		}
	 526  		isEncoded := false
	 527  		if p.peek() == '"' {
	 528  			// quoted-string
	 529  			word, err = p.consumeQuotedString()
	 530  		} else {
	 531  			// atom
	 532  			// We actually parse dot-atom here to be more permissive
	 533  			// than what RFC 5322 specifies.
	 534  			word, err = p.consumeAtom(true, true)
	 535  			if err == nil {
	 536  				word, isEncoded, err = p.decodeRFC2047Word(word)
	 537  			}
	 538  		}
	 539  
	 540  		if err != nil {
	 541  			break
	 542  		}
	 543  		debug.Printf("consumePhrase: consumed %q", word)
	 544  		if isPrevEncoded && isEncoded {
	 545  			words[len(words)-1] += word
	 546  		} else {
	 547  			words = append(words, word)
	 548  		}
	 549  		isPrevEncoded = isEncoded
	 550  	}
	 551  	// Ignore any error if we got at least one word.
	 552  	if err != nil && len(words) == 0 {
	 553  		debug.Printf("consumePhrase: hit err: %v", err)
	 554  		return "", fmt.Errorf("mail: missing word in phrase: %v", err)
	 555  	}
	 556  	phrase = strings.Join(words, " ")
	 557  	return phrase, nil
	 558  }
	 559  
	 560  // consumeQuotedString parses the quoted string at the start of p.
	 561  func (p *addrParser) consumeQuotedString() (qs string, err error) {
	 562  	// Assume first byte is '"'.
	 563  	i := 1
	 564  	qsb := make([]rune, 0, 10)
	 565  
	 566  	escaped := false
	 567  
	 568  Loop:
	 569  	for {
	 570  		r, size := utf8.DecodeRuneInString(p.s[i:])
	 571  
	 572  		switch {
	 573  		case size == 0:
	 574  			return "", errors.New("mail: unclosed quoted-string")
	 575  
	 576  		case size == 1 && r == utf8.RuneError:
	 577  			return "", fmt.Errorf("mail: invalid utf-8 in quoted-string: %q", p.s)
	 578  
	 579  		case escaped:
	 580  			//	quoted-pair = ("\" (VCHAR / WSP))
	 581  
	 582  			if !isVchar(r) && !isWSP(r) {
	 583  				return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
	 584  			}
	 585  
	 586  			qsb = append(qsb, r)
	 587  			escaped = false
	 588  
	 589  		case isQtext(r) || isWSP(r):
	 590  			// qtext (printable US-ASCII excluding " and \), or
	 591  			// FWS (almost; we're ignoring CRLF)
	 592  			qsb = append(qsb, r)
	 593  
	 594  		case r == '"':
	 595  			break Loop
	 596  
	 597  		case r == '\\':
	 598  			escaped = true
	 599  
	 600  		default:
	 601  			return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
	 602  
	 603  		}
	 604  
	 605  		i += size
	 606  	}
	 607  	p.s = p.s[i+1:]
	 608  	return string(qsb), nil
	 609  }
	 610  
	 611  // consumeAtom parses an RFC 5322 atom at the start of p.
	 612  // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.
	 613  // If permissive is true, consumeAtom will not fail on:
	 614  // - leading/trailing/double dots in the atom (see golang.org/issue/4938)
	 615  // - special characters (RFC 5322 3.2.3) except '<', '>', ':' and '"' (see golang.org/issue/21018)
	 616  func (p *addrParser) consumeAtom(dot bool, permissive bool) (atom string, err error) {
	 617  	i := 0
	 618  
	 619  Loop:
	 620  	for {
	 621  		r, size := utf8.DecodeRuneInString(p.s[i:])
	 622  		switch {
	 623  		case size == 1 && r == utf8.RuneError:
	 624  			return "", fmt.Errorf("mail: invalid utf-8 in address: %q", p.s)
	 625  
	 626  		case size == 0 || !isAtext(r, dot, permissive):
	 627  			break Loop
	 628  
	 629  		default:
	 630  			i += size
	 631  
	 632  		}
	 633  	}
	 634  
	 635  	if i == 0 {
	 636  		return "", errors.New("mail: invalid string")
	 637  	}
	 638  	atom, p.s = p.s[:i], p.s[i:]
	 639  	if !permissive {
	 640  		if strings.HasPrefix(atom, ".") {
	 641  			return "", errors.New("mail: leading dot in atom")
	 642  		}
	 643  		if strings.Contains(atom, "..") {
	 644  			return "", errors.New("mail: double dot in atom")
	 645  		}
	 646  		if strings.HasSuffix(atom, ".") {
	 647  			return "", errors.New("mail: trailing dot in atom")
	 648  		}
	 649  	}
	 650  	return atom, nil
	 651  }
	 652  
	 653  func (p *addrParser) consumeDisplayNameComment() (string, error) {
	 654  	if !p.consume('(') {
	 655  		return "", errors.New("mail: comment does not start with (")
	 656  	}
	 657  	comment, ok := p.consumeComment()
	 658  	if !ok {
	 659  		return "", errors.New("mail: misformatted parenthetical comment")
	 660  	}
	 661  
	 662  	// TODO(stapelberg): parse quoted-string within comment
	 663  	words := strings.FieldsFunc(comment, func(r rune) bool { return r == ' ' || r == '\t' })
	 664  	for idx, word := range words {
	 665  		decoded, isEncoded, err := p.decodeRFC2047Word(word)
	 666  		if err != nil {
	 667  			return "", err
	 668  		}
	 669  		if isEncoded {
	 670  			words[idx] = decoded
	 671  		}
	 672  	}
	 673  
	 674  	return strings.Join(words, " "), nil
	 675  }
	 676  
	 677  func (p *addrParser) consume(c byte) bool {
	 678  	if p.empty() || p.peek() != c {
	 679  		return false
	 680  	}
	 681  	p.s = p.s[1:]
	 682  	return true
	 683  }
	 684  
	 685  // skipSpace skips the leading space and tab characters.
	 686  func (p *addrParser) skipSpace() {
	 687  	p.s = strings.TrimLeft(p.s, " \t")
	 688  }
	 689  
	 690  func (p *addrParser) peek() byte {
	 691  	return p.s[0]
	 692  }
	 693  
	 694  func (p *addrParser) empty() bool {
	 695  	return p.len() == 0
	 696  }
	 697  
	 698  func (p *addrParser) len() int {
	 699  	return len(p.s)
	 700  }
	 701  
	 702  // skipCFWS skips CFWS as defined in RFC5322.
	 703  func (p *addrParser) skipCFWS() bool {
	 704  	p.skipSpace()
	 705  
	 706  	for {
	 707  		if !p.consume('(') {
	 708  			break
	 709  		}
	 710  
	 711  		if _, ok := p.consumeComment(); !ok {
	 712  			return false
	 713  		}
	 714  
	 715  		p.skipSpace()
	 716  	}
	 717  
	 718  	return true
	 719  }
	 720  
	 721  func (p *addrParser) consumeComment() (string, bool) {
	 722  	// '(' already consumed.
	 723  	depth := 1
	 724  
	 725  	var comment string
	 726  	for {
	 727  		if p.empty() || depth == 0 {
	 728  			break
	 729  		}
	 730  
	 731  		if p.peek() == '\\' && p.len() > 1 {
	 732  			p.s = p.s[1:]
	 733  		} else if p.peek() == '(' {
	 734  			depth++
	 735  		} else if p.peek() == ')' {
	 736  			depth--
	 737  		}
	 738  		if depth > 0 {
	 739  			comment += p.s[:1]
	 740  		}
	 741  		p.s = p.s[1:]
	 742  	}
	 743  
	 744  	return comment, depth == 0
	 745  }
	 746  
	 747  func (p *addrParser) decodeRFC2047Word(s string) (word string, isEncoded bool, err error) {
	 748  	if p.dec != nil {
	 749  		word, err = p.dec.Decode(s)
	 750  	} else {
	 751  		word, err = rfc2047Decoder.Decode(s)
	 752  	}
	 753  
	 754  	if err == nil {
	 755  		return word, true, nil
	 756  	}
	 757  
	 758  	if _, ok := err.(charsetError); ok {
	 759  		return s, true, err
	 760  	}
	 761  
	 762  	// Ignore invalid RFC 2047 encoded-word errors.
	 763  	return s, false, nil
	 764  }
	 765  
	 766  var rfc2047Decoder = mime.WordDecoder{
	 767  	CharsetReader: func(charset string, input io.Reader) (io.Reader, error) {
	 768  		return nil, charsetError(charset)
	 769  	},
	 770  }
	 771  
	 772  type charsetError string
	 773  
	 774  func (e charsetError) Error() string {
	 775  	return fmt.Sprintf("charset not supported: %q", string(e))
	 776  }
	 777  
	 778  // isAtext reports whether r is an RFC 5322 atext character.
	 779  // If dot is true, period is included.
	 780  // If permissive is true, RFC 5322 3.2.3 specials is included,
	 781  // except '<', '>', ':' and '"'.
	 782  func isAtext(r rune, dot, permissive bool) bool {
	 783  	switch r {
	 784  	case '.':
	 785  		return dot
	 786  
	 787  	// RFC 5322 3.2.3. specials
	 788  	case '(', ')', '[', ']', ';', '@', '\\', ',':
	 789  		return permissive
	 790  
	 791  	case '<', '>', '"', ':':
	 792  		return false
	 793  	}
	 794  	return isVchar(r)
	 795  }
	 796  
	 797  // isQtext reports whether r is an RFC 5322 qtext character.
	 798  func isQtext(r rune) bool {
	 799  	// Printable US-ASCII, excluding backslash or quote.
	 800  	if r == '\\' || r == '"' {
	 801  		return false
	 802  	}
	 803  	return isVchar(r)
	 804  }
	 805  
	 806  // quoteString renders a string as an RFC 5322 quoted-string.
	 807  func quoteString(s string) string {
	 808  	var buf strings.Builder
	 809  	buf.WriteByte('"')
	 810  	for _, r := range s {
	 811  		if isQtext(r) || isWSP(r) {
	 812  			buf.WriteRune(r)
	 813  		} else if isVchar(r) {
	 814  			buf.WriteByte('\\')
	 815  			buf.WriteRune(r)
	 816  		}
	 817  	}
	 818  	buf.WriteByte('"')
	 819  	return buf.String()
	 820  }
	 821  
	 822  // isVchar reports whether r is an RFC 5322 VCHAR character.
	 823  func isVchar(r rune) bool {
	 824  	// Visible (printing) characters.
	 825  	return '!' <= r && r <= '~' || isMultibyte(r)
	 826  }
	 827  
	 828  // isMultibyte reports whether r is a multi-byte UTF-8 character
	 829  // as supported by RFC 6532
	 830  func isMultibyte(r rune) bool {
	 831  	return r >= utf8.RuneSelf
	 832  }
	 833  
	 834  // isWSP reports whether r is a WSP (white space).
	 835  // WSP is a space or horizontal tab (RFC 5234 Appendix B).
	 836  func isWSP(r rune) bool {
	 837  	return r == ' ' || r == '\t'
	 838  }
	 839  

View as plain text