...

Source file src/mime/quotedprintable/reader.go

Documentation: mime/quotedprintable

		 1  // Copyright 2012 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  // Package quotedprintable implements quoted-printable encoding as specified by
		 6  // RFC 2045.
		 7  package quotedprintable
		 8  
		 9  import (
		10  	"bufio"
		11  	"bytes"
		12  	"fmt"
		13  	"io"
		14  )
		15  
		16  // Reader is a quoted-printable decoder.
		17  type Reader struct {
		18  	br	 *bufio.Reader
		19  	rerr error	// last read error
		20  	line []byte // to be consumed before more of br
		21  }
		22  
		23  // NewReader returns a quoted-printable reader, decoding from r.
		24  func NewReader(r io.Reader) *Reader {
		25  	return &Reader{
		26  		br: bufio.NewReader(r),
		27  	}
		28  }
		29  
		30  func fromHex(b byte) (byte, error) {
		31  	switch {
		32  	case b >= '0' && b <= '9':
		33  		return b - '0', nil
		34  	case b >= 'A' && b <= 'F':
		35  		return b - 'A' + 10, nil
		36  	// Accept badly encoded bytes.
		37  	case b >= 'a' && b <= 'f':
		38  		return b - 'a' + 10, nil
		39  	}
		40  	return 0, fmt.Errorf("quotedprintable: invalid hex byte 0x%02x", b)
		41  }
		42  
		43  func readHexByte(v []byte) (b byte, err error) {
		44  	if len(v) < 2 {
		45  		return 0, io.ErrUnexpectedEOF
		46  	}
		47  	var hb, lb byte
		48  	if hb, err = fromHex(v[0]); err != nil {
		49  		return 0, err
		50  	}
		51  	if lb, err = fromHex(v[1]); err != nil {
		52  		return 0, err
		53  	}
		54  	return hb<<4 | lb, nil
		55  }
		56  
		57  func isQPDiscardWhitespace(r rune) bool {
		58  	switch r {
		59  	case '\n', '\r', ' ', '\t':
		60  		return true
		61  	}
		62  	return false
		63  }
		64  
		65  var (
		66  	crlf			 = []byte("\r\n")
		67  	lf				 = []byte("\n")
		68  	softSuffix = []byte("=")
		69  )
		70  
		71  // Read reads and decodes quoted-printable data from the underlying reader.
		72  func (r *Reader) Read(p []byte) (n int, err error) {
		73  	// Deviations from RFC 2045:
		74  	// 1. in addition to "=\r\n", "=\n" is also treated as soft line break.
		75  	// 2. it will pass through a '\r' or '\n' not preceded by '=', consistent
		76  	//		with other broken QP encoders & decoders.
		77  	// 3. it accepts soft line-break (=) at end of message (issue 15486); i.e.
		78  	//		the final byte read from the underlying reader is allowed to be '=',
		79  	//		and it will be silently ignored.
		80  	// 4. it takes = as literal = if not followed by two hex digits
		81  	//		but not at end of line (issue 13219).
		82  	for len(p) > 0 {
		83  		if len(r.line) == 0 {
		84  			if r.rerr != nil {
		85  				return n, r.rerr
		86  			}
		87  			r.line, r.rerr = r.br.ReadSlice('\n')
		88  
		89  			// Does the line end in CRLF instead of just LF?
		90  			hasLF := bytes.HasSuffix(r.line, lf)
		91  			hasCR := bytes.HasSuffix(r.line, crlf)
		92  			wholeLine := r.line
		93  			r.line = bytes.TrimRightFunc(wholeLine, isQPDiscardWhitespace)
		94  			if bytes.HasSuffix(r.line, softSuffix) {
		95  				rightStripped := wholeLine[len(r.line):]
		96  				r.line = r.line[:len(r.line)-1]
		97  				if !bytes.HasPrefix(rightStripped, lf) && !bytes.HasPrefix(rightStripped, crlf) &&
		98  					!(len(rightStripped) == 0 && len(r.line) > 0 && r.rerr == io.EOF) {
		99  					r.rerr = fmt.Errorf("quotedprintable: invalid bytes after =: %q", rightStripped)
	 100  				}
	 101  			} else if hasLF {
	 102  				if hasCR {
	 103  					r.line = append(r.line, '\r', '\n')
	 104  				} else {
	 105  					r.line = append(r.line, '\n')
	 106  				}
	 107  			}
	 108  			continue
	 109  		}
	 110  		b := r.line[0]
	 111  
	 112  		switch {
	 113  		case b == '=':
	 114  			b, err = readHexByte(r.line[1:])
	 115  			if err != nil {
	 116  				if len(r.line) >= 2 && r.line[1] != '\r' && r.line[1] != '\n' {
	 117  					// Take the = as a literal =.
	 118  					b = '='
	 119  					break
	 120  				}
	 121  				return n, err
	 122  			}
	 123  			r.line = r.line[2:] // 2 of the 3; other 1 is done below
	 124  		case b == '\t' || b == '\r' || b == '\n':
	 125  			break
	 126  		case b >= 0x80:
	 127  			// As an extension to RFC 2045, we accept
	 128  			// values >= 0x80 without complaint. Issue 22597.
	 129  			break
	 130  		case b < ' ' || b > '~':
	 131  			return n, fmt.Errorf("quotedprintable: invalid unescaped byte 0x%02x in body", b)
	 132  		}
	 133  		p[0] = b
	 134  		p = p[1:]
	 135  		r.line = r.line[1:]
	 136  		n++
	 137  	}
	 138  	return n, nil
	 139  }
	 140  

View as plain text