base64.go

Documentation: encoding/base64

		 1  // Copyright 2009 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  // Package base64 implements base64 encoding as specified by RFC 4648.
		 6  package base64
		 7  
		 8  import (
		 9  	"encoding/binary"
		10  	"io"
		11  	"strconv"
		12  )
		13  
		14  /*
		15   * Encodings
		16   */
		17  
		18  // An Encoding is a radix 64 encoding/decoding scheme, defined by a
		19  // 64-character alphabet. The most common encoding is the "base64"
		20  // encoding defined in RFC 4648 and used in MIME (RFC 2045) and PEM
		21  // (RFC 1421).	RFC 4648 also defines an alternate encoding, which is
		22  // the standard encoding with - and _ substituted for + and /.
		23  type Encoding struct {
		24  	encode		[64]byte
		25  	decodeMap [256]byte
		26  	padChar	 rune
		27  	strict		bool
		28  }
		29  
		30  const (
		31  	StdPadding rune = '=' // Standard padding character
		32  	NoPadding	rune = -1	// No padding
		33  )
		34  
		35  const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
		36  const encodeURL = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
		37  
		38  // NewEncoding returns a new padded Encoding defined by the given alphabet,
		39  // which must be a 64-byte string that does not contain the padding character
		40  // or CR / LF ('\r', '\n').
		41  // The resulting Encoding uses the default padding character ('='),
		42  // which may be changed or disabled via WithPadding.
		43  func NewEncoding(encoder string) *Encoding {
		44  	if len(encoder) != 64 {
		45  		panic("encoding alphabet is not 64-bytes long")
		46  	}
		47  	for i := 0; i < len(encoder); i++ {
		48  		if encoder[i] == '\n' || encoder[i] == '\r' {
		49  			panic("encoding alphabet contains newline character")
		50  		}
		51  	}
		52  
		53  	e := new(Encoding)
		54  	e.padChar = StdPadding
		55  	copy(e.encode[:], encoder)
		56  
		57  	for i := 0; i < len(e.decodeMap); i++ {
		58  		e.decodeMap[i] = 0xFF
		59  	}
		60  	for i := 0; i < len(encoder); i++ {
		61  		e.decodeMap[encoder[i]] = byte(i)
		62  	}
		63  	return e
		64  }
		65  
		66  // WithPadding creates a new encoding identical to enc except
		67  // with a specified padding character, or NoPadding to disable padding.
		68  // The padding character must not be '\r' or '\n', must not
		69  // be contained in the encoding's alphabet and must be a rune equal or
		70  // below '\xff'.
		71  func (enc Encoding) WithPadding(padding rune) *Encoding {
		72  	if padding == '\r' || padding == '\n' || padding > 0xff {
		73  		panic("invalid padding")
		74  	}
		75  
		76  	for i := 0; i < len(enc.encode); i++ {
		77  		if rune(enc.encode[i]) == padding {
		78  			panic("padding contained in alphabet")
		79  		}
		80  	}
		81  
		82  	enc.padChar = padding
		83  	return &enc
		84  }
		85  
		86  // Strict creates a new encoding identical to enc except with
		87  // strict decoding enabled. In this mode, the decoder requires that
		88  // trailing padding bits are zero, as described in RFC 4648 section 3.5.
		89  //
		90  // Note that the input is still malleable, as new line characters
		91  // (CR and LF) are still ignored.
		92  func (enc Encoding) Strict() *Encoding {
		93  	enc.strict = true
		94  	return &enc
		95  }
		96  
		97  // StdEncoding is the standard base64 encoding, as defined in
		98  // RFC 4648.
		99  var StdEncoding = NewEncoding(encodeStd)
	 100  
	 101  // URLEncoding is the alternate base64 encoding defined in RFC 4648.
	 102  // It is typically used in URLs and file names.
	 103  var URLEncoding = NewEncoding(encodeURL)
	 104  
	 105  // RawStdEncoding is the standard raw, unpadded base64 encoding,
	 106  // as defined in RFC 4648 section 3.2.
	 107  // This is the same as StdEncoding but omits padding characters.
	 108  var RawStdEncoding = StdEncoding.WithPadding(NoPadding)
	 109  
	 110  // RawURLEncoding is the unpadded alternate base64 encoding defined in RFC 4648.
	 111  // It is typically used in URLs and file names.
	 112  // This is the same as URLEncoding but omits padding characters.
	 113  var RawURLEncoding = URLEncoding.WithPadding(NoPadding)
	 114  
	 115  /*
	 116   * Encoder
	 117   */
	 118  
	 119  // Encode encodes src using the encoding enc, writing
	 120  // EncodedLen(len(src)) bytes to dst.
	 121  //
	 122  // The encoding pads the output to a multiple of 4 bytes,
	 123  // so Encode is not appropriate for use on individual blocks
	 124  // of a large data stream. Use NewEncoder() instead.
	 125  func (enc *Encoding) Encode(dst, src []byte) {
	 126  	if len(src) == 0 {
	 127  		return
	 128  	}
	 129  	// enc is a pointer receiver, so the use of enc.encode within the hot
	 130  	// loop below means a nil check at every operation. Lift that nil check
	 131  	// outside of the loop to speed up the encoder.
	 132  	_ = enc.encode
	 133  
	 134  	di, si := 0, 0
	 135  	n := (len(src) / 3) * 3
	 136  	for si < n {
	 137  		// Convert 3x 8bit source bytes into 4 bytes
	 138  		val := uint(src[si+0])<<16 | uint(src[si+1])<<8 | uint(src[si+2])
	 139  
	 140  		dst[di+0] = enc.encode[val>>18&0x3F]
	 141  		dst[di+1] = enc.encode[val>>12&0x3F]
	 142  		dst[di+2] = enc.encode[val>>6&0x3F]
	 143  		dst[di+3] = enc.encode[val&0x3F]
	 144  
	 145  		si += 3
	 146  		di += 4
	 147  	}
	 148  
	 149  	remain := len(src) - si
	 150  	if remain == 0 {
	 151  		return
	 152  	}
	 153  	// Add the remaining small block
	 154  	val := uint(src[si+0]) << 16
	 155  	if remain == 2 {
	 156  		val |= uint(src[si+1]) << 8
	 157  	}
	 158  
	 159  	dst[di+0] = enc.encode[val>>18&0x3F]
	 160  	dst[di+1] = enc.encode[val>>12&0x3F]
	 161  
	 162  	switch remain {
	 163  	case 2:
	 164  		dst[di+2] = enc.encode[val>>6&0x3F]
	 165  		if enc.padChar != NoPadding {
	 166  			dst[di+3] = byte(enc.padChar)
	 167  		}
	 168  	case 1:
	 169  		if enc.padChar != NoPadding {
	 170  			dst[di+2] = byte(enc.padChar)
	 171  			dst[di+3] = byte(enc.padChar)
	 172  		}
	 173  	}
	 174  }
	 175  
	 176  // EncodeToString returns the base64 encoding of src.
	 177  func (enc *Encoding) EncodeToString(src []byte) string {
	 178  	buf := make([]byte, enc.EncodedLen(len(src)))
	 179  	enc.Encode(buf, src)
	 180  	return string(buf)
	 181  }
	 182  
	 183  type encoder struct {
	 184  	err	error
	 185  	enc	*Encoding
	 186  	w		io.Writer
	 187  	buf	[3]byte		// buffered data waiting to be encoded
	 188  	nbuf int				// number of bytes in buf
	 189  	out	[1024]byte // output buffer
	 190  }
	 191  
	 192  func (e *encoder) Write(p []byte) (n int, err error) {
	 193  	if e.err != nil {
	 194  		return 0, e.err
	 195  	}
	 196  
	 197  	// Leading fringe.
	 198  	if e.nbuf > 0 {
	 199  		var i int
	 200  		for i = 0; i < len(p) && e.nbuf < 3; i++ {
	 201  			e.buf[e.nbuf] = p[i]
	 202  			e.nbuf++
	 203  		}
	 204  		n += i
	 205  		p = p[i:]
	 206  		if e.nbuf < 3 {
	 207  			return
	 208  		}
	 209  		e.enc.Encode(e.out[:], e.buf[:])
	 210  		if _, e.err = e.w.Write(e.out[:4]); e.err != nil {
	 211  			return n, e.err
	 212  		}
	 213  		e.nbuf = 0
	 214  	}
	 215  
	 216  	// Large interior chunks.
	 217  	for len(p) >= 3 {
	 218  		nn := len(e.out) / 4 * 3
	 219  		if nn > len(p) {
	 220  			nn = len(p)
	 221  			nn -= nn % 3
	 222  		}
	 223  		e.enc.Encode(e.out[:], p[:nn])
	 224  		if _, e.err = e.w.Write(e.out[0 : nn/3*4]); e.err != nil {
	 225  			return n, e.err
	 226  		}
	 227  		n += nn
	 228  		p = p[nn:]
	 229  	}
	 230  
	 231  	// Trailing fringe.
	 232  	for i := 0; i < len(p); i++ {
	 233  		e.buf[i] = p[i]
	 234  	}
	 235  	e.nbuf = len(p)
	 236  	n += len(p)
	 237  	return
	 238  }
	 239  
	 240  // Close flushes any pending output from the encoder.
	 241  // It is an error to call Write after calling Close.
	 242  func (e *encoder) Close() error {
	 243  	// If there's anything left in the buffer, flush it out
	 244  	if e.err == nil && e.nbuf > 0 {
	 245  		e.enc.Encode(e.out[:], e.buf[:e.nbuf])
	 246  		_, e.err = e.w.Write(e.out[:e.enc.EncodedLen(e.nbuf)])
	 247  		e.nbuf = 0
	 248  	}
	 249  	return e.err
	 250  }
	 251  
	 252  // NewEncoder returns a new base64 stream encoder. Data written to
	 253  // the returned writer will be encoded using enc and then written to w.
	 254  // Base64 encodings operate in 4-byte blocks; when finished
	 255  // writing, the caller must Close the returned encoder to flush any
	 256  // partially written blocks.
	 257  func NewEncoder(enc *Encoding, w io.Writer) io.WriteCloser {
	 258  	return &encoder{enc: enc, w: w}
	 259  }
	 260  
	 261  // EncodedLen returns the length in bytes of the base64 encoding
	 262  // of an input buffer of length n.
	 263  func (enc *Encoding) EncodedLen(n int) int {
	 264  	if enc.padChar == NoPadding {
	 265  		return (n*8 + 5) / 6 // minimum # chars at 6 bits per char
	 266  	}
	 267  	return (n + 2) / 3 * 4 // minimum # 4-char quanta, 3 bytes each
	 268  }
	 269  
	 270  /*
	 271   * Decoder
	 272   */
	 273  
	 274  type CorruptInputError int64
	 275  
	 276  func (e CorruptInputError) Error() string {
	 277  	return "illegal base64 data at input byte " + strconv.FormatInt(int64(e), 10)
	 278  }
	 279  
	 280  // decodeQuantum decodes up to 4 base64 bytes. The received parameters are
	 281  // the destination buffer dst, the source buffer src and an index in the
	 282  // source buffer si.
	 283  // It returns the number of bytes read from src, the number of bytes written
	 284  // to dst, and an error, if any.
	 285  func (enc *Encoding) decodeQuantum(dst, src []byte, si int) (nsi, n int, err error) {
	 286  	// Decode quantum using the base64 alphabet
	 287  	var dbuf [4]byte
	 288  	dlen := 4
	 289  
	 290  	// Lift the nil check outside of the loop.
	 291  	_ = enc.decodeMap
	 292  
	 293  	for j := 0; j < len(dbuf); j++ {
	 294  		if len(src) == si {
	 295  			switch {
	 296  			case j == 0:
	 297  				return si, 0, nil
	 298  			case j == 1, enc.padChar != NoPadding:
	 299  				return si, 0, CorruptInputError(si - j)
	 300  			}
	 301  			dlen = j
	 302  			break
	 303  		}
	 304  		in := src[si]
	 305  		si++
	 306  
	 307  		out := enc.decodeMap[in]
	 308  		if out != 0xff {
	 309  			dbuf[j] = out
	 310  			continue
	 311  		}
	 312  
	 313  		if in == '\n' || in == '\r' {
	 314  			j--
	 315  			continue
	 316  		}
	 317  
	 318  		if rune(in) != enc.padChar {
	 319  			return si, 0, CorruptInputError(si - 1)
	 320  		}
	 321  
	 322  		// We've reached the end and there's padding
	 323  		switch j {
	 324  		case 0, 1:
	 325  			// incorrect padding
	 326  			return si, 0, CorruptInputError(si - 1)
	 327  		case 2:
	 328  			// "==" is expected, the first "=" is already consumed.
	 329  			// skip over newlines
	 330  			for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
	 331  				si++
	 332  			}
	 333  			if si == len(src) {
	 334  				// not enough padding
	 335  				return si, 0, CorruptInputError(len(src))
	 336  			}
	 337  			if rune(src[si]) != enc.padChar {
	 338  				// incorrect padding
	 339  				return si, 0, CorruptInputError(si - 1)
	 340  			}
	 341  
	 342  			si++
	 343  		}
	 344  
	 345  		// skip over newlines
	 346  		for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
	 347  			si++
	 348  		}
	 349  		if si < len(src) {
	 350  			// trailing garbage
	 351  			err = CorruptInputError(si)
	 352  		}
	 353  		dlen = j
	 354  		break
	 355  	}
	 356  
	 357  	// Convert 4x 6bit source bytes into 3 bytes
	 358  	val := uint(dbuf[0])<<18 | uint(dbuf[1])<<12 | uint(dbuf[2])<<6 | uint(dbuf[3])
	 359  	dbuf[2], dbuf[1], dbuf[0] = byte(val>>0), byte(val>>8), byte(val>>16)
	 360  	switch dlen {
	 361  	case 4:
	 362  		dst[2] = dbuf[2]
	 363  		dbuf[2] = 0
	 364  		fallthrough
	 365  	case 3:
	 366  		dst[1] = dbuf[1]
	 367  		if enc.strict && dbuf[2] != 0 {
	 368  			return si, 0, CorruptInputError(si - 1)
	 369  		}
	 370  		dbuf[1] = 0
	 371  		fallthrough
	 372  	case 2:
	 373  		dst[0] = dbuf[0]
	 374  		if enc.strict && (dbuf[1] != 0 || dbuf[2] != 0) {
	 375  			return si, 0, CorruptInputError(si - 2)
	 376  		}
	 377  	}
	 378  
	 379  	return si, dlen - 1, err
	 380  }
	 381  
	 382  // DecodeString returns the bytes represented by the base64 string s.
	 383  func (enc *Encoding) DecodeString(s string) ([]byte, error) {
	 384  	dbuf := make([]byte, enc.DecodedLen(len(s)))
	 385  	n, err := enc.Decode(dbuf, []byte(s))
	 386  	return dbuf[:n], err
	 387  }
	 388  
	 389  type decoder struct {
	 390  	err		 error
	 391  	readErr error // error from r.Read
	 392  	enc		 *Encoding
	 393  	r			 io.Reader
	 394  	buf		 [1024]byte // leftover input
	 395  	nbuf		int
	 396  	out		 []byte // leftover decoded output
	 397  	outbuf	[1024 / 4 * 3]byte
	 398  }
	 399  
	 400  func (d *decoder) Read(p []byte) (n int, err error) {
	 401  	// Use leftover decoded output from last read.
	 402  	if len(d.out) > 0 {
	 403  		n = copy(p, d.out)
	 404  		d.out = d.out[n:]
	 405  		return n, nil
	 406  	}
	 407  
	 408  	if d.err != nil {
	 409  		return 0, d.err
	 410  	}
	 411  
	 412  	// This code assumes that d.r strips supported whitespace ('\r' and '\n').
	 413  
	 414  	// Refill buffer.
	 415  	for d.nbuf < 4 && d.readErr == nil {
	 416  		nn := len(p) / 3 * 4
	 417  		if nn < 4 {
	 418  			nn = 4
	 419  		}
	 420  		if nn > len(d.buf) {
	 421  			nn = len(d.buf)
	 422  		}
	 423  		nn, d.readErr = d.r.Read(d.buf[d.nbuf:nn])
	 424  		d.nbuf += nn
	 425  	}
	 426  
	 427  	if d.nbuf < 4 {
	 428  		if d.enc.padChar == NoPadding && d.nbuf > 0 {
	 429  			// Decode final fragment, without padding.
	 430  			var nw int
	 431  			nw, d.err = d.enc.Decode(d.outbuf[:], d.buf[:d.nbuf])
	 432  			d.nbuf = 0
	 433  			d.out = d.outbuf[:nw]
	 434  			n = copy(p, d.out)
	 435  			d.out = d.out[n:]
	 436  			if n > 0 || len(p) == 0 && len(d.out) > 0 {
	 437  				return n, nil
	 438  			}
	 439  			if d.err != nil {
	 440  				return 0, d.err
	 441  			}
	 442  		}
	 443  		d.err = d.readErr
	 444  		if d.err == io.EOF && d.nbuf > 0 {
	 445  			d.err = io.ErrUnexpectedEOF
	 446  		}
	 447  		return 0, d.err
	 448  	}
	 449  
	 450  	// Decode chunk into p, or d.out and then p if p is too small.
	 451  	nr := d.nbuf / 4 * 4
	 452  	nw := d.nbuf / 4 * 3
	 453  	if nw > len(p) {
	 454  		nw, d.err = d.enc.Decode(d.outbuf[:], d.buf[:nr])
	 455  		d.out = d.outbuf[:nw]
	 456  		n = copy(p, d.out)
	 457  		d.out = d.out[n:]
	 458  	} else {
	 459  		n, d.err = d.enc.Decode(p, d.buf[:nr])
	 460  	}
	 461  	d.nbuf -= nr
	 462  	copy(d.buf[:d.nbuf], d.buf[nr:])
	 463  	return n, d.err
	 464  }
	 465  
	 466  // Decode decodes src using the encoding enc. It writes at most
	 467  // DecodedLen(len(src)) bytes to dst and returns the number of bytes
	 468  // written. If src contains invalid base64 data, it will return the
	 469  // number of bytes successfully written and CorruptInputError.
	 470  // New line characters (\r and \n) are ignored.
	 471  func (enc *Encoding) Decode(dst, src []byte) (n int, err error) {
	 472  	if len(src) == 0 {
	 473  		return 0, nil
	 474  	}
	 475  
	 476  	// Lift the nil check outside of the loop. enc.decodeMap is directly
	 477  	// used later in this function, to let the compiler know that the
	 478  	// receiver can't be nil.
	 479  	_ = enc.decodeMap
	 480  
	 481  	si := 0
	 482  	for strconv.IntSize >= 64 && len(src)-si >= 8 && len(dst)-n >= 8 {
	 483  		src2 := src[si : si+8]
	 484  		if dn, ok := assemble64(
	 485  			enc.decodeMap[src2[0]],
	 486  			enc.decodeMap[src2[1]],
	 487  			enc.decodeMap[src2[2]],
	 488  			enc.decodeMap[src2[3]],
	 489  			enc.decodeMap[src2[4]],
	 490  			enc.decodeMap[src2[5]],
	 491  			enc.decodeMap[src2[6]],
	 492  			enc.decodeMap[src2[7]],
	 493  		); ok {
	 494  			binary.BigEndian.PutUint64(dst[n:], dn)
	 495  			n += 6
	 496  			si += 8
	 497  		} else {
	 498  			var ninc int
	 499  			si, ninc, err = enc.decodeQuantum(dst[n:], src, si)
	 500  			n += ninc
	 501  			if err != nil {
	 502  				return n, err
	 503  			}
	 504  		}
	 505  	}
	 506  
	 507  	for len(src)-si >= 4 && len(dst)-n >= 4 {
	 508  		src2 := src[si : si+4]
	 509  		if dn, ok := assemble32(
	 510  			enc.decodeMap[src2[0]],
	 511  			enc.decodeMap[src2[1]],
	 512  			enc.decodeMap[src2[2]],
	 513  			enc.decodeMap[src2[3]],
	 514  		); ok {
	 515  			binary.BigEndian.PutUint32(dst[n:], dn)
	 516  			n += 3
	 517  			si += 4
	 518  		} else {
	 519  			var ninc int
	 520  			si, ninc, err = enc.decodeQuantum(dst[n:], src, si)
	 521  			n += ninc
	 522  			if err != nil {
	 523  				return n, err
	 524  			}
	 525  		}
	 526  	}
	 527  
	 528  	for si < len(src) {
	 529  		var ninc int
	 530  		si, ninc, err = enc.decodeQuantum(dst[n:], src, si)
	 531  		n += ninc
	 532  		if err != nil {
	 533  			return n, err
	 534  		}
	 535  	}
	 536  	return n, err
	 537  }
	 538  
	 539  // assemble32 assembles 4 base64 digits into 3 bytes.
	 540  // Each digit comes from the decode map, and will be 0xff
	 541  // if it came from an invalid character.
	 542  func assemble32(n1, n2, n3, n4 byte) (dn uint32, ok bool) {
	 543  	// Check that all the digits are valid. If any of them was 0xff, their
	 544  	// bitwise OR will be 0xff.
	 545  	if n1|n2|n3|n4 == 0xff {
	 546  		return 0, false
	 547  	}
	 548  	return uint32(n1)<<26 |
	 549  			uint32(n2)<<20 |
	 550  			uint32(n3)<<14 |
	 551  			uint32(n4)<<8,
	 552  		true
	 553  }
	 554  
	 555  // assemble64 assembles 8 base64 digits into 6 bytes.
	 556  // Each digit comes from the decode map, and will be 0xff
	 557  // if it came from an invalid character.
	 558  func assemble64(n1, n2, n3, n4, n5, n6, n7, n8 byte) (dn uint64, ok bool) {
	 559  	// Check that all the digits are valid. If any of them was 0xff, their
	 560  	// bitwise OR will be 0xff.
	 561  	if n1|n2|n3|n4|n5|n6|n7|n8 == 0xff {
	 562  		return 0, false
	 563  	}
	 564  	return uint64(n1)<<58 |
	 565  			uint64(n2)<<52 |
	 566  			uint64(n3)<<46 |
	 567  			uint64(n4)<<40 |
	 568  			uint64(n5)<<34 |
	 569  			uint64(n6)<<28 |
	 570  			uint64(n7)<<22 |
	 571  			uint64(n8)<<16,
	 572  		true
	 573  }
	 574  
	 575  type newlineFilteringReader struct {
	 576  	wrapped io.Reader
	 577  }
	 578  
	 579  func (r *newlineFilteringReader) Read(p []byte) (int, error) {
	 580  	n, err := r.wrapped.Read(p)
	 581  	for n > 0 {
	 582  		offset := 0
	 583  		for i, b := range p[:n] {
	 584  			if b != '\r' && b != '\n' {
	 585  				if i != offset {
	 586  					p[offset] = b
	 587  				}
	 588  				offset++
	 589  			}
	 590  		}
	 591  		if offset > 0 {
	 592  			return offset, err
	 593  		}
	 594  		// Previous buffer entirely whitespace, read again
	 595  		n, err = r.wrapped.Read(p)
	 596  	}
	 597  	return n, err
	 598  }
	 599  
	 600  // NewDecoder constructs a new base64 stream decoder.
	 601  func NewDecoder(enc *Encoding, r io.Reader) io.Reader {
	 602  	return &decoder{enc: enc, r: &newlineFilteringReader{r}}
	 603  }
	 604  
	 605  // DecodedLen returns the maximum length in bytes of the decoded data
	 606  // corresponding to n bytes of base64-encoded data.
	 607  func (enc *Encoding) DecodedLen(n int) int {
	 608  	if enc.padChar == NoPadding {
	 609  		// Unpadded data may end with partial block of 2-3 characters.
	 610  		return n * 6 / 8
	 611  	}
	 612  	// Padded base64 should always be a multiple of 4 characters in length.
	 613  	return n / 4 * 3
	 614  }
	 615
View as plain text