...

Source file src/compress/flate/inflate.go

Documentation: compress/flate

		 1  // Copyright 2009 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  // Package flate implements the DEFLATE compressed data format, described in
		 6  // RFC 1951.	The gzip and zlib packages implement access to DEFLATE-based file
		 7  // formats.
		 8  package flate
		 9  
		10  import (
		11  	"bufio"
		12  	"io"
		13  	"math/bits"
		14  	"strconv"
		15  	"sync"
		16  )
		17  
		18  const (
		19  	maxCodeLen = 16 // max length of Huffman code
		20  	// The next three numbers come from the RFC section 3.2.7, with the
		21  	// additional proviso in section 3.2.5 which implies that distance codes
		22  	// 30 and 31 should never occur in compressed data.
		23  	maxNumLit	= 286
		24  	maxNumDist = 30
		25  	numCodes	 = 19 // number of codes in Huffman meta-code
		26  )
		27  
		28  // Initialize the fixedHuffmanDecoder only once upon first use.
		29  var fixedOnce sync.Once
		30  var fixedHuffmanDecoder huffmanDecoder
		31  
		32  // A CorruptInputError reports the presence of corrupt input at a given offset.
		33  type CorruptInputError int64
		34  
		35  func (e CorruptInputError) Error() string {
		36  	return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
		37  }
		38  
		39  // An InternalError reports an error in the flate code itself.
		40  type InternalError string
		41  
		42  func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
		43  
		44  // A ReadError reports an error encountered while reading input.
		45  //
		46  // Deprecated: No longer returned.
		47  type ReadError struct {
		48  	Offset int64 // byte offset where error occurred
		49  	Err		error // error returned by underlying Read
		50  }
		51  
		52  func (e *ReadError) Error() string {
		53  	return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
		54  }
		55  
		56  // A WriteError reports an error encountered while writing output.
		57  //
		58  // Deprecated: No longer returned.
		59  type WriteError struct {
		60  	Offset int64 // byte offset where error occurred
		61  	Err		error // error returned by underlying Write
		62  }
		63  
		64  func (e *WriteError) Error() string {
		65  	return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
		66  }
		67  
		68  // Resetter resets a ReadCloser returned by NewReader or NewReaderDict
		69  // to switch to a new underlying Reader. This permits reusing a ReadCloser
		70  // instead of allocating a new one.
		71  type Resetter interface {
		72  	// Reset discards any buffered data and resets the Resetter as if it was
		73  	// newly initialized with the given reader.
		74  	Reset(r io.Reader, dict []byte) error
		75  }
		76  
		77  // The data structure for decoding Huffman tables is based on that of
		78  // zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
		79  // For codes smaller than the table width, there are multiple entries
		80  // (each combination of trailing bits has the same value). For codes
		81  // larger than the table width, the table contains a link to an overflow
		82  // table. The width of each entry in the link table is the maximum code
		83  // size minus the chunk width.
		84  //
		85  // Note that you can do a lookup in the table even without all bits
		86  // filled. Since the extra bits are zero, and the DEFLATE Huffman codes
		87  // have the property that shorter codes come before longer ones, the
		88  // bit length estimate in the result is a lower bound on the actual
		89  // number of bits.
		90  //
		91  // See the following:
		92  //	https://github.com/madler/zlib/raw/master/doc/algorithm.txt
		93  
		94  // chunk & 15 is number of bits
		95  // chunk >> 4 is value, including table link
		96  
		97  const (
		98  	huffmanChunkBits	= 9
		99  	huffmanNumChunks	= 1 << huffmanChunkBits
	 100  	huffmanCountMask	= 15
	 101  	huffmanValueShift = 4
	 102  )
	 103  
	 104  type huffmanDecoder struct {
	 105  	min			int											// the minimum code length
	 106  	chunks	 [huffmanNumChunks]uint32 // chunks as described above
	 107  	links		[][]uint32							 // overflow links
	 108  	linkMask uint32									 // mask the width of the link table
	 109  }
	 110  
	 111  // Initialize Huffman decoding tables from array of code lengths.
	 112  // Following this function, h is guaranteed to be initialized into a complete
	 113  // tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
	 114  // degenerate case where the tree has only a single symbol with length 1. Empty
	 115  // trees are permitted.
	 116  func (h *huffmanDecoder) init(lengths []int) bool {
	 117  	// Sanity enables additional runtime tests during Huffman
	 118  	// table construction. It's intended to be used during
	 119  	// development to supplement the currently ad-hoc unit tests.
	 120  	const sanity = false
	 121  
	 122  	if h.min != 0 {
	 123  		*h = huffmanDecoder{}
	 124  	}
	 125  
	 126  	// Count number of codes of each length,
	 127  	// compute min and max length.
	 128  	var count [maxCodeLen]int
	 129  	var min, max int
	 130  	for _, n := range lengths {
	 131  		if n == 0 {
	 132  			continue
	 133  		}
	 134  		if min == 0 || n < min {
	 135  			min = n
	 136  		}
	 137  		if n > max {
	 138  			max = n
	 139  		}
	 140  		count[n]++
	 141  	}
	 142  
	 143  	// Empty tree. The decompressor.huffSym function will fail later if the tree
	 144  	// is used. Technically, an empty tree is only valid for the HDIST tree and
	 145  	// not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
	 146  	// is guaranteed to fail since it will attempt to use the tree to decode the
	 147  	// codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
	 148  	// guaranteed to fail later since the compressed data section must be
	 149  	// composed of at least one symbol (the end-of-block marker).
	 150  	if max == 0 {
	 151  		return true
	 152  	}
	 153  
	 154  	code := 0
	 155  	var nextcode [maxCodeLen]int
	 156  	for i := min; i <= max; i++ {
	 157  		code <<= 1
	 158  		nextcode[i] = code
	 159  		code += count[i]
	 160  	}
	 161  
	 162  	// Check that the coding is complete (i.e., that we've
	 163  	// assigned all 2-to-the-max possible bit sequences).
	 164  	// Exception: To be compatible with zlib, we also need to
	 165  	// accept degenerate single-code codings. See also
	 166  	// TestDegenerateHuffmanCoding.
	 167  	if code != 1<<uint(max) && !(code == 1 && max == 1) {
	 168  		return false
	 169  	}
	 170  
	 171  	h.min = min
	 172  	if max > huffmanChunkBits {
	 173  		numLinks := 1 << (uint(max) - huffmanChunkBits)
	 174  		h.linkMask = uint32(numLinks - 1)
	 175  
	 176  		// create link tables
	 177  		link := nextcode[huffmanChunkBits+1] >> 1
	 178  		h.links = make([][]uint32, huffmanNumChunks-link)
	 179  		for j := uint(link); j < huffmanNumChunks; j++ {
	 180  			reverse := int(bits.Reverse16(uint16(j)))
	 181  			reverse >>= uint(16 - huffmanChunkBits)
	 182  			off := j - uint(link)
	 183  			if sanity && h.chunks[reverse] != 0 {
	 184  				panic("impossible: overwriting existing chunk")
	 185  			}
	 186  			h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
	 187  			h.links[off] = make([]uint32, numLinks)
	 188  		}
	 189  	}
	 190  
	 191  	for i, n := range lengths {
	 192  		if n == 0 {
	 193  			continue
	 194  		}
	 195  		code := nextcode[n]
	 196  		nextcode[n]++
	 197  		chunk := uint32(i<<huffmanValueShift | n)
	 198  		reverse := int(bits.Reverse16(uint16(code)))
	 199  		reverse >>= uint(16 - n)
	 200  		if n <= huffmanChunkBits {
	 201  			for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
	 202  				// We should never need to overwrite
	 203  				// an existing chunk. Also, 0 is
	 204  				// never a valid chunk, because the
	 205  				// lower 4 "count" bits should be
	 206  				// between 1 and 15.
	 207  				if sanity && h.chunks[off] != 0 {
	 208  					panic("impossible: overwriting existing chunk")
	 209  				}
	 210  				h.chunks[off] = chunk
	 211  			}
	 212  		} else {
	 213  			j := reverse & (huffmanNumChunks - 1)
	 214  			if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
	 215  				// Longer codes should have been
	 216  				// associated with a link table above.
	 217  				panic("impossible: not an indirect chunk")
	 218  			}
	 219  			value := h.chunks[j] >> huffmanValueShift
	 220  			linktab := h.links[value]
	 221  			reverse >>= huffmanChunkBits
	 222  			for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
	 223  				if sanity && linktab[off] != 0 {
	 224  					panic("impossible: overwriting existing chunk")
	 225  				}
	 226  				linktab[off] = chunk
	 227  			}
	 228  		}
	 229  	}
	 230  
	 231  	if sanity {
	 232  		// Above we've sanity checked that we never overwrote
	 233  		// an existing entry. Here we additionally check that
	 234  		// we filled the tables completely.
	 235  		for i, chunk := range h.chunks {
	 236  			if chunk == 0 {
	 237  				// As an exception, in the degenerate
	 238  				// single-code case, we allow odd
	 239  				// chunks to be missing.
	 240  				if code == 1 && i%2 == 1 {
	 241  					continue
	 242  				}
	 243  				panic("impossible: missing chunk")
	 244  			}
	 245  		}
	 246  		for _, linktab := range h.links {
	 247  			for _, chunk := range linktab {
	 248  				if chunk == 0 {
	 249  					panic("impossible: missing chunk")
	 250  				}
	 251  			}
	 252  		}
	 253  	}
	 254  
	 255  	return true
	 256  }
	 257  
	 258  // The actual read interface needed by NewReader.
	 259  // If the passed in io.Reader does not also have ReadByte,
	 260  // the NewReader will introduce its own buffering.
	 261  type Reader interface {
	 262  	io.Reader
	 263  	io.ByteReader
	 264  }
	 265  
	 266  // Decompress state.
	 267  type decompressor struct {
	 268  	// Input source.
	 269  	r			 Reader
	 270  	roffset int64
	 271  
	 272  	// Input bits, in top of b.
	 273  	b	uint32
	 274  	nb uint
	 275  
	 276  	// Huffman decoders for literal/length, distance.
	 277  	h1, h2 huffmanDecoder
	 278  
	 279  	// Length arrays used to define Huffman codes.
	 280  	bits		 *[maxNumLit + maxNumDist]int
	 281  	codebits *[numCodes]int
	 282  
	 283  	// Output history, buffer.
	 284  	dict dictDecoder
	 285  
	 286  	// Temporary buffer (avoids repeated allocation).
	 287  	buf [4]byte
	 288  
	 289  	// Next step in the decompression,
	 290  	// and decompression state.
	 291  	step			func(*decompressor)
	 292  	stepState int
	 293  	final		 bool
	 294  	err			 error
	 295  	toRead		[]byte
	 296  	hl, hd		*huffmanDecoder
	 297  	copyLen	 int
	 298  	copyDist	int
	 299  }
	 300  
	 301  func (f *decompressor) nextBlock() {
	 302  	for f.nb < 1+2 {
	 303  		if f.err = f.moreBits(); f.err != nil {
	 304  			return
	 305  		}
	 306  	}
	 307  	f.final = f.b&1 == 1
	 308  	f.b >>= 1
	 309  	typ := f.b & 3
	 310  	f.b >>= 2
	 311  	f.nb -= 1 + 2
	 312  	switch typ {
	 313  	case 0:
	 314  		f.dataBlock()
	 315  	case 1:
	 316  		// compressed, fixed Huffman tables
	 317  		f.hl = &fixedHuffmanDecoder
	 318  		f.hd = nil
	 319  		f.huffmanBlock()
	 320  	case 2:
	 321  		// compressed, dynamic Huffman tables
	 322  		if f.err = f.readHuffman(); f.err != nil {
	 323  			break
	 324  		}
	 325  		f.hl = &f.h1
	 326  		f.hd = &f.h2
	 327  		f.huffmanBlock()
	 328  	default:
	 329  		// 3 is reserved.
	 330  		f.err = CorruptInputError(f.roffset)
	 331  	}
	 332  }
	 333  
	 334  func (f *decompressor) Read(b []byte) (int, error) {
	 335  	for {
	 336  		if len(f.toRead) > 0 {
	 337  			n := copy(b, f.toRead)
	 338  			f.toRead = f.toRead[n:]
	 339  			if len(f.toRead) == 0 {
	 340  				return n, f.err
	 341  			}
	 342  			return n, nil
	 343  		}
	 344  		if f.err != nil {
	 345  			return 0, f.err
	 346  		}
	 347  		f.step(f)
	 348  		if f.err != nil && len(f.toRead) == 0 {
	 349  			f.toRead = f.dict.readFlush() // Flush what's left in case of error
	 350  		}
	 351  	}
	 352  }
	 353  
	 354  func (f *decompressor) Close() error {
	 355  	if f.err == io.EOF {
	 356  		return nil
	 357  	}
	 358  	return f.err
	 359  }
	 360  
	 361  // RFC 1951 section 3.2.7.
	 362  // Compression with dynamic Huffman codes
	 363  
	 364  var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
	 365  
	 366  func (f *decompressor) readHuffman() error {
	 367  	// HLIT[5], HDIST[5], HCLEN[4].
	 368  	for f.nb < 5+5+4 {
	 369  		if err := f.moreBits(); err != nil {
	 370  			return err
	 371  		}
	 372  	}
	 373  	nlit := int(f.b&0x1F) + 257
	 374  	if nlit > maxNumLit {
	 375  		return CorruptInputError(f.roffset)
	 376  	}
	 377  	f.b >>= 5
	 378  	ndist := int(f.b&0x1F) + 1
	 379  	if ndist > maxNumDist {
	 380  		return CorruptInputError(f.roffset)
	 381  	}
	 382  	f.b >>= 5
	 383  	nclen := int(f.b&0xF) + 4
	 384  	// numCodes is 19, so nclen is always valid.
	 385  	f.b >>= 4
	 386  	f.nb -= 5 + 5 + 4
	 387  
	 388  	// (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
	 389  	for i := 0; i < nclen; i++ {
	 390  		for f.nb < 3 {
	 391  			if err := f.moreBits(); err != nil {
	 392  				return err
	 393  			}
	 394  		}
	 395  		f.codebits[codeOrder[i]] = int(f.b & 0x7)
	 396  		f.b >>= 3
	 397  		f.nb -= 3
	 398  	}
	 399  	for i := nclen; i < len(codeOrder); i++ {
	 400  		f.codebits[codeOrder[i]] = 0
	 401  	}
	 402  	if !f.h1.init(f.codebits[0:]) {
	 403  		return CorruptInputError(f.roffset)
	 404  	}
	 405  
	 406  	// HLIT + 257 code lengths, HDIST + 1 code lengths,
	 407  	// using the code length Huffman code.
	 408  	for i, n := 0, nlit+ndist; i < n; {
	 409  		x, err := f.huffSym(&f.h1)
	 410  		if err != nil {
	 411  			return err
	 412  		}
	 413  		if x < 16 {
	 414  			// Actual length.
	 415  			f.bits[i] = x
	 416  			i++
	 417  			continue
	 418  		}
	 419  		// Repeat previous length or zero.
	 420  		var rep int
	 421  		var nb uint
	 422  		var b int
	 423  		switch x {
	 424  		default:
	 425  			return InternalError("unexpected length code")
	 426  		case 16:
	 427  			rep = 3
	 428  			nb = 2
	 429  			if i == 0 {
	 430  				return CorruptInputError(f.roffset)
	 431  			}
	 432  			b = f.bits[i-1]
	 433  		case 17:
	 434  			rep = 3
	 435  			nb = 3
	 436  			b = 0
	 437  		case 18:
	 438  			rep = 11
	 439  			nb = 7
	 440  			b = 0
	 441  		}
	 442  		for f.nb < nb {
	 443  			if err := f.moreBits(); err != nil {
	 444  				return err
	 445  			}
	 446  		}
	 447  		rep += int(f.b & uint32(1<<nb-1))
	 448  		f.b >>= nb
	 449  		f.nb -= nb
	 450  		if i+rep > n {
	 451  			return CorruptInputError(f.roffset)
	 452  		}
	 453  		for j := 0; j < rep; j++ {
	 454  			f.bits[i] = b
	 455  			i++
	 456  		}
	 457  	}
	 458  
	 459  	if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
	 460  		return CorruptInputError(f.roffset)
	 461  	}
	 462  
	 463  	// As an optimization, we can initialize the min bits to read at a time
	 464  	// for the HLIT tree to the length of the EOB marker since we know that
	 465  	// every block must terminate with one. This preserves the property that
	 466  	// we never read any extra bytes after the end of the DEFLATE stream.
	 467  	if f.h1.min < f.bits[endBlockMarker] {
	 468  		f.h1.min = f.bits[endBlockMarker]
	 469  	}
	 470  
	 471  	return nil
	 472  }
	 473  
	 474  // Decode a single Huffman block from f.
	 475  // hl and hd are the Huffman states for the lit/length values
	 476  // and the distance values, respectively. If hd == nil, using the
	 477  // fixed distance encoding associated with fixed Huffman blocks.
	 478  func (f *decompressor) huffmanBlock() {
	 479  	const (
	 480  		stateInit = iota // Zero value must be stateInit
	 481  		stateDict
	 482  	)
	 483  
	 484  	switch f.stepState {
	 485  	case stateInit:
	 486  		goto readLiteral
	 487  	case stateDict:
	 488  		goto copyHistory
	 489  	}
	 490  
	 491  readLiteral:
	 492  	// Read literal and/or (length, distance) according to RFC section 3.2.3.
	 493  	{
	 494  		v, err := f.huffSym(f.hl)
	 495  		if err != nil {
	 496  			f.err = err
	 497  			return
	 498  		}
	 499  		var n uint // number of bits extra
	 500  		var length int
	 501  		switch {
	 502  		case v < 256:
	 503  			f.dict.writeByte(byte(v))
	 504  			if f.dict.availWrite() == 0 {
	 505  				f.toRead = f.dict.readFlush()
	 506  				f.step = (*decompressor).huffmanBlock
	 507  				f.stepState = stateInit
	 508  				return
	 509  			}
	 510  			goto readLiteral
	 511  		case v == 256:
	 512  			f.finishBlock()
	 513  			return
	 514  		// otherwise, reference to older data
	 515  		case v < 265:
	 516  			length = v - (257 - 3)
	 517  			n = 0
	 518  		case v < 269:
	 519  			length = v*2 - (265*2 - 11)
	 520  			n = 1
	 521  		case v < 273:
	 522  			length = v*4 - (269*4 - 19)
	 523  			n = 2
	 524  		case v < 277:
	 525  			length = v*8 - (273*8 - 35)
	 526  			n = 3
	 527  		case v < 281:
	 528  			length = v*16 - (277*16 - 67)
	 529  			n = 4
	 530  		case v < 285:
	 531  			length = v*32 - (281*32 - 131)
	 532  			n = 5
	 533  		case v < maxNumLit:
	 534  			length = 258
	 535  			n = 0
	 536  		default:
	 537  			f.err = CorruptInputError(f.roffset)
	 538  			return
	 539  		}
	 540  		if n > 0 {
	 541  			for f.nb < n {
	 542  				if err = f.moreBits(); err != nil {
	 543  					f.err = err
	 544  					return
	 545  				}
	 546  			}
	 547  			length += int(f.b & uint32(1<<n-1))
	 548  			f.b >>= n
	 549  			f.nb -= n
	 550  		}
	 551  
	 552  		var dist int
	 553  		if f.hd == nil {
	 554  			for f.nb < 5 {
	 555  				if err = f.moreBits(); err != nil {
	 556  					f.err = err
	 557  					return
	 558  				}
	 559  			}
	 560  			dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
	 561  			f.b >>= 5
	 562  			f.nb -= 5
	 563  		} else {
	 564  			if dist, err = f.huffSym(f.hd); err != nil {
	 565  				f.err = err
	 566  				return
	 567  			}
	 568  		}
	 569  
	 570  		switch {
	 571  		case dist < 4:
	 572  			dist++
	 573  		case dist < maxNumDist:
	 574  			nb := uint(dist-2) >> 1
	 575  			// have 1 bit in bottom of dist, need nb more.
	 576  			extra := (dist & 1) << nb
	 577  			for f.nb < nb {
	 578  				if err = f.moreBits(); err != nil {
	 579  					f.err = err
	 580  					return
	 581  				}
	 582  			}
	 583  			extra |= int(f.b & uint32(1<<nb-1))
	 584  			f.b >>= nb
	 585  			f.nb -= nb
	 586  			dist = 1<<(nb+1) + 1 + extra
	 587  		default:
	 588  			f.err = CorruptInputError(f.roffset)
	 589  			return
	 590  		}
	 591  
	 592  		// No check on length; encoding can be prescient.
	 593  		if dist > f.dict.histSize() {
	 594  			f.err = CorruptInputError(f.roffset)
	 595  			return
	 596  		}
	 597  
	 598  		f.copyLen, f.copyDist = length, dist
	 599  		goto copyHistory
	 600  	}
	 601  
	 602  copyHistory:
	 603  	// Perform a backwards copy according to RFC section 3.2.3.
	 604  	{
	 605  		cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
	 606  		if cnt == 0 {
	 607  			cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
	 608  		}
	 609  		f.copyLen -= cnt
	 610  
	 611  		if f.dict.availWrite() == 0 || f.copyLen > 0 {
	 612  			f.toRead = f.dict.readFlush()
	 613  			f.step = (*decompressor).huffmanBlock // We need to continue this work
	 614  			f.stepState = stateDict
	 615  			return
	 616  		}
	 617  		goto readLiteral
	 618  	}
	 619  }
	 620  
	 621  // Copy a single uncompressed data block from input to output.
	 622  func (f *decompressor) dataBlock() {
	 623  	// Uncompressed.
	 624  	// Discard current half-byte.
	 625  	f.nb = 0
	 626  	f.b = 0
	 627  
	 628  	// Length then ones-complement of length.
	 629  	nr, err := io.ReadFull(f.r, f.buf[0:4])
	 630  	f.roffset += int64(nr)
	 631  	if err != nil {
	 632  		f.err = noEOF(err)
	 633  		return
	 634  	}
	 635  	n := int(f.buf[0]) | int(f.buf[1])<<8
	 636  	nn := int(f.buf[2]) | int(f.buf[3])<<8
	 637  	if uint16(nn) != uint16(^n) {
	 638  		f.err = CorruptInputError(f.roffset)
	 639  		return
	 640  	}
	 641  
	 642  	if n == 0 {
	 643  		f.toRead = f.dict.readFlush()
	 644  		f.finishBlock()
	 645  		return
	 646  	}
	 647  
	 648  	f.copyLen = n
	 649  	f.copyData()
	 650  }
	 651  
	 652  // copyData copies f.copyLen bytes from the underlying reader into f.hist.
	 653  // It pauses for reads when f.hist is full.
	 654  func (f *decompressor) copyData() {
	 655  	buf := f.dict.writeSlice()
	 656  	if len(buf) > f.copyLen {
	 657  		buf = buf[:f.copyLen]
	 658  	}
	 659  
	 660  	cnt, err := io.ReadFull(f.r, buf)
	 661  	f.roffset += int64(cnt)
	 662  	f.copyLen -= cnt
	 663  	f.dict.writeMark(cnt)
	 664  	if err != nil {
	 665  		f.err = noEOF(err)
	 666  		return
	 667  	}
	 668  
	 669  	if f.dict.availWrite() == 0 || f.copyLen > 0 {
	 670  		f.toRead = f.dict.readFlush()
	 671  		f.step = (*decompressor).copyData
	 672  		return
	 673  	}
	 674  	f.finishBlock()
	 675  }
	 676  
	 677  func (f *decompressor) finishBlock() {
	 678  	if f.final {
	 679  		if f.dict.availRead() > 0 {
	 680  			f.toRead = f.dict.readFlush()
	 681  		}
	 682  		f.err = io.EOF
	 683  	}
	 684  	f.step = (*decompressor).nextBlock
	 685  }
	 686  
	 687  // noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
	 688  func noEOF(e error) error {
	 689  	if e == io.EOF {
	 690  		return io.ErrUnexpectedEOF
	 691  	}
	 692  	return e
	 693  }
	 694  
	 695  func (f *decompressor) moreBits() error {
	 696  	c, err := f.r.ReadByte()
	 697  	if err != nil {
	 698  		return noEOF(err)
	 699  	}
	 700  	f.roffset++
	 701  	f.b |= uint32(c) << f.nb
	 702  	f.nb += 8
	 703  	return nil
	 704  }
	 705  
	 706  // Read the next Huffman-encoded symbol from f according to h.
	 707  func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
	 708  	// Since a huffmanDecoder can be empty or be composed of a degenerate tree
	 709  	// with single element, huffSym must error on these two edge cases. In both
	 710  	// cases, the chunks slice will be 0 for the invalid sequence, leading it
	 711  	// satisfy the n == 0 check below.
	 712  	n := uint(h.min)
	 713  	// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
	 714  	// but is smart enough to keep local variables in registers, so use nb and b,
	 715  	// inline call to moreBits and reassign b,nb back to f on return.
	 716  	nb, b := f.nb, f.b
	 717  	for {
	 718  		for nb < n {
	 719  			c, err := f.r.ReadByte()
	 720  			if err != nil {
	 721  				f.b = b
	 722  				f.nb = nb
	 723  				return 0, noEOF(err)
	 724  			}
	 725  			f.roffset++
	 726  			b |= uint32(c) << (nb & 31)
	 727  			nb += 8
	 728  		}
	 729  		chunk := h.chunks[b&(huffmanNumChunks-1)]
	 730  		n = uint(chunk & huffmanCountMask)
	 731  		if n > huffmanChunkBits {
	 732  			chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
	 733  			n = uint(chunk & huffmanCountMask)
	 734  		}
	 735  		if n <= nb {
	 736  			if n == 0 {
	 737  				f.b = b
	 738  				f.nb = nb
	 739  				f.err = CorruptInputError(f.roffset)
	 740  				return 0, f.err
	 741  			}
	 742  			f.b = b >> (n & 31)
	 743  			f.nb = nb - n
	 744  			return int(chunk >> huffmanValueShift), nil
	 745  		}
	 746  	}
	 747  }
	 748  
	 749  func makeReader(r io.Reader) Reader {
	 750  	if rr, ok := r.(Reader); ok {
	 751  		return rr
	 752  	}
	 753  	return bufio.NewReader(r)
	 754  }
	 755  
	 756  func fixedHuffmanDecoderInit() {
	 757  	fixedOnce.Do(func() {
	 758  		// These come from the RFC section 3.2.6.
	 759  		var bits [288]int
	 760  		for i := 0; i < 144; i++ {
	 761  			bits[i] = 8
	 762  		}
	 763  		for i := 144; i < 256; i++ {
	 764  			bits[i] = 9
	 765  		}
	 766  		for i := 256; i < 280; i++ {
	 767  			bits[i] = 7
	 768  		}
	 769  		for i := 280; i < 288; i++ {
	 770  			bits[i] = 8
	 771  		}
	 772  		fixedHuffmanDecoder.init(bits[:])
	 773  	})
	 774  }
	 775  
	 776  func (f *decompressor) Reset(r io.Reader, dict []byte) error {
	 777  	*f = decompressor{
	 778  		r:				makeReader(r),
	 779  		bits:		 f.bits,
	 780  		codebits: f.codebits,
	 781  		dict:		 f.dict,
	 782  		step:		 (*decompressor).nextBlock,
	 783  	}
	 784  	f.dict.init(maxMatchOffset, dict)
	 785  	return nil
	 786  }
	 787  
	 788  // NewReader returns a new ReadCloser that can be used
	 789  // to read the uncompressed version of r.
	 790  // If r does not also implement io.ByteReader,
	 791  // the decompressor may read more data than necessary from r.
	 792  // It is the caller's responsibility to call Close on the ReadCloser
	 793  // when finished reading.
	 794  //
	 795  // The ReadCloser returned by NewReader also implements Resetter.
	 796  func NewReader(r io.Reader) io.ReadCloser {
	 797  	fixedHuffmanDecoderInit()
	 798  
	 799  	var f decompressor
	 800  	f.r = makeReader(r)
	 801  	f.bits = new([maxNumLit + maxNumDist]int)
	 802  	f.codebits = new([numCodes]int)
	 803  	f.step = (*decompressor).nextBlock
	 804  	f.dict.init(maxMatchOffset, nil)
	 805  	return &f
	 806  }
	 807  
	 808  // NewReaderDict is like NewReader but initializes the reader
	 809  // with a preset dictionary. The returned Reader behaves as if
	 810  // the uncompressed data stream started with the given dictionary,
	 811  // which has already been read. NewReaderDict is typically used
	 812  // to read data compressed by NewWriterDict.
	 813  //
	 814  // The ReadCloser returned by NewReader also implements Resetter.
	 815  func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
	 816  	fixedHuffmanDecoderInit()
	 817  
	 818  	var f decompressor
	 819  	f.r = makeReader(r)
	 820  	f.bits = new([maxNumLit + maxNumDist]int)
	 821  	f.codebits = new([numCodes]int)
	 822  	f.step = (*decompressor).nextBlock
	 823  	f.dict.init(maxMatchOffset, dict)
	 824  	return &f
	 825  }
	 826  

View as plain text