dict_decoder.go

Documentation: compress/flate

		 1  // Copyright 2016 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  package flate
		 6  
		 7  // dictDecoder implements the LZ77 sliding dictionary as used in decompression.
		 8  // LZ77 decompresses data through sequences of two forms of commands:
		 9  //
		10  //	* Literal insertions: Runs of one or more symbols are inserted into the data
		11  //	stream as is. This is accomplished through the writeByte method for a
		12  //	single symbol, or combinations of writeSlice/writeMark for multiple symbols.
		13  //	Any valid stream must start with a literal insertion if no preset dictionary
		14  //	is used.
		15  //
		16  //	* Backward copies: Runs of one or more symbols are copied from previously
		17  //	emitted data. Backward copies come as the tuple (dist, length) where dist
		18  //	determines how far back in the stream to copy from and length determines how
		19  //	many bytes to copy. Note that it is valid for the length to be greater than
		20  //	the distance. Since LZ77 uses forward copies, that situation is used to
		21  //	perform a form of run-length encoding on repeated runs of symbols.
		22  //	The writeCopy and tryWriteCopy are used to implement this command.
		23  //
		24  // For performance reasons, this implementation performs little to no sanity
		25  // checks about the arguments. As such, the invariants documented for each
		26  // method call must be respected.
		27  type dictDecoder struct {
		28  	hist []byte // Sliding window history
		29  
		30  	// Invariant: 0 <= rdPos <= wrPos <= len(hist)
		31  	wrPos int	// Current output position in buffer
		32  	rdPos int	// Have emitted hist[:rdPos] already
		33  	full	bool // Has a full window length been written yet?
		34  }
		35  
		36  // init initializes dictDecoder to have a sliding window dictionary of the given
		37  // size. If a preset dict is provided, it will initialize the dictionary with
		38  // the contents of dict.
		39  func (dd *dictDecoder) init(size int, dict []byte) {
		40  	*dd = dictDecoder{hist: dd.hist}
		41  
		42  	if cap(dd.hist) < size {
		43  		dd.hist = make([]byte, size)
		44  	}
		45  	dd.hist = dd.hist[:size]
		46  
		47  	if len(dict) > len(dd.hist) {
		48  		dict = dict[len(dict)-len(dd.hist):]
		49  	}
		50  	dd.wrPos = copy(dd.hist, dict)
		51  	if dd.wrPos == len(dd.hist) {
		52  		dd.wrPos = 0
		53  		dd.full = true
		54  	}
		55  	dd.rdPos = dd.wrPos
		56  }
		57  
		58  // histSize reports the total amount of historical data in the dictionary.
		59  func (dd *dictDecoder) histSize() int {
		60  	if dd.full {
		61  		return len(dd.hist)
		62  	}
		63  	return dd.wrPos
		64  }
		65  
		66  // availRead reports the number of bytes that can be flushed by readFlush.
		67  func (dd *dictDecoder) availRead() int {
		68  	return dd.wrPos - dd.rdPos
		69  }
		70  
		71  // availWrite reports the available amount of output buffer space.
		72  func (dd *dictDecoder) availWrite() int {
		73  	return len(dd.hist) - dd.wrPos
		74  }
		75  
		76  // writeSlice returns a slice of the available buffer to write data to.
		77  //
		78  // This invariant will be kept: len(s) <= availWrite()
		79  func (dd *dictDecoder) writeSlice() []byte {
		80  	return dd.hist[dd.wrPos:]
		81  }
		82  
		83  // writeMark advances the writer pointer by cnt.
		84  //
		85  // This invariant must be kept: 0 <= cnt <= availWrite()
		86  func (dd *dictDecoder) writeMark(cnt int) {
		87  	dd.wrPos += cnt
		88  }
		89  
		90  // writeByte writes a single byte to the dictionary.
		91  //
		92  // This invariant must be kept: 0 < availWrite()
		93  func (dd *dictDecoder) writeByte(c byte) {
		94  	dd.hist[dd.wrPos] = c
		95  	dd.wrPos++
		96  }
		97  
		98  // writeCopy copies a string at a given (dist, length) to the output.
		99  // This returns the number of bytes copied and may be less than the requested
	 100  // length if the available space in the output buffer is too small.
	 101  //
	 102  // This invariant must be kept: 0 < dist <= histSize()
	 103  func (dd *dictDecoder) writeCopy(dist, length int) int {
	 104  	dstBase := dd.wrPos
	 105  	dstPos := dstBase
	 106  	srcPos := dstPos - dist
	 107  	endPos := dstPos + length
	 108  	if endPos > len(dd.hist) {
	 109  		endPos = len(dd.hist)
	 110  	}
	 111  
	 112  	// Copy non-overlapping section after destination position.
	 113  	//
	 114  	// This section is non-overlapping in that the copy length for this section
	 115  	// is always less than or equal to the backwards distance. This can occur
	 116  	// if a distance refers to data that wraps-around in the buffer.
	 117  	// Thus, a backwards copy is performed here; that is, the exact bytes in
	 118  	// the source prior to the copy is placed in the destination.
	 119  	if srcPos < 0 {
	 120  		srcPos += len(dd.hist)
	 121  		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:])
	 122  		srcPos = 0
	 123  	}
	 124  
	 125  	// Copy possibly overlapping section before destination position.
	 126  	//
	 127  	// This section can overlap if the copy length for this section is larger
	 128  	// than the backwards distance. This is allowed by LZ77 so that repeated
	 129  	// strings can be succinctly represented using (dist, length) pairs.
	 130  	// Thus, a forwards copy is performed here; that is, the bytes copied is
	 131  	// possibly dependent on the resulting bytes in the destination as the copy
	 132  	// progresses along. This is functionally equivalent to the following:
	 133  	//
	 134  	//	for i := 0; i < endPos-dstPos; i++ {
	 135  	//		dd.hist[dstPos+i] = dd.hist[srcPos+i]
	 136  	//	}
	 137  	//	dstPos = endPos
	 138  	//
	 139  	for dstPos < endPos {
	 140  		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
	 141  	}
	 142  
	 143  	dd.wrPos = dstPos
	 144  	return dstPos - dstBase
	 145  }
	 146  
	 147  // tryWriteCopy tries to copy a string at a given (distance, length) to the
	 148  // output. This specialized version is optimized for short distances.
	 149  //
	 150  // This method is designed to be inlined for performance reasons.
	 151  //
	 152  // This invariant must be kept: 0 < dist <= histSize()
	 153  func (dd *dictDecoder) tryWriteCopy(dist, length int) int {
	 154  	dstPos := dd.wrPos
	 155  	endPos := dstPos + length
	 156  	if dstPos < dist || endPos > len(dd.hist) {
	 157  		return 0
	 158  	}
	 159  	dstBase := dstPos
	 160  	srcPos := dstPos - dist
	 161  
	 162  	// Copy possibly overlapping section before destination position.
	 163  	for dstPos < endPos {
	 164  		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
	 165  	}
	 166  
	 167  	dd.wrPos = dstPos
	 168  	return dstPos - dstBase
	 169  }
	 170  
	 171  // readFlush returns a slice of the historical buffer that is ready to be
	 172  // emitted to the user. The data returned by readFlush must be fully consumed
	 173  // before calling any other dictDecoder methods.
	 174  func (dd *dictDecoder) readFlush() []byte {
	 175  	toRead := dd.hist[dd.rdPos:dd.wrPos]
	 176  	dd.rdPos = dd.wrPos
	 177  	if dd.wrPos == len(dd.hist) {
	 178  		dd.wrPos, dd.rdPos = 0, 0
	 179  		dd.full = true
	 180  	}
	 181  	return toRead
	 182  }
	 183
View as plain text