deflatefast.go

Documentation: compress/flate

		 1  // Copyright 2016 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  package flate
		 6  
		 7  import "math"
		 8  
		 9  // This encoding algorithm, which prioritizes speed over output size, is
		10  // based on Snappy's LZ77-style encoder: github.com/golang/snappy
		11  
		12  const (
		13  	tableBits	= 14						 // Bits used in the table.
		14  	tableSize	= 1 << tableBits // Size of the table.
		15  	tableMask	= tableSize - 1	// Mask for table indices. Redundant, but can eliminate bounds checks.
		16  	tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32.
		17  
		18  	// Reset the buffer offset when reaching this.
		19  	// Offsets are stored between blocks as int32 values.
		20  	// Since the offset we are checking against is at the beginning
		21  	// of the buffer, we need to subtract the current and input
		22  	// buffer to not risk overflowing the int32.
		23  	bufferReset = math.MaxInt32 - maxStoreBlockSize*2
		24  )
		25  
		26  func load32(b []byte, i int32) uint32 {
		27  	b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
		28  	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
		29  }
		30  
		31  func load64(b []byte, i int32) uint64 {
		32  	b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
		33  	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
		34  		uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
		35  }
		36  
		37  func hash(u uint32) uint32 {
		38  	return (u * 0x1e35a7bd) >> tableShift
		39  }
		40  
		41  // These constants are defined by the Snappy implementation so that its
		42  // assembly implementation can fast-path some 16-bytes-at-a-time copies. They
		43  // aren't necessary in the pure Go implementation, as we don't use those same
		44  // optimizations, but using the same thresholds doesn't really hurt.
		45  const (
		46  	inputMargin						= 16 - 1
		47  	minNonLiteralBlockSize = 1 + 1 + inputMargin
		48  )
		49  
		50  type tableEntry struct {
		51  	val		uint32 // Value at destination
		52  	offset int32
		53  }
		54  
		55  // deflateFast maintains the table for matches,
		56  // and the previous byte block for cross block matching.
		57  type deflateFast struct {
		58  	table [tableSize]tableEntry
		59  	prev	[]byte // Previous block, zero length if unknown.
		60  	cur	 int32	// Current match offset.
		61  }
		62  
		63  func newDeflateFast() *deflateFast {
		64  	return &deflateFast{cur: maxStoreBlockSize, prev: make([]byte, 0, maxStoreBlockSize)}
		65  }
		66  
		67  // encode encodes a block given in src and appends tokens
		68  // to dst and returns the result.
		69  func (e *deflateFast) encode(dst []token, src []byte) []token {
		70  	// Ensure that e.cur doesn't wrap.
		71  	if e.cur >= bufferReset {
		72  		e.shiftOffsets()
		73  	}
		74  
		75  	// This check isn't in the Snappy implementation, but there, the caller
		76  	// instead of the callee handles this case.
		77  	if len(src) < minNonLiteralBlockSize {
		78  		e.cur += maxStoreBlockSize
		79  		e.prev = e.prev[:0]
		80  		return emitLiteral(dst, src)
		81  	}
		82  
		83  	// sLimit is when to stop looking for offset/length copies. The inputMargin
		84  	// lets us use a fast path for emitLiteral in the main loop, while we are
		85  	// looking for copies.
		86  	sLimit := int32(len(src) - inputMargin)
		87  
		88  	// nextEmit is where in src the next emitLiteral should start from.
		89  	nextEmit := int32(0)
		90  	s := int32(0)
		91  	cv := load32(src, s)
		92  	nextHash := hash(cv)
		93  
		94  	for {
		95  		// Copied from the C++ snappy implementation:
		96  		//
		97  		// Heuristic match skipping: If 32 bytes are scanned with no matches
		98  		// found, start looking only at every other byte. If 32 more bytes are
		99  		// scanned (or skipped), look at every third byte, etc.. When a match
	 100  		// is found, immediately go back to looking at every byte. This is a
	 101  		// small loss (~5% performance, ~0.1% density) for compressible data
	 102  		// due to more bookkeeping, but for non-compressible data (such as
	 103  		// JPEG) it's a huge win since the compressor quickly "realizes" the
	 104  		// data is incompressible and doesn't bother looking for matches
	 105  		// everywhere.
	 106  		//
	 107  		// The "skip" variable keeps track of how many bytes there are since
	 108  		// the last match; dividing it by 32 (ie. right-shifting by five) gives
	 109  		// the number of bytes to move ahead for each iteration.
	 110  		skip := int32(32)
	 111  
	 112  		nextS := s
	 113  		var candidate tableEntry
	 114  		for {
	 115  			s = nextS
	 116  			bytesBetweenHashLookups := skip >> 5
	 117  			nextS = s + bytesBetweenHashLookups
	 118  			skip += bytesBetweenHashLookups
	 119  			if nextS > sLimit {
	 120  				goto emitRemainder
	 121  			}
	 122  			candidate = e.table[nextHash&tableMask]
	 123  			now := load32(src, nextS)
	 124  			e.table[nextHash&tableMask] = tableEntry{offset: s + e.cur, val: cv}
	 125  			nextHash = hash(now)
	 126  
	 127  			offset := s - (candidate.offset - e.cur)
	 128  			if offset > maxMatchOffset || cv != candidate.val {
	 129  				// Out of range or not matched.
	 130  				cv = now
	 131  				continue
	 132  			}
	 133  			break
	 134  		}
	 135  
	 136  		// A 4-byte match has been found. We'll later see if more than 4 bytes
	 137  		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
	 138  		// them as literal bytes.
	 139  		dst = emitLiteral(dst, src[nextEmit:s])
	 140  
	 141  		// Call emitCopy, and then see if another emitCopy could be our next
	 142  		// move. Repeat until we find no match for the input immediately after
	 143  		// what was consumed by the last emitCopy call.
	 144  		//
	 145  		// If we exit this loop normally then we need to call emitLiteral next,
	 146  		// though we don't yet know how big the literal will be. We handle that
	 147  		// by proceeding to the next iteration of the main loop. We also can
	 148  		// exit this loop via goto if we get close to exhausting the input.
	 149  		for {
	 150  			// Invariant: we have a 4-byte match at s, and no need to emit any
	 151  			// literal bytes prior to s.
	 152  
	 153  			// Extend the 4-byte match as long as possible.
	 154  			//
	 155  			s += 4
	 156  			t := candidate.offset - e.cur + 4
	 157  			l := e.matchLen(s, t, src)
	 158  
	 159  			// matchToken is flate's equivalent of Snappy's emitCopy. (length,offset)
	 160  			dst = append(dst, matchToken(uint32(l+4-baseMatchLength), uint32(s-t-baseMatchOffset)))
	 161  			s += l
	 162  			nextEmit = s
	 163  			if s >= sLimit {
	 164  				goto emitRemainder
	 165  			}
	 166  
	 167  			// We could immediately start working at s now, but to improve
	 168  			// compression we first update the hash table at s-1 and at s. If
	 169  			// another emitCopy is not our next move, also calculate nextHash
	 170  			// at s+1. At least on GOARCH=amd64, these three hash calculations
	 171  			// are faster as one load64 call (with some shifts) instead of
	 172  			// three load32 calls.
	 173  			x := load64(src, s-1)
	 174  			prevHash := hash(uint32(x))
	 175  			e.table[prevHash&tableMask] = tableEntry{offset: e.cur + s - 1, val: uint32(x)}
	 176  			x >>= 8
	 177  			currHash := hash(uint32(x))
	 178  			candidate = e.table[currHash&tableMask]
	 179  			e.table[currHash&tableMask] = tableEntry{offset: e.cur + s, val: uint32(x)}
	 180  
	 181  			offset := s - (candidate.offset - e.cur)
	 182  			if offset > maxMatchOffset || uint32(x) != candidate.val {
	 183  				cv = uint32(x >> 8)
	 184  				nextHash = hash(cv)
	 185  				s++
	 186  				break
	 187  			}
	 188  		}
	 189  	}
	 190  
	 191  emitRemainder:
	 192  	if int(nextEmit) < len(src) {
	 193  		dst = emitLiteral(dst, src[nextEmit:])
	 194  	}
	 195  	e.cur += int32(len(src))
	 196  	e.prev = e.prev[:len(src)]
	 197  	copy(e.prev, src)
	 198  	return dst
	 199  }
	 200  
	 201  func emitLiteral(dst []token, lit []byte) []token {
	 202  	for _, v := range lit {
	 203  		dst = append(dst, literalToken(uint32(v)))
	 204  	}
	 205  	return dst
	 206  }
	 207  
	 208  // matchLen returns the match length between src[s:] and src[t:].
	 209  // t can be negative to indicate the match is starting in e.prev.
	 210  // We assume that src[s-4:s] and src[t-4:t] already match.
	 211  func (e *deflateFast) matchLen(s, t int32, src []byte) int32 {
	 212  	s1 := int(s) + maxMatchLength - 4
	 213  	if s1 > len(src) {
	 214  		s1 = len(src)
	 215  	}
	 216  
	 217  	// If we are inside the current block
	 218  	if t >= 0 {
	 219  		b := src[t:]
	 220  		a := src[s:s1]
	 221  		b = b[:len(a)]
	 222  		// Extend the match to be as long as possible.
	 223  		for i := range a {
	 224  			if a[i] != b[i] {
	 225  				return int32(i)
	 226  			}
	 227  		}
	 228  		return int32(len(a))
	 229  	}
	 230  
	 231  	// We found a match in the previous block.
	 232  	tp := int32(len(e.prev)) + t
	 233  	if tp < 0 {
	 234  		return 0
	 235  	}
	 236  
	 237  	// Extend the match to be as long as possible.
	 238  	a := src[s:s1]
	 239  	b := e.prev[tp:]
	 240  	if len(b) > len(a) {
	 241  		b = b[:len(a)]
	 242  	}
	 243  	a = a[:len(b)]
	 244  	for i := range b {
	 245  		if a[i] != b[i] {
	 246  			return int32(i)
	 247  		}
	 248  	}
	 249  
	 250  	// If we reached our limit, we matched everything we are
	 251  	// allowed to in the previous block and we return.
	 252  	n := int32(len(b))
	 253  	if int(s+n) == s1 {
	 254  		return n
	 255  	}
	 256  
	 257  	// Continue looking for more matches in the current block.
	 258  	a = src[s+n : s1]
	 259  	b = src[:len(a)]
	 260  	for i := range a {
	 261  		if a[i] != b[i] {
	 262  			return int32(i) + n
	 263  		}
	 264  	}
	 265  	return int32(len(a)) + n
	 266  }
	 267  
	 268  // Reset resets the encoding history.
	 269  // This ensures that no matches are made to the previous block.
	 270  func (e *deflateFast) reset() {
	 271  	e.prev = e.prev[:0]
	 272  	// Bump the offset, so all matches will fail distance check.
	 273  	// Nothing should be >= e.cur in the table.
	 274  	e.cur += maxMatchOffset
	 275  
	 276  	// Protect against e.cur wraparound.
	 277  	if e.cur >= bufferReset {
	 278  		e.shiftOffsets()
	 279  	}
	 280  }
	 281  
	 282  // shiftOffsets will shift down all match offset.
	 283  // This is only called in rare situations to prevent integer overflow.
	 284  //
	 285  // See https://golang.org/issue/18636 and https://github.com/golang/go/issues/34121.
	 286  func (e *deflateFast) shiftOffsets() {
	 287  	if len(e.prev) == 0 {
	 288  		// We have no history; just clear the table.
	 289  		for i := range e.table[:] {
	 290  			e.table[i] = tableEntry{}
	 291  		}
	 292  		e.cur = maxMatchOffset + 1
	 293  		return
	 294  	}
	 295  
	 296  	// Shift down everything in the table that isn't already too far away.
	 297  	for i := range e.table[:] {
	 298  		v := e.table[i].offset - e.cur + maxMatchOffset + 1
	 299  		if v < 0 {
	 300  			// We want to reset e.cur to maxMatchOffset + 1, so we need to shift
	 301  			// all table entries down by (e.cur - (maxMatchOffset + 1)).
	 302  			// Because we ignore matches > maxMatchOffset, we can cap
	 303  			// any negative offsets at 0.
	 304  			v = 0
	 305  		}
	 306  		e.table[i].offset = v
	 307  	}
	 308  	e.cur = maxMatchOffset + 1
	 309  }
	 310
View as plain text