...

Source file src/compress/lzw/writer.go

Documentation: compress/lzw

		 1  // Copyright 2011 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  package lzw
		 6  
		 7  import (
		 8  	"bufio"
		 9  	"errors"
		10  	"fmt"
		11  	"io"
		12  )
		13  
		14  // A writer is a buffered, flushable writer.
		15  type writer interface {
		16  	io.ByteWriter
		17  	Flush() error
		18  }
		19  
		20  const (
		21  	// A code is a 12 bit value, stored as a uint32 when encoding to avoid
		22  	// type conversions when shifting bits.
		23  	maxCode		 = 1<<12 - 1
		24  	invalidCode = 1<<32 - 1
		25  	// There are 1<<12 possible codes, which is an upper bound on the number of
		26  	// valid hash table entries at any given point in time. tableSize is 4x that.
		27  	tableSize = 4 * 1 << 12
		28  	tableMask = tableSize - 1
		29  	// A hash table entry is a uint32. Zero is an invalid entry since the
		30  	// lower 12 bits of a valid entry must be a non-literal code.
		31  	invalidEntry = 0
		32  )
		33  
		34  // Writer is an LZW compressor. It writes the compressed form of the data
		35  // to an underlying writer (see NewWriter).
		36  type Writer struct {
		37  	// w is the writer that compressed bytes are written to.
		38  	w writer
		39  	// order, write, bits, nBits and width are the state for
		40  	// converting a code stream into a byte stream.
		41  	order Order
		42  	write func(*Writer, uint32) error
		43  	bits	uint32
		44  	nBits uint
		45  	width uint
		46  	// litWidth is the width in bits of literal codes.
		47  	litWidth uint
		48  	// hi is the code implied by the next code emission.
		49  	// overflow is the code at which hi overflows the code width.
		50  	hi, overflow uint32
		51  	// savedCode is the accumulated code at the end of the most recent Write
		52  	// call. It is equal to invalidCode if there was no such call.
		53  	savedCode uint32
		54  	// err is the first error encountered during writing. Closing the writer
		55  	// will make any future Write calls return errClosed
		56  	err error
		57  	// table is the hash table from 20-bit keys to 12-bit values. Each table
		58  	// entry contains key<<12|val and collisions resolve by linear probing.
		59  	// The keys consist of a 12-bit code prefix and an 8-bit byte suffix.
		60  	// The values are a 12-bit code.
		61  	table [tableSize]uint32
		62  }
		63  
		64  // writeLSB writes the code c for "Least Significant Bits first" data.
		65  func (w *Writer) writeLSB(c uint32) error {
		66  	w.bits |= c << w.nBits
		67  	w.nBits += w.width
		68  	for w.nBits >= 8 {
		69  		if err := w.w.WriteByte(uint8(w.bits)); err != nil {
		70  			return err
		71  		}
		72  		w.bits >>= 8
		73  		w.nBits -= 8
		74  	}
		75  	return nil
		76  }
		77  
		78  // writeMSB writes the code c for "Most Significant Bits first" data.
		79  func (w *Writer) writeMSB(c uint32) error {
		80  	w.bits |= c << (32 - w.width - w.nBits)
		81  	w.nBits += w.width
		82  	for w.nBits >= 8 {
		83  		if err := w.w.WriteByte(uint8(w.bits >> 24)); err != nil {
		84  			return err
		85  		}
		86  		w.bits <<= 8
		87  		w.nBits -= 8
		88  	}
		89  	return nil
		90  }
		91  
		92  // errOutOfCodes is an internal error that means that the writer has run out
		93  // of unused codes and a clear code needs to be sent next.
		94  var errOutOfCodes = errors.New("lzw: out of codes")
		95  
		96  // incHi increments e.hi and checks for both overflow and running out of
		97  // unused codes. In the latter case, incHi sends a clear code, resets the
		98  // writer state and returns errOutOfCodes.
		99  func (w *Writer) incHi() error {
	 100  	w.hi++
	 101  	if w.hi == w.overflow {
	 102  		w.width++
	 103  		w.overflow <<= 1
	 104  	}
	 105  	if w.hi == maxCode {
	 106  		clear := uint32(1) << w.litWidth
	 107  		if err := w.write(w, clear); err != nil {
	 108  			return err
	 109  		}
	 110  		w.width = w.litWidth + 1
	 111  		w.hi = clear + 1
	 112  		w.overflow = clear << 1
	 113  		for i := range w.table {
	 114  			w.table[i] = invalidEntry
	 115  		}
	 116  		return errOutOfCodes
	 117  	}
	 118  	return nil
	 119  }
	 120  
	 121  // Write writes a compressed representation of p to w's underlying writer.
	 122  func (w *Writer) Write(p []byte) (n int, err error) {
	 123  	if w.err != nil {
	 124  		return 0, w.err
	 125  	}
	 126  	if len(p) == 0 {
	 127  		return 0, nil
	 128  	}
	 129  	if maxLit := uint8(1<<w.litWidth - 1); maxLit != 0xff {
	 130  		for _, x := range p {
	 131  			if x > maxLit {
	 132  				w.err = errors.New("lzw: input byte too large for the litWidth")
	 133  				return 0, w.err
	 134  			}
	 135  		}
	 136  	}
	 137  	n = len(p)
	 138  	code := w.savedCode
	 139  	if code == invalidCode {
	 140  		// The first code sent is always a literal code.
	 141  		code, p = uint32(p[0]), p[1:]
	 142  	}
	 143  loop:
	 144  	for _, x := range p {
	 145  		literal := uint32(x)
	 146  		key := code<<8 | literal
	 147  		// If there is a hash table hit for this key then we continue the loop
	 148  		// and do not emit a code yet.
	 149  		hash := (key>>12 ^ key) & tableMask
	 150  		for h, t := hash, w.table[hash]; t != invalidEntry; {
	 151  			if key == t>>12 {
	 152  				code = t & maxCode
	 153  				continue loop
	 154  			}
	 155  			h = (h + 1) & tableMask
	 156  			t = w.table[h]
	 157  		}
	 158  		// Otherwise, write the current code, and literal becomes the start of
	 159  		// the next emitted code.
	 160  		if w.err = w.write(w, code); w.err != nil {
	 161  			return 0, w.err
	 162  		}
	 163  		code = literal
	 164  		// Increment e.hi, the next implied code. If we run out of codes, reset
	 165  		// the writer state (including clearing the hash table) and continue.
	 166  		if err1 := w.incHi(); err1 != nil {
	 167  			if err1 == errOutOfCodes {
	 168  				continue
	 169  			}
	 170  			w.err = err1
	 171  			return 0, w.err
	 172  		}
	 173  		// Otherwise, insert key -> e.hi into the map that e.table represents.
	 174  		for {
	 175  			if w.table[hash] == invalidEntry {
	 176  				w.table[hash] = (key << 12) | w.hi
	 177  				break
	 178  			}
	 179  			hash = (hash + 1) & tableMask
	 180  		}
	 181  	}
	 182  	w.savedCode = code
	 183  	return n, nil
	 184  }
	 185  
	 186  // Close closes the Writer, flushing any pending output. It does not close
	 187  // w's underlying writer.
	 188  func (w *Writer) Close() error {
	 189  	if w.err != nil {
	 190  		if w.err == errClosed {
	 191  			return nil
	 192  		}
	 193  		return w.err
	 194  	}
	 195  	// Make any future calls to Write return errClosed.
	 196  	w.err = errClosed
	 197  	// Write the savedCode if valid.
	 198  	if w.savedCode != invalidCode {
	 199  		if err := w.write(w, w.savedCode); err != nil {
	 200  			return err
	 201  		}
	 202  		if err := w.incHi(); err != nil && err != errOutOfCodes {
	 203  			return err
	 204  		}
	 205  	}
	 206  	// Write the eof code.
	 207  	eof := uint32(1)<<w.litWidth + 1
	 208  	if err := w.write(w, eof); err != nil {
	 209  		return err
	 210  	}
	 211  	// Write the final bits.
	 212  	if w.nBits > 0 {
	 213  		if w.order == MSB {
	 214  			w.bits >>= 24
	 215  		}
	 216  		if err := w.w.WriteByte(uint8(w.bits)); err != nil {
	 217  			return err
	 218  		}
	 219  	}
	 220  	return w.w.Flush()
	 221  }
	 222  
	 223  // Reset clears the Writer's state and allows it to be reused again
	 224  // as a new Writer.
	 225  func (w *Writer) Reset(dst io.Writer, order Order, litWidth int) {
	 226  	*w = Writer{}
	 227  	w.init(dst, order, litWidth)
	 228  }
	 229  
	 230  // NewWriter creates a new io.WriteCloser.
	 231  // Writes to the returned io.WriteCloser are compressed and written to w.
	 232  // It is the caller's responsibility to call Close on the WriteCloser when
	 233  // finished writing.
	 234  // The number of bits to use for literal codes, litWidth, must be in the
	 235  // range [2,8] and is typically 8. Input bytes must be less than 1<<litWidth.
	 236  //
	 237  // It is guaranteed that the underlying type of the returned io.WriteCloser
	 238  // is a *Writer.
	 239  func NewWriter(w io.Writer, order Order, litWidth int) io.WriteCloser {
	 240  	return newWriter(w, order, litWidth)
	 241  }
	 242  
	 243  func newWriter(dst io.Writer, order Order, litWidth int) *Writer {
	 244  	w := new(Writer)
	 245  	w.init(dst, order, litWidth)
	 246  	return w
	 247  }
	 248  
	 249  func (w *Writer) init(dst io.Writer, order Order, litWidth int) {
	 250  	switch order {
	 251  	case LSB:
	 252  		w.write = (*Writer).writeLSB
	 253  	case MSB:
	 254  		w.write = (*Writer).writeMSB
	 255  	default:
	 256  		w.err = errors.New("lzw: unknown order")
	 257  		return
	 258  	}
	 259  	if litWidth < 2 || 8 < litWidth {
	 260  		w.err = fmt.Errorf("lzw: litWidth %d out of range", litWidth)
	 261  		return
	 262  	}
	 263  	bw, ok := dst.(writer)
	 264  	if !ok && dst != nil {
	 265  		bw = bufio.NewWriter(dst)
	 266  	}
	 267  	w.w = bw
	 268  	lw := uint(litWidth)
	 269  	w.order = order
	 270  	w.width = 1 + lw
	 271  	w.litWidth = lw
	 272  	w.hi = 1<<lw + 1
	 273  	w.overflow = 1 << (lw + 1)
	 274  	w.savedCode = invalidCode
	 275  }
	 276  

View as plain text