...

Source file src/compress/bzip2/huffman.go

Documentation: compress/bzip2

		 1  // Copyright 2011 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  package bzip2
		 6  
		 7  import "sort"
		 8  
		 9  // A huffmanTree is a binary tree which is navigated, bit-by-bit to reach a
		10  // symbol.
		11  type huffmanTree struct {
		12  	// nodes contains all the non-leaf nodes in the tree. nodes[0] is the
		13  	// root of the tree and nextNode contains the index of the next element
		14  	// of nodes to use when the tree is being constructed.
		15  	nodes		[]huffmanNode
		16  	nextNode int
		17  }
		18  
		19  // A huffmanNode is a node in the tree. left and right contain indexes into the
		20  // nodes slice of the tree. If left or right is invalidNodeValue then the child
		21  // is a left node and its value is in leftValue/rightValue.
		22  //
		23  // The symbols are uint16s because bzip2 encodes not only MTF indexes in the
		24  // tree, but also two magic values for run-length encoding and an EOF symbol.
		25  // Thus there are more than 256 possible symbols.
		26  type huffmanNode struct {
		27  	left, right					 uint16
		28  	leftValue, rightValue uint16
		29  }
		30  
		31  // invalidNodeValue is an invalid index which marks a leaf node in the tree.
		32  const invalidNodeValue = 0xffff
		33  
		34  // Decode reads bits from the given bitReader and navigates the tree until a
		35  // symbol is found.
		36  func (t *huffmanTree) Decode(br *bitReader) (v uint16) {
		37  	nodeIndex := uint16(0) // node 0 is the root of the tree.
		38  
		39  	for {
		40  		node := &t.nodes[nodeIndex]
		41  
		42  		var bit uint16
		43  		if br.bits > 0 {
		44  			// Get next bit - fast path.
		45  			br.bits--
		46  			bit = uint16(br.n>>(br.bits&63)) & 1
		47  		} else {
		48  			// Get next bit - slow path.
		49  			// Use ReadBits to retrieve a single bit
		50  			// from the underling io.ByteReader.
		51  			bit = uint16(br.ReadBits(1))
		52  		}
		53  
		54  		// Trick a compiler into generating conditional move instead of branch,
		55  		// by making both loads unconditional.
		56  		l, r := node.left, node.right
		57  
		58  		if bit == 1 {
		59  			nodeIndex = l
		60  		} else {
		61  			nodeIndex = r
		62  		}
		63  
		64  		if nodeIndex == invalidNodeValue {
		65  			// We found a leaf. Use the value of bit to decide
		66  			// whether is a left or a right value.
		67  			l, r := node.leftValue, node.rightValue
		68  			if bit == 1 {
		69  				v = l
		70  			} else {
		71  				v = r
		72  			}
		73  			return
		74  		}
		75  	}
		76  }
		77  
		78  // newHuffmanTree builds a Huffman tree from a slice containing the code
		79  // lengths of each symbol. The maximum code length is 32 bits.
		80  func newHuffmanTree(lengths []uint8) (huffmanTree, error) {
		81  	// There are many possible trees that assign the same code length to
		82  	// each symbol (consider reflecting a tree down the middle, for
		83  	// example). Since the code length assignments determine the
		84  	// efficiency of the tree, each of these trees is equally good. In
		85  	// order to minimize the amount of information needed to build a tree
		86  	// bzip2 uses a canonical tree so that it can be reconstructed given
		87  	// only the code length assignments.
		88  
		89  	if len(lengths) < 2 {
		90  		panic("newHuffmanTree: too few symbols")
		91  	}
		92  
		93  	var t huffmanTree
		94  
		95  	// First we sort the code length assignments by ascending code length,
		96  	// using the symbol value to break ties.
		97  	pairs := make([]huffmanSymbolLengthPair, len(lengths))
		98  	for i, length := range lengths {
		99  		pairs[i].value = uint16(i)
	 100  		pairs[i].length = length
	 101  	}
	 102  
	 103  	sort.Slice(pairs, func(i, j int) bool {
	 104  		if pairs[i].length < pairs[j].length {
	 105  			return true
	 106  		}
	 107  		if pairs[i].length > pairs[j].length {
	 108  			return false
	 109  		}
	 110  		if pairs[i].value < pairs[j].value {
	 111  			return true
	 112  		}
	 113  		return false
	 114  	})
	 115  
	 116  	// Now we assign codes to the symbols, starting with the longest code.
	 117  	// We keep the codes packed into a uint32, at the most-significant end.
	 118  	// So branches are taken from the MSB downwards. This makes it easy to
	 119  	// sort them later.
	 120  	code := uint32(0)
	 121  	length := uint8(32)
	 122  
	 123  	codes := make([]huffmanCode, len(lengths))
	 124  	for i := len(pairs) - 1; i >= 0; i-- {
	 125  		if length > pairs[i].length {
	 126  			length = pairs[i].length
	 127  		}
	 128  		codes[i].code = code
	 129  		codes[i].codeLen = length
	 130  		codes[i].value = pairs[i].value
	 131  		// We need to 'increment' the code, which means treating |code|
	 132  		// like a |length| bit number.
	 133  		code += 1 << (32 - length)
	 134  	}
	 135  
	 136  	// Now we can sort by the code so that the left half of each branch are
	 137  	// grouped together, recursively.
	 138  	sort.Slice(codes, func(i, j int) bool {
	 139  		return codes[i].code < codes[j].code
	 140  	})
	 141  
	 142  	t.nodes = make([]huffmanNode, len(codes))
	 143  	_, err := buildHuffmanNode(&t, codes, 0)
	 144  	return t, err
	 145  }
	 146  
	 147  // huffmanSymbolLengthPair contains a symbol and its code length.
	 148  type huffmanSymbolLengthPair struct {
	 149  	value	uint16
	 150  	length uint8
	 151  }
	 152  
	 153  // huffmanCode contains a symbol, its code and code length.
	 154  type huffmanCode struct {
	 155  	code		uint32
	 156  	codeLen uint8
	 157  	value	 uint16
	 158  }
	 159  
	 160  // buildHuffmanNode takes a slice of sorted huffmanCodes and builds a node in
	 161  // the Huffman tree at the given level. It returns the index of the newly
	 162  // constructed node.
	 163  func buildHuffmanNode(t *huffmanTree, codes []huffmanCode, level uint32) (nodeIndex uint16, err error) {
	 164  	test := uint32(1) << (31 - level)
	 165  
	 166  	// We have to search the list of codes to find the divide between the left and right sides.
	 167  	firstRightIndex := len(codes)
	 168  	for i, code := range codes {
	 169  		if code.code&test != 0 {
	 170  			firstRightIndex = i
	 171  			break
	 172  		}
	 173  	}
	 174  
	 175  	left := codes[:firstRightIndex]
	 176  	right := codes[firstRightIndex:]
	 177  
	 178  	if len(left) == 0 || len(right) == 0 {
	 179  		// There is a superfluous level in the Huffman tree indicating
	 180  		// a bug in the encoder. However, this bug has been observed in
	 181  		// the wild so we handle it.
	 182  
	 183  		// If this function was called recursively then we know that
	 184  		// len(codes) >= 2 because, otherwise, we would have hit the
	 185  		// "leaf node" case, below, and not recursed.
	 186  		//
	 187  		// However, for the initial call it's possible that len(codes)
	 188  		// is zero or one. Both cases are invalid because a zero length
	 189  		// tree cannot encode anything and a length-1 tree can only
	 190  		// encode EOF and so is superfluous. We reject both.
	 191  		if len(codes) < 2 {
	 192  			return 0, StructuralError("empty Huffman tree")
	 193  		}
	 194  
	 195  		// In this case the recursion doesn't always reduce the length
	 196  		// of codes so we need to ensure termination via another
	 197  		// mechanism.
	 198  		if level == 31 {
	 199  			// Since len(codes) >= 2 the only way that the values
	 200  			// can match at all 32 bits is if they are equal, which
	 201  			// is invalid. This ensures that we never enter
	 202  			// infinite recursion.
	 203  			return 0, StructuralError("equal symbols in Huffman tree")
	 204  		}
	 205  
	 206  		if len(left) == 0 {
	 207  			return buildHuffmanNode(t, right, level+1)
	 208  		}
	 209  		return buildHuffmanNode(t, left, level+1)
	 210  	}
	 211  
	 212  	nodeIndex = uint16(t.nextNode)
	 213  	node := &t.nodes[t.nextNode]
	 214  	t.nextNode++
	 215  
	 216  	if len(left) == 1 {
	 217  		// leaf node
	 218  		node.left = invalidNodeValue
	 219  		node.leftValue = left[0].value
	 220  	} else {
	 221  		node.left, err = buildHuffmanNode(t, left, level+1)
	 222  	}
	 223  
	 224  	if err != nil {
	 225  		return
	 226  	}
	 227  
	 228  	if len(right) == 1 {
	 229  		// leaf node
	 230  		node.right = invalidNodeValue
	 231  		node.rightValue = right[0].value
	 232  	} else {
	 233  		node.right, err = buildHuffmanNode(t, right, level+1)
	 234  	}
	 235  
	 236  	return
	 237  }
	 238  

View as plain text