maphash.go

Documentation: hash/maphash

		 1  // Copyright 2019 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  // Package maphash provides hash functions on byte sequences.
		 6  // These hash functions are intended to be used to implement hash tables or
		 7  // other data structures that need to map arbitrary strings or byte
		 8  // sequences to a uniform distribution on unsigned 64-bit integers.
		 9  // Each different instance of a hash table or data structure should use its own Seed.
		10  //
		11  // The hash functions are not cryptographically secure.
		12  // (See crypto/sha256 and crypto/sha512 for cryptographic use.)
		13  //
		14  package maphash
		15  
		16  import (
		17  	"internal/unsafeheader"
		18  	"unsafe"
		19  )
		20  
		21  // A Seed is a random value that selects the specific hash function
		22  // computed by a Hash. If two Hashes use the same Seeds, they
		23  // will compute the same hash values for any given input.
		24  // If two Hashes use different Seeds, they are very likely to compute
		25  // distinct hash values for any given input.
		26  //
		27  // A Seed must be initialized by calling MakeSeed.
		28  // The zero seed is uninitialized and not valid for use with Hash's SetSeed method.
		29  //
		30  // Each Seed value is local to a single process and cannot be serialized
		31  // or otherwise recreated in a different process.
		32  type Seed struct {
		33  	s uint64
		34  }
		35  
		36  // A Hash computes a seeded hash of a byte sequence.
		37  //
		38  // The zero Hash is a valid Hash ready to use.
		39  // A zero Hash chooses a random seed for itself during
		40  // the first call to a Reset, Write, Seed, or Sum64 method.
		41  // For control over the seed, use SetSeed.
		42  //
		43  // The computed hash values depend only on the initial seed and
		44  // the sequence of bytes provided to the Hash object, not on the way
		45  // in which the bytes are provided. For example, the three sequences
		46  //
		47  //		 h.Write([]byte{'f','o','o'})
		48  //		 h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o')
		49  //		 h.WriteString("foo")
		50  //
		51  // all have the same effect.
		52  //
		53  // Hashes are intended to be collision-resistant, even for situations
		54  // where an adversary controls the byte sequences being hashed.
		55  //
		56  // A Hash is not safe for concurrent use by multiple goroutines, but a Seed is.
		57  // If multiple goroutines must compute the same seeded hash,
		58  // each can declare its own Hash and call SetSeed with a common Seed.
		59  type Hash struct {
		60  	_		 [0]func()		 // not comparable
		61  	seed	Seed					// initial seed used for this hash
		62  	state Seed					// current hash of all flushed bytes
		63  	buf	 [bufSize]byte // unflushed byte buffer
		64  	n		 int					 // number of unflushed bytes
		65  }
		66  
		67  // bufSize is the size of the Hash write buffer.
		68  // The buffer ensures that writes depend only on the sequence of bytes,
		69  // not the sequence of WriteByte/Write/WriteString calls,
		70  // by always calling rthash with a full buffer (except for the tail).
		71  const bufSize = 128
		72  
		73  // initSeed seeds the hash if necessary.
		74  // initSeed is called lazily before any operation that actually uses h.seed/h.state.
		75  // Note that this does not include Write/WriteByte/WriteString in the case
		76  // where they only add to h.buf. (If they write too much, they call h.flush,
		77  // which does call h.initSeed.)
		78  func (h *Hash) initSeed() {
		79  	if h.seed.s == 0 {
		80  		seed := MakeSeed()
		81  		h.seed = seed
		82  		h.state = seed
		83  	}
		84  }
		85  
		86  // WriteByte adds b to the sequence of bytes hashed by h.
		87  // It never fails; the error result is for implementing io.ByteWriter.
		88  func (h *Hash) WriteByte(b byte) error {
		89  	if h.n == len(h.buf) {
		90  		h.flush()
		91  	}
		92  	h.buf[h.n] = b
		93  	h.n++
		94  	return nil
		95  }
		96  
		97  // Write adds b to the sequence of bytes hashed by h.
		98  // It always writes all of b and never fails; the count and error result are for implementing io.Writer.
		99  func (h *Hash) Write(b []byte) (int, error) {
	 100  	size := len(b)
	 101  	// Deal with bytes left over in h.buf.
	 102  	// h.n <= bufSize is always true.
	 103  	// Checking it is ~free and it lets the compiler eliminate a bounds check.
	 104  	if h.n > 0 && h.n <= bufSize {
	 105  		k := copy(h.buf[h.n:], b)
	 106  		h.n += k
	 107  		if h.n < bufSize {
	 108  			// Copied the entirety of b to h.buf.
	 109  			return size, nil
	 110  		}
	 111  		b = b[k:]
	 112  		h.flush()
	 113  		// No need to set h.n = 0 here; it happens just before exit.
	 114  	}
	 115  	// Process as many full buffers as possible, without copying, and calling initSeed only once.
	 116  	if len(b) > bufSize {
	 117  		h.initSeed()
	 118  		for len(b) > bufSize {
	 119  			h.state.s = rthash(&b[0], bufSize, h.state.s)
	 120  			b = b[bufSize:]
	 121  		}
	 122  	}
	 123  	// Copy the tail.
	 124  	copy(h.buf[:], b)
	 125  	h.n = len(b)
	 126  	return size, nil
	 127  }
	 128  
	 129  // WriteString adds the bytes of s to the sequence of bytes hashed by h.
	 130  // It always writes all of s and never fails; the count and error result are for implementing io.StringWriter.
	 131  func (h *Hash) WriteString(s string) (int, error) {
	 132  	// WriteString mirrors Write. See Write for comments.
	 133  	size := len(s)
	 134  	if h.n > 0 && h.n <= bufSize {
	 135  		k := copy(h.buf[h.n:], s)
	 136  		h.n += k
	 137  		if h.n < bufSize {
	 138  			return size, nil
	 139  		}
	 140  		s = s[k:]
	 141  		h.flush()
	 142  	}
	 143  	if len(s) > bufSize {
	 144  		h.initSeed()
	 145  		for len(s) > bufSize {
	 146  			ptr := (*byte)((*unsafeheader.String)(unsafe.Pointer(&s)).Data)
	 147  			h.state.s = rthash(ptr, bufSize, h.state.s)
	 148  			s = s[bufSize:]
	 149  		}
	 150  	}
	 151  	copy(h.buf[:], s)
	 152  	h.n = len(s)
	 153  	return size, nil
	 154  }
	 155  
	 156  // Seed returns h's seed value.
	 157  func (h *Hash) Seed() Seed {
	 158  	h.initSeed()
	 159  	return h.seed
	 160  }
	 161  
	 162  // SetSeed sets h to use seed, which must have been returned by MakeSeed
	 163  // or by another Hash's Seed method.
	 164  // Two Hash objects with the same seed behave identically.
	 165  // Two Hash objects with different seeds will very likely behave differently.
	 166  // Any bytes added to h before this call will be discarded.
	 167  func (h *Hash) SetSeed(seed Seed) {
	 168  	if seed.s == 0 {
	 169  		panic("maphash: use of uninitialized Seed")
	 170  	}
	 171  	h.seed = seed
	 172  	h.state = seed
	 173  	h.n = 0
	 174  }
	 175  
	 176  // Reset discards all bytes added to h.
	 177  // (The seed remains the same.)
	 178  func (h *Hash) Reset() {
	 179  	h.initSeed()
	 180  	h.state = h.seed
	 181  	h.n = 0
	 182  }
	 183  
	 184  // precondition: buffer is full.
	 185  func (h *Hash) flush() {
	 186  	if h.n != len(h.buf) {
	 187  		panic("maphash: flush of partially full buffer")
	 188  	}
	 189  	h.initSeed()
	 190  	h.state.s = rthash(&h.buf[0], h.n, h.state.s)
	 191  	h.n = 0
	 192  }
	 193  
	 194  // Sum64 returns h's current 64-bit value, which depends on
	 195  // h's seed and the sequence of bytes added to h since the
	 196  // last call to Reset or SetSeed.
	 197  //
	 198  // All bits of the Sum64 result are close to uniformly and
	 199  // independently distributed, so it can be safely reduced
	 200  // by using bit masking, shifting, or modular arithmetic.
	 201  func (h *Hash) Sum64() uint64 {
	 202  	h.initSeed()
	 203  	return rthash(&h.buf[0], h.n, h.state.s)
	 204  }
	 205  
	 206  // MakeSeed returns a new random seed.
	 207  func MakeSeed() Seed {
	 208  	var s1, s2 uint64
	 209  	for {
	 210  		s1 = uint64(runtime_fastrand())
	 211  		s2 = uint64(runtime_fastrand())
	 212  		// We use seed 0 to indicate an uninitialized seed/hash,
	 213  		// so keep trying until we get a non-zero seed.
	 214  		if s1|s2 != 0 {
	 215  			break
	 216  		}
	 217  	}
	 218  	return Seed{s: s1<<32 + s2}
	 219  }
	 220  
	 221  //go:linkname runtime_fastrand runtime.fastrand
	 222  func runtime_fastrand() uint32
	 223  
	 224  func rthash(ptr *byte, len int, seed uint64) uint64 {
	 225  	if len == 0 {
	 226  		return seed
	 227  	}
	 228  	// The runtime hasher only works on uintptr. For 64-bit
	 229  	// architectures, we use the hasher directly. Otherwise,
	 230  	// we use two parallel hashers on the lower and upper 32 bits.
	 231  	if unsafe.Sizeof(uintptr(0)) == 8 {
	 232  		return uint64(runtime_memhash(unsafe.Pointer(ptr), uintptr(seed), uintptr(len)))
	 233  	}
	 234  	lo := runtime_memhash(unsafe.Pointer(ptr), uintptr(seed), uintptr(len))
	 235  	hi := runtime_memhash(unsafe.Pointer(ptr), uintptr(seed>>32), uintptr(len))
	 236  	return uint64(hi)<<32 | uint64(lo)
	 237  }
	 238  
	 239  //go:linkname runtime_memhash runtime.memhash
	 240  //go:noescape
	 241  func runtime_memhash(p unsafe.Pointer, seed, s uintptr) uintptr
	 242  
	 243  // Sum appends the hash's current 64-bit value to b.
	 244  // It exists for implementing hash.Hash.
	 245  // For direct calls, it is more efficient to use Sum64.
	 246  func (h *Hash) Sum(b []byte) []byte {
	 247  	x := h.Sum64()
	 248  	return append(b,
	 249  		byte(x>>0),
	 250  		byte(x>>8),
	 251  		byte(x>>16),
	 252  		byte(x>>24),
	 253  		byte(x>>32),
	 254  		byte(x>>40),
	 255  		byte(x>>48),
	 256  		byte(x>>56))
	 257  }
	 258  
	 259  // Size returns h's hash value size, 8 bytes.
	 260  func (h *Hash) Size() int { return 8 }
	 261  
	 262  // BlockSize returns h's block size.
	 263  func (h *Hash) BlockSize() int { return len(h.buf) }
	 264
View as plain text