Source file src/hash/maphash/maphash.go
1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package maphash provides hash functions on byte sequences and comparable values. 6 // These hash functions are intended to be used to implement hash tables or 7 // other data structures that need to map arbitrary strings or byte 8 // sequences to a uniform distribution on unsigned 64-bit integers. 9 // Each different instance of a hash table or data structure should use its own [Seed]. 10 // 11 // The hash functions are not cryptographically secure. 12 // (See crypto/sha256 and crypto/sha512 for cryptographic use.) 13 package maphash 14 15 import ( 16 "internal/byteorder" 17 "math" 18 ) 19 20 // A Seed is a random value that selects the specific hash function 21 // computed by a [Hash]. If two Hashes use the same Seeds, they 22 // will compute the same hash values for any given input. 23 // If two Hashes use different Seeds, they are very likely to compute 24 // distinct hash values for any given input. 25 // 26 // A Seed must be initialized by calling [MakeSeed]. 27 // The zero seed is uninitialized and not valid for use with [Hash]'s SetSeed method. 28 // 29 // Each Seed value is local to a single process and cannot be serialized 30 // or otherwise recreated in a different process. 31 type Seed struct { 32 s uint64 33 } 34 35 // Bytes returns the hash of b with the given seed. 36 // 37 // Bytes is equivalent to, but more convenient and efficient than: 38 // 39 // var h Hash 40 // h.SetSeed(seed) 41 // h.Write(b) 42 // return h.Sum64() 43 func Bytes(seed Seed, b []byte) uint64 { 44 state := seed.s 45 if state == 0 { 46 panic("maphash: use of uninitialized Seed") 47 } 48 49 if len(b) > bufSize { 50 b = b[:len(b):len(b)] // merge len and cap calculations when reslicing 51 for len(b) > bufSize { 52 state = rthash(b[:bufSize], state) 53 b = b[bufSize:] 54 } 55 } 56 return rthash(b, state) 57 } 58 59 // String returns the hash of s with the given seed. 60 // 61 // String is equivalent to, but more convenient and efficient than: 62 // 63 // var h Hash 64 // h.SetSeed(seed) 65 // h.WriteString(s) 66 // return h.Sum64() 67 func String(seed Seed, s string) uint64 { 68 state := seed.s 69 if state == 0 { 70 panic("maphash: use of uninitialized Seed") 71 } 72 for len(s) > bufSize { 73 state = rthashString(s[:bufSize], state) 74 s = s[bufSize:] 75 } 76 return rthashString(s, state) 77 } 78 79 // A Hash computes a seeded hash of a byte sequence. 80 // 81 // The zero Hash is a valid Hash ready to use. 82 // A zero Hash chooses a random seed for itself during 83 // the first call to a Reset, Write, Seed, or Sum64 method. 84 // For control over the seed, use SetSeed. 85 // 86 // The computed hash values depend only on the initial seed and 87 // the sequence of bytes provided to the Hash object, not on the way 88 // in which the bytes are provided. For example, the three sequences 89 // 90 // h.Write([]byte{'f','o','o'}) 91 // h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o') 92 // h.WriteString("foo") 93 // 94 // all have the same effect. 95 // 96 // Hashes are intended to be collision-resistant, even for situations 97 // where an adversary controls the byte sequences being hashed. 98 // 99 // A Hash is not safe for concurrent use by multiple goroutines, but a Seed is. 100 // If multiple goroutines must compute the same seeded hash, 101 // each can declare its own Hash and call SetSeed with a common Seed. 102 type Hash struct { 103 _ [0]func() // not comparable 104 seed Seed // initial seed used for this hash 105 state Seed // current hash of all flushed bytes 106 buf [bufSize]byte // unflushed byte buffer 107 n int // number of unflushed bytes 108 } 109 110 // bufSize is the size of the Hash write buffer. 111 // The buffer ensures that writes depend only on the sequence of bytes, 112 // not the sequence of WriteByte/Write/WriteString calls, 113 // by always calling rthash with a full buffer (except for the tail). 114 const bufSize = 128 115 116 // initSeed seeds the hash if necessary. 117 // initSeed is called lazily before any operation that actually uses h.seed/h.state. 118 // Note that this does not include Write/WriteByte/WriteString in the case 119 // where they only add to h.buf. (If they write too much, they call h.flush, 120 // which does call h.initSeed.) 121 func (h *Hash) initSeed() { 122 if h.seed.s == 0 { 123 seed := MakeSeed() 124 h.seed = seed 125 h.state = seed 126 } 127 } 128 129 // WriteByte adds b to the sequence of bytes hashed by h. 130 // It never fails; the error result is for implementing [io.ByteWriter]. 131 func (h *Hash) WriteByte(b byte) error { 132 if h.n == len(h.buf) { 133 h.flush() 134 } 135 h.buf[h.n] = b 136 h.n++ 137 return nil 138 } 139 140 // Write adds b to the sequence of bytes hashed by h. 141 // It always writes all of b and never fails; the count and error result are for implementing [io.Writer]. 142 func (h *Hash) Write(b []byte) (int, error) { 143 size := len(b) 144 // Deal with bytes left over in h.buf. 145 // h.n <= bufSize is always true. 146 // Checking it is ~free and it lets the compiler eliminate a bounds check. 147 if h.n > 0 && h.n <= bufSize { 148 k := copy(h.buf[h.n:], b) 149 h.n += k 150 if h.n < bufSize { 151 // Copied the entirety of b to h.buf. 152 return size, nil 153 } 154 b = b[k:] 155 h.flush() 156 // No need to set h.n = 0 here; it happens just before exit. 157 } 158 // Process as many full buffers as possible, without copying, and calling initSeed only once. 159 if len(b) > bufSize { 160 h.initSeed() 161 for len(b) > bufSize { 162 h.state.s = rthash(b[:bufSize], h.state.s) 163 b = b[bufSize:] 164 } 165 } 166 // Copy the tail. 167 copy(h.buf[:], b) 168 h.n = len(b) 169 return size, nil 170 } 171 172 // WriteString adds the bytes of s to the sequence of bytes hashed by h. 173 // It always writes all of s and never fails; the count and error result are for implementing [io.StringWriter]. 174 func (h *Hash) WriteString(s string) (int, error) { 175 // WriteString mirrors Write. See Write for comments. 176 size := len(s) 177 if h.n > 0 && h.n <= bufSize { 178 k := copy(h.buf[h.n:], s) 179 h.n += k 180 if h.n < bufSize { 181 return size, nil 182 } 183 s = s[k:] 184 h.flush() 185 } 186 if len(s) > bufSize { 187 h.initSeed() 188 for len(s) > bufSize { 189 h.state.s = rthashString(s[:bufSize], h.state.s) 190 s = s[bufSize:] 191 } 192 } 193 copy(h.buf[:], s) 194 h.n = len(s) 195 return size, nil 196 } 197 198 // Seed returns h's seed value. 199 func (h *Hash) Seed() Seed { 200 h.initSeed() 201 return h.seed 202 } 203 204 // SetSeed sets h to use seed, which must have been returned by [MakeSeed] 205 // or by another [Hash.Seed] method. 206 // Two [Hash] objects with the same seed behave identically. 207 // Two [Hash] objects with different seeds will very likely behave differently. 208 // Any bytes added to h before this call will be discarded. 209 func (h *Hash) SetSeed(seed Seed) { 210 if seed.s == 0 { 211 panic("maphash: use of uninitialized Seed") 212 } 213 h.seed = seed 214 h.state = seed 215 h.n = 0 216 } 217 218 // Reset discards all bytes added to h. 219 // (The seed remains the same.) 220 func (h *Hash) Reset() { 221 h.initSeed() 222 h.state = h.seed 223 h.n = 0 224 } 225 226 // precondition: buffer is full. 227 func (h *Hash) flush() { 228 if h.n != len(h.buf) { 229 panic("maphash: flush of partially full buffer") 230 } 231 h.initSeed() 232 h.state.s = rthash(h.buf[:h.n], h.state.s) 233 h.n = 0 234 } 235 236 // Sum64 returns h's current 64-bit value, which depends on 237 // h's seed and the sequence of bytes added to h since the 238 // last call to [Hash.Reset] or [Hash.SetSeed]. 239 // 240 // All bits of the Sum64 result are close to uniformly and 241 // independently distributed, so it can be safely reduced 242 // by using bit masking, shifting, or modular arithmetic. 243 func (h *Hash) Sum64() uint64 { 244 h.initSeed() 245 return rthash(h.buf[:h.n], h.state.s) 246 } 247 248 // MakeSeed returns a new random seed. 249 func MakeSeed() Seed { 250 var s uint64 251 for { 252 s = randUint64() 253 // We use seed 0 to indicate an uninitialized seed/hash, 254 // so keep trying until we get a non-zero seed. 255 if s != 0 { 256 break 257 } 258 } 259 return Seed{s: s} 260 } 261 262 // Sum appends the hash's current 64-bit value to b. 263 // It exists for implementing [hash.Hash]. 264 // For direct calls, it is more efficient to use [Hash.Sum64]. 265 func (h *Hash) Sum(b []byte) []byte { 266 x := h.Sum64() 267 return append(b, 268 byte(x>>0), 269 byte(x>>8), 270 byte(x>>16), 271 byte(x>>24), 272 byte(x>>32), 273 byte(x>>40), 274 byte(x>>48), 275 byte(x>>56)) 276 } 277 278 // Size returns h's hash value size, 8 bytes. 279 func (h *Hash) Size() int { return 8 } 280 281 // BlockSize returns h's block size. 282 func (h *Hash) BlockSize() int { return len(h.buf) } 283 284 // Comparable returns the hash of comparable value v with the given seed 285 // such that Comparable(s, v1) == Comparable(s, v2) if v1 == v2. 286 // If v != v, then the resulting hash is randomly distributed. 287 func Comparable[T comparable](seed Seed, v T) uint64 { 288 escapeForHash(v) 289 return comparableHash(v, seed) 290 } 291 292 // escapeForHash forces v to be on the heap, if v contains a 293 // non-string pointer. We cannot hash pointers to local variables, 294 // as the address of the local variable might change on stack growth. 295 // Strings are okay as the hash depends on only the content, not 296 // the pointer. 297 // 298 // This is essentially 299 // 300 // if hasNonStringPointers(T) { abi.Escape(v) } 301 // 302 // Implemented as a compiler intrinsic. 303 func escapeForHash[T comparable](v T) { panic("intrinsic") } 304 305 // WriteComparable adds x to the data hashed by h. 306 func WriteComparable[T comparable](h *Hash, x T) { 307 escapeForHash(x) 308 // writeComparable (not in purego mode) directly operates on h.state 309 // without using h.buf. Mix in the buffer length so it won't 310 // commute with a buffered write, which either changes h.n or changes 311 // h.state. 312 if h.n != 0 { 313 writeComparable(h, h.n) 314 } 315 writeComparable(h, x) 316 } 317 318 func (h *Hash) float64(f float64) { 319 if f == 0 { 320 h.WriteByte(0) 321 return 322 } 323 var buf [8]byte 324 if f != f { 325 byteorder.LEPutUint64(buf[:], randUint64()) 326 h.Write(buf[:]) 327 return 328 } 329 byteorder.LEPutUint64(buf[:], math.Float64bits(f)) 330 h.Write(buf[:]) 331 } 332 333 func btoi(b bool) byte { 334 if b { 335 return 1 336 } 337 return 0 338 } 339