Source file src/hash/maphash/maphash.go
1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package maphash provides hash functions on byte sequences. 6 // These hash functions are intended to be used to implement hash tables or 7 // other data structures that need to map arbitrary strings or byte 8 // sequences to a uniform distribution on unsigned 64-bit integers. 9 // Each different instance of a hash table or data structure should use its own Seed. 10 // 11 // The hash functions are not cryptographically secure. 12 // (See crypto/sha256 and crypto/sha512 for cryptographic use.) 13 package maphash 14 15 import ( 16 "unsafe" 17 ) 18 19 // A Seed is a random value that selects the specific hash function 20 // computed by a Hash. If two Hashes use the same Seeds, they 21 // will compute the same hash values for any given input. 22 // If two Hashes use different Seeds, they are very likely to compute 23 // distinct hash values for any given input. 24 // 25 // A Seed must be initialized by calling MakeSeed. 26 // The zero seed is uninitialized and not valid for use with Hash's SetSeed method. 27 // 28 // Each Seed value is local to a single process and cannot be serialized 29 // or otherwise recreated in a different process. 30 type Seed struct { 31 s uint64 32 } 33 34 // Bytes returns the hash of b with the given seed. 35 // 36 // Bytes is equivalent to, but more convenient and efficient than: 37 // 38 // var h Hash 39 // h.SetSeed(seed) 40 // h.Write(b) 41 // return h.Sum64() 42 func Bytes(seed Seed, b []byte) uint64 { 43 state := seed.s 44 if state == 0 { 45 panic("maphash: use of uninitialized Seed") 46 } 47 if len(b) == 0 { 48 return rthash(nil, 0, state) // avoid &b[0] index panic below 49 } 50 if len(b) > bufSize { 51 b = b[:len(b):len(b)] // merge len and cap calculations when reslicing 52 for len(b) > bufSize { 53 state = rthash(&b[0], bufSize, state) 54 b = b[bufSize:] 55 } 56 } 57 return rthash(&b[0], len(b), state) 58 } 59 60 // String returns the hash of s with the given seed. 61 // 62 // String is equivalent to, but more convenient and efficient than: 63 // 64 // var h Hash 65 // h.SetSeed(seed) 66 // h.WriteString(s) 67 // return h.Sum64() 68 func String(seed Seed, s string) uint64 { 69 state := seed.s 70 if state == 0 { 71 panic("maphash: use of uninitialized Seed") 72 } 73 for len(s) > bufSize { 74 p := (*byte)(unsafe.StringData(s)) 75 state = rthash(p, bufSize, state) 76 s = s[bufSize:] 77 } 78 p := (*byte)(unsafe.StringData(s)) 79 return rthash(p, len(s), state) 80 } 81 82 // A Hash computes a seeded hash of a byte sequence. 83 // 84 // The zero Hash is a valid Hash ready to use. 85 // A zero Hash chooses a random seed for itself during 86 // the first call to a Reset, Write, Seed, or Sum64 method. 87 // For control over the seed, use SetSeed. 88 // 89 // The computed hash values depend only on the initial seed and 90 // the sequence of bytes provided to the Hash object, not on the way 91 // in which the bytes are provided. For example, the three sequences 92 // 93 // h.Write([]byte{'f','o','o'}) 94 // h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o') 95 // h.WriteString("foo") 96 // 97 // all have the same effect. 98 // 99 // Hashes are intended to be collision-resistant, even for situations 100 // where an adversary controls the byte sequences being hashed. 101 // 102 // A Hash is not safe for concurrent use by multiple goroutines, but a Seed is. 103 // If multiple goroutines must compute the same seeded hash, 104 // each can declare its own Hash and call SetSeed with a common Seed. 105 type Hash struct { 106 _ [0]func() // not comparable 107 seed Seed // initial seed used for this hash 108 state Seed // current hash of all flushed bytes 109 buf [bufSize]byte // unflushed byte buffer 110 n int // number of unflushed bytes 111 } 112 113 // bufSize is the size of the Hash write buffer. 114 // The buffer ensures that writes depend only on the sequence of bytes, 115 // not the sequence of WriteByte/Write/WriteString calls, 116 // by always calling rthash with a full buffer (except for the tail). 117 const bufSize = 128 118 119 // initSeed seeds the hash if necessary. 120 // initSeed is called lazily before any operation that actually uses h.seed/h.state. 121 // Note that this does not include Write/WriteByte/WriteString in the case 122 // where they only add to h.buf. (If they write too much, they call h.flush, 123 // which does call h.initSeed.) 124 func (h *Hash) initSeed() { 125 if h.seed.s == 0 { 126 seed := MakeSeed() 127 h.seed = seed 128 h.state = seed 129 } 130 } 131 132 // WriteByte adds b to the sequence of bytes hashed by h. 133 // It never fails; the error result is for implementing io.ByteWriter. 134 func (h *Hash) WriteByte(b byte) error { 135 if h.n == len(h.buf) { 136 h.flush() 137 } 138 h.buf[h.n] = b 139 h.n++ 140 return nil 141 } 142 143 // Write adds b to the sequence of bytes hashed by h. 144 // It always writes all of b and never fails; the count and error result are for implementing io.Writer. 145 func (h *Hash) Write(b []byte) (int, error) { 146 size := len(b) 147 // Deal with bytes left over in h.buf. 148 // h.n <= bufSize is always true. 149 // Checking it is ~free and it lets the compiler eliminate a bounds check. 150 if h.n > 0 && h.n <= bufSize { 151 k := copy(h.buf[h.n:], b) 152 h.n += k 153 if h.n < bufSize { 154 // Copied the entirety of b to h.buf. 155 return size, nil 156 } 157 b = b[k:] 158 h.flush() 159 // No need to set h.n = 0 here; it happens just before exit. 160 } 161 // Process as many full buffers as possible, without copying, and calling initSeed only once. 162 if len(b) > bufSize { 163 h.initSeed() 164 for len(b) > bufSize { 165 h.state.s = rthash(&b[0], bufSize, h.state.s) 166 b = b[bufSize:] 167 } 168 } 169 // Copy the tail. 170 copy(h.buf[:], b) 171 h.n = len(b) 172 return size, nil 173 } 174 175 // WriteString adds the bytes of s to the sequence of bytes hashed by h. 176 // It always writes all of s and never fails; the count and error result are for implementing io.StringWriter. 177 func (h *Hash) WriteString(s string) (int, error) { 178 // WriteString mirrors Write. See Write for comments. 179 size := len(s) 180 if h.n > 0 && h.n <= bufSize { 181 k := copy(h.buf[h.n:], s) 182 h.n += k 183 if h.n < bufSize { 184 return size, nil 185 } 186 s = s[k:] 187 h.flush() 188 } 189 if len(s) > bufSize { 190 h.initSeed() 191 for len(s) > bufSize { 192 ptr := (*byte)(unsafe.StringData(s)) 193 h.state.s = rthash(ptr, bufSize, h.state.s) 194 s = s[bufSize:] 195 } 196 } 197 copy(h.buf[:], s) 198 h.n = len(s) 199 return size, nil 200 } 201 202 // Seed returns h's seed value. 203 func (h *Hash) Seed() Seed { 204 h.initSeed() 205 return h.seed 206 } 207 208 // SetSeed sets h to use seed, which must have been returned by MakeSeed 209 // or by another Hash's Seed method. 210 // Two Hash objects with the same seed behave identically. 211 // Two Hash objects with different seeds will very likely behave differently. 212 // Any bytes added to h before this call will be discarded. 213 func (h *Hash) SetSeed(seed Seed) { 214 if seed.s == 0 { 215 panic("maphash: use of uninitialized Seed") 216 } 217 h.seed = seed 218 h.state = seed 219 h.n = 0 220 } 221 222 // Reset discards all bytes added to h. 223 // (The seed remains the same.) 224 func (h *Hash) Reset() { 225 h.initSeed() 226 h.state = h.seed 227 h.n = 0 228 } 229 230 // precondition: buffer is full. 231 func (h *Hash) flush() { 232 if h.n != len(h.buf) { 233 panic("maphash: flush of partially full buffer") 234 } 235 h.initSeed() 236 h.state.s = rthash(&h.buf[0], h.n, h.state.s) 237 h.n = 0 238 } 239 240 // Sum64 returns h's current 64-bit value, which depends on 241 // h's seed and the sequence of bytes added to h since the 242 // last call to Reset or SetSeed. 243 // 244 // All bits of the Sum64 result are close to uniformly and 245 // independently distributed, so it can be safely reduced 246 // by using bit masking, shifting, or modular arithmetic. 247 func (h *Hash) Sum64() uint64 { 248 h.initSeed() 249 return rthash(&h.buf[0], h.n, h.state.s) 250 } 251 252 // MakeSeed returns a new random seed. 253 func MakeSeed() Seed { 254 var s uint64 255 for { 256 s = runtime_fastrand64() 257 // We use seed 0 to indicate an uninitialized seed/hash, 258 // so keep trying until we get a non-zero seed. 259 if s != 0 { 260 break 261 } 262 } 263 return Seed{s: s} 264 } 265 266 //go:linkname runtime_fastrand64 runtime.fastrand64 267 func runtime_fastrand64() uint64 268 269 func rthash(ptr *byte, len int, seed uint64) uint64 { 270 if len == 0 { 271 return seed 272 } 273 // The runtime hasher only works on uintptr. For 64-bit 274 // architectures, we use the hasher directly. Otherwise, 275 // we use two parallel hashers on the lower and upper 32 bits. 276 if unsafe.Sizeof(uintptr(0)) == 8 { 277 return uint64(runtime_memhash(unsafe.Pointer(ptr), uintptr(seed), uintptr(len))) 278 } 279 lo := runtime_memhash(unsafe.Pointer(ptr), uintptr(seed), uintptr(len)) 280 hi := runtime_memhash(unsafe.Pointer(ptr), uintptr(seed>>32), uintptr(len)) 281 return uint64(hi)<<32 | uint64(lo) 282 } 283 284 //go:linkname runtime_memhash runtime.memhash 285 //go:noescape 286 func runtime_memhash(p unsafe.Pointer, seed, s uintptr) uintptr 287 288 // Sum appends the hash's current 64-bit value to b. 289 // It exists for implementing hash.Hash. 290 // For direct calls, it is more efficient to use Sum64. 291 func (h *Hash) Sum(b []byte) []byte { 292 x := h.Sum64() 293 return append(b, 294 byte(x>>0), 295 byte(x>>8), 296 byte(x>>16), 297 byte(x>>24), 298 byte(x>>32), 299 byte(x>>40), 300 byte(x>>48), 301 byte(x>>56)) 302 } 303 304 // Size returns h's hash value size, 8 bytes. 305 func (h *Hash) Size() int { return 8 } 306 307 // BlockSize returns h's block size. 308 func (h *Hash) BlockSize() int { return len(h.buf) } 309