1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 : * vim: set ts=8 sw=4 et tw=99 ft=cpp:
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this file,
6 : * You can obtain one at http://mozilla.org/MPL/2.0/. */
7 :
8 : /* Utilities for hashing */
9 :
10 : /*
11 : * This file exports functions for hashing data down to a 32-bit value,
12 : * including:
13 : *
14 : * - HashString Hash a char* or uint16_t/wchar_t* of known or unknown
15 : * length.
16 : *
17 : * - HashBytes Hash a byte array of known length.
18 : *
19 : * - HashGeneric Hash one or more values. Currently, we support uint32_t,
20 : * types which can be implicitly cast to uint32_t, data
21 : * pointers, and function pointers.
22 : *
23 : * - AddToHash Add one or more values to the given hash. This supports the
24 : * same list of types as HashGeneric.
25 : *
26 : *
27 : * You can chain these functions together to hash complex objects. For example:
28 : *
29 : * class ComplexObject {
30 : * char* str;
31 : * uint32_t uint1, uint2;
32 : * void (*callbackFn)();
33 : *
34 : * uint32_t Hash() {
35 : * uint32_t hash = HashString(str);
36 : * hash = AddToHash(hash, uint1, uint2);
37 : * return AddToHash(hash, callbackFn);
38 : * }
39 : * };
40 : *
41 : * If you want to hash an nsAString or nsACString, use the HashString functions
42 : * in nsHashKey.h.
43 : */
44 :
45 : #ifndef mozilla_HashFunctions_h_
46 : #define mozilla_HashFunctions_h_
47 :
48 : #include "mozilla/Assertions.h"
49 : #include "mozilla/Attributes.h"
50 : #include "mozilla/StandardInteger.h"
51 :
52 : #ifdef __cplusplus
53 : namespace mozilla {
54 :
55 : /**
56 : * The golden ratio as a 32-bit fixed-point value.
57 : */
58 : static const uint32_t GoldenRatioU32 = 0x9E3779B9U;
59 :
60 : inline uint32_t
61 532171804 : RotateLeft32(uint32_t value, uint8_t bits)
62 : {
63 532171804 : MOZ_ASSERT(bits < 32);
64 532171804 : return (value << bits) | (value >> (32 - bits));
65 : }
66 :
67 : namespace detail {
68 :
69 : inline uint32_t
70 532171783 : AddU32ToHash(uint32_t hash, uint32_t value)
71 : {
72 : /*
73 : * This is the meat of all our hash routines. This hash function is not
74 : * particularly sophisticated, but it seems to work well for our mostly
75 : * plain-text inputs. Implementation notes follow.
76 : *
77 : * Our use of the golden ratio here is arbitrary; we could pick almost any
78 : * number which:
79 : *
80 : * * is odd (because otherwise, all our hash values will be even)
81 : *
82 : * * has a reasonably-even mix of 1's and 0's (consider the extreme case
83 : * where we multiply by 0x3 or 0xeffffff -- this will not produce good
84 : * mixing across all bits of the hash).
85 : *
86 : * The rotation length of 5 is also arbitrary, although an odd number is again
87 : * preferable so our hash explores the whole universe of possible rotations.
88 : *
89 : * Finally, we multiply by the golden ratio *after* xor'ing, not before.
90 : * Otherwise, if |hash| is 0 (as it often is for the beginning of a message),
91 : * the expression
92 : *
93 : * (GoldenRatioU32 * RotateLeft(hash, 5)) |xor| value
94 : *
95 : * evaluates to |value|.
96 : *
97 : * (Number-theoretic aside: Because any odd number |m| is relatively prime to
98 : * our modulus (2^32), the list
99 : *
100 : * [x * m (mod 2^32) for 0 <= x < 2^32]
101 : *
102 : * has no duplicate elements. This means that multiplying by |m| does not
103 : * cause us to skip any possible hash values.
104 : *
105 : * It's also nice if |m| has large-ish order mod 2^32 -- that is, if the
106 : * smallest k such that m^k == 1 (mod 2^32) is large -- so we can safely
107 : * multiply our hash value by |m| a few times without negating the
108 : * multiplicative effect. Our golden ratio constant has order 2^29, which is
109 : * more than enough for our purposes.)
110 : */
111 532171783 : return GoldenRatioU32 * (RotateLeft32(hash, 5) ^ value);
112 : }
113 :
114 : /**
115 : * AddUintptrToHash takes sizeof(uintptr_t) as a template parameter.
116 : */
117 : template<size_t PtrSize>
118 : inline uint32_t
119 : AddUintptrToHash(uint32_t hash, uintptr_t value);
120 :
121 : template<>
122 : inline uint32_t
123 429163 : AddUintptrToHash<4>(uint32_t hash, uintptr_t value)
124 : {
125 429163 : return AddU32ToHash(hash, static_cast<uint32_t>(value));
126 : }
127 :
128 : template<>
129 : inline uint32_t
130 : AddUintptrToHash<8>(uint32_t hash, uintptr_t value)
131 : {
132 : /*
133 : * The static cast to uint64_t below is necessary because this function
134 : * sometimes gets compiled on 32-bit platforms (yes, even though it's a
135 : * template and we never call this particular override in a 32-bit build). If
136 : * we do value >> 32 on a 32-bit machine, we're shifting a 32-bit uintptr_t
137 : * right 32 bits, and the compiler throws an error.
138 : */
139 : uint32_t v1 = static_cast<uint32_t>(value);
140 : uint32_t v2 = static_cast<uint32_t>(static_cast<uint64_t>(value) >> 32);
141 : return AddU32ToHash(AddU32ToHash(hash, v1), v2);
142 : }
143 :
144 : } /* namespace detail */
145 :
146 : /**
147 : * AddToHash takes a hash and some values and returns a new hash based on the
148 : * inputs.
149 : *
150 : * Currently, we support hashing uint32_t's, values which we can implicitly
151 : * convert to uint32_t, data pointers, and function pointers.
152 : */
153 : template<typename A>
154 : MOZ_WARN_UNUSED_RESULT
155 : inline uint32_t
156 531742570 : AddToHash(uint32_t hash, A a)
157 : {
158 : /*
159 : * Try to convert |A| to uint32_t implicitly. If this works, great. If not,
160 : * we'll error out.
161 : */
162 531742570 : return detail::AddU32ToHash(hash, a);
163 : }
164 :
165 : template<typename A>
166 : MOZ_WARN_UNUSED_RESULT
167 : inline uint32_t
168 429163 : AddToHash(uint32_t hash, A* a)
169 : {
170 : /*
171 : * You might think this function should just take a void*. But then we'd only
172 : * catch data pointers and couldn't handle function pointers.
173 : */
174 :
175 : MOZ_STATIC_ASSERT(sizeof(a) == sizeof(uintptr_t),
176 : "Strange pointer!");
177 :
178 429163 : return detail::AddUintptrToHash<sizeof(uintptr_t)>(hash, uintptr_t(a));
179 : }
180 :
181 : template<typename A, typename B>
182 : MOZ_WARN_UNUSED_RESULT
183 : uint32_t
184 34221387 : AddToHash(uint32_t hash, A a, B b)
185 : {
186 34221387 : return AddToHash(AddToHash(hash, a), b);
187 : }
188 :
189 : template<typename A, typename B, typename C>
190 : MOZ_WARN_UNUSED_RESULT
191 : uint32_t
192 0 : AddToHash(uint32_t hash, A a, B b, C c)
193 : {
194 0 : return AddToHash(AddToHash(hash, a, b), c);
195 : }
196 :
197 : template<typename A, typename B, typename C, typename D>
198 : MOZ_WARN_UNUSED_RESULT
199 : uint32_t
200 : AddToHash(uint32_t hash, A a, B b, C c, D d)
201 : {
202 : return AddToHash(AddToHash(hash, a, b, c), d);
203 : }
204 :
205 : template<typename A, typename B, typename C, typename D, typename E>
206 : MOZ_WARN_UNUSED_RESULT
207 : uint32_t
208 : AddToHash(uint32_t hash, A a, B b, C c, D d, E e)
209 : {
210 : return AddToHash(AddToHash(hash, a, b, c, d), e);
211 : }
212 :
213 : /**
214 : * The HashGeneric class of functions let you hash one or more values.
215 : *
216 : * If you want to hash together two values x and y, calling HashGeneric(x, y) is
217 : * much better than calling AddToHash(x, y), because AddToHash(x, y) assumes
218 : * that x has already been hashed.
219 : */
220 : template<typename A>
221 : MOZ_WARN_UNUSED_RESULT
222 : inline uint32_t
223 0 : HashGeneric(A a)
224 : {
225 0 : return AddToHash(0, a);
226 : }
227 :
228 : template<typename A, typename B>
229 : MOZ_WARN_UNUSED_RESULT
230 : inline uint32_t
231 1922 : HashGeneric(A a, B b)
232 : {
233 1922 : return AddToHash(0, a, b);
234 : }
235 :
236 : template<typename A, typename B, typename C>
237 : MOZ_WARN_UNUSED_RESULT
238 : inline uint32_t
239 0 : HashGeneric(A a, B b, C c)
240 : {
241 0 : return AddToHash(0, a, b, c);
242 : }
243 :
244 : template<typename A, typename B, typename C, typename D>
245 : MOZ_WARN_UNUSED_RESULT
246 : inline uint32_t
247 : HashGeneric(A a, B b, C c, D d)
248 : {
249 : return AddToHash(0, a, b, c, d);
250 : }
251 :
252 : template<typename A, typename B, typename C, typename D, typename E>
253 : MOZ_WARN_UNUSED_RESULT
254 : inline uint32_t
255 : HashGeneric(A a, B b, C c, D d, E e)
256 : {
257 : return AddToHash(0, a, b, c, d, e);
258 : }
259 :
260 : namespace detail {
261 :
262 : template<typename T>
263 : uint32_t
264 8869305 : HashUntilZero(const T* str)
265 : {
266 8869305 : uint32_t hash = 0;
267 197589467 : for (T c; (c = *str); str++)
268 188720162 : hash = AddToHash(hash, c);
269 8869305 : return hash;
270 : }
271 :
272 : template<typename T>
273 : uint32_t
274 13360971 : HashKnownLength(const T* str, size_t length)
275 : {
276 13360971 : uint32_t hash = 0;
277 269266395 : for (size_t i = 0; i < length; i++)
278 255905438 : hash = AddToHash(hash, str[i]);
279 13360957 : return hash;
280 : }
281 :
282 : } /* namespace detail */
283 :
284 : /**
285 : * The HashString overloads below do just what you'd expect.
286 : *
287 : * If you have the string's length, you might as well call the overload which
288 : * includes the length. It may be marginally faster.
289 : */
290 : MOZ_WARN_UNUSED_RESULT
291 : inline uint32_t
292 8675341 : HashString(const char* str)
293 : {
294 8675341 : return detail::HashUntilZero(str);
295 : }
296 :
297 : MOZ_WARN_UNUSED_RESULT
298 : inline uint32_t
299 4411104 : HashString(const char* str, size_t length)
300 : {
301 4411104 : return detail::HashKnownLength(str, length);
302 : }
303 :
304 : MOZ_WARN_UNUSED_RESULT
305 : inline uint32_t
306 193964 : HashString(const uint16_t* str)
307 : {
308 193964 : return detail::HashUntilZero(str);
309 : }
310 :
311 : MOZ_WARN_UNUSED_RESULT
312 : inline uint32_t
313 8949848 : HashString(const uint16_t* str, size_t length)
314 : {
315 8949848 : return detail::HashKnownLength(str, length);
316 : }
317 :
318 : /*
319 : * On Windows, wchar_t (PRUnichar) is not the same as uint16_t, even though it's
320 : * the same width!
321 : */
322 : #ifdef WIN32
323 : MOZ_WARN_UNUSED_RESULT
324 : inline uint32_t
325 : HashString(const wchar_t* str)
326 : {
327 : return detail::HashUntilZero(str);
328 : }
329 :
330 : MOZ_WARN_UNUSED_RESULT
331 : inline uint32_t
332 : HashString(const wchar_t* str, size_t length)
333 : {
334 : return detail::HashKnownLength(str, length);
335 : }
336 : #endif
337 :
338 : /**
339 : * Hash some number of bytes.
340 : *
341 : * This hash walks word-by-word, rather than byte-by-byte, so you won't get the
342 : * same result out of HashBytes as you would out of HashString.
343 : */
344 : MOZ_WARN_UNUSED_RESULT
345 : extern MFBT_API(uint32_t)
346 : HashBytes(const void* bytes, size_t length);
347 :
348 : } /* namespace mozilla */
349 : #endif /* __cplusplus */
350 : #endif /* mozilla_HashFunctions_h_ */
|