LCOV - code coverage report
Current view: directory - objdir/dist/include/mozilla - HashFunctions.h (source / functions) Found Hit Coverage
Test: app.info Lines: 39 33 84.6 %
Date: 2012-06-02 Functions: 57 25 43.9 %

       1                 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
       2                 :  * vim: set ts=8 sw=4 et tw=99 ft=cpp:
       3                 :  *
       4                 :  * This Source Code Form is subject to the terms of the Mozilla Public
       5                 :  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
       6                 :  * You can obtain one at http://mozilla.org/MPL/2.0/. */
       7                 : 
       8                 : /* Utilities for hashing */
       9                 : 
      10                 : /*
      11                 :  * This file exports functions for hashing data down to a 32-bit value,
      12                 :  * including:
      13                 :  *
      14                 :  *  - HashString    Hash a char* or uint16_t/wchar_t* of known or unknown
      15                 :  *                  length.
      16                 :  *
      17                 :  *  - HashBytes     Hash a byte array of known length.
      18                 :  *
      19                 :  *  - HashGeneric   Hash one or more values.  Currently, we support uint32_t,
      20                 :  *                  types which can be implicitly cast to uint32_t, data
      21                 :  *                  pointers, and function pointers.
      22                 :  *
      23                 :  *  - AddToHash     Add one or more values to the given hash.  This supports the
      24                 :  *                  same list of types as HashGeneric.
      25                 :  *
      26                 :  *
      27                 :  * You can chain these functions together to hash complex objects.  For example:
      28                 :  *
      29                 :  *  class ComplexObject {
      30                 :  *    char* str;
      31                 :  *    uint32_t uint1, uint2;
      32                 :  *    void (*callbackFn)();
      33                 :  *
      34                 :  *    uint32_t Hash() {
      35                 :  *      uint32_t hash = HashString(str);
      36                 :  *      hash = AddToHash(hash, uint1, uint2);
      37                 :  *      return AddToHash(hash, callbackFn);
      38                 :  *    }
      39                 :  *  };
      40                 :  *
      41                 :  * If you want to hash an nsAString or nsACString, use the HashString functions
      42                 :  * in nsHashKey.h.
      43                 :  */
      44                 : 
      45                 : #ifndef mozilla_HashFunctions_h_
      46                 : #define mozilla_HashFunctions_h_
      47                 : 
      48                 : #include "mozilla/Assertions.h"
      49                 : #include "mozilla/Attributes.h"
      50                 : #include "mozilla/StandardInteger.h"
      51                 : 
      52                 : #ifdef __cplusplus
      53                 : namespace mozilla {
      54                 : 
      55                 : /**
      56                 :  * The golden ratio as a 32-bit fixed-point value.
      57                 :  */
      58                 : static const uint32_t GoldenRatioU32 = 0x9E3779B9U;
      59                 : 
      60                 : inline uint32_t
      61       532171804 : RotateLeft32(uint32_t value, uint8_t bits)
      62                 : {
      63       532171804 :   MOZ_ASSERT(bits < 32);
      64       532171804 :   return (value << bits) | (value >> (32 - bits));
      65                 : }
      66                 : 
      67                 : namespace detail {
      68                 : 
      69                 : inline uint32_t
      70       532171783 : AddU32ToHash(uint32_t hash, uint32_t value)
      71                 : {
      72                 :   /*
      73                 :    * This is the meat of all our hash routines.  This hash function is not
      74                 :    * particularly sophisticated, but it seems to work well for our mostly
      75                 :    * plain-text inputs.  Implementation notes follow.
      76                 :    *
      77                 :    * Our use of the golden ratio here is arbitrary; we could pick almost any
      78                 :    * number which:
      79                 :    *
      80                 :    *  * is odd (because otherwise, all our hash values will be even)
      81                 :    *
      82                 :    *  * has a reasonably-even mix of 1's and 0's (consider the extreme case
      83                 :    *    where we multiply by 0x3 or 0xeffffff -- this will not produce good
      84                 :    *    mixing across all bits of the hash).
      85                 :    *
      86                 :    * The rotation length of 5 is also arbitrary, although an odd number is again
      87                 :    * preferable so our hash explores the whole universe of possible rotations.
      88                 :    *
      89                 :    * Finally, we multiply by the golden ratio *after* xor'ing, not before.
      90                 :    * Otherwise, if |hash| is 0 (as it often is for the beginning of a message),
      91                 :    * the expression
      92                 :    *
      93                 :    *   (GoldenRatioU32 * RotateLeft(hash, 5)) |xor| value
      94                 :    *
      95                 :    * evaluates to |value|.
      96                 :    *
      97                 :    * (Number-theoretic aside: Because any odd number |m| is relatively prime to
      98                 :    * our modulus (2^32), the list
      99                 :    *
     100                 :    *    [x * m (mod 2^32) for 0 <= x < 2^32]
     101                 :    *
     102                 :    * has no duplicate elements.  This means that multiplying by |m| does not
     103                 :    * cause us to skip any possible hash values.
     104                 :    *
     105                 :    * It's also nice if |m| has large-ish order mod 2^32 -- that is, if the
     106                 :    * smallest k such that m^k == 1 (mod 2^32) is large -- so we can safely
     107                 :    * multiply our hash value by |m| a few times without negating the
     108                 :    * multiplicative effect.  Our golden ratio constant has order 2^29, which is
     109                 :    * more than enough for our purposes.)
     110                 :    */
     111       532171783 :   return GoldenRatioU32 * (RotateLeft32(hash, 5) ^ value);
     112                 : }
     113                 : 
     114                 : /**
     115                 :  * AddUintptrToHash takes sizeof(uintptr_t) as a template parameter.
     116                 :  */
     117                 : template<size_t PtrSize>
     118                 : inline uint32_t
     119                 : AddUintptrToHash(uint32_t hash, uintptr_t value);
     120                 : 
     121                 : template<>
     122                 : inline uint32_t
     123          429163 : AddUintptrToHash<4>(uint32_t hash, uintptr_t value)
     124                 : {
     125          429163 :   return AddU32ToHash(hash, static_cast<uint32_t>(value));
     126                 : }
     127                 : 
     128                 : template<>
     129                 : inline uint32_t
     130                 : AddUintptrToHash<8>(uint32_t hash, uintptr_t value)
     131                 : {
     132                 :   /*
     133                 :    * The static cast to uint64_t below is necessary because this function
     134                 :    * sometimes gets compiled on 32-bit platforms (yes, even though it's a
     135                 :    * template and we never call this particular override in a 32-bit build).  If
     136                 :    * we do value >> 32 on a 32-bit machine, we're shifting a 32-bit uintptr_t
     137                 :    * right 32 bits, and the compiler throws an error.
     138                 :    */
     139                 :   uint32_t v1 = static_cast<uint32_t>(value);
     140                 :   uint32_t v2 = static_cast<uint32_t>(static_cast<uint64_t>(value) >> 32);
     141                 :   return AddU32ToHash(AddU32ToHash(hash, v1), v2);
     142                 : }
     143                 : 
     144                 : } /* namespace detail */
     145                 : 
     146                 : /**
     147                 :  * AddToHash takes a hash and some values and returns a new hash based on the
     148                 :  * inputs.
     149                 :  *
     150                 :  * Currently, we support hashing uint32_t's, values which we can implicitly
     151                 :  * convert to uint32_t, data pointers, and function pointers.
     152                 :  */
     153                 : template<typename A>
     154                 : MOZ_WARN_UNUSED_RESULT
     155                 : inline uint32_t
     156       531742570 : AddToHash(uint32_t hash, A a)
     157                 : {
     158                 :   /*
     159                 :    * Try to convert |A| to uint32_t implicitly.  If this works, great.  If not,
     160                 :    * we'll error out.
     161                 :    */
     162       531742570 :   return detail::AddU32ToHash(hash, a);
     163                 : }
     164                 : 
     165                 : template<typename A>
     166                 : MOZ_WARN_UNUSED_RESULT
     167                 : inline uint32_t
     168          429163 : AddToHash(uint32_t hash, A* a)
     169                 : {
     170                 :   /*
     171                 :    * You might think this function should just take a void*.  But then we'd only
     172                 :    * catch data pointers and couldn't handle function pointers.
     173                 :    */
     174                 : 
     175                 :   MOZ_STATIC_ASSERT(sizeof(a) == sizeof(uintptr_t),
     176                 :                     "Strange pointer!");
     177                 : 
     178          429163 :   return detail::AddUintptrToHash<sizeof(uintptr_t)>(hash, uintptr_t(a));
     179                 : }
     180                 : 
     181                 : template<typename A, typename B>
     182                 : MOZ_WARN_UNUSED_RESULT
     183                 : uint32_t
     184        34221387 : AddToHash(uint32_t hash, A a, B b)
     185                 : {
     186        34221387 :   return AddToHash(AddToHash(hash, a), b);
     187                 : }
     188                 : 
     189                 : template<typename A, typename B, typename C>
     190                 : MOZ_WARN_UNUSED_RESULT
     191                 : uint32_t
     192               0 : AddToHash(uint32_t hash, A a, B b, C c)
     193                 : {
     194               0 :   return AddToHash(AddToHash(hash, a, b), c);
     195                 : }
     196                 : 
     197                 : template<typename A, typename B, typename C, typename D>
     198                 : MOZ_WARN_UNUSED_RESULT
     199                 : uint32_t
     200                 : AddToHash(uint32_t hash, A a, B b, C c, D d)
     201                 : {
     202                 :   return AddToHash(AddToHash(hash, a, b, c), d);
     203                 : }
     204                 : 
     205                 : template<typename A, typename B, typename C, typename D, typename E>
     206                 : MOZ_WARN_UNUSED_RESULT
     207                 : uint32_t
     208                 : AddToHash(uint32_t hash, A a, B b, C c, D d, E e)
     209                 : {
     210                 :   return AddToHash(AddToHash(hash, a, b, c, d), e);
     211                 : }
     212                 : 
     213                 : /**
     214                 :  * The HashGeneric class of functions let you hash one or more values.
     215                 :  *
     216                 :  * If you want to hash together two values x and y, calling HashGeneric(x, y) is
     217                 :  * much better than calling AddToHash(x, y), because AddToHash(x, y) assumes
     218                 :  * that x has already been hashed.
     219                 :  */
     220                 : template<typename A>
     221                 : MOZ_WARN_UNUSED_RESULT
     222                 : inline uint32_t
     223               0 : HashGeneric(A a)
     224                 : {
     225               0 :   return AddToHash(0, a);
     226                 : }
     227                 : 
     228                 : template<typename A, typename B>
     229                 : MOZ_WARN_UNUSED_RESULT
     230                 : inline uint32_t
     231            1922 : HashGeneric(A a, B b)
     232                 : {
     233            1922 :   return AddToHash(0, a, b);
     234                 : }
     235                 : 
     236                 : template<typename A, typename B, typename C>
     237                 : MOZ_WARN_UNUSED_RESULT
     238                 : inline uint32_t
     239               0 : HashGeneric(A a, B b, C c)
     240                 : {
     241               0 :   return AddToHash(0, a, b, c);
     242                 : }
     243                 : 
     244                 : template<typename A, typename B, typename C, typename D>
     245                 : MOZ_WARN_UNUSED_RESULT
     246                 : inline uint32_t
     247                 : HashGeneric(A a, B b, C c, D d)
     248                 : {
     249                 :   return AddToHash(0, a, b, c, d);
     250                 : }
     251                 : 
     252                 : template<typename A, typename B, typename C, typename D, typename E>
     253                 : MOZ_WARN_UNUSED_RESULT
     254                 : inline uint32_t
     255                 : HashGeneric(A a, B b, C c, D d, E e)
     256                 : {
     257                 :   return AddToHash(0, a, b, c, d, e);
     258                 : }
     259                 : 
     260                 : namespace detail {
     261                 : 
     262                 : template<typename T>
     263                 : uint32_t
     264         8869305 : HashUntilZero(const T* str)
     265                 : {
     266         8869305 :   uint32_t hash = 0;
     267       197589467 :   for (T c; (c = *str); str++)
     268       188720162 :     hash = AddToHash(hash, c);
     269         8869305 :   return hash;
     270                 : }
     271                 : 
     272                 : template<typename T>
     273                 : uint32_t
     274        13360971 : HashKnownLength(const T* str, size_t length)
     275                 : {
     276        13360971 :   uint32_t hash = 0;
     277       269266395 :   for (size_t i = 0; i < length; i++)
     278       255905438 :     hash = AddToHash(hash, str[i]);
     279        13360957 :   return hash;
     280                 : }
     281                 : 
     282                 : } /* namespace detail */
     283                 : 
     284                 : /**
     285                 :  * The HashString overloads below do just what you'd expect.
     286                 :  *
     287                 :  * If you have the string's length, you might as well call the overload which
     288                 :  * includes the length.  It may be marginally faster.
     289                 :  */
     290                 : MOZ_WARN_UNUSED_RESULT
     291                 : inline uint32_t
     292         8675341 : HashString(const char* str)
     293                 : {
     294         8675341 :   return detail::HashUntilZero(str);
     295                 : }
     296                 : 
     297                 : MOZ_WARN_UNUSED_RESULT
     298                 : inline uint32_t
     299         4411104 : HashString(const char* str, size_t length)
     300                 : {
     301         4411104 :   return detail::HashKnownLength(str, length);
     302                 : }
     303                 : 
     304                 : MOZ_WARN_UNUSED_RESULT
     305                 : inline uint32_t
     306          193964 : HashString(const uint16_t* str)
     307                 : {
     308          193964 :   return detail::HashUntilZero(str);
     309                 : }
     310                 : 
     311                 : MOZ_WARN_UNUSED_RESULT
     312                 : inline uint32_t
     313         8949848 : HashString(const uint16_t* str, size_t length)
     314                 : {
     315         8949848 :   return detail::HashKnownLength(str, length);
     316                 : }
     317                 : 
     318                 : /*
     319                 :  * On Windows, wchar_t (PRUnichar) is not the same as uint16_t, even though it's
     320                 :  * the same width!
     321                 :  */
     322                 : #ifdef WIN32
     323                 : MOZ_WARN_UNUSED_RESULT
     324                 : inline uint32_t
     325                 : HashString(const wchar_t* str)
     326                 : {
     327                 :   return detail::HashUntilZero(str);
     328                 : }
     329                 : 
     330                 : MOZ_WARN_UNUSED_RESULT
     331                 : inline uint32_t
     332                 : HashString(const wchar_t* str, size_t length)
     333                 : {
     334                 :   return detail::HashKnownLength(str, length);
     335                 : }
     336                 : #endif
     337                 : 
     338                 : /**
     339                 :  * Hash some number of bytes.
     340                 :  *
     341                 :  * This hash walks word-by-word, rather than byte-by-byte, so you won't get the
     342                 :  * same result out of HashBytes as you would out of HashString.
     343                 :  */
     344                 : MOZ_WARN_UNUSED_RESULT
     345                 : extern MFBT_API(uint32_t)
     346                 : HashBytes(const void* bytes, size_t length);
     347                 : 
     348                 : } /* namespace mozilla */
     349                 : #endif /* __cplusplus */
     350                 : #endif /* mozilla_HashFunctions_h_ */

Generated by: LCOV version 1.7