Re: Good hash for pointers

Liste des GroupesRevenir à cl c  
Sujet : Re: Good hash for pointers
De : already5chosen (at) *nospam* yahoo.com (Michael S)
Groupes : comp.lang.c
Date : 04. Jun 2024, 22:59:16
Autres entêtes
Organisation : A noiseless patient Spider
Message-ID : <20240605005916.00001b33@yahoo.com>
References : 1 2 3 4 5 6 7 8 9 10 11
User-Agent : Claws Mail 4.1.1 (GTK 3.24.34; x86_64-w64-mingw32)
On Sun, 26 May 2024 10:20:55 -0700
Tim Rentsch <tr.17687@z991.linuxsc.com> wrote:

Bonita Montero <Bonita.Montero@gmail.com> writes:
 
Am 26.05.2024 um 18:24 schrieb Tim Rentsch:
 
Bonita Montero <Bonita.Montero@gmail.com> writes:
 
Am 25.05.2024 um 19:40 schrieb Tim Rentsch:
 
Bonita Montero <Bonita.Montero@gmail.com> writes:
 
Am 25.05.2024 um 11:12 schrieb Tim Rentsch:
 
Your hash function is expensive to compute, moreso even
than the "FNV" function shown earlier.  In a case like
this one where the compares are cheap, it's better to
have a dumb-but-fast hash function that might need a
few more looks to find an open slot, because the cost
of looking is so cheap compared to computing the hash
function. 
>
A (size_t)pointer * LARGE_PRIME can be sufficient,
ignoring the overflow. 
>
Plenty fast but the output quality is poor. ... 
>
If the prime is large enough there'e no regular distribution. 
>
Your conjecture is contradicted by empirical facts. 
>
There are no empirival facts for that since two times the
taken prime is beyond the 64 bit address space. 
 
You don't listen very well do you?
 
I say the output quality is poor because I have run tests that
show the poor output quality.  I've done that with a prime of my
own choosing and also with 18446744073709551557, the value you
suggested.  In both cases the test runs show results that are
clearly worse than all the other hash functions tested, including
bart's and malcolm's.  Furthermore the results for your suggested
calculation are worse across the board, on every variety of
dynamic workload in my test suite.
 
Your proposed hash function is too weak to be taken as a serious
candidate.

18446744073709551557 is indeed very bad (too close to 2**64).
But I tried other prime and at least in my simple tests it performs
rather well.
May be my tests are to simplistic, I don't pretend to understand much
about it.

// hash_aes4.c
// Reference. Hopefully behaves similarly to crypto hash
#include <stdint.h>
#include <string.h>
#include <x86intrin.h>

uint64_t hash_ref(void* key)
{
  uint64_t ukey = (uint64_t)key;
  __m128i xkey = _mm_set_epi64x(ukey, 42);
  static const uint64_t bar[8] = {
    0xBB09BBCC90B24BF2, 0x825C622FF2792A01,
    0x94F0535CB06D4060, 0x939C756246DBFD1D,
    0x5B835E01A7E14CA1, 0xAC2BDAFC023CDD06,
    0xE0B6A4735B774AEC, 0x9CAFB43E7DDE494C,
  };
  for (int i = 0; i < 4; ++i) {
    __m128i rkey;
    memcpy(&rkey, &bar[i*2], sizeof(rkey));
    xkey = _mm_aesenc_si128(xkey, rkey);
  }
  memcpy(&ukey, &xkey, sizeof(ukey));
  return ukey;
}

// hash_bonita.c
// Bonita's with different prime
#include <stdint.h>

uint64_t hash_foo(void* key)
{
  uint64_t ukey = (uint64_t)key;
  static const uint64_t LARGE_PRIME = 0xbb09bbcc90b24bcdull;
  return ukey * LARGE_PRIME;
}


// ptr_hash.c
// test bench
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>

uint64_t hash_foo(void*);
uint64_t hash_ref(void*);

static void test(
 void** pointers, size_t n_pointers,
 size_t* table,   size_t tab_sz,
 uint64_t (*hash_function)(void*));

static const char UsageStr[] =
"ptr_hash - examine performance of pointer hash function\n"
"Usage\n"
"ptr_hash n m sz1 [sz2]\n"
"where\n"
" n   - the size of hash table\n"
" m   - # of pointers to put in the table\n"
" sz1 - minimal size of allocated blocks\n"
" sz2 - [optional] maximal size of allocated blocks. Default = sz1\n"
;
int main(int argz, char** argv)
{
  if (argz < 4) {
    fprintf(stderr, "%s", UsageStr);
    return 1;
  }

  size_t params[4];
  for (int arg_i = 1; arg_i < 5 && arg_i < argz; ++arg_i) {
    long val = strtol(argv[arg_i], NULL, 0);
    if (val <= 0) {
      fprintf(stderr
        ,"Bad parameter %s. Please specify positive number\n"
        ,argv[arg_i]);
      return 1;
    }
    params[arg_i-1] = val;
  }
  size_t n = params[0];
  size_t m = params[1];
  size_t sz1 = params[2];
  size_t sz2 = params[3];
  if (argz == 4)
    sz2 = sz1;
  if (sz1 > sz2) {
    size_t tmp = sz1; sz1 = sz2; sz2 = tmp;
  }

  int ret = 1;
  void **pointers = malloc(m * sizeof(*pointers));
  if (pointers) {
    size_t* table = malloc(n * sizeof(*table));
    if (table) {
      ret = 0;
      srand(1);
      const unsigned __int128 RAND_SCALE = (unsigned __int128)RAND_MAX
  + 1;
      for (size_t i = 0; i < m; ++i) {
        size_t r = rand();
        size_t sz = (unsigned __int128)(sz2-sz1+1)*r / RAND_SCALE + sz1;
        void* block = malloc(sz);
        if (!block) {
          ret = 1;
          perror("malloc()");
          for (size_t k = 0; k < i; ++k)
            free(pointers[k]);
          break;
        }
        pointers[i] = block;
      }
      if (ret == 0) {
        for (size_t i = 0; i < m; ++i)
          free(pointers[i]); // we don't need allocated blocks for a
  test
        printf("ref: ");
        test(pointers, m, table, n, hash_ref);
        printf("uut: ");
        test(pointers, m, table, n, hash_foo);
      }
      free(table);
    } else {
      perror("malloc()");
    }
    free(pointers);
  } else {
    perror("malloc()");
  }

  return ret;
}

static void test(
 void** pointers, size_t n_pointers,
 size_t* table,   size_t tab_sz,
 uint64_t (*hash_function)(void*))
{
  for (size_t i = 0; i < tab_sz; ++i)
    table[i] = 0;
  // simulate hash operation
  for (size_t i = 0; i < n_pointers; ++i) {
    uint64_t h = hash_function(pointers[i]);
    table[(size_t)(((unsigned __int128)tab_sz * h)>>64)] += 1;
  }
  // statistics
  size_t occupied = 0;
  size_t collisions = 0;
  size_t worst = 0;
  for (size_t i = 0; i < tab_sz; ++i) {
    size_t c = table[i];
    occupied += c != 0;
    collisions += c > 1;
    if (worst < c)
      worst = c;
  }
  printf(
  "%zu keys, %zu slots, %zu occupied %zu collisions. Worst collision:
%zu\n"
  ,n_pointers
  , tab_sz
  , occupied
  , collisions
  , worst
  );
}


Compiled as:
gcc -s -Wall -O2 -march=native ptr_hash.c hash_bonita.c hash_aes4.c


Run as:
./a 100000 75000 80 1000

Result:
ref: 75000 keys, 100000 slots, 52799 occupied 17431 collisions. Worst
collision: 7
uut: 75000 keys, 100000 slots, 53201 occupied 17139 collisions. Worst
collision: 6







Date Sujet#  Auteur
23 May 24 * Good hash for pointers111Malcolm McLean
23 May 24 +* Re: Good hash for pointers3Richard Harnden
24 May 24 i`* Re: Good hash for pointers2Keith Thompson
24 May 24 i `- Re: Good hash for pointers1Malcolm McLean
23 May 24 +- Re: Good hash for pointers1Kaz Kylheku
24 May 24 +* Re: Good hash for pointers16Tim Rentsch
24 May 24 i`* Re: Good hash for pointers15Malcolm McLean
24 May 24 i `* Re: Good hash for pointers14Tim Rentsch
24 May 24 i  `* Re: Good hash for pointers13Malcolm McLean
24 May 24 i   +* Re: Good hash for pointers11Tim Rentsch
24 May 24 i   i`* Re: Good hash for pointers10bart
24 May 24 i   i +* Re: Good hash for pointers3Malcolm McLean
24 May 24 i   i i`* Re: Good hash for pointers2Chris M. Thomasson
24 May 24 i   i i `- Re: Good hash for pointers1Chris M. Thomasson
24 May 24 i   i +* Re: Good hash for pointers3Tim Rentsch
24 May 24 i   i i`* Re: Good hash for pointers2Malcolm McLean
25 May 24 i   i i `- Re: Good hash for pointers1Tim Rentsch
24 May 24 i   i `* Re: Good hash for pointers3David Brown
24 May 24 i   i  `* Re: Good hash for pointers2Malcolm McLean
24 May 24 i   i   `- Re: Good hash for pointers1Michael S
24 May 24 i   `- Re: Good hash for pointers1David Brown
24 May 24 +* Re: Good hash for pointers3Chris M. Thomasson
24 May 24 i`* Re: Good hash for pointers2Chris M. Thomasson
24 May 24 i `- Re: Good hash for pointers1jak
24 May 24 +* Re: Good hash for pointers84Bonita Montero
24 May 24 i`* Re: Good hash for pointers83Malcolm McLean
25 May 24 i `* Re: Good hash for pointers82bart
25 May 24 i  `* Re: Good hash for pointers81Tim Rentsch
25 May 24 i   +* Re: Good hash for pointers4bart
25 May 24 i   i`* Re: Good hash for pointers3Tim Rentsch
25 May 24 i   i `* Re: Good hash for pointers2bart
26 May 24 i   i  `- Re: Good hash for pointers1Tim Rentsch
25 May 24 i   `* Re: Good hash for pointers76Bonita Montero
25 May 24 i    `* Re: Good hash for pointers75Tim Rentsch
25 May 24 i     +* Re: Good hash for pointers9Malcolm McLean
25 May 24 i     i+- Re: Good hash for pointers1Tim Rentsch
25 May 24 i     i+* Re: Good hash for pointers5Janis Papanagnou
26 May 24 i     ii`* Re: Good hash for pointers4Malcolm McLean
26 May 24 i     ii `* Re: Good hash for pointers3bart
26 May 24 i     ii  +- Re: Good hash for pointers1Richard Harnden
27 May 24 i     ii  `- Re: Good hash for pointers1Kaz Kylheku
26 May 24 i     i`* Re: Good hash for pointers2Bonita Montero
26 May 24 i     i `- Re: Good hash for pointers1Bonita Montero
26 May 24 i     `* Re: Good hash for pointers65Bonita Montero
26 May 24 i      `* Re: Good hash for pointers64Tim Rentsch
26 May 24 i       `* Re: Good hash for pointers63Bonita Montero
26 May 24 i        `* Re: Good hash for pointers62Tim Rentsch
26 May 24 i         +* Re: Good hash for pointers29Bonita Montero
26 May 24 i         i+- Re: Good hash for pointers1Bonita Montero
27 May 24 i         i+* Re: Good hash for pointers5Bonita Montero
28 May 24 i         ii`* Re: Good hash for pointers4Ben Bacarisse
30 May 24 i         ii `* Re: Good hash for pointers3Bonita Montero
30 May 24 i         ii  +- Re: Good hash for pointers1Bonita Montero
31 May 24 i         ii  `- Re: Good hash for pointers1Tim Rentsch
31 May 24 i         i`* Re: Good hash for pointers22Tim Rentsch
2 Jun 24 i         i `* Re: Good hash for pointers21Michael S
2 Jun 24 i         i  +* Re: Good hash for pointers2Chris M. Thomasson
3 Jun 24 i         i  i`- Re: Good hash for pointers1Chris M. Thomasson
3 Jun 24 i         i  +* Re: Good hash for pointers5Tim Rentsch
3 Jun 24 i         i  i`* Re: Good hash for pointers4Michael S
4 Jun 24 i         i  i `* Re: Good hash for pointers3Tim Rentsch
4 Jun 24 i         i  i  `* Re: Good hash for pointers2Michael S
5 Jun 24 i         i  i   `- Re: Good hash for pointers1Tim Rentsch
3 Jun 24 i         i  `* Re: Good hash for pointers13Bonita Montero
3 Jun 24 i         i   `* Re: Good hash for pointers12Michael S
3 Jun 24 i         i    `* Re: Good hash for pointers11Bonita Montero
3 Jun 24 i         i     +* Re: Good hash for pointers8bart
3 Jun 24 i         i     i+* Re: Good hash for pointers6Michael S
3 Jun 24 i         i     ii+* Re: Good hash for pointers4bart
3 Jun 24 i         i     iii+- Re: Good hash for pointers1Michael S
3 Jun 24 i         i     iii+- Re: Good hash for pointers1Michael S
4 Jun 24 i         i     iii`- Re: Good hash for pointers1Tim Rentsch
4 Jun 24 i         i     ii`- Re: Good hash for pointers1Tim Rentsch
3 Jun 24 i         i     i`- Re: Good hash for pointers1Bonita Montero
3 Jun 24 i         i     `* Re: Good hash for pointers2Michael S
3 Jun 24 i         i      `- Re: Good hash for pointers1Bonita Montero
4 Jun 24 i         `* Re: Good hash for pointers32Michael S
5 Jun 24 i          +* Re: Good hash for pointers4Bonita Montero
5 Jun 24 i          i`* Re: Good hash for pointers3Michael S
5 Jun 24 i          i `* Re: Good hash for pointers2Bonita Montero
5 Jun 24 i          i  `- Re: Good hash for pointers1Michael S
5 Jun 24 i          +* Re: Good hash for pointers17Tim Rentsch
5 Jun 24 i          i+* AES problem (was: Good hash for pointers)2Michael S
6 Jun 24 i          ii`- Re: AES problem (was: Good hash for pointers)1Tim Rentsch
5 Jun 24 i          i+* Re: Good hash for pointers11Michael S
6 Jun 24 i          ii`* Re: Good hash for pointers10Tim Rentsch
6 Jun 24 i          ii `* Re: Good hash for pointers9Michael S
17 Jun 24 i          ii  `* Re: Good hash for pointers8Tim Rentsch
17 Jun 24 i          ii   `* Re: Good hash for pointers7Michael S
18 Jun 24 i          ii    `* Re: Good hash for pointers6Tim Rentsch
19 Jun 24 i          ii     +* Re: Good hash for pointers2Keith Thompson
19 Jun 24 i          ii     i`- Re: Good hash for pointers1Tim Rentsch
19 Jun 24 i          ii     `* Re: Good hash for pointers3James Kuyper
19 Jun 24 i          ii      +- Re: Good hash for pointers1Keith Thompson
23 Jun 24 i          ii      `- Re: Good hash for pointers1Tim Rentsch
6 Jun 24 i          i`* Re: Good hash for pointers3Michael S
16 Jun 24 i          i `* Re: Good hash for pointers2Tim Rentsch
16 Jun 24 i          i  `- Re: Good hash for pointers1Chris M. Thomasson
7 Jun 24 i          `* Re: Good hash for pointers10Bonita Montero
9 Jun 24 i           `* Re: Good hash for pointers9Bonita Montero
9 Jun 24 i            +* Re: Good hash for pointers2Richard Harnden
10 Jun 24 i            `* Re: Good hash for pointers6Malcolm McLean
26 May 24 +* Re: Good hash for pointers2Bonita Montero
26 May 24 `- Re: Good hash for pointers1Bonita Montero

Haut de la page

Les messages affichés proviennent d'usenet.

NewsPortal