/*
  Generic hash table routines.

  These functions create hash tables that grow automatically and can
  store multiple entries for each key. However, it is meant for
  relatively small numbers of entries per key (see hash_nget()
  below). The table grows by rehashing all entries into a new table,
  which can take some time for a large table. Both the keys and the
  data can be of arbitrary type and size. (The hash table treats them
  as arrays of bytes). A hash table can either use a built-in hash
  function and a built-in key comparison function, or functions
  provided by the caller.

  Each hash table stores copies of the keys and data in memory that it
  manages itself. Use hash_destroy() to free all memory used by a hash
  table.

  hash_create() creates a new hash table with a given hash function
  and a given key equality function.

  The hash function may be specified as NULL, in which case a built-in
  hash function will be used. The built-in function is djb2, which
  works well for keys that are text strings.

  The key equality function may also be specified as NULL, in which
  case a built-in function will be used, which simply checks that two
  keys have the same number of bytes and are byte for byte identical.

  The hash function must take a pointer (void*) to a key and the
  length of the key in bytes and must return an unsigned long int. The
  hash table will use this value (modulo the actual table size) as
  index into the table. The key equality function takes two pointers
  to keys and two lengths and must return true if the keys are equal,
  false if not.

  hash_create() returns a hashtable (an opaque pointer) or NULL in
  case of error. It may fail for the same reasons as malloc(3) and
  sets errno accordingly.

  hash_destroy() frees all memory used by the hash table. The hash
  table becomes invalid and should not be passed to any hash_*
  function afterwards. The routine doesn't know what is in each entry,
  so if the entries contain pointers to allocated memory, it is the
  caller's responsibility to free those.

  hash_get() retrieves an entry given a key. It returns a pointer to
  the data and the data's length in bytes, or NULL if the key wasn't
  found in the table. If there are multiple entries for the key,
  hash_get() returns only one.

  hash_nget() retrieves the n-th entry for a given key. It returns a
  pointer to the data and the data's length in bytes, or NULL if there
  are fewer than n entries for the key. The first entry is entry
  1. The order of the entries is undefined, i.e., it is not
  necessarily the same as the order they were added with hash_add().

  The value n is meant to be small. While hash_get() is efficient,
  hash_nget() additionally has to traverse a list of n entries, and
  thus takes longer the larger the value of n is.

  hash_put() creates or updates an entry for a given key. All existing
  entries for the key are deleted and a new one is created. hash_put()
  returns true for success and false if an error occurred. In case of
  an error, it sets errno. It may fail for the same reasons as
  malloc(3).

  hash_add() adds an entry for a given key, without removing existing
  entries for that key. It returns true for success and false if an
  error occurred. In case of an error, it sets errno. It may fail for
  the same reasons as malloc(3).

  hash_del() removes all entries for a given key.

  hash_cursor() creates a "cursor", a pointer to an opaque structure
  that can be passed to hash_next(). It returns the cursor, or NULL in
  case of an error. It may fail and set errno for the same reasons as
  malloc(3).

  hash_next() is passed a cursor created by hash_cursor() and allows
  to iterate over the keys of the hash table for which there is at
  least one entry. Every call to hash_next() returns a different key,
  along with the size of that key in bytes. After all keys have been
  returned, the next call to hash_next() returns NULL. When it returns
  NULL, the cursor is no longer valid and should not be passed to
  hash_next() again.

  The order of the entries returned by hash_next() is undefined. If
  entries are added with hash_put() while a cursor is in use,
  hash_next() may fail to return all entries. (This can happen if the
  new item causes the table to be enlarged and there were deleted
  items in the table.) However, it is safe to remove entries with
  hash_del() while a cursor is in use: hash_next() will return all
  (non-deleted) entries that it hasn't returned yet.

  To iterate over all entries, in case there are multiple entries for
  the same key, combine hash_next() and hash_nget().
*/

#include "stdincls.h"
#include "s-hashfn.e"

#define INITIAL_SIZE 2048

EXPORT typedef struct hashtable *hashtable;
EXPORT typedef unsigned long (*hashfunction)(const void*, size_t);
EXPORT typedef bool (*hashcompare)(const void*, size_t, const void*, size_t);
EXPORT typedef struct hashcursor *hashcursor;

/*
  To avoid having to initialize all slots in the table to NULL on
  creation of the table, nentries and x1 are used to keep track of
  which slots have been initialized so far. At creation, memory is
  allocated, but only nentries is initialized, by setting it to 0.

  After that, at any time, the first nentries values of x1 hold the
  indexes of the entries in keys, data, keylen, datalen and x2 that
  have been initialized.

  x2 holds the reverse mapping: if i < nentries and x1[i] = h (i.e.,
  slot h has been initialized), then x2[h] = i. So, to know if slot h
  in the table has been initialized or not, check if x2[h] < nentries
  and x1[x2[h]] = h.

  Note that x2[h] may not have been initialized yet and may contain
  any random number, and so tools like valgrind will complain about
  this check.
*/
struct data {
  size_t datalen;		/* Size in bytes */
  void *data;			/* Pointer to data allocated on heap */
  struct data *next;		/* Linked list */
};
struct hashtable {
  hashfunction hash;		/* Function (key) -> unsigned int */
  hashcompare equal;		/* Function (key, key) -> bool */
  void **keys;			/* Array of pointers to keys */
  size_t *keylen;		/* Size of each key in bytes */
  struct data **data;		/* Array of pointers to data */
  unsigned long tablesize, nentries, *x1, *x2;
};

struct hashcursor {
  hashtable table;
  unsigned long i;		/* Entry x1[i] is the next to try */
};


/* Forward declaration */
static bool enlarge_table(hashtable table);


/* hash1 -- compute a hash of a byte string s (Dan Bernstein's djb2) */
static unsigned long hash1(const void *s, const size_t len)
{
  unsigned char *p = (unsigned char*)s;
  unsigned long seed = HASHSEED;
  size_t i;

  for (i = 0; i < len; i++)
    seed = ((seed << 5) + seed) ^ (unsigned long)*(p++);
  return seed;
}


/* equal -- true if two keys are equal */
static bool equal(const void *a, const size_t alen,
		  const void *b, const size_t blen)
{
  return alen == blen && memcmp(a, b, alen) == 0;
}


/* hash_create_with_size -- create a hash table with the given # of entries */
static hashtable hash_create_with_size(unsigned long size, hashfunction fn,
				       hashcompare eq)
{
  hashtable h;

  if (!(h = malloc(sizeof(*h)))) return NULL;

  h->hash = fn ? fn : hash1;
  h->equal = eq ? eq : equal;
  h->nentries = 0;
  h->tablesize = size;
  h->keys = malloc(size * sizeof(*h->keys));
  h->data = malloc(size * sizeof(*h->data));
  h->keylen = malloc(size * sizeof(*h->keylen));
  h->x1 = malloc(size * sizeof(*h->x1));
  h->x2 = malloc(size * sizeof(*h->x2));

  /* If any of the allocations failed, free the others and return NULL */
  if (!h->keys || !h->data || !h->keylen || !h->x1 || !h->x2) {
    free(h->keys);
    free(h->data);
    free(h->keylen);
    free(h->x1);
    free(h->x2);
    free(h);
    return NULL;
  }

  return h;
}


/* hash_create -- create a table with given hash and compare functions */
EXPORT hashtable hash_create(hashfunction fn, hashcompare eq)
{
  return hash_create_with_size(INITIAL_SIZE, fn, eq);
}


/* hash_destroy1 -- free the memory used by a table, but not the table itself */
static void hash_destroy1(hashtable table)
{
  struct data *p, *q;
  unsigned long i;

  for (i = 0; i < table->nentries; i++) {
    free(table->keys[table->x1[i]]);
    p = table->data[table->x1[i]];
    while (p) {q = p; p = p->next; free(q->data); free(q);}
  }
  free(table->keys);
  free(table->data);
  free(table->keylen);
  free(table->x1);
  free(table->x2);
}


/* hash_destroy -- free the memory used by a hash table */
EXPORT void hash_destroy(hashtable table)
{
  hash_destroy1(table);
  free(table);
}


/* hash_get -- retrieve an entry from a hash table, NULL if not found */
EXPORT void *hash_get(hashtable const table, const void * const key,
		      const size_t keylen, size_t *datalen)
{
  unsigned long h1, h2;

  h1 = h2 = table->hash(key, keylen) % table->tablesize;

  /* Loop until we find the key or an empty slot, or checked all entries */
  while (true) {

    /* If slot h1 is not in use, it means the key doesn't occur. */
    if (table->x2[h1] >= table->nentries || table->x1[table->x2[h1]] != h1)
      return NULL;

    /* Slot h1 is in use. If it has the key, return an entry, if any */
    if (table->equal(table->keys[h1], table->keylen[h1], key, keylen)) {
      if (!table->data[h1]) return NULL;
      *datalen = table->data[h1]->datalen;
      return table->data[h1]->data;
    }

    /* Go to the next slot, unless we've come full circle */
    if (++h1 == table->tablesize) h1 = 0;
    if (h1 == h2) return NULL;
  }
}


/* hash_nget -- retrieve the n-th entry for a key from a hash table, or NULL */
EXPORT void *hash_nget(hashtable const table, const void * const key,
		       const size_t keylen, int n, size_t *datalen)
{
  unsigned long h1, h2;
  struct data *p;
  int i;

  h1 = h2 = table->hash(key, keylen) % table->tablesize;

  /* Loop until we find the key or an empty slot, or checked all entries */
  while (true) {

    /* If slot h1 is not in use, it means the key doesn't occur. */
    if (table->x2[h1] >= table->nentries || table->x1[table->x2[h1]] != h1)
      return NULL;

    /* Slot h1 is in use. If it has the key, return the n-th entry, if any */
    if (table->equal(table->keys[h1], table->keylen[h1], key, keylen)) {
      for (p = table->data[h1], i = 1; p && i < n; p = p->next, i++);
      if (!p) return NULL;
      *datalen = p->datalen;
      return p->data;
    }

    /* Go to the next slot, unless we've come full circle */
    if (++h1 == table->tablesize) h1 = 0;
    if (h1 == h2) return NULL;
  }
}


/* hash_put -- create/update entry in hash table, return false if error */
EXPORT bool hash_put(hashtable const table,
		     void* const key, const size_t klen,
		     void* const data, const size_t dlen)
{
  unsigned long h1, h2;
  struct data *p, *q;
  void *k, *d;

  /* Try to enlarge the table if it is more than 75% full. */
  if (table->nentries > table->tablesize / 4 * 3)
    if (!enlarge_table(table)) return false;

  h1 = h2 = table->hash(key, klen) % table->tablesize;

  /* Loop until we find the key, or an empty slot */
  while (true) {

    /* Use slot h1 if it has not yet been used. Note: table->x2[h1]
       may be uninitialized, that's on purpose. */
    if (table->x2[h1] >= table->nentries || table->x1[table->x2[h1]] != h1) {
      if (!(k = malloc(klen))) return false;
      else if (!(d = malloc(dlen))) {free(k); return false;}
      else if (!(p = malloc(sizeof(*p)))) {free(d); free(k); return false;}
      p->datalen = dlen;
      p->data = memcpy(d, data, dlen);
      p->next = NULL;
      table->data[h1] = p;
      table->keys[h1] = memcpy(k, key, klen);
      table->keylen[h1] = klen;
      table->x2[h1] = table->nentries;
      table->x1[table->nentries] = h1;
      table->nentries++;
      return true;
    }

    /* Slot h1 is in use. Update it if it has the same key. */
    if (table->equal(table->keys[h1], table->keylen[h1], key, klen)) {
      /* Remove existing entries */
      p = table->data[h1];
      while (p) {q = p; p = p->next; free(q->data); free(q);}
      /* Create new entry */
      if (!(d = malloc(dlen))) return false;
      else if (!(p = malloc(sizeof(*p)))) {free(d); return false;}
      p->datalen = dlen;
      p->data = memcpy(d, data, dlen);
      p->next = NULL;
      table->data[h1] = p;
      return true;
    }

    /* Advance to next slot */
    if (++h1 == table->tablesize) h1 = 0;

    /* We cannot come full circle, because enlarge_table() would have failed */
    assert(h1 != h2);
  }
}


/* hash_add -- add an entry for key in a hash table, return false if error */
EXPORT bool hash_add(hashtable const table,
		     void* const key, const size_t klen,
		     void* const data, const size_t dlen)
{
  unsigned long h1, h2;
  struct data *p;
  void *k, *d;

  /* Try to enlarge the table if it is more than 75% full. */
  if (table->nentries > table->tablesize / 4 * 3)
    if (!enlarge_table(table)) return false;

  h1 = h2 = table->hash(key, klen) % table->tablesize;

  /* Loop until we find the key, or an empty slot */
  while (true) {

    /* Use slot h1 if it has not yet been used. */
    if (table->x2[h1] >= table->nentries || table->x1[table->x2[h1]] != h1) {
      if (!(k = malloc(klen))) return false;
      else if (!(d = malloc(dlen))) {free(k); return false;}
      else if (!(p = malloc(sizeof(*p)))) {free(d); free(k); return false;}
      p->datalen = dlen;
      p->data = memcpy(d, data, dlen);
      p->next = NULL;
      table->data[h1] = p;
      table->keys[h1] = memcpy(k, key, klen);
      table->keylen[h1] = klen;
      table->x2[h1] = table->nentries;
      table->x1[table->nentries] = h1;
      table->nentries++;
      return true;
    }

    /* Slot h1 is in use. Add to it if it has the same key. */
    if (table->equal(table->keys[h1], table->keylen[h1], key, klen)) {
      if (!(d = malloc(dlen))) return false;
      else if (!(p = malloc(sizeof(*p)))) {free(d); return false;}
      p->datalen = dlen;
      p->data = memcpy(d, data, dlen);
      p->next = table->data[h1];
      table->data[h1] = p;
      return true;
    }

    /* Advance to next slot */
    if (++h1 == table->tablesize) h1 = 0;

    /* We cannot come full circle, because enlarge_table() would have failed */
    assert(h1 != h2);
  }
}


/* hash_del -- delete all entries for a key from a hash table */
EXPORT void hash_del(hashtable const table, const void * const key,
		     const size_t keylen)
{
  unsigned long h1, h2;
  struct data *p, *q;

  h1 = h2 = table->hash(key, keylen) % table->tablesize;

  /* Loop until we find the key or an empty slot, or checked all entries */
  while (true) {

    /* If slot h1 is unused, it means we didn't find the key. */
    if (table->x2[h1] >= table->nentries || table->x1[table->x2[h1]] != h1)
      return;

    /* Check if slot h1 contains the key, then remove the data, if any. */
    if (table->equal(table->keys[h1], table->keylen[h1], key, keylen)) {
      p = table->data[h1];
      while (p) {q = p; p = p->next; free(q->data); free(q);}
      table->data[h1] = NULL;
      return;
    }

    /* Go to the next slot, unless we've come full circle */
    if (++h1 == table->tablesize) h1 = 0;
    if (h1 == h2) return;
  }
}


/* hash_cursor -- create a cursor to iterate over a hash table */
EXPORT hashcursor hash_cursor(hashtable table)
{
  hashcursor c;

  if ((c = malloc(sizeof(*c)))) {
    c->table = table;
    c->i = 0;			/* The next bcuket in x1 to try */
  }
  return c;
}


/* hash_next -- retrieve next key from a hash table, using a cursor */
EXPORT void *hash_next(hashcursor cursor, size_t *keylen)
{
  hashtable t = cursor->table;

  /* Skip over deleted entries */
  while (cursor->i < t->nentries && !t->data[t->x1[cursor->i]]) cursor->i++;

  /* If we're not at the end, return a key and advance the cursor. */
  if (cursor->i < t->nentries) {
    *keylen = t->keylen[t->x1[cursor->i]];
    return t->keys[t->x1[cursor->i++]];
  }

  /* We're at the end. Free the cursor and return NULL. */
  free(cursor);
  return NULL;
}


/* enlarge_table -- try to make the table bigger */
static bool enlarge_table(hashtable table)
{
  size_t keylen, datalen;
  hashtable newtable;
  hashcursor cursor;
  void *data, *key;
  int n;

  /* We'll need a cursor to copy entries to the enlarged table */
  if (!(cursor = hash_cursor(table))) return false;

  /* Try to allocate a new table with double the size. It may be that
     the table only appears full, because it has lots of empty slots
     where entries were deleted. But we don't have a separate function
     to compact a table without doubling its size...
  */
  if ((newtable = hash_create_with_size(2 * table->tablesize, table->hash,
					table->equal))) {

    /* We succeeded in allocating, copy the entries over. */
    while ((key = hash_next(cursor, &keylen)))
      for (n = 1; (data = hash_nget(table, key, keylen, n, &datalen)); n++)
	(void) hash_add(newtable, key, keylen, data, datalen);

    assert(table->nentries == newtable->nentries);

    /* Destroy the old table and put the new data in its place */
    hash_destroy1(table);
    table->keys = newtable->keys;
    table->data = newtable->data;
    table->keylen = newtable->keylen;
    table->x1 = newtable ->x1;
    table->x2 = newtable->x2;
    table->tablesize = newtable->tablesize;
    free(newtable);
  }

  /* Whether we created a bigger table or not, if the table isn't 100%
     full, return success. We may succeed in making a bigger table
     next time. */
  return table->nentries < table->tablesize;
}
