123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480 |
- /* Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- #include "fspr_private.h"
- #include "fspr_general.h"
- #include "fspr_pools.h"
- #include "fspr_hash.h"
- #if APR_HAVE_STDLIB_H
- #include <stdlib.h>
- #endif
- #if APR_HAVE_STRING_H
- #include <string.h>
- #endif
- #include <assert.h> /* assert() */
- #if APR_POOL_DEBUG && APR_HAVE_STDIO_H
- #include <stdio.h>
- #endif
- /*
- * The internal form of a hash table.
- *
- * The table is an array indexed by the hash of the key; collisions
- * are resolved by hanging a linked list of hash entries off each
- * element of the array. Although this is a really simple design it
- * isn't too bad given that pools have a low allocation overhead.
- */
- typedef struct fspr_hash_entry_t fspr_hash_entry_t;
- struct fspr_hash_entry_t {
- fspr_hash_entry_t *next;
- unsigned int hash;
- const void *key;
- fspr_ssize_t klen;
- const void *val;
- };
- /*
- * Data structure for iterating through a hash table.
- *
- * We keep a pointer to the next hash entry here to allow the current
- * hash entry to be freed or otherwise mangled between calls to
- * fspr_hash_next().
- */
- struct fspr_hash_index_t {
- fspr_hash_t *ht;
- fspr_hash_entry_t *this, *next;
- unsigned int index;
- };
- /*
- * The size of the array is always a power of two. We use the maximum
- * index rather than the size so that we can use bitwise-AND for
- * modular arithmetic.
- * The count of hash entries may be greater depending on the chosen
- * collision rate.
- */
- struct fspr_hash_t {
- fspr_pool_t *pool;
- fspr_hash_entry_t **array;
- fspr_hash_index_t iterator; /* For fspr_hash_first(NULL, ...) */
- unsigned int count, max;
- fspr_hashfunc_t hash_func;
- fspr_hash_entry_t *free; /* List of recycled entries */
- };
- #define INITIAL_MAX 15 /* tunable == 2^n - 1 */
- /*
- * Hash creation functions.
- */
- static fspr_hash_entry_t **alloc_array(fspr_hash_t *ht, unsigned int max)
- {
- return fspr_pcalloc(ht->pool, sizeof(*ht->array) * (max + 1));
- }
- APR_DECLARE(fspr_hash_t *) fspr_hash_make(fspr_pool_t *pool)
- {
- fspr_hash_t *ht;
- ht = fspr_palloc(pool, sizeof(fspr_hash_t));
- ht->pool = pool;
- ht->free = NULL;
- ht->count = 0;
- ht->max = INITIAL_MAX;
- ht->array = alloc_array(ht, ht->max);
- ht->hash_func = fspr_hashfunc_default;
- return ht;
- }
- APR_DECLARE(fspr_hash_t *) fspr_hash_make_custom(fspr_pool_t *pool,
- fspr_hashfunc_t hash_func)
- {
- fspr_hash_t *ht = fspr_hash_make(pool);
- ht->hash_func = hash_func;
- return ht;
- }
- /*
- * Hash iteration functions.
- */
- APR_DECLARE(fspr_hash_index_t *) fspr_hash_next(fspr_hash_index_t *hi)
- {
- hi->this = hi->next;
- while (!hi->this) {
- if (hi->index > hi->ht->max)
- return NULL;
- hi->this = hi->ht->array[hi->index++];
- }
- hi->next = hi->this->next;
- return hi;
- }
- APR_DECLARE(fspr_hash_index_t *) fspr_hash_first(fspr_pool_t *p, fspr_hash_t *ht)
- {
- fspr_hash_index_t *hi;
- if (p)
- hi = fspr_palloc(p, sizeof(*hi));
- else
- hi = &ht->iterator;
- hi->ht = ht;
- hi->index = 0;
- hi->this = NULL;
- hi->next = NULL;
- return fspr_hash_next(hi);
- }
- APR_DECLARE(void) fspr_hash_this(fspr_hash_index_t *hi,
- const void **key,
- fspr_ssize_t *klen,
- void **val)
- {
- if (key) *key = hi->this->key;
- if (klen) *klen = hi->this->klen;
- if (val) *val = (void *)hi->this->val;
- }
- /*
- * Expanding a hash table
- */
- static void expand_array(fspr_hash_t *ht)
- {
- fspr_hash_index_t *hi;
- fspr_hash_entry_t **new_array;
- unsigned int new_max;
- new_max = ht->max * 2 + 1;
- new_array = alloc_array(ht, new_max);
- for (hi = fspr_hash_first(NULL, ht); hi; hi = fspr_hash_next(hi)) {
- unsigned int i = hi->this->hash & new_max;
- hi->this->next = new_array[i];
- new_array[i] = hi->this;
- }
- ht->array = new_array;
- ht->max = new_max;
- }
- APR_DECLARE_NONSTD(unsigned int) fspr_hashfunc_default(const char *char_key,
- fspr_ssize_t *klen)
- {
- unsigned int hash = 0;
- const unsigned char *key = (const unsigned char *)char_key;
- const unsigned char *p;
- fspr_ssize_t i;
-
- /*
- * This is the popular `times 33' hash algorithm which is used by
- * perl and also appears in Berkeley DB. This is one of the best
- * known hash functions for strings because it is both computed
- * very fast and distributes very well.
- *
- * The originator may be Dan Bernstein but the code in Berkeley DB
- * cites Chris Torek as the source. The best citation I have found
- * is "Chris Torek, Hash function for text in C, Usenet message
- * <27038@mimsy.umd.edu> in comp.lang.c , October, 1990." in Rich
- * Salz's USENIX 1992 paper about INN which can be found at
- * <http://citeseer.nj.nec.com/salz92internetnews.html>.
- *
- * The magic of number 33, i.e. why it works better than many other
- * constants, prime or not, has never been adequately explained by
- * anyone. So I try an explanation: if one experimentally tests all
- * multipliers between 1 and 256 (as I did while writing a low-level
- * data structure library some time ago) one detects that even
- * numbers are not useable at all. The remaining 128 odd numbers
- * (except for the number 1) work more or less all equally well.
- * They all distribute in an acceptable way and this way fill a hash
- * table with an average percent of approx. 86%.
- *
- * If one compares the chi^2 values of the variants (see
- * Bob Jenkins ``Hashing Frequently Asked Questions'' at
- * http://burtleburtle.net/bob/hash/hashfaq.html for a description
- * of chi^2), the number 33 not even has the best value. But the
- * number 33 and a few other equally good numbers like 17, 31, 63,
- * 127 and 129 have nevertheless a great advantage to the remaining
- * numbers in the large set of possible multipliers: their multiply
- * operation can be replaced by a faster operation based on just one
- * shift plus either a single addition or subtraction operation. And
- * because a hash function has to both distribute good _and_ has to
- * be very fast to compute, those few numbers should be preferred.
- *
- * -- Ralf S. Engelschall <rse@engelschall.com>
- */
-
- if (*klen == APR_HASH_KEY_STRING) {
- for (p = key; *p; p++) {
- hash = hash * 33 + *p;
- }
- *klen = p - key;
- }
- else {
- for (p = key, i = *klen; i; i--, p++) {
- hash = hash * 33 + *p;
- }
- }
- return hash;
- }
- /*
- * This is where we keep the details of the hash function and control
- * the maximum collision rate.
- *
- * If val is non-NULL it creates and initializes a new hash entry if
- * there isn't already one there; it returns an updatable pointer so
- * that hash entries can be removed.
- */
- static fspr_hash_entry_t **find_entry(fspr_hash_t *ht,
- const void *key,
- fspr_ssize_t klen,
- const void *val)
- {
- fspr_hash_entry_t **hep, *he;
- unsigned int hash;
- hash = ht->hash_func(key, &klen);
- /* scan linked list */
- for (hep = &ht->array[hash & ht->max], he = *hep;
- he; hep = &he->next, he = *hep) {
- if (he->hash == hash
- && he->klen == klen
- && memcmp(he->key, key, klen) == 0)
- break;
- }
- if (he || !val)
- return hep;
- /* add a new entry for non-NULL values */
- if ((he = ht->free) != NULL)
- ht->free = he->next;
- else
- he = fspr_palloc(ht->pool, sizeof(*he));
- he->next = NULL;
- he->hash = hash;
- he->key = key;
- he->klen = klen;
- he->val = val;
- *hep = he;
- ht->count++;
- return hep;
- }
- APR_DECLARE(fspr_hash_t *) fspr_hash_copy(fspr_pool_t *pool,
- const fspr_hash_t *orig)
- {
- fspr_hash_t *ht;
- fspr_hash_entry_t *new_vals;
- unsigned int i, j;
- ht = fspr_palloc(pool, sizeof(fspr_hash_t) +
- sizeof(*ht->array) * (orig->max + 1) +
- sizeof(fspr_hash_entry_t) * orig->count);
- ht->pool = pool;
- ht->free = NULL;
- ht->count = orig->count;
- ht->max = orig->max;
- ht->hash_func = orig->hash_func;
- ht->array = (fspr_hash_entry_t **)((char *)ht + sizeof(fspr_hash_t));
- new_vals = (fspr_hash_entry_t *)((char *)(ht) + sizeof(fspr_hash_t) +
- sizeof(*ht->array) * (orig->max + 1));
- j = 0;
- for (i = 0; i <= ht->max; i++) {
- fspr_hash_entry_t **new_entry = &(ht->array[i]);
- fspr_hash_entry_t *orig_entry = orig->array[i];
- while (orig_entry) {
- *new_entry = &new_vals[j++];
- (*new_entry)->hash = orig_entry->hash;
- (*new_entry)->key = orig_entry->key;
- (*new_entry)->klen = orig_entry->klen;
- (*new_entry)->val = orig_entry->val;
- new_entry = &((*new_entry)->next);
- orig_entry = orig_entry->next;
- }
- *new_entry = NULL;
- }
- return ht;
- }
- APR_DECLARE(void *) fspr_hash_get(fspr_hash_t *ht,
- const void *key,
- fspr_ssize_t klen)
- {
- fspr_hash_entry_t *he;
- he = *find_entry(ht, key, klen, NULL);
- if (he)
- return (void *)he->val;
- else
- return NULL;
- }
- APR_DECLARE(void) fspr_hash_set(fspr_hash_t *ht,
- const void *key,
- fspr_ssize_t klen,
- const void *val)
- {
- fspr_hash_entry_t **hep;
- hep = find_entry(ht, key, klen, val);
- if (*hep) {
- if (!val) {
- /* delete entry */
- fspr_hash_entry_t *old = *hep;
- *hep = (*hep)->next;
- old->next = ht->free;
- ht->free = old;
- --ht->count;
- }
- else {
- /* replace entry */
- (*hep)->val = val;
- /* check that the collision rate isn't too high */
- if (ht->count > ht->max) {
- expand_array(ht);
- }
- }
- }
- /* else key not present and val==NULL */
- }
- APR_DECLARE(unsigned int) fspr_hash_count(fspr_hash_t *ht)
- {
- return ht->count;
- }
- APR_DECLARE(void) fspr_hash_clear(fspr_hash_t *ht)
- {
- fspr_hash_index_t *hi;
- for (hi = fspr_hash_first(NULL, ht); hi; hi = fspr_hash_next(hi))
- fspr_hash_set(ht, hi->this->key, hi->this->klen, NULL);
- }
- APR_DECLARE(fspr_hash_t*) fspr_hash_overlay(fspr_pool_t *p,
- const fspr_hash_t *overlay,
- const fspr_hash_t *base)
- {
- return fspr_hash_merge(p, overlay, base, NULL, NULL);
- }
- APR_DECLARE(fspr_hash_t *) fspr_hash_merge(fspr_pool_t *p,
- const fspr_hash_t *overlay,
- const fspr_hash_t *base,
- void * (*merger)(fspr_pool_t *p,
- const void *key,
- fspr_ssize_t klen,
- const void *h1_val,
- const void *h2_val,
- const void *data),
- const void *data)
- {
- fspr_hash_t *res;
- fspr_hash_entry_t *new_vals = NULL;
- fspr_hash_entry_t *iter;
- fspr_hash_entry_t *ent;
- unsigned int i,j,k;
- #if APR_POOL_DEBUG
- /* we don't copy keys and values, so it's necessary that
- * overlay->a.pool and base->a.pool have a life span at least
- * as long as p
- */
- if (!fspr_pool_is_ancestor(overlay->pool, p)) {
- fprintf(stderr,
- "fspr_hash_merge: overlay's pool is not an ancestor of p\n");
- abort();
- }
- if (!fspr_pool_is_ancestor(base->pool, p)) {
- fprintf(stderr,
- "fspr_hash_merge: base's pool is not an ancestor of p\n");
- abort();
- }
- #endif
- res = fspr_palloc(p, sizeof(fspr_hash_t));
- res->pool = p;
- res->free = NULL;
- res->hash_func = base->hash_func;
- res->count = base->count;
- res->max = (overlay->max > base->max) ? overlay->max : base->max;
- if (base->count + overlay->count > res->max) {
- res->max = res->max * 2 + 1;
- }
- res->array = alloc_array(res, res->max);
- if (base->count + overlay->count) {
- new_vals = fspr_palloc(p, sizeof(fspr_hash_entry_t) *
- (base->count + overlay->count));
- }
- j = 0;
- for (k = 0; k <= base->max; k++) {
- for (iter = base->array[k]; iter; iter = iter->next) {
- i = iter->hash & res->max;
- assert(new_vals);
- new_vals[j].klen = iter->klen;
- new_vals[j].key = iter->key;
- new_vals[j].val = iter->val;
- new_vals[j].hash = iter->hash;
- new_vals[j].next = res->array[i];
- res->array[i] = &new_vals[j];
- j++;
- }
- }
- for (k = 0; k <= overlay->max; k++) {
- for (iter = overlay->array[k]; iter; iter = iter->next) {
- i = iter->hash & res->max;
- for (ent = res->array[i]; ent; ent = ent->next) {
- if ((ent->klen == iter->klen) &&
- (memcmp(ent->key, iter->key, iter->klen) == 0)) {
- if (merger) {
- ent->val = (*merger)(p, iter->key, iter->klen,
- iter->val, ent->val, data);
- }
- else {
- ent->val = iter->val;
- }
- break;
- }
- }
- if (new_vals && !ent) {
- new_vals[j].klen = iter->klen;
- new_vals[j].key = iter->key;
- new_vals[j].val = iter->val;
- new_vals[j].hash = iter->hash;
- new_vals[j].next = res->array[i];
- res->array[i] = &new_vals[j];
- res->count++;
- j++;
- }
- }
- }
- return res;
- }
- APR_POOL_IMPLEMENT_ACCESSOR(hash)
|