Patch: Dynamic hashtable sizing

Mon Aug 7 20:59:36 UTC 2006

Here's a patch that squeezes another few percent of CPU time improvement 
out of our large caches. It adds auto-resize capability to the memcached 
hash table code.

For small caches, this is a memory win because the initial size is now 
substantially lower, so the constant-factor memory overhead of memcached 
will be less. For large caches, this is a CPU time win because the old 
size (1 million buckets) was pretty easy to exceed substantially if you 
cached a lot of small items. One of our sets of servers has over 40 
million cache entries per server, which means that on average, the old 
code was having to do 40 string compares for each key in a "get" request.

The gotcha: the hashtable has to resize itself periodically. That is not 
really noticeable with cache sizes up to a few hundred thousand entries, 
but beyond that the server will pause while it re-buckets all the 
existing cache entries. The hashtable never shrinks, only grows, so once 
your cache has filled up and you've started expiring old entries (and 
thus the number of entries reaches its maximum) you won't see any more 
pauses. For a 40-million-entry hashtable, you will see a total of 9 
pauses on the way to equilibrium, and only the last couple of them will 
be noticeable on modern hardware.

This patch doubles the size of the hashtable when there are enough 
entries to fill the double-sized table -- that is, when the number of 
entries exceeds 2x the current hashtable size. The performance 
difference between doing it that way and expanding at 1x the current 
size was not really measurable in my tests, and this way eats less 
memory (a 32-million-bucket hashtable on a 64-bit system is not exactly 
tiny.)

-Steve
-------------- next part --------------
Index: assoc.c
===================================================================

--- assoc.c	(revision 19008)
+++ assoc.c	(revision 19009)
@@ -34,8 +34,8 @@
 typedef  unsigned long  int  ub4;   /* unsigned 4-byte quantities */
 typedef  unsigned       char ub1;   /* unsigned 1-byte quantities */
 
-/* hard-code one million buckets, for now (2**20 == 4MB hash) */
-#define HASHPOWER  20
+/* how many powers of 2's worth of buckets we use */
+int hashpower = 16;
 
 #define hashsize(n) ((ub4)1<<(n))
 #define hashmask(n) (hashsize(n)-1)
@@ -127,9 +127,10 @@
 }
 
 static item** hashtable = 0;
+static int hash_items = 0;
 
 void assoc_init(void) {
-    unsigned int hash_size = hashsize(HASHPOWER) * sizeof(void*);
+    unsigned int hash_size = hashsize(hashpower) * sizeof(void*);
     hashtable = malloc(hash_size);
     if (! hashtable) {
         fprintf(stderr, "Failed to init hashtable.\n");
@@ -139,7 +140,7 @@
 }
 
 item *assoc_find(char *key) {
-    ub4 hv = hash(key, strlen(key), 0) & hashmask(HASHPOWER);
+    ub4 hv = hash(key, strlen(key), 0) & hashmask(hashpower);
     item *it = hashtable[hv];
     
     while (it) {
@@ -154,7 +155,7 @@
    the item wasn't found */
 
 static item** _hashitem_before (char *key) {
-    ub4 hv = hash(key, strlen(key), 0) & hashmask(HASHPOWER);
+    ub4 hv = hash(key, strlen(key), 0) & hashmask(hashpower);
     item **pos = &hashtable[hv];
 
     while (*pos && strcmp(key, ITEM_key(*pos))) {
@@ -163,11 +164,38 @@
     return pos;
 }
 
+/* grows the hashtable to the next power of 2, rehashing all the items. */
+static void assoc_expand(void) {
+    item **old_hashtable = hashtable;
+    item **new_hashtable;
+    int i;
+
+    new_hashtable = calloc(hashsize(hashpower + 1), sizeof(void *));
+    if (new_hashtable) {
+        hashpower++;
+        hashtable = new_hashtable;
+        hash_items = 0;
+        for (i = hashsize(hashpower - 1) - 1; i >= 0; i--) {
+            item *it = old_hashtable[i];
+            while (it) {
+                item *next = it->h_next;
+                assoc_insert(ITEM_key(it), it);
+                it = next;
+            }
+        }
+
+        free(old_hashtable);
+    }
+}
+
 /* Note: this isn't an assoc_update.  The key must not already exist to call this */
 int assoc_insert(char *key, item *it) {
-    ub4 hv = hash(key, strlen(key), 0) & hashmask(HASHPOWER);
+    ub4 hv = hash(key, strlen(key), 0) & hashmask(hashpower);
     it->h_next = hashtable[hv];
     hashtable[hv] = it;
+    if (++hash_items > hashsize(hashpower) * 2) {
+        assoc_expand();
+    }
     return 1;
 }
 
@@ -177,6 +205,7 @@
         item *nxt = (*before)->h_next;
         (*before)->h_next = 0;   /* probably pointless, but whatever. */
         *before = nxt;
+        hash_items--;
         return;
     }
     /* Note:  we never actually get here.  the callers don't delete things