Patch: Hash table autoexpansion (now with no pauses)

Steven Grimm sgrimm at facebook.com
Fri Oct 13 23:28:53 UTC 2006


As Brad requested, here's a reworked version of our earlier patch that 
does automatic expansion of the hashtable. Now the expansion happens 
incrementally rather than all at once, so there are no pauses in 
operation regardless of how big the hashtable gets.

This is a patch against the current svn source (which includes the new 
hash function I sent out to the list earlier this week.)

-Steve

-------------- next part --------------
Index: memcached.c
===================================================================
--- memcached.c	(revision 411)
+++ memcached.c	(working copy)
@@ -430,6 +430,7 @@
     if (state != c->state) {
         if (state == conn_read) {
             conn_shrink(c);
+            assoc_move_next_bucket();
         }
         c->state = state;
     }
Index: assoc.c
===================================================================
--- assoc.c	(revision 412)
+++ assoc.c	(working copy)
@@ -31,15 +31,6 @@
 
 #include "memcached.h"
 
-typedef  unsigned long  int  ub4;   /* unsigned 4-byte quantities */
-typedef  unsigned       char ub1;   /* unsigned 1-byte quantities */
-
-/* hard-code one million buckets, for now (2**20 == 4MB hash) */
-#define HASHPOWER  20
-
-#define hashsize(n) ((ub4)1<<(n))
-#define hashmask(n) (hashsize(n)-1)
-
 /*
  * Since the hash function does bit manipulation, it needs to know
  * whether it's big or little-endian. ENDIAN_LITTLE and ENDIAN_BIG
@@ -459,22 +450,60 @@
 #error Must define HASH_BIG_ENDIAN or HASH_LITTLE_ENDIAN 
 #endif // hash_XXX_ENDIAN == 1
 
-static item** hashtable = 0;
+typedef  unsigned long  int  ub4;   /* unsigned 4-byte quantities */
+typedef  unsigned       char ub1;   /* unsigned 1-byte quantities */
 
+/* how many powers of 2's worth of buckets we use */
+int hashpower = 16;
+
+#define hashsize(n) ((ub4)1<<(n))
+#define hashmask(n) (hashsize(n)-1)
+
+/* Main hash table. This is where we look except during expansion. */
+static item** primary_hashtable = 0;
+
+/*
+ * Previous hash table. During expansion, we look here for keys that haven't
+ * been moved over to the primary yet.
+ */
+static item** old_hashtable = 0;
+
+/* Number of items in the hash table. */
+static int hash_items = 0;
+
+/* Flag: Are we in the middle of expanding now? */
+static int expanding = 0;
+
+/*
+ * During expansion we migrate values with bucket granularity; this is how
+ * far we've gotten so far. Ranges from 0 .. hashsize(hashpower - 1) - 1.
+ */
+static int expand_bucket = 0;
+
 void assoc_init(void) {
-    unsigned int hash_size = hashsize(HASHPOWER) * sizeof(void*);
-    hashtable = malloc(hash_size);
-    if (! hashtable) {
+    unsigned int hash_size = hashsize(hashpower) * sizeof(void*);
+    primary_hashtable = malloc(hash_size);
+    if (! primary_hashtable) {
         fprintf(stderr, "Failed to init hashtable.\n");
         exit(1);
     }
-    memset(hashtable, 0, hash_size);
+    memset(primary_hashtable, 0, hash_size);
 }
 
-item *assoc_find(char *key) {
-    ub4 hv = hash(key, strlen(key), 0) & hashmask(HASHPOWER);
-    item *it = hashtable[hv];
+item *assoc_find(const char *key) {
+    int nkey = strlen(key);
+    uint32_t hv = hash(key, nkey, 0);
+    item *it;
+    int oldbucket;
 
+    if (expanding &&
+        (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
+    {
+        it = old_hashtable[oldbucket];
+    } else {
+        it = primary_hashtable[hv & hashmask(hashpower)];
+    }
+
     while (it) {
         if (strcmp(key, ITEM_key(it)) == 0)
             return it;
@@ -486,35 +515,104 @@
 /* returns the address of the item pointer before the key.  if *item == 0,
    the item wasn't found */
 
-static item** _hashitem_before (char *key) {
-    ub4 hv = hash(key, strlen(key), 0) & hashmask(HASHPOWER);
-    item **pos = &hashtable[hv];
+static item** _hashitem_before (const char *key) {
+    uint32_t hv = hash(key, strlen(key), 0);
+    item **pos;
+    int oldbucket;
 
+    if (expanding &&
+        (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
+    {
+        pos = &old_hashtable[oldbucket];
+    } else {
+        pos = &primary_hashtable[hv & hashmask(hashpower)];
+    }
+
     while (*pos && strcmp(key, ITEM_key(*pos))) {
         pos = &(*pos)->h_next;
     }
     return pos;
 }
 
+/* grows the hashtable to the next power of 2. */
+static void assoc_expand(void) {
+    old_hashtable = primary_hashtable;
+
+    primary_hashtable = calloc(hashsize(hashpower + 1), sizeof(void *));
+    if (primary_hashtable) {
+	if (settings.verbose > 1)
+	    fprintf(stderr, "Hash table expansion starting\n");
+        hashpower++;
+        expanding = 1;
+        expand_bucket = 0;
+	assoc_move_next_bucket();
+    } else {
+        primary_hashtable = old_hashtable;
+	/* Bad news, but we can keep running. */
+    }
+}
+
+/* migrates the next bucket to the primary hashtable if we're expanding. */
+void assoc_move_next_bucket(void) {
+    item *it, *next;
+    int bucket;
+
+    if (expanding) {
+        for (it = old_hashtable[expand_bucket]; NULL != it; it = next) {
+	    next = it->h_next;
+
+            bucket = hash(ITEM_key(it), strlen(ITEM_key(it)), 0) & hashmask(hashpower);
+            it->h_next = primary_hashtable[bucket];
+            primary_hashtable[bucket] = it;
+	}
+
+	expand_bucket++;
+	if (expand_bucket == hashsize(hashpower - 1)) {
+	    expanding = 0;
+	    free(old_hashtable);
+	    if (settings.verbose > 1)
+	        fprintf(stderr, "Hash table expansion done\n");
+	}
+    }
+}
+
 /* Note: this isn't an assoc_update.  The key must not already exist to call this */
-int assoc_insert(char *key, item *it) {
-    ub4 hv;
+int assoc_insert(item *it) {
+    uint32_t hv;
+    int oldbucket;
+
     assert(assoc_find(key) == 0);  /* shouldn't have duplicately named things defined */
-    hv = hash(key, strlen(key), 0) & hashmask(HASHPOWER);
-    it->h_next = hashtable[hv];
-    hashtable[hv] = it;
+
+    hv = hash(ITEM_key(it), strlen(ITEM_key(it)), 0);
+    if (expanding &&
+        (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
+    {
+        it->h_next = old_hashtable[oldbucket];
+        old_hashtable[oldbucket] = it;
+    } else {
+        it->h_next = primary_hashtable[hv & hashmask(hashpower)];
+        primary_hashtable[hv & hashmask(hashpower)] = it;
+    }
+
+    hash_items++;
+    if (! expanding && hash_items > (hashsize(hashpower) * 3) / 2) {
+        assoc_expand();
+    }
+
     return 1;
 }
 
-void assoc_delete(char *key) {
+void assoc_delete(const char *key) {
     item **before = _hashitem_before(key);
+
     if (*before) {
         item *nxt = (*before)->h_next;
         (*before)->h_next = 0;   /* probably pointless, but whatever. */
         *before = nxt;
+        hash_items--;
         return;
     }
-    /* Note:  we never actually get here.  the callers don't delete things
+    /* Note:  we never actually get here.  the callers don't delete things 
        they can't find. */
     assert(*before != 0);
 }
Index: memcached.h
===================================================================
--- memcached.h	(revision 411)
+++ memcached.h	(working copy)
@@ -256,9 +256,10 @@
 void settings_init(void);
 /* associative array */
 void assoc_init(void);
-item *assoc_find(char *key);
-int assoc_insert(char *key, item *item);
-void assoc_delete(char *key);
+item *assoc_find(const char *key);
+int assoc_insert(item *item);
+void assoc_delete(const char *key);
+void assoc_move_next_bucket(void);
 void item_init(void);
 item *item_alloc(char *key, int flags, rel_time_t exptime, int nbytes);
 void item_free(item *it);
Index: items.c
===================================================================
--- items.c	(revision 411)
+++ items.c	(working copy)
@@ -184,7 +184,7 @@
     assert(it->nbytes < 1048576);
     it->it_flags |= ITEM_LINKED;
     it->time = current_time;
-    assoc_insert(ITEM_key(it), it);
+    assoc_insert(it);
 
     stats.curr_bytes += ITEM_ntotal(it);
     stats.curr_items += 1;


More information about the memcached mailing list