Patch: Increased memory efficiency

Steven Grimm sgrimm at facebook.com
Wed May 3 00:23:14 UTC 2006


This patch to memcached 1.1.12 (extracted from a larger set of changes, 
patch to follow) increases the number of items that can be stored in a 
given amount of memory. In our configuration we see about a 40% increase 
in item count on machines with a mix of item sizes. Memcached instances 
with fixed-size items can benefit even more than that with a little 
tuning of command-line options.

Details:

The slab allocator's powers-of-2 size strategy is now a powers-of-N 
strategy, where N may be specified on the command line using the new 
"-f" option. The default is 1.25. For a large memcached instance, where 
there are enough items of enough different sizes that the increased 
number of slab classes isn't itself a waste of memory, this is a 
significant win: items are placed in chunks whose sizes are much closer 
to the item size, wasting less memory.

One consequence of this is that slabs are no longer fixed-size; by 
default they are no bigger than 1MB each, but are only as big as they 
need to be to hold a whole number of chunks. That causes the "slabs 
reassign" command to be unavailable; it can be reenabled by compiling 
with -DALLOW_SLABS_REASSIGN at the expense of some wasted memory (all 
slabs will be 1MB).

The minimum amount of space for data in chunks of the smallest slab 
class may be adjusted on the command line using the new "-s" option. 
Each chunk is that many bytes plus the size of the fixed-size item 
header. If you have a lot of items with small fixed-size keys and 
values, you can use this option to maximize the number of items per slab 
in the smallest slab class; experiment with your particular data to find 
the optimal value.

-Steven Grimm
 Facebook
-------------- next part --------------
--- ../1.1.12-dist/doc/memcached.1	2006-05-02 14:53:48.000000000 -0700
+++ ./doc/memcached.1	2006-05-02 16:59:00.000000000 -0700
@@ -58,6 +58,19 @@
 .TP
 .B \-r
 Raise the core file size limit to the maximum allowable.
+.B \-f <factor>
+Use <factor> as the multiplier for computing the sizes of memory chunks that
+items are stored in. A lower value may result in less wasted memory depending
+on the total amount of memory available and the distribution of item sizes.
+The default is 1.25.
+.TP
+.B \-s <size>
+Allocate a minimum of <size> bytes for the item key, value, and flags. The
+default is 48. If you have a lot of small keys and values, you can get a
+significant memory efficiency gain with a lower value. If you use a high
+chunk growth factor (-f option), on the other hand, you may want to increase
+the size to allow a bigger percentage of your items to fit in the most densely
+packed (smallest) chunks.
 .TP
 .B \-h
 Show the version of memcached and a summary of options.
--- ../1.1.12-dist/items.c	2006-05-02 14:53:49.000000000 -0700
+++ ./items.c	2006-05-02 16:34:11.000000000 -0700
@@ -21,12 +21,7 @@
 #include "memcached.h"
 
 
-/* 
- * NOTE: we assume here for simplicity that slab ids are <=32. That's true in 
- * the powers-of-2 implementation, but if that changes this should be changed too
- */
-
-#define LARGEST_ID 32
+#define LARGEST_ID 255
 static item *heads[LARGEST_ID];
 static item *tails[LARGEST_ID];
 unsigned int sizes[LARGEST_ID];
--- ../1.1.12-dist/memcached.c	2006-05-02 14:53:49.000000000 -0700
+++ ./memcached.c	2006-05-02 17:02:13.000000000 -0700
@@ -93,6 +93,8 @@
     settings.verbose = 0;
     settings.oldest_live = 0;
     settings.evict_to_free = 1;       /* push old items out of cache when memory runs out */
+    settings.factor = 1.25;
+    settings.chunk_size = 48;         /* space for a modest key and value */
 }
 
 conn **freeconns;
@@ -724,6 +726,7 @@
     }
 
     if (strncmp(command, "slabs reassign ", 15) == 0) {
+#ifdef ALLOW_SLABS_REASSIGN
         int src, dst;
         char *start = command+15;
         if (sscanf(start, "%u %u\r\n", &src, &dst) == 2) {
@@ -742,6 +745,9 @@
             }
         }
         out_string(c, "CLIENT_ERROR bogus command");
+#else
+        out_string(c, "CLIENT_ERROR Slab reassignment not supported");
+#endif
         return;
     }
     
@@ -1234,6 +1240,8 @@
     printf("-h            print this help and exit\n");
     printf("-i            print memcached and libevent license\n");
     printf("-P <file>     save PID in <file>, only used with -d option\n");
+    printf("-f <factor>   chunk size growth factor, default 1.25\n");
+    printf("-s <size>     minimum space allocated for key+value+flags, default 48\n");
     return;
 }
 
@@ -1354,7 +1362,7 @@
     settings_init();
 
     /* process arguments */
-    while ((c = getopt(argc, argv, "p:m:Mc:khirvdl:u:P:")) != -1) {
+    while ((c = getopt(argc, argv, "p:m:Mc:khirvdl:u:P:f:s:")) != -1) {
         switch (c) {
         case 'p':
             settings.port = atoi(optarg);
@@ -1400,6 +1408,20 @@
         case 'P':
             pid_file = optarg;
             break;
+	case 'f':
+            settings.factor = atof(optarg);
+            if (settings.factor <= 1.0) {
+                fprintf(stderr, "Factor must be greater than 1\n");
+                return 1;
+            }
+            break;
+        case 's':
+            settings.chunk_size = atoi(optarg);
+            if (settings.chunk_size == 0) {
+                fprintf(stderr, "Chunk size must be greater than 0\n");
+                return 1;
+            }
+            break;
         default:
             fprintf(stderr, "Illegal argument \"%c\"\n", c);
             return 1;
@@ -1501,7 +1523,7 @@
     stats_init();
     assoc_init();
     conn_init();
-    slabs_init(settings.maxbytes);
+    slabs_init(settings.maxbytes, settings.factor);
 
     /* lock paged memory if needed */
     if (lock_memory) {
--- ../1.1.12-dist/memcached.h	2006-05-02 14:53:49.000000000 -0700
+++ ./memcached.h	2006-05-02 17:03:04.000000000 -0700
@@ -31,6 +31,8 @@
     int verbose;
     time_t oldest_live;   /* ignore existing items older than this */
     int evict_to_free;
+    double factor;        /* chunk size growth factor */
+    int chunk_size;
 };
 
 extern struct stats stats;
@@ -147,8 +149,10 @@
 
 /* slabs memory allocation */
 
-/* Init the subsystem. The argument is the limit on no. of bytes to allocate, 0 if no limit */
-void slabs_init(unsigned int limit);
+/* Init the subsystem. 1st argument is the limit on no. of bytes to allocate,
+   0 if no limit. 2nd argument is the growth factor; each slab will use a chunk
+   size equal to the previous slab's chunk size times this factor. */
+void slabs_init(unsigned int limit, double factor);
 
 /* Given object size, return id to use when allocating/freeing memory for object */
 /* 0 means error: can't store such a large object */
--- ../1.1.12-dist/slabs.c	2006-05-02 14:53:49.000000000 -0700
+++ ./slabs.c	2006-05-02 17:12:24.000000000 -0700
@@ -1,6 +1,11 @@
 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 /*
- * Slabs memory allocation, based on powers-of-2
+ * Slabs memory allocation, based on powers-of-N. Slabs are up to 1MB in size
+ * and are divided into chunks. The chunk sizes start off at the size of the
+ * "item" structure plus space for a small key and value. They increase by
+ * a multiplier factor from there, up to half the maximum slab size. The last
+ * slab size is always 1MB, since that's the maximum item size allowed by the
+ * memcached protocol.
  *
  * $Id: slabs.c,v 1.15 2003/09/05 22:37:36 bradfitz Exp $
  */
@@ -23,11 +28,12 @@
 
 #include "memcached.h"
 
-#define POWER_SMALLEST 3
-#define POWER_LARGEST  20
+#define POWER_SMALLEST 1
+#define POWER_LARGEST  200
 #define POWER_BLOCK 1048576
+#define CHUNK_ALIGN_BYTES (sizeof(void *))
 
-/* powers-of-2 allocation structures */
+/* powers-of-N allocation structures */
 
 typedef struct {
     unsigned int size;      /* sizes of items */
@@ -51,38 +57,56 @@
 static slabclass_t slabclass[POWER_LARGEST+1];
 static unsigned int mem_limit = 0;
 static unsigned int mem_malloced = 0;
+static int power_largest;
 
+/*
+ * Figures out which slab class (chunk size) is required to store an item of
+ * a given size.
+ */
 unsigned int slabs_clsid(unsigned int size) {
-    int res = 1;
+    int res = POWER_SMALLEST;
 
     if(size==0)
         return 0;
-    size--;
-    while(size >>= 1)
-        res++;
-    if (res < POWER_SMALLEST) 
-        res = POWER_SMALLEST;
-    if (res > POWER_LARGEST)
-        res = 0;
+    while (size > slabclass[res].size)
+        if (res++ == power_largest)     /* won't fit in the biggest slab */
+            return 0;
+
     return res;
 }
 
-void slabs_init(unsigned int limit) {
-    int i;
-    int size=1;
+/*
+ * Determines the chunk sizes and initializes the slab class descriptors
+ * accordingly.
+ */
+void slabs_init(unsigned int limit, double factor) {
+    int i = POWER_SMALLEST - 1;
+    unsigned int size = sizeof(item) + settings.chunk_size;
+
+    /* Factor of 2.0 means use the default memcached behavior */
+    if (factor == 2.0 && size < 128)
+        size = 128;
 
     mem_limit = limit;
-    for(i=0; i<=POWER_LARGEST; i++, size*=2) {
+    memset(slabclass, 0, sizeof(slabclass));
+
+    while (++i < POWER_LARGEST && size <= POWER_BLOCK / 2) {
+        /* Make sure items are always n-byte aligned */
+        if (size % CHUNK_ALIGN_BYTES)
+            size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);
+
         slabclass[i].size = size;
-        slabclass[i].perslab = POWER_BLOCK / size;
-        slabclass[i].slots = 0;
-        slabclass[i].sl_curr = slabclass[i].sl_total = slabclass[i].slabs = 0;
-        slabclass[i].end_page_ptr = 0;
-        slabclass[i].end_page_free = 0;
-        slabclass[i].slab_list = 0;
-        slabclass[i].list_size = 0;
-        slabclass[i].killing = 0;
+        slabclass[i].perslab = POWER_BLOCK / slabclass[i].size;
+        size *= factor;
+        if (settings.verbose > 1) {
+            fprintf(stderr, "slab class %3d: chunk size %6d perslab %5d\n",
+                    i, slabclass[i].size, slabclass[i].perslab);
+        }
     }
+
+    power_largest = i;
+    slabclass[power_largest].size = POWER_BLOCK;
+    slabclass[power_largest].perslab = 1;
 }
 
 static int grow_slab_list (unsigned int id) { 
@@ -99,8 +123,11 @@
 
 int slabs_newslab(unsigned int id) {
     slabclass_t *p = &slabclass[id];
-    int num = p->perslab;
+#ifdef ALLOW_SLABS_REASSIGN
     int len = POWER_BLOCK;
+#else
+    int len = p->size * p->perslab;
+#endif
     char *ptr;
 
     if (mem_limit && mem_malloced + len > mem_limit)
@@ -113,7 +140,7 @@
 
     memset(ptr, 0, len);
     p->end_page_ptr = ptr;
-    p->end_page_free = num;
+    p->end_page_free = p->perslab;
 
     p->slab_list[p->slabs++] = ptr;
     mem_malloced += len;
@@ -124,7 +151,7 @@
     slabclass_t *p;
 
     unsigned char id = slabs_clsid(size);
-    if (id < POWER_SMALLEST || id > POWER_LARGEST)
+    if (id < POWER_SMALLEST || id > power_largest)
         return 0;
 
     p = &slabclass[id];
@@ -165,8 +192,8 @@
     slabclass_t *p;
 
     assert(((item *)ptr)->slabs_clsid==0);
-    assert(id >= POWER_SMALLEST && id <= POWER_LARGEST);
-    if (id < POWER_SMALLEST || id > POWER_LARGEST)
+    assert(id >= POWER_SMALLEST && id <= power_largest);
+    if (id < POWER_SMALLEST || id > power_largest)
         return;
 
     p = &slabclass[id];
@@ -191,14 +218,14 @@
 
 char* slabs_stats(int *buflen) {
     int i, total;
-    char *buf = (char*) malloc(8192);
+    char *buf = (char*) malloc(power_largest * 200 + 100);
     char *bufcurr = buf;
 
     *buflen = 0;
     if (!buf) return 0;
 
     total = 0;
-    for(i = POWER_SMALLEST; i <= POWER_LARGEST; i++) {
+    for(i = POWER_SMALLEST; i <= power_largest; i++) {
         slabclass_t *p = &slabclass[i];
         if (p->slabs) {
             unsigned int perslab, slabs;
@@ -222,7 +249,13 @@
     return buf;
 }
 
-/* 1 = success
+#ifdef ALLOW_SLABS_REASSIGN
+/* Blows away all the items in a slab class and moves its slabs to another
+   class. This is only used by the "slabs reassign" command, for manual tweaking
+   of memory allocation. It's disabled by default since it requires that all
+   slabs be the same size (which can waste space for chunk size mantissas of
+   other than 2.0).
+   1 = success
    0 = fail
    -1 = tried. busy. send again shortly. */
 int slabs_reassign(unsigned char srcid, unsigned char dstid) {
@@ -231,8 +264,8 @@
     void *iter;
     int was_busy = 0;
 
-    if (srcid < POWER_SMALLEST || srcid > POWER_LARGEST ||
-        dstid < POWER_SMALLEST || dstid > POWER_LARGEST)
+    if (srcid < POWER_SMALLEST || srcid > power_largest ||
+        dstid < POWER_SMALLEST || dstid > power_largest)
         return 0;
    
     p = &slabclass[srcid];
@@ -288,3 +321,4 @@
 
     return 1;
 }
+#endif


More information about the memcached mailing list