network performance measurements

Brad Fitzpatrick brad@danga.com
Tue, 12 Aug 2003 12:58:36 -0700 (PDT)


I benchmarked 3 different methods of doing network I/O:

   1) the default way, with the Nagel algorithm.
   2) using TCP_CORK (Linux, same as TCP_PUSH on BSD)
   3) using TCP_NODELAY

I measured both real time and number of packets on the wire.
The test was doing 2,500 deletes, sets, and gets, then a 2,500 get_multi.

           Seconds     Packets
DEFAULT     102.48      22638
TCP_CORK      3.88      15105
TCP_CORK      3.87      15108
TCP_CORK      3.86      15105
TCP_NODELAY   3.99      20169
TCP_NODELAY   4.04      20170
TCP_NODELAY   4.00      20170

Clearly, TCP_CORK is the winner.

I can't believe LiveJournal's running on the DEFAULT above.  We'll be
switching it to TCP_CORK today and will make a release once we find it
stable.

Patch is below.  Avva, please review.


--- memcached.c 11 Aug 2003 16:41:28 -0000      1.34
+++ memcached.c 12 Aug 2003 19:56:14 -0000
@@ -96,6 +96,15 @@
 int freetotal;
 int freecurr;

+void set_cork (conn *c, int val) {
+    if (val) val = 1;
+    if (c->is_corked == val) return;
+    c->is_corked = val;
+#ifdef TCP_NOPUSH
+    setsockopt(c->sfd, IPPROTO_TCP, TCP_NOPUSH, &val, sizeof(val));
+#endif
+}
+
 void conn_init(void) {
     freetotal = 200;
     freecurr = 0;
@@ -156,6 +165,9 @@
     c->write_and_free = 0;
     c->item = 0;

+    c->write_and_uncork = 0;
+    c->is_corked = 0;
+
     event_set(&c->event, sfd, event_flags, event_handler, (void *)c);
     c->ev_flags = event_flags;

@@ -234,6 +246,7 @@
     c->wbytes = len + 2;
     c->wcurr = c->wbuf;

+    c->write_and_uncork = 1;
     c->state = conn_write;
     c->write_and_go = conn_read;
     return;
@@ -475,6 +488,10 @@
     if (settings.verbose > 1)
         fprintf(stderr, "<%d %s\n", c->sfd, command);
+    /* All incoming commands will require a response, so we cork at the beginning,
+       and uncork at the very end (usually by means of out_string)  */
+    set_cork(c, 1);
+
     if ((strncmp(command, "add ", 4) == 0 && (comm = NREAD_ADD)) ||
         (strncmp(command, "set ", 4) == 0 && (comm = NREAD_SET)) ||
         (strncmp(command, "replace ", 8) == 0 && (comm = NREAD_REPLACE))) {
@@ -926,6 +943,8 @@
                 stats.bytes_written += res;
                 c->wcurr  += res;
                 c->wbytes -= res;
+                if (c->wbytes == 0 && c->write_and_uncork)
+                    set_cork(c, 0);
                 break;
             }
             if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {

Index: memcached.h
===================================================================
RCS file: /home/cvspub/wcmtools/memcached/memcached.h,v
retrieving revision 1.17
diff -u -r1.17 memcached.h
--- memcached.h 12 Aug 2003 18:10:55 -0000      1.17
+++ memcached.h 12 Aug 2003 19:56:14 -0000
@@ -3,6 +3,10 @@

 #define DATA_BUFFER_SIZE 2048

+#if defined(TCP_CORK) && !defined(TCP_NOPUSH)
+#define TCP_NOPUSH TCP_CORK
+#endif
+
 struct stats {
     unsigned int  curr_items;
     unsigned int  total_items;
@@ -89,6 +93,8 @@
     int    wbytes;
     int    write_and_go; /* which state to go into after finishing current write */
     void   *write_and_free; /* free this memory after finishing writing */
+    char    write_and_uncork;  /* boolean, uncork when done writing */
+    char    is_corked;         /* boolean, connection is corked */

     char   *rcurr;
     int    rlbytes;