[memcached] bradfitz, r425: consistent hashing playground

commits at code.sixapart.com commits at code.sixapart.com
Fri Nov 3 22:09:24 UTC 2006


consistent hashing playground



A   trunk/api/perl/dev/cons-hash.pl


Added: trunk/api/perl/dev/cons-hash.pl
===================================================================
--- trunk/api/perl/dev/cons-hash.pl	2006-11-03 22:09:17 UTC (rev 424)
+++ trunk/api/perl/dev/cons-hash.pl	2006-11-03 22:09:24 UTC (rev 425)
@@ -0,0 +1,213 @@
+#!/usr/bin/perl
+
+use strict;
+use Digest::SHA1 qw(sha1);
+use String::CRC32 qw(crc32);;
+use Data::Dumper;
+
+my $set = Set::ConsistentHash->new;
+$set->modify_targets(
+                     A => 1,
+                     B => 1,
+                     C => 2,
+                     );
+
+my $set2 = Set::ConsistentHash->new;
+$set2->modify_targets(
+                      A => 1,
+                      B => 1,
+                      C => 1,
+                      );
+
+#print Dumper($set->bucket_counts);
+#print Dumper($set2->bucket_counts);
+
+
+if (0) {
+    my %matched;
+    my $total_trials = 100_000;
+    for my $n (1..$total_trials) {
+        my $rand = crc32("trial$n");
+        my $server = $set->target_of_point($rand);
+        #print "matched $rand = $server\n";
+        $matched{$server}++;
+    }
+
+    foreach my $s ($set->targets) {
+        printf("$s: expected=%0.02f%%  actual=%0.02f%%\n", #  space=%0.02f%%\n",
+               $set->weight_percentage($s),
+               100 * $matched{$s} / $total_trials,
+               #($space{$s} / 2**32) * 100,
+               );
+    }
+}
+
+if (1) {
+    my $total_trials = 100_000;
+    my %tran;
+    for my $n (1..$total_trials) {
+        my $rand = crc32("trial$n");
+        #my $s1 = $set->target_of_point($rand);
+        #my $s2 = $set2->target_of_point($rand);
+
+        my $s1 = $set->target_of_bucket($rand);
+        my $s2 = $set2->target_of_bucket($rand);
+        $tran{"$s1-$s2"}++;
+        $tran{"was-$s1"}++;
+        $tran{"now-$s2"}++;
+    }
+
+    print Dumper(\%tran);
+}
+
+
+############################################################################
+
+package Set::ConsistentHash;
+use strict;
+use Digest::SHA1 qw(sha1);
+
+# creates a new consistent hashing set with no targets.  you'll need to add targets.
+sub new {
+    my ($class) = @_;
+    return bless {
+        weights => {},  # $target => integer $weight
+        points  => {},  # 32-bit value points on 'circle' => \$target
+        order   => [],  # 32-bit points, sorted
+        buckets      => undef, # when requested, arrayref of 1024 buckets mapping to targets
+        total_weight => undef, # when requested, total weight of all targets
+    }, $class;
+}
+
+# returns sorted list of all configured $targets
+sub targets {
+    my $self = shift;
+    return sort keys %{$self->{weights}};
+}
+
+
+# returns sum of all target's weight
+sub total_weight {
+    my $self = shift;
+    return $self->{total_weight} if defined $self->{total_weight};
+    my $sum = 0;
+    foreach my $val (values %{$self->{weights}}) {
+        $sum += $val;
+    }
+    return $self->{total_weight} = $sum;
+}
+
+# returns the configured weight percentage [0,100] of a target.
+sub weight_percentage {
+    my ($self, $target) = @_;
+    return 0 unless $self->{weights}{$target};
+    return 100 * $self->{weights}{$target} / $self->total_weight;
+}
+
+# remove all targets
+sub reset_targets {
+    my $self = shift;
+    $self->modify_targets(map { $_ => 0 } $self->targets);
+}
+
+# add/modify targets.  parameters are %weights:  $target -> $weight
+sub modify_targets {
+    my ($self, %weights) = @_;
+
+    # uncache stuff:
+    $self->{total_weight} = undef;
+    $self->{buckets}      = undef;
+
+    while (my ($target, $weight) = each %weights) {
+        if ($weight) {
+            $self->{weights}{$target} = $weight;
+        } else {
+            delete $self->{weight}{$target};
+        }
+    }
+    $self->_redo_circle;
+}
+*modify_target = \&modify_targets;
+
+sub _redo_circle {
+    my $self = shift;
+
+    my $pts = $self->{points} = {};
+    while (my ($target, $weight) = each %{$self->{weights}}) {
+        my $num_pts = $weight * 100;
+        foreach my $ptn (1..$num_pts) {
+            my $key = "$target-$ptn";
+            my $val = unpack("L", substr(sha1($key), 0, 4));
+            $pts->{$val} = \$target;
+        }
+    }
+
+    $self->{order} = [ sort { $a <=> $b } keys %$pts ];
+}
+
+# returns arrayref of 1024 buckets.  each array element is the $target for that bucket index.
+sub buckets {
+    my $self = shift;
+    return $self->{buckets} if $self->{buckets};
+    my $buckets = $self->{buckets} = [];
+    my $by = 2**22;  # 2**32 / 2**10 (1024)
+    for my $n (0..1023) {
+        my $pt = $n * $by;
+        $buckets->[$n] = $self->target_of_point($pt);
+    }
+
+    return $buckets;
+}
+
+# returns hashref of $target -> $number of occurences in 1024 buckets
+sub bucket_counts {
+    my $self = shift;
+    my $ct = {};
+    foreach my $t (@{ $self->buckets }) {
+        $ct->{$t}++;
+    }
+    return $ct;
+}
+
+# given an integer, returns $target (after modding on 1024 buckets)
+sub target_of_bucket {
+    my ($self, $bucketpos) = @_;
+    return ($self->{buckets} || $self->buckets)->[$bucketpos % 1024];
+}
+
+# given a $point [0,2**32), returns the $target that's next going around the circle
+sub target_of_point {
+    my ($self, $pt) = @_;  # $pt is 32-bit unsigned integer
+
+    my $order = $self->{order};
+    my $circle_pt = $self->{points};
+
+    my ($lo, $hi) = (0, scalar(@$order)-1);  # inclusive candidates
+
+    while (1) {
+        my $mid           = int(($lo + $hi) / 2);
+        my $val_at_mid    = $order->[$mid];
+        my $val_one_below = $mid ? $order->[$mid-1] : 0;
+
+        # match
+        return ${ $circle_pt->{$order->[$mid]} } if
+            $pt <= $val_at_mid && $pt > $val_one_below;
+
+        # wrap-around match
+        return ${ $circle_pt->{$order->[0]} } if
+            $lo == $hi;
+
+        # too low, go up.
+        if ($val_at_mid < $pt) {
+            $lo = $mid + 1;
+            $lo = $hi if $lo > $hi;
+        }
+        # too high
+        else {
+            $hi = $mid - 1;
+            $hi = $lo if $hi < $lo;
+        }
+
+        next;
+    }
+};


Property changes on: trunk/api/perl/dev/cons-hash.pl
___________________________________________________________________
Name: svn:executable
   + *




More information about the memcached-commits mailing list