[PATCH 3/3] Open files only once
Michael Hanselmann
public at hansmi.ch
Wed Aug 13 21:18:14 UTC 2008
- Reduced I/O load by only reading its content only once
- Easier error handling (before a file could disappear between two
calls to open())
---
lib/Brackup/File.pm | 92 +++++++++++++++++++++-------------------
lib/Brackup/PositionedChunk.pm | 47 ++-------------------
2 files changed, 52 insertions(+), 87 deletions(-)
diff --git a/lib/Brackup/File.pm b/lib/Brackup/File.pm
index 827eff1..1996cbe 100644
--- a/lib/Brackup/File.pm
+++ b/lib/Brackup/File.pm
@@ -30,6 +30,8 @@ sub new {
die "No path provided." unless $self->{path};
$self->{path} =~ s!^\./!!;
+ $self->_read_file;
+
return $self;
}
@@ -119,18 +121,22 @@ sub find_file_chunker {
sub chunks {
my $self = shift;
- # memoized:
- return @{ $self->{chunks} } if $self->{chunks};
+ return @{$self->{chunks}};
+}
+
+sub _read_file {
+ my ($self) = @_;
# non-files don't have chunks
- if (!$self->is_file) {
+ if (not $self->is_file) {
$self->{chunks} = [];
- return ();
+ return;
}
- # Get the appropriate FileChunker for this file type,
- # then pass ourselves to it to get our chunks.
- my @chunk_list;
+ my $fullpath = $self->fullpath;
+ open(my $fh, '<', $fullpath)
+ or die "Failed to open $fullpath: $!\n";
+ binmode($fh);
# Get the appropriate FileChunker for this file type
my $chunker = $self->find_file_chunker();
@@ -139,6 +145,11 @@ sub chunks {
my $last_offset = undef;
my $last_len = undef;
+ my $sha1_file = Digest::SHA1->new;
+ my $sha1_chunk = Digest::SHA1->new;
+
+ my @chunks;
+
$chunker->get_offsets($self, sub {
my ($offset, $len) = @_;
@@ -164,10 +175,31 @@ sub chunks {
die "Chunker didn't start at offset 0";
}
- push @chunk_list, new Brackup::PositionedChunk(
+ # Optimize for continous reading
+ unless (tell($fh) == $offset) {
+ seek($fh, $offset, 0) or die "Couldn't seek: $!\n";
+ }
+
+ my $data;
+ my $rv = read($fh, $data, $len) or die "Failed to read: $!\n";
+ unless ($rv == $len) {
+ Carp::confess("Read $rv bytes, not $len");
+ }
+
+ # Update digest for complete file
+ $sha1_file->add($data);
+
+ # Generate chunk digest
+ $sha1_chunk->reset;
+ $sha1_chunk->add($data);
+ my $chunk_digest = 'sha1:' . $sha1_chunk->hexdigest;
+
+ push @chunks, Brackup::PositionedChunk->new(
file => $self,
offset => $offset,
length => $len,
+ chunkref => \$data,
+ digest => $chunk_digest,
);
$last_offset = $offset;
@@ -178,46 +210,18 @@ sub chunks {
die "Chunker didn't return complete file";
}
- $self->{chunks} = \@chunk_list;
- return @chunk_list;
-}
+ my $file_digest = 'sha1:' . $sha1_file->hexdigest();
-sub full_digest {
- my $self = shift;
- return $self->{_full_digest} ||= $self->_calc_full_digest;
-}
-
-sub _calc_full_digest {
- my $self = shift;
- return "" unless $self->is_file;
-
- my $cache = $self->{root}->digest_cache;
- my $key = $self->cachekey;
-
- my $dig = $cache->get($key);
- return $dig if $dig;
+ $self->{_full_digest} = $file_digest;
- # legacy migration thing... we used to more often store
- # the chunk digests, not the file digests. so try that
- # first...
- if ($self->chunks == 1) {
- my ($chunk) = $self->chunks;
- $dig = $cache->get($chunk->cachekey);
- }
-
- unless ($dig) {
- my $sha1 = Digest::SHA1->new;
- my $path = $self->fullpath;
- open (my $fh, $path) or die "Couldn't open $path: $!\n";
- binmode($fh);
- $sha1->addfile($fh);
- close($fh);
+ close($fh);
- $dig = "sha1:" . $sha1->hexdigest;
- }
+ $self->{chunks} = \@chunks;
+}
- $cache->set($key => $dig);
- return $dig;
+sub full_digest {
+ my $self = shift;
+ return $self->{_full_digest};
}
sub link_target {
diff --git a/lib/Brackup/PositionedChunk.pm b/lib/Brackup/PositionedChunk.pm
index b5fabc1..8ed806c 100644
--- a/lib/Brackup/PositionedChunk.pm
+++ b/lib/Brackup/PositionedChunk.pm
@@ -20,6 +20,8 @@ sub new {
$self->{file} = delete $opts{'file'}; # Brackup::File object
$self->{offset} = delete $opts{'offset'};
$self->{length} = delete $opts{'length'};
+ $self->{_raw_chunkref} = delete $opts{'chunkref'};
+ $self->{_raw_digest} = delete $opts{'digest'};
croak("Unknown options: " . join(', ', keys %opts)) if %opts;
croak("offset not numeric") unless $self->{offset} =~ /^\d+$/;
@@ -55,53 +57,12 @@ sub root {
sub raw_digest {
my $self = shift;
- return $self->{_raw_digest} ||= $self->_calc_raw_digest;
-}
-
-sub _calc_raw_digest {
- my $self = shift;
-
- my $n_chunks = $self->{file}->chunks
- or die "zero chunks?";
- if ($n_chunks == 1) {
- # don't calculate this chunk's digest.. it's the same as our
- # file's digest, since this chunk spans the entire file.
- die "ASSERT" unless $self->length == $self->{file}->size;
- return $self->{file}->full_digest;
- }
-
- my $cache = $self->root->digest_cache;
- my $key = $self->cachekey;
- my $dig;
-
- if ($dig = $cache->get($key)) {
- return $self->{_raw_digest} = $dig;
- }
-
- my $rchunk = $self->raw_chunkref;
- $dig = "sha1:" . sha1_hex($$rchunk);
-
- $cache->set($key => $dig);
-
- return $self->{_raw_digest} = $dig;
+ return $self->{_raw_digest};
}
sub raw_chunkref {
my $self = shift;
- return $self->{_raw_chunkref} if $self->{_raw_chunkref};
-
- my $data;
- my $fullpath = $self->{file}->fullpath;
- open(my $fh, $fullpath) or die "Failed to open $fullpath: $!\n";
- binmode($fh);
- seek($fh, $self->{offset}, 0) or die "Couldn't seek: $!\n";
- my $rv = read($fh, $data, $self->{length})
- or die "Failed to read: $!\n";
- unless ($rv == $self->{length}) {
- Carp::confess("Read $rv bytes, not $self->{length}");
- }
-
- return $self->{_raw_chunkref} = \$data;
+ return $self->{_raw_chunkref};
}
# useful string for targets to key on. of one of the forms:
--
1.5.5.3
More information about the brackup
mailing list