[PATCH 2/7] Move main backup routine into classes

Michael Hanselmann public at hansmi.ch
Wed Aug 13 21:15:57 UTC 2008

From: Michael Hanselmann <hansmi at hansmi-mac-zrh.local>

Encryption and status information have been left out in this patch.
They'll be reimplemented in consecutive patches.
 lib/Brackup/Backup.pm |  518 +++++++++++++++++++++++++------------------------
 1 files changed, 264 insertions(+), 254 deletions(-)

diff --git a/lib/Brackup/Backup.pm b/lib/Brackup/Backup.pm
index 7211439..dfc4818 100644
--- a/lib/Brackup/Backup.pm
+++ b/lib/Brackup/Backup.pm
@@ -17,10 +17,6 @@ sub new {
     $self->{verbose} = delete $opts{verbose};  # bool
     $self->{zenityprogress} = delete $opts{zenityprogress};  # bool
-    $self->{modecounts} = {}; # type -> mode(octal) -> count
-    $self->{saved_files} = [];   # list of Brackup::File objects backed up
     croak("Unknown options: " . join(', ', keys %opts)) if %opts;
     return $self;
@@ -29,251 +25,259 @@ sub new {
 # returns true (a Brackup::BackupStats object) on success, or dies with error
 sub backup {
     my ($self, $backup_file) = @_;
+    my $backup = Brackup::Backup::_Sync->new($self, $backup_file);
-    my $root   = $self->{root};
-    my $target = $self->{target};
+    $backup->run();
-    my $stats  = Brackup::BackupStats->new;
+    return $backup->stats;
-    my $gpg_rcpt = $self->{root}->gpg_rcpt;
+sub backup_time {
+    my $self = shift;
+    return $self->{backup_time} ||= time();
-    my $n_kb         = 0.0; # num:  kb of all files in root
-    my $n_files      = 0;   # int:  # of files in root
-    my $n_kb_done    = 0.0; # num:  kb of files already done with (uploaded or skipped)
-    # if we're pre-calculating the amount of data we'll
-    # actually need to upload, store it here.
-    my $n_kb_up      = 0.0;
-    my $n_kb_up_need = 0.0; # by default, not calculated/used.
+package Brackup::Backup::_Base;
+use strict;
+use warnings;
+use Brackup::Util qw/tempfile/;
-    my $n_files_done = 0;   # int
-    my @files;         # Brackup::File objs
+sub new {
+    my ($class, $parent, $backup_file) = @_;
+    my $self = bless {}, $class;
-    $self->debug("Discovering files in ", $root->path, "...\n");
-    $self->report_progress(0, "Discovering files in " . $root->path . "...");
-    $root->foreach_file(sub {
-        my ($file) = @_;  # a Brackup::File
-        push @files, $file;
-        $n_files++;
-        $n_kb += $file->size / 1024;
-    });
+    $self->{parent} = $parent;
+    $self->{backup_file} = $backup_file;
-    $self->debug("Number of files: $n_files\n");
-    # calc needed chunks
-    if ($ENV{CALC_NEEDED}) {
-        my $fn = 0;
-        foreach my $f (@files) {
-            $fn++;
-            if ($fn % 100 == 0) { warn "$fn / $n_files ...\n"; }
-            foreach my $pc ($f->chunks) {
-                if ($target->stored_chunk_from_inventory($pc)) {
-                    $pc->forget_chunkref;
-                    next;
-                }
-                $n_kb_up_need += $pc->length / 1024;
-                $pc->forget_chunkref;
-            }
+    $self->{stats} = Brackup::BackupStats->new;
+    $self->{comp_chunk} = undef;
+    $self->{cur_file} = undef;
+    $self->{metafh} = undef;
+    $self->{file_has_shown_status} = 0;
+    $self->{modecounts} = {}; # type -> mode(octal) -> count
+    $self->{stored_files} = ();
+    $self->{stored_chunks} = ();
+    return $self;
+sub debug {
+    my ($self, @m) = @_;
+    return unless $self->{parent}->{verbose};
+    my $line = join("", @m);
+    chomp $line;
+    print $line, "\n";
+sub report_progress {
+    my ($self, $percent, $message) = @_;
+    if ($self->{zenityprogress}) {
+        if (defined($message) && length($message) > 100) {
+            $message = substr($message, 0, 100)."...";
-        warn "kb need to upload = $n_kb_up_need\n";
+        print STDOUT "#", $message, "\n" if defined $message;
+        print STDOUT $percent, "\n" if defined $percent;
+sub show_status {
+    my ($self) = @_;
-    my $chunk_iterator = Brackup::ChunkIterator->new(@files);
+sub run {
+    my ($self) = @_;
-    my $gpg_iter;
-    my $gpg_pm;   # gpg ProcessManager
-    if ($gpg_rcpt) {
-        ($chunk_iterator, $gpg_iter) = $chunk_iterator->mux_into(2);
-        $gpg_pm = Brackup::GPGProcManager->new($gpg_iter, $target);
-    }
+    $self->_begin();
+    $self->_gather_chunks();
+    $self->_end();
-    my $cur_file; # current (last seen) file
-    my @stored_chunks;
-    my $file_has_shown_status = 0;
-    my $end_file = sub {
-        return unless $cur_file;
-        $self->add_file($cur_file, [ @stored_chunks ]);
-        $n_files_done++;
-        $n_kb_done += $cur_file->size / 1024;
-        $cur_file = undef;
-    };
-    my $show_status = sub {
-        # use either size of files in normal case, or if we pre-calculated
-        # the size-to-upload (by looking in inventory, then we'll show the
-        # more accurate percentage)
-        my $percdone = 100 * ($n_kb_up_need ?
-                              ($n_kb_up / $n_kb_up_need) :
-                              ($n_kb_done / $n_kb));
-        my $mb_remain = ($n_kb_up_need ?
-                         ($n_kb_up_need - $n_kb_up) :
-                         ($n_kb - $n_kb_done)) / 1024;
-        $self->debug(sprintf("* %-60s %d/%d (%0.02f%%; remain: %0.01f MB)",
-                             $cur_file->path, $n_files_done, $n_files, $percdone,
-                             $mb_remain));
-        $self->report_progress($percdone);
-    };
-    my $start_file = sub {
-        $end_file->();
-        $cur_file = shift;
-        @stored_chunks = ();
-        $show_status->() if $cur_file->is_dir;
-        if ($gpg_iter) {
-            # catch our gpg iterator up.  we want it to be ahead of us,
-            # nothing iteresting is behind us.
-            $gpg_iter->next while $gpg_iter->behind_by > 1;
-        }
-        $file_has_shown_status = 0;
-    };
-    my $merge_under = $root->merge_files_under;
-    my $comp_chunk  = undef;
-    # records are either Brackup::File (for symlinks, directories, etc), or
-    # PositionedChunks, in which case the file can asked of the chunk
-    while (my $rec = $chunk_iterator->next) {
-        if ($rec->isa("Brackup::File")) {
-            $start_file->($rec);
-            next;
-        }
-        my $pchunk = $rec;
-        if ($pchunk->file != $cur_file) {
-            $start_file->($pchunk->file);
-        }
+sub stats {
+    my ($self) = @_;
-        # have we already stored this chunk before?  (iterative backup)
-        my $schunk;
-        if ($schunk = $target->stored_chunk_from_inventory($pchunk)) {
-            $pchunk->forget_chunkref;
-            push @stored_chunks, $schunk;
-            next;
-        }
+    return $self->{stats};
+sub _begin {
+    my ($self) = @_;
+    $self->{metafh} = tempfile();
-        # weird case... have we stored this same pchunk digest in the
-        # current comp_chunk we're building?  these aren't caught by
-        # the above inventory check, because chunks in a composite
-        # chunk aren't added to the inventory until after the the composite
-        # chunk has fully grown (because it's not until it's fully grown
-        # that we know the handle for it, its digest)
-        if ($comp_chunk && ($schunk = $comp_chunk->stored_chunk_from_dup_internal_raw($pchunk))) {
-            $pchunk->forget_chunkref;
-            push @stored_chunks, $schunk;
-            next;
+sub _emit_file {
+    my ($self, $file) = @_;
+    return undef unless $file;
+    my ($first, @chunks) = $file->chunks;
+    if ($first) {
+        $self->_emit_chunk($first);
+        foreach (@chunks) {
+            $self->_emit_chunk($_);
+    } else {
+        # File without chunks
+        $self->_emit_chunk($file);
+    }
-        $show_status->() unless $file_has_shown_status++;
-        $self->debug("  * storing chunk: ", $pchunk->as_string, "\n");
-        $self->report_progress(undef, $pchunk->file->path . " (" . $pchunk->offset . "," . $pchunk->length . ")");
+sub _emit_chunk {
+    my ($self, $rec) = @_;
+    my $merge_under = $self->{parent}->{root}->merge_files_under;
-        unless ($self->{dryrun}) {
-            $schunk = Brackup::StoredChunk->new($pchunk);
+    if ($rec->isa("Brackup::File")) {
+        $self->start_file($rec);
+        return;
+    }
-            # encrypt it
-            if ($gpg_rcpt) {
-                $schunk->set_encrypted_chunkref($gpg_pm->enc_chunkref_of($pchunk));
-            }
+    my $pchunk = $rec;
+    if ($pchunk->file != $self->{cur_file}) {
+        $self->start_file($pchunk->file);
+    }
-            # see if we should pack it into a bigger blob
-            my $chunk_size = $schunk->backup_length;
-            # see if we should merge this chunk (in this case, file) together with
-            # other small files we encountered earlier, into a "composite chunk",
-            # to be stored on the target in one go.
-            # Note: no technical reason for only merging small files (is_entire_file),
-            # and not the tails of larger files.  just don't like the idea of files being
-            # both split up (for big head) and also merged together (for little end).
-            # would rather just have 1 type of magic per file.  (split it or join it)
-            if ($merge_under && $chunk_size < $merge_under && $pchunk->is_entire_file) {
-                if ($comp_chunk && ! $comp_chunk->can_fit($chunk_size)) {
-                    $self->debug("Finalizing composite chunk $comp_chunk...");
-                    $comp_chunk->finalize;
-                    $comp_chunk = undef;
-                }
-                $comp_chunk ||= Brackup::CompositeChunk->new($root, $target);
-                $comp_chunk->append_little_chunk($schunk);
-            } else {
-                # store it regularly, as its own chunk on the target
-                $target->store_chunk($schunk)
-                    or die "Chunk storage failed.\n";
-                $target->add_to_inventory($pchunk => $schunk);
-            }
+    # have we already stored this chunk before?  (iterative backup)
+    my $schunk;
+    if ($schunk = $self->{parent}->{target}->stored_chunk_from_inventory($pchunk)) {
+        $pchunk->forget_chunkref;
+        push @{$self->{stored_chunks}}, $schunk;
+        return;
+    }
-            # if only this worked... (LWP protocol handler seems to
-            # get confused by its syscalls getting interrupted?)
-            #local $SIG{CHLD} = sub {
-            #    print "some child finished!\n";
-            #    $gpg_pm->start_some_processes;
-            #};
+    # weird case... have we stored this same pchunk digest in the
+    # current comp_chunk we're building?  these aren't caught by
+    # the above inventory check, because chunks in a composite
+    # chunk aren't added to the inventory until after the the composite
+    # chunk has fully grown (because it's not until it's fully grown
+    # that we know the handle for it, its digest)
+    if ($self->{comp_chunk} &&
+        ($schunk = $self->{comp_chunk}->stored_chunk_from_dup_internal_raw($pchunk))) {
+        $pchunk->forget_chunkref;
+        push @{$self->{stored_chunks}}, $schunk;
+        return;
+    }
+    $self->show_status() unless $self->{file_has_shown_status}++;
+    $self->debug("  * storing chunk: ", $pchunk->as_string, "\n");
+    $self->report_progress(undef, $pchunk->file->path . " (" .
+        $pchunk->offset . "," . $pchunk->length . ")");
-            $n_kb_up += $pchunk->length / 1024;
-            push @stored_chunks, $schunk;
+    unless ($self->{dryrun}) {
+        $schunk = Brackup::StoredChunk->new($pchunk);
+        # see if we should pack it into a bigger blob
+        my $chunk_size = $schunk->backup_length;
+        # see if we should merge this chunk (in this case, file) together with
+        # other small files we encountered earlier, into a "composite chunk",
+        # to be stored on the target in one go.
+        # Note: no technical reason for only merging small files (is_entire_file),
+        # and not the tails of larger files.  just don't like the idea of files being
+        # both split up (for big head) and also merged together (for little end).
+        # would rather just have 1 type of magic per file.  (split it or join it)
+        if ($merge_under && $chunk_size < $merge_under && $pchunk->is_entire_file) {
+            if ($self->{comp_chunk} && ! $self->{comp_chunk}->can_fit($chunk_size)) {
+                $self->debug('Finalizing composite chunk ', $self->{comp_chunk}, '...');
+                $self->{comp_chunk}->finalize;
+                $self->{comp_chunk} = undef;
+            }
+            $self->{comp_chunk} ||= Brackup::CompositeChunk->new($self->{parent}->{root},
+                                                                 $self->{parent}->{target});
+            $self->{comp_chunk}->append_little_chunk($schunk);
+        } else {
+            # store it regularly, as its own chunk on the target
+            $self->{parent}->{target}->store_chunk($schunk)
+                or die "Chunk storage failed.\n";
+            $self->{parent}->{target}->add_to_inventory($pchunk => $schunk);
-        #$stats->note_stored_chunk($schunk);
-        # DEBUG: verify it got written correctly
-        if ($ENV{BRACKUP_PARANOID}) {
-            die "FIX UP TO NEW API";
-            #my $saved_ref = $target->load_chunk($handle);
-            #my $saved_len = length $$saved_ref;
-            #unless ($saved_len == $chunk->backup_length) {
-            #    warn "Saved length of $saved_len doesn't match our length of " . $chunk->backup_length . "\n";
-            #    die;
-            #}
+        push @{$self->{stored_chunks}}, $schunk;
+    }
+    $pchunk->forget_chunkref;
+    $schunk->forget_chunkref if $schunk;
+sub start_file {
+    my ($self, $file) = @_;
+    $self->end_file();
+    $self->{cur_file} = $file;
+    $self->{stored_chunks} = ();
+    $self->show_status() if $self->{cur_file}->is_dir;
+    $self->{file_has_shown_status} = 0;
+sub end_file {
+    my ($self) = @_;
+    my $file = $self->{cur_file};
+    return unless $file;
+    push @{$self->{stored_files}}, [$file, $self->{stored_chunks}];
+    $self->{modecounts}{$file->type}{$file->mode}++;
+    $self->{cur_file} = undef;
+sub _write_meta {
+    my ($self) = @_;
+    my $metafh = $self->{metafh};
+    # Don't write anything while we're building a composite chunk
+    unless ($self->{comp_chunk}) {
+        foreach (@{$self->{stored_files}}) {
+            my ($file, @stored_chunks) = @{$_};
+            print $metafh $file->as_rfc822(@stored_chunks, $self);
-        $pchunk->forget_chunkref;
-        $schunk->forget_chunkref if $schunk;
+        $self->{stored_files} = ();
+    }
+sub _end {
+    my ($self) = @_;
+    my $metafh = $self->{metafh};
+    $self->end_file();
+    if ($self->{comp_chunk}) {
+        $self->{comp_chunk}->finalize;
+        $self->{comp_chunk} = undef;
-    $end_file->();
-    $comp_chunk->finalize if $comp_chunk;
+    $self->_write_meta();
     unless ($self->{dryrun}) {
-        # write the metafile
+        my $backup_file = $self->{backup_file};
         $self->debug("Writing metafile ($backup_file)");
-        $self->report_progress(100, "Saving metafile " . $backup_file);
-        open (my $metafh, ">$backup_file") or die "Failed to open $backup_file for writing: $!\n";
-        print $metafh $self->backup_header;
-        $self->foreach_saved_file(sub {
-            my ($file, $schunk_list) = @_;
-            print $metafh $file->as_rfc822($schunk_list, $self);  # arrayref of StoredChunks
-        });
-        close $metafh or die;
-        my $contents;
-        # store the metafile, encrypted, on the target
-        if ($gpg_rcpt) {
-            my $encfile = $backup_file . ".enc";
-            system($self->{root}->gpg_path, $self->{root}->gpg_args,
-                   "--trust-model=always",
-                   "--recipient", $gpg_rcpt, "--encrypt", "--output=$encfile", "--yes", $backup_file)
-                and die "Failed to run gpg while encryping metafile: $!\n";
-            $contents = _contents_of($encfile);
-            unlink $encfile;
-        } else {
-            $contents = _contents_of($backup_file);
-        }
+        $self->report_progress(100, "Saving metafile $backup_file");
+        # Go to start of temporary file
+        seek $metafh, 0, 0;
+        # Concatenate metadata
+        my $contents = $self->backup_header;
+        $contents .= do { local $/; <$metafh>; };
+        open (my $out, '>', $backup_file)
+            or die "Failed to open $backup_file for writing: $!\n";
+        print $out $contents;
+        close $out;
         # store it on the target
-        $self->debug("Storing metafile to " . ref($target));
-        my $name = $self->{root}->publicname . "-" . $self->backup_time;
-        $target->store_backup_meta($name, $contents);
+        $self->debug("Storing metafile to ", ref($self->{parent}->{target}));
+        my $name = $self->{parent}->{root}->publicname . "-" . $self->{parent}->backup_time;
+        $self->{parent}->{target}->store_backup_meta($name, $contents);
-    $self->report_progress(100, "Backup complete.");
-    return $stats;
+    $self->report_progress(100, "Backup complete.");
-sub _contents_of {
-    my $file = shift;
-    open (my $fh, $file) or die "Failed to read contents of $file: $!\n";
-    return do { local $/; <$fh>; };
+    close $metafh or die $!;
 sub default_file_mode {
@@ -292,18 +296,16 @@ sub _default_mode {
     return (sort { $map->{$b} <=> $map->{$a} } keys %$map)[0];
-sub backup_time {
-    my $self = shift;
-    return $self->{backup_time} ||= time();
 sub backup_header {
-    my $self = shift;
+    my ($self) = @_;
+    my $root = $self->{parent}->{root};
+    my $now = $self->{parent}->backup_time;
     my $ret = "";
-    my $now = $self->backup_time;
     $ret .= "BackupTime: " . $now . " (" . localtime($now) . ")\n";
-    $ret .= "BackupDriver: " . ref($self->{target}) . "\n";
-    if (my $fields = $self->{target}->backup_header) {
+    $ret .= "BackupDriver: " . ref($self->{parent}->{target}) . "\n";
+    if (my $fields = $self->{parent}->{target}->backup_header) {
         foreach my $k (keys %$fields) {
             die "Bogus header field from driver" unless $k =~ /^\w+$/;
             my $val = $fields->{$k};
@@ -311,49 +313,57 @@ sub backup_header {
             $ret .= "Driver-$k: $val\n";
-    $ret .= "RootName: " . $self->{root}->name . "\n";
-    $ret .= "RootPath: " . $self->{root}->path . "\n";
-    $ret .= "DefaultFileMode: " . $self->default_file_mode . "\n";
-    $ret .= "DefaultDirMode: " . $self->default_directory_mode . "\n";
-    if (my $rcpt = $self->{root}->gpg_rcpt) {
-        $ret .= "GPG-Recipient: $rcpt\n";
+    $ret .= "RootName: " . $root->name . "\n";
+    $ret .= "RootPath: " . $root->path . "\n";
+    if (defined($self->default_file_mode)) {
+        $ret .= "DefaultFileMode: " . $self->default_file_mode . "\n";
+    }
+    if (defined($self->default_directory_mode)) {
+        $ret .= "DefaultDirMode: " . $self->default_directory_mode . "\n";
-    $ret .= "\n";
-    return $ret;
-sub add_file {
-    my ($self, $file, $handlelist) = @_;
-    $self->{modecounts}{$file->type}{$file->mode}++;
-    push @{ $self->{saved_files} }, [ $file, $handlelist ];
+    return "$ret\n";
-sub foreach_saved_file {
-    my ($self, $cb) = @_;
-    foreach my $rec (@{ $self->{saved_files} }) {
-        $cb->(@$rec);  # Brackup::File, arrayref of Brackup::StoredChunk
-    }
+package Brackup::Backup::_Sync;
+use strict;
+use warnings;
+use base 'Brackup::Backup::_Base';
+sub new {
+    my ($class, @params) = @_;
+    my $self = $class->SUPER::new(@params);
+    $self->{files} = [];
+    return $self;
-sub debug {
-    my ($self, @m) = @_;
-    return unless $self->{verbose};
-    my $line = join("", @m);
-    chomp $line;
-    print $line, "\n";
+sub _begin {
+    my ($self) = @_;
+    my $root = $self->{parent}->{root};
+    $self->report_progress(0, "Discovering files in " . $root->path . "...");
+    $self->debug("Discovering files in ", $root->path, " ...\n");
+    $root->foreach_file(sub {
+        my ($file) = @_;  # a Brackup::File
+        push @{$self->{files}}, $file;
+    });
+    $self->debug("Number of files: ", scalar(@{$self->{files}}), "\n");
+    $self->SUPER::_begin();
-sub report_progress {
-    my ($self, $percent, $message) = @_;
+sub _gather_chunks {
+    my ($self) = @_;
-    if ($self->{zenityprogress}) {
-        if (defined($message) && length($message) > 100) {
-            $message = substr($message, 0, 100)."...";
-        }
-        print STDOUT "#", $message, "\n" if defined $message;
-        print STDOUT $percent, "\n" if defined $percent;
+    foreach (@{$self->{files}}) {
+        $self->_emit_file($_);

