Index: t/06-concurrent-changes.t =================================================================== --- t/06-concurrent-changes.t (revision 0) +++ t/06-concurrent-changes.t (revision 0) @@ -0,0 +1,110 @@ +# -*-perl-*- + +# This test backups a file that is being modified by a separate process. +# We do this by forking, parent backups while child modifies the file. + +use strict; +use warnings; +use Test::More tests => 27; + +use Brackup::Test; +use FindBin qw($Bin); +use Brackup::Util qw(tempfile tempdir); +use POSIX (); + +my $root_dir = tempdir(); +my $pid; + +END { + # kill child when we're done + kill 'KILL', $pid if $pid; + + # do_backup chdirs to root_dir, which is a tempdir and this causes + # problems with cleanup. So we chdir back. + chdir $Bin; +}; + +if(!($pid = fork)) { + # child: write 1K to a 'small' file (will fit in one chunk) + # and 100K to a 'large' file (will need many chunks) until killed + # stop after 1 minute as a precaution + # + open F, "> $root_dir/small"; + binmode F; + open G, "> $root_dir/large"; + binmode G; + + my $start = time; + while(1) { + seek F, 0, 0; + print F chr(int(rand(255))) for 1..1024; + + seek G, 0, 0; + print G chr(int(rand(255))) for 1..1024*100; + + POSIX::_exit(1) if time - $start > 60; + } +} + +# Parent +# +############### Backup with no encryption + +my ($digdb_fh, $digdb_fn) = tempfile(); +close($digdb_fh); + +ok(-d $root_dir, "test data to backup exists"); +my $backup_file = do_backup( + with_confsec => sub { + my $csec = shift; + $csec->add("path", $root_dir); + $csec->add("chunk_size", "2k"); + $csec->add("digestdb_file", $digdb_fn); + }, + ); + +############### Restore + +my $restore_dir = do_restore($backup_file); +ok(-f "$restore_dir/small", "file 'small' restored"); +ok(-f "$restore_dir/large", "file 'large' restored"); + + +############### Backup with encryption + +SKIP: { + `gpg --version` + or skip 'gpg binary not found, skipping encrypted tests', 13; + + my $gpg_args = ["--no-default-keyring", + "--keyring=$Bin/data/pubring-test.gpg", + "--secret-keyring=$Bin/data/secring-test.gpg"]; + + ($digdb_fh, $digdb_fn) = tempfile(); + close($digdb_fh); + + my $backup_file = do_backup( + with_confsec => sub { + my $csec = shift; + $csec->add("path", $root_dir); + $csec->add("chunk_size", "2k"); + $csec->add("digestdb_file", $digdb_fn); + $csec->add("gpg_recipient", "2149C469"); + }, + with_root => sub { + my $root = shift; + $root->{gpg_args} = $gpg_args; + }, + ); + + ############### Restore + + my $gpg_restore_dir = do { + local @Brackup::GPG_ARGS = @$gpg_args; + do_restore($backup_file); + }; + ok(-f "$gpg_restore_dir/small", "file 'small' restored"); + ok(-f "$gpg_restore_dir/large", "file 'large' restored"); + +}; # skip + Index: lib/Brackup/Backup.pm =================================================================== --- lib/Brackup/Backup.pm (revision 23) +++ lib/Brackup/Backup.pm (working copy) @@ -205,9 +205,13 @@ unless ($self->{dryrun}) { $schunk = Brackup::StoredChunk->new($pchunk); - # encrypt it + # set schunk's data. after this schunk's data are supposed to not change. + # also the raw digest is updated to match the actual read data if ($gpg_rcpt) { $schunk->set_encrypted_chunkref($gpg_pm->enc_chunkref_of($pchunk)); + } else { + # no encryption: copy raw data to freeze them and update the raw digest + $schunk->copy_raw_data; } # see if we should pack it into a bigger blob Index: lib/Brackup/PositionedChunk.pm =================================================================== --- lib/Brackup/PositionedChunk.pm (revision 23) +++ lib/Brackup/PositionedChunk.pm (working copy) @@ -61,6 +61,20 @@ return $self->{_raw_digest} ||= $self->_calc_raw_digest; } +# called when the raw data are consumed (eg sent to gpg) to update the +# digest with that of the actual read data (file might have changed in the meanwhile) +# +sub set_raw_digest { + my ($self, $dig) = @_; + $self->{_raw_digest} = $dig; + + # if the file has a single chunk, we also update its full_digest + my $n_chunks = $self->{file}->chunks + or die "zero chunks?"; + $self->{file}->set_full_digest($dig) + if $n_chunks == 1; +} + sub _calc_raw_digest { my $self = shift; Index: lib/Brackup/GPGProcess.pm =================================================================== --- lib/Brackup/GPGProcess.pm (revision 23) +++ lib/Brackup/GPGProcess.pm (working copy) @@ -8,9 +8,15 @@ sub new { my ($class, $pchunk) = @_; + # encrypted data will be written there my $destfh = tempfile_obj(); my $destfn = $destfh->filename; + # pipe to get the raw digest from the child + my $pipe_r = Symbol::gensym(); + my $pipe_w = Symbol::gensym(); + pipe ($pipe_r, $pipe_w) or die "cannot open pipe: $!"; + my $no_fork = $ENV{BRACKUP_NOFORK} || 0; # if true (perhaps on Windows?), then don't fork... do all inline. my $pid = $no_fork ? 0 : fork; @@ -20,16 +26,23 @@ # caller (parent) if ($pid) { + close $pipe_w; # only the child writes to the pipe + return bless { destfh => $destfh, + pipe_r => $pipe_r, pid => $pid, running => 1, }, $class; } # child: encrypt and exit(0)... - $pchunk->root->encrypt($pchunk->raw_chunkref, $destfn); + my $dig = $pchunk->root->encrypt($pchunk->raw_chunkref, $destfn); + print $pipe_w $dig; + close $pipe_w; + close $pipe_r unless $no_fork; + unless (-e $destfn) { # if the file's gone, that likely means the parent process # already terminated and unlinked our temp file, in @@ -44,6 +57,7 @@ if ($no_fork) { return bless { destfh => $destfh, + pipe_r => $pipe_r, pid => 0, }, $class; } @@ -57,14 +71,25 @@ sub pid { $_[0]{pid} } sub running { $_[0]{running} } -sub note_stopped { $_[0]{running} = 0; } +sub note_stopped { + my ($self) = @_; + $self->{running} = 0; + + # read raw_digest from the pipe so that we can close it. Otherwise, if there are many + # uncollected processes we might reach the limit of open pipes + my $h = $self->{pipe_r}; + $self->{raw_digest} = <$h>; + close $h; + delete $self->{pipe_r}; +} + sub chunkref { my ($self) = @_; die "Still running!" if $self->{running}; die "No data in file" unless $self->size_on_disk; - return $self->{destfh}; + return ($self->{destfh}, $self->{raw_digest}); } sub size_on_disk { Index: lib/Brackup/File.pm =================================================================== --- lib/Brackup/File.pm (revision 23) +++ lib/Brackup/File.pm (working copy) @@ -132,6 +132,15 @@ return $self->{_full_digest} ||= $self->_calc_full_digest; } +# called only for files with a single chunk (so chunk digest = full digest), +# when the raw data are consumed (eg sent to gpg), to update the +# digest with that of the actual read data (file might have changed in the meanwhile) +# +sub set_full_digest { + my ($self, $dig) = @_; + $self->{_full_digest} = $dig; +} + sub _calc_full_digest { my $self = shift; return "" unless $self->is_file; Index: lib/Brackup/Test.pm =================================================================== --- lib/Brackup/Test.pm (revision 23) +++ lib/Brackup/Test.pm (working copy) @@ -216,7 +216,7 @@ # no encryption, copy raw data and store schunk my $schunk = Brackup::StoredChunk->new($pchunk); -# $schunk->copy_raw_data; + $schunk->copy_raw_data; $target->store_chunk($schunk); } } Index: lib/Brackup/Root.pm =================================================================== --- lib/Brackup/Root.pm (revision 23) +++ lib/Brackup/Root.pm (working copy) @@ -7,6 +7,7 @@ use Brackup::Util qw(tempfile io_print_to_fh); use IPC::Open2; use Symbol; +use Digest::SHA1; sub new { my ($class, $conf) = @_; @@ -200,7 +201,8 @@ $pop_dir->() while @dir_stack; } -# given filehandle to data, returns encrypted data +# given filehandle to data and output filename, encrypts data and returns +# the digest of the raw data that were actually sent to gpg sub encrypt { my ($self, $data_fh, $outfn) = @_; my $gpg_rcpt = $self->gpg_rcpt @@ -221,8 +223,9 @@ ); # send data to gpg + my $sha1 = Digest::SHA1->new; binmode $cin; - my $bytes = io_print_to_fh($data_fh, $cin) + my $bytes = io_print_to_fh($data_fh, $cin, $sha1) or die "Sending data to gpg failed: $!"; close $cin; @@ -230,6 +233,8 @@ waitpid($pid, 0); die "GPG failed: $!" if $? != 0; # If gpg return status is non-zero + + return "sha1:" . $sha1->hexdigest; } 1; Index: lib/Brackup/StoredChunk.pm =================================================================== --- lib/Brackup/StoredChunk.pm (revision 23) +++ lib/Brackup/StoredChunk.pm (working copy) @@ -3,8 +3,9 @@ use strict; use warnings; use Carp qw(croak); -use Brackup::Util qw(io_sha1); +use Brackup::Util qw(tempfile_obj io_sha1 io_print_to_fh); use Fcntl qw(SEEK_SET); +use Digest::SHA1; # fields: # pchunk - always @@ -149,33 +150,49 @@ sub chunkref { my $self = shift; if ($self->{_chunkref}) { - $self->{_chunkref}->seek(0, SEEK_SET); + seek($self->{_chunkref}, 0, SEEK_SET); return $self->{_chunkref}; } - # encrypting case: chunkref gets set via set_encrypted_chunkref in Backup::backup - croak "ASSERT: encrypted but no chunkref set" if $self->encrypted; - # caller/consistency check: Carp::confess("Can't access chunkref on lite StoredChunk instance (handle only)") if $self->{lite}; - # non-encrypting case - return $self->{_chunkref} = $self->{pchunk}->raw_chunkref; + # chunkref gets set via set_encrypted_chunkref (encryption) or copy_raw_data (no encryption) in Backup::backup + croak "ASSERT: no chunkref set"; } # set encrypted chunk filehandle and digest/length sub set_encrypted_chunkref { - my ($self, $fh, $enc_length) = @_; + my ($self, $fh, $enc_length, $raw_digest) = @_; die "ASSERT: not enc" unless $self->encrypted; die "ASSERT: already set?" if $self->{backlength} || $self->{backdigest}; $self->{backdigest} = "sha1:" . io_sha1($fh); $self->{backlength} = $enc_length; + # we update pchunk's digest with the one of the actual read data + $self->{pchunk}->set_raw_digest($raw_digest); + return $self->{_chunkref} = $fh; } +# called in case of no encryption, we copy the raw data to freeze them +# and to update the raw digest with the actual read data +sub copy_raw_data { + my ($self) = @_; + die "ASSERT: enc" if $self->encrypted; + + # we create new file dumping the data from the io handle + my $sha1 = Digest::SHA1->new; + my $fh = tempfile_obj(); + my $bytes = io_print_to_fh($self->{pchunk}->raw_chunkref, $fh, $sha1); + + $self->{pchunk}->set_raw_digest("sha1:".$sha1->hexdigest); + + return $self->{_chunkref} = $fh; +} + # lose the chunkref data sub forget_chunkref { my $self = shift; Index: lib/Brackup/GPGProcManager.pm =================================================================== --- lib/Brackup/GPGProcManager.pm (revision 23) +++ lib/Brackup/GPGProcManager.pm (working copy) @@ -42,11 +42,11 @@ } $self->_proc_summary_dump; - my ($cref, $enc_length) = $self->get_proc_chunkref($proc); + my ($cref, $enc_length, $dig) = $self->get_proc_chunkref($proc); $self->_proc_summary_dump; $self->start_some_processes; - return ($cref, $enc_length); + return ($cref, $enc_length, $dig); } sub start_some_processes { @@ -87,10 +87,10 @@ sub get_proc_chunkref { my ($self, $proc) = @_; - my $cref = $proc->chunkref; + my ($cref, $dig) = $proc->chunkref; delete $self->{procs}{$proc}; $self->{uncollected_bytes} -= $proc->size_on_disk; - return ($cref, $proc->size_on_disk); + return ($cref, $proc->size_on_disk, $dig); } # returns PID of a process that finished