📄 file.pm
字号:
$nrecs = $oldsize - $pos; } } $self->_fixrecs(@data); my $data = join '', @data; my $datalen = length $data; my $oldlen = 0; # compute length of data being removed for ($pos .. $pos+$nrecs-1) { last unless defined $self->_fill_offsets_to($_); my $rec = $self->_fetch($_); last unless defined $rec; push @result, $rec; # Why don't we just use length($rec) here? # Because that record might have come from the cache. _splice # might have been called to flush out the deferred-write records, # and in this case length($rec) is the length of the record to be # *written*, not the length of the actual record in the file. But # the offsets are still true. 20020322 $oldlen += $self->{offsets}[$_+1] - $self->{offsets}[$_] if defined $self->{offsets}[$_+1]; } $self->_fill_offsets_to($pos+$nrecs); # Modify the file $self->_mtwrite($data, $self->{offsets}[$pos], $oldlen); # Adjust the offsets table $self->_oadjust([$pos, $nrecs, @data]); { # Take this read cache stuff out into a separate function # You made a half-attempt to put it into _oadjust. # Finish something like that up eventually. # STORE also needs to do something similarish # update the read cache, part 1 # modified records for ($pos .. $pos+$nrecs-1) { my $new = $data[$_-$pos]; if (defined $new) { $self->{cache}->update($_, $new); } else { $self->{cache}->remove($_); } } # update the read cache, part 2 # moved records - records past the site of the change # need to be renumbered # Maybe merge this with the previous block? { my @oldkeys = grep $_ >= $pos + $nrecs, $self->{cache}->ckeys; my @newkeys = map $_-$nrecs+@data, @oldkeys; $self->{cache}->rekey(\@oldkeys, \@newkeys); } # Now there might be too much data in the cache, if we spliced out # some short records and spliced in some long ones. If so, flush # the cache. $self->_cache_flush; } # Yes, the return value of 'splice' *is* actually this complicated wantarray ? @result : @result ? $result[-1] : undef;}# write data into the file# $data is the data to be written.# it should be written at position $pos, and should overwrite# exactly $len of the following bytes. # Note that if length($data) > $len, the subsequent bytes will have to # be moved up, and if length($data) < $len, they will have to# be moved downsub _twrite { my ($self, $data, $pos, $len) = @_; unless (defined $pos) { die "\$pos was undefined in _twrite"; } my $len_diff = length($data) - $len; if ($len_diff == 0) { # Woo-hoo! my $fh = $self->{fh}; $self->_seekb($pos); $self->_write_record($data); return; # well, that was easy. } # the two records are of different lengths # our strategy here: rewrite the tail of the file, # reading ahead one buffer at a time # $bufsize is required to be at least as large as the data we're overwriting my $bufsize = _bufsize($len_diff); my ($writepos, $readpos) = ($pos, $pos+$len); my $next_block; my $more_data; # Seems like there ought to be a way to avoid the repeated code # and the special case here. The read(1) is also a little weird. # Think about this. do { $self->_seekb($readpos); my $br = read $self->{fh}, $next_block, $bufsize; $more_data = read $self->{fh}, my($dummy), 1; $self->_seekb($writepos); $self->_write_record($data); $readpos += $br; $writepos += length $data; $data = $next_block; } while $more_data; $self->_seekb($writepos); $self->_write_record($next_block); # There might be leftover data at the end of the file $self->_chop_file if $len_diff < 0;}# _iwrite(D, S, E)# Insert text D at position S.# Let C = E-S-|D|. If C < 0; die. # Data in [S,S+C) is copied to [S+D,S+D+C) = [S+D,E).# Data in [S+C = E-D, E) is returned. Data in [E, oo) is untouched.## In a later version, don't read the entire intervening area into# memory at once; do the copying block by block.sub _iwrite { my $self = shift; my ($D, $s, $e) = @_; my $d = length $D; my $c = $e-$s-$d; local *FH = $self->{fh}; confess "Not enough space to insert $d bytes between $s and $e" if $c < 0; confess "[$s,$e) is an invalid insertion range" if $e < $s; $self->_seekb($s); read FH, my $buf, $e-$s; $D .= substr($buf, 0, $c, ""); $self->_seekb($s); $self->_write_record($D); return $buf;}# Like _twrite, but the data-pos-len triple may be repeated; you may# write several chunks. All the writing will be done in# one pass. Chunks SHALL be in ascending order and SHALL NOT overlap.sub _mtwrite { my $self = shift; my $unwritten = ""; my $delta = 0; @_ % 3 == 0 or die "Arguments to _mtwrite did not come in groups of three"; while (@_) { my ($data, $pos, $len) = splice @_, 0, 3; my $end = $pos + $len; # The OLD end of the segment to be replaced $data = $unwritten . $data; $delta -= length($unwritten); $unwritten = ""; $pos += $delta; # This is where the data goes now my $dlen = length $data; $self->_seekb($pos); if ($len >= $dlen) { # the data will fit $self->_write_record($data); $delta += ($dlen - $len); # everything following moves down by this much $data = ""; # All the data in the buffer has been written } else { # won't fit my $writable = substr($data, 0, $len - $delta, ""); $self->_write_record($writable); $delta += ($dlen - $len); # everything following moves down by this much } # At this point we've written some but maybe not all of the data. # There might be a gap to close up, or $data might still contain a # bunch of unwritten data that didn't fit. my $ndlen = length $data; if ($delta == 0) { $self->_write_record($data); } elsif ($delta < 0) { # upcopy (close up gap) if (@_) { $self->_upcopy($end, $end + $delta, $_[1] - $end); } else { $self->_upcopy($end, $end + $delta); } } else { # downcopy (insert data that didn't fit; replace this data in memory # with _later_ data that doesn't fit) if (@_) { $unwritten = $self->_downcopy($data, $end, $_[1] - $end); } else { # Make the file longer to accommodate the last segment that doesn' $unwritten = $self->_downcopy($data, $end); } } }}# Copy block of data of length $len from position $spos to position $dpos# $dpos must be <= $spos## If $len is undefined, go all the way to the end of the file# and then truncate it ($spos - $dpos bytes will be removed)sub _upcopy { my $blocksize = 8192; my ($self, $spos, $dpos, $len) = @_; if ($dpos > $spos) { die "source ($spos) was upstream of destination ($dpos) in _upcopy"; } elsif ($dpos == $spos) { return; } while (! defined ($len) || $len > 0) { my $readsize = ! defined($len) ? $blocksize : $len > $blocksize ? $blocksize : $len; my $fh = $self->{fh}; $self->_seekb($spos); my $bytes_read = read $fh, my($data), $readsize; $self->_seekb($dpos); if ($data eq "") { $self->_chop_file; last; } $self->_write_record($data); $spos += $bytes_read; $dpos += $bytes_read; $len -= $bytes_read if defined $len; }}# Write $data into a block of length $len at position $pos,# moving everything in the block forwards to make room.# Instead of writing the last length($data) bytes from the block# (because there isn't room for them any longer) return them.## Undefined $len means 'until the end of the file'sub _downcopy { my $blocksize = 8192; my ($self, $data, $pos, $len) = @_; my $fh = $self->{fh}; while (! defined $len || $len > 0) { my $readsize = ! defined($len) ? $blocksize : $len > $blocksize? $blocksize : $len; $self->_seekb($pos); read $fh, my($old), $readsize; my $last_read_was_short = length($old) < $readsize; $data .= $old; my $writable; if ($last_read_was_short) { # If last read was short, then $data now contains the entire rest # of the file, so there's no need to write only one block of it $writable = $data; $data = ""; } else { $writable = substr($data, 0, $readsize, ""); } last if $writable eq ""; $self->_seekb($pos); $self->_write_record($writable); last if $last_read_was_short && $data eq ""; $len -= $readsize if defined $len; $pos += $readsize; } return $data;}# Adjust the object data structures following an '_mtwrite'# Arguments are# [$pos, $nrecs, @length] items# indicating that $nrecs records were removed at $recpos (a record offset)# and replaced with records of length @length...# Arguments guarantee that $recpos is strictly increasing.# No return valuesub _oadjust { my $self = shift; my $delta = 0; my $delta_recs = 0; my $prev_end = -1; my %newkeys; for (@_) { my ($pos, $nrecs, @data) = @$_; $pos += $delta_recs; # Adjust the offsets of the records after the previous batch up # to the first new one of this batch for my $i ($prev_end+2 .. $pos - 1) { $self->{offsets}[$i] += $delta; $newkey{$i} = $i + $delta_recs; } $prev_end = $pos + @data - 1; # last record moved on this pass # Remove the offsets for the removed records; # replace with the offsets for the inserted records my @newoff = ($self->{offsets}[$pos] + $delta); for my $i (0 .. $#data) { my $newlen = length $data[$i]; push @newoff, $newoff[$i] + $newlen; $delta += $newlen; } for my $i ($pos .. $pos+$nrecs-1) { last if $i+1 > $#{$self->{offsets}}; my $oldlen = $self->{offsets}[$i+1] - $self->{offsets}[$i]; $delta -= $oldlen; }# # also this data has changed, so update it in the cache# for (0 .. $#data) {# $self->{cache}->update($pos + $_, $data[$_]);# }# if ($delta_recs) {# my @oldkeys = grep $_ >= $pos + @data, $self->{cache}->ckeys;# my @newkeys = map $_ + $delta_recs, @oldkeys;# $self->{cache}->rekey(\@oldkeys, \@newkeys);# } # replace old offsets with new splice @{$self->{offsets}}, $pos, $nrecs+1, @newoff; # What if we just spliced out the end of the offsets table? # shouldn't we clear $self->{eof}? Test for this XXX BUG TODO $delta_recs += @data - $nrecs; # net change in total number of records } # The trailing records at the very end of the file if ($delta) { for my $i ($prev_end+2 .. $#{$self->{offsets}}) { $self->{offsets}[$i] += $delta; } } # If we scrubbed out all known offsets, regenerate the trivial table # that knows that the file does indeed start at 0. $self->{offsets}[0] = 0 unless @{$self->{offsets}}; # If the file got longer, the offsets table is no longer complete # $self->{eof} = 0 if $delta_recs > 0; # Now there might be too much data in the cache, if we spliced out # some short records and spliced in some long ones. If so, flush # the cache. $self->_cache_flush;}# If a record does not already end with the appropriate terminator# string, append one.sub _fixrecs { my $self = shift; for (@_) { $_ = "" unless defined $_; $_ .= $self->{recsep} unless substr($_, - $self->{recseplen}) eq $self->{recsep}; }}################################################################## Basic read, write, and seek## seek to the beginning of record #$n# Assumes that the offsets table is already correctly populated## Note that $n=-1 has a special meaning here: It means the start of# the last known record; this may or may not be the very last record# in the file, depending on whether the offsets table is fully populated.#sub _seek { my ($self, $n) = @_; my $o = $self->{offsets}[$n]; defined($o) or confess("logic error: undefined offset for record $n"); seek $self->{fh}, $o, SEEK_SET or confess "Couldn't seek filehandle: $!"; # "Should never happen."}# seek to byte $b in the filesub _seekb { my ($self, $b) = @_; seek $self->{fh}, $b, SEEK_SET or die "Couldn't seek filehandle: $!"; # "Should never happen."}# populate the offsets table up to the beginning of record $n# return the offset of record $nsub _fill_offsets_to { my ($self, $n) = @_; return $self->{offsets}[$n] if $self->{eof}; my $fh = $self->{fh}; local *OFF = $self->{offsets}; my $rec; until ($#OFF >= $n) { $self->_seek(-1); # tricky -- see comment at _seek $rec = $self->_read_record; if (defined $rec) { push @OFF, int(tell $fh); # Tels says that int() saves memory here } else { $self->{eof} = 1; return; # It turns out there is no such record } } # we have now read all the records up to record n-1, # so we can return the offset of record n $OFF[$n];}sub _fill_offsets { my ($self) = @_; my $fh = $self->{fh}; local *OFF = $self->{offsets}; $self->_seek(-1); # tricky -- see comment at _seek # Tels says that inlining read_record() would make this loop # five times faster. 20030508 while ( defined $self->_read_record()) { # int() saves us memory here push @OFF, int(tell $fh); } $self->{eof} = 1; $#OFF;}# assumes that $rec is already suitably terminatedsub _write_record { my ($self, $rec) = @_; my $fh = $self->{fh}; local $\ = ""; print $fh $rec or die "Couldn't write record: $!"; # "Should never happen."# $self->{_written} += length($rec);}sub _read_record { my $self = shift; my $rec; { local $/ = $self->{recsep}; my $fh = $self->{fh}; $rec = <$fh>; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -