File: big_records.pm

package info (click to toggle)
libvcp-perl 0.9-20050110-1
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 1,608 kB
  • ctags: 827
  • sloc: perl: 18,194; makefile: 42; sh: 11
file content (299 lines) | stat: -rw-r--r-- 7,046 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
package VCP::DB_File::big_records;

=head1 NAME

VCP::DB_File::big_records - VCP::DB_File::sdbml subclass for large records

=head1 SYNOPSIS

    use VCP::DB_File;
    VCP::DB_File->new;

=head1 DESCRIPTION

sdbm files are limited to 1008 bytes per record, including key.  That's
just not enough for storing revisions in (although it suffices for most
other VCP needs).

This subclass assumes your disk is large enough and that you won't be
altering records in place, but allows for unlimited record sizes.  No
attempt is made to reclaim free space; so far our application doesn't
need that.

Records are retrievable in the order they were added in (so this "file"
may be a queue) or in random order (like a DB file).

=head1 INTERNALS

There are three related data sets:

    - an sdbm database "db", which is an index to the location and size
      of each record for the key of the record
    - a file "records.mdb", which is a set of ( key, record size, record data)
      entries for each record
    - a file "order.txt", which is a flat file with one key per record
      in the order that they were added to the file

Generally, you should use this module only to read or write a dataset as an
error in reading could cause corruption if you write (the orders.txt is not
flushed or seek() properly on a read error).  This is sufficient for our
purposes, because these files are only used by but could be a problem if read/write mode is needed.

=over

=for test_script t/01db_file_big_records.t

=cut

$VERSION = 1 ;

@EXPORT_OK = qw( escape_nl deescape_nl );

@ISA = qw( VCP::DB_File::sdbm Exporter );

use strict ;

use VCP::Debug qw( :debug );
use Fcntl;
use File::Spec;
use VCP::Debug qw( :debug );
use VCP::Logger qw( lg pr BUG );
use VCP::Utils qw( empty );
use VCP::DB_File::sdbm;

#use base qw( VCP::DB_File::sdbm Exporter );

#use fields (
#   'RecordsFileName',  ## Where we store the data
#   'RecordsFH',        ## Our handle to it when its open
#   'OrderFileName',    ## The order the records were added in
#   'OrderFH',
#);


sub db_file {
   my $self = shift;
   return File::Spec->catfile(
      $self->store_loc,
      "db"
   );
}


sub records_file {
   my $self = shift;
   return File::Spec->catfile(
       $self->store_loc,
       "records.mdb"
    );
}


sub order_file {
   my $self = shift;
   return File::Spec->catfile(
       $self->store_loc,
       "order.txt"
    );
}


sub close_db {
   my $self = shift;

   close $self->{RecordsFH} if $self->{RecordsFH};
   close $self->{OrderFH}   if $self->{OrderFH};
   $self->{RecordsFH} = undef;
   $self->{OrderFH} = undef;

   $self->SUPER::close_db;
}


sub _open {
   my ( $fn, $mode ) = @_;

   local *DATAFILE;
   sysopen DATAFILE, $fn, $mode or die "$! opening '$fn'";
   binmode DATAFILE;
   return *DATAFILE{IO};
}


sub open_db {
   my $self = shift;

   $self->SUPER::open_db;
   $self->{RecordsFH} = _open $self->records_file, O_RDWR | O_CREAT;
   $self->{OrderFH}   = _open $self->order_file,   O_RDWR | O_CREAT;
}


sub open_existing_db {
   my $self = shift;

   $self->SUPER::open_db;

   my $fn = $self->records_file;

   $self->{RecordsFH} = _open $self->records_file, O_RDWR;
   $self->{OrderFH}   = _open $self->order_file,   O_RDWR;
}


sub escape_nl {
   my $k = shift;
   $k =~ s/\\/\\\\/g;
   $k =~ s/\n/\\n/g;
   return $k;
}


sub deescape_nl {
   my $k = shift;
   1 while chomp $k;
   $k =~ s{\\n}{\n}g;
   $k =~ s{\\\\}{\\}g;
   return $k;
}


sub set {
   my $self = shift;
   my $key_parts = shift;

   my $key = $self->pack_values( @$key_parts );

   my $pointer = $self->SUPER::raw_get( $key );
   my ( $location, $old_encoded_size ) =
      defined $pointer
         ? $self->unpack_values( $pointer )
         : ( undef, 0 );

   if ( !defined $location ) {
      my $fh = $self->{OrderFH};
      print $fh escape_nl( $key ), "\n";
   }

   BUG "corrupt pointer '$pointer' for '$key'\n"
      if defined $pointer and empty $location or empty $old_encoded_size;

   ## The dual \n is for easy reading in an editor
   my $packed = escape_nl( $self->pack_values( @_ ) ) . "\n====\n";
   my $data_size = length( $packed ) - 6;  ## the \n====\n is not data.

   my $header = "$key;$data_size\n";  # not packed, but safe-ish
       ## We include the key in case we ever need to rebuild the sdbm
       ## file.

   my $encoded_size = $data_size + length $header;

   if ( $encoded_size > $old_encoded_size && defined $location ) {
      sysseek  $self->{RecordsFH}, $location, 0;
      syswrite $self->{RecordsFH}, ( "x" x ( $old_encoded_size - 1 ) ) . "\n";
      $location = undef;
   }

   if ( empty $location ) {
      lg "growing $key from $old_encoded_size to $encoded_size"
         if $old_encoded_size;
      $location = sysseek( $self->{RecordsFH}, 0, 2 );
      $self->raw_set( $key, $self->pack_values( $location, $encoded_size ) );
   }

   sysseek  $self->{RecordsFH}, $location, 0;
   syswrite $self->{RecordsFH}, $header;
   syswrite $self->{RecordsFH}, $packed;
}


sub get_data {
   my $self = shift;
   my ( $location, $encoded_size ) = @_;

   return if empty $location;

   sysseek $self->{RecordsFH}, $location, 0;
   sysread $self->{RecordsFH}, my( $v ), $encoded_size;
   my ( $header, $value ) = split /\n/, $v, 2;
   my ( $key, $length ) = split /;/, $header;

   BUG "corrupt header '$header'"
      if empty $key || empty $length;
   substr( $value, $length ) = "";  ## Knock off extra data
   BUG "length trim failed: $length != ", length $value
      if length $value != $length;

   return $self->unpack_values( deescape_nl $value );
}


sub get {
   my $self = shift;
   return $self->get_data( $self->SUPER::get( @_ ) )
}


=item foreach_record_do

    $db->foreach_record_do( sub { ... } );

Iterate over the contents in as-stored order, executing sub { ... }
for each one found.

=cut

sub foreach_record_do {
   my $self = shift;
   my ( $sub ) = @_;

   my $fh = $self->{OrderFH};

   seek $fh, 0, 0;
   while ( <$fh> ) {
      $sub->(
         $self->get_data(
            $self->unpack_values(
               $self->raw_get( deescape_nl $_ )
            )
         )
      );
   }
}


=item dump

BROKEN FOR NOW.  Reports the pointers, not the pointed-to data

TODO: fix.

=cut

=back

=head1 LIMITATIONS

There is no way (yet) of telling the mapper to continue processing the
rules list.  We could implement labels like C< <<I<label>>> > to be
allowed before pattern expressions (but not between pattern and result),
and we could then impelement C< <<goto I<label>>> >.  And a C< <<next>>
> could be used to fall through to the next label.  All of which is
wonderful, but I want to gain some real world experience with the
current system and find a use case for gotos and fallthroughs before I
implement them.  This comment is here to solicit feedback :).

=head1 AUTHOR

Barrie Slaymaker <barries@slaysys.com>

=head1 COPYRIGHT

Copyright (c) 2000, 2001, 2002 Perforce Software, Inc.
All rights reserved.

See L<VCP::License|VCP::License> (C<vcp help license>) for the terms of use.

=cut

1