File: BulkMerge.pm

package info (click to toggle)
libdemeter-perl 0.9.27%2Bds6-9
  • links: PTS, VCS
  • area: contrib
  • in suites: forky, sid, trixie
  • size: 74,028 kB
  • sloc: perl: 73,233; python: 2,196; makefile: 1,999; ansic: 1,368; lisp: 454; sh: 74
file content (318 lines) | stat: -rw-r--r-- 9,653 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
package Demeter::Data::BulkMerge;

use Moose;
extends 'Demeter';
with 'Demeter::Data::XDI';

use List::MoreUtils qw(any);

has 'align'  => (is => 'rw', isa => 'Bool',   default => 0);
has 'smooth' => (is => 'rw', isa => 'Int',    default => 0);
has 'plugin' => (is => 'rw', isa => 'Str',    default => q{});
has 'max'    => (is => 'rw', isa => 'Int',    default => 1e9);
has 'size'   => (is => 'rw', isa => 'Int',    default => 0);
has 'margin' => (is => 'rw', isa => 'LaxNum', default => 0.997);
has 'count'  => (is => 'rw', isa => 'Int',    default => 0);

has 'data' => (
	       traits    => ['Array'],
	       is        => 'rw',
	       isa       => 'ArrayRef',
	       default   => sub { [] },
	       handles   => {
			     'push_data'    => 'push',
			     'pop_data'     => 'pop',
			     'shift_data'   => 'shift',
			     'unshift_data' => 'unshift',
			     'clear_data'   => 'clear',
			    }
	      );
has 'subsample' => (
		   traits    => ['Array'],
		   is        => 'rw',
		   isa       => 'ArrayRef[Int]',
		   default   => sub { [] },
		   handles   => {
				 'push_subsample'    => 'push',
				 'pop_subsample'     => 'pop',
				 'shift_subsample'   => 'shift',
				 'unshift_subsample' => 'unshift',
				 'clear_subsample'   => 'clear',
				}
		  );
has 'sequence' => (
		   traits    => ['Array'],
		   is        => 'rw',
		   isa       => 'ArrayRef',
		   default   => sub { [] },
		   handles   => {
				 'push_sequence'    => 'push',
				 'pop_sequence'     => 'pop',
				 'shift_sequence'   => 'shift',
				 'unshift_sequence' => 'unshift',
				 'clear_sequence'   => 'clear',
				}
		  );
has 'skipped' => (
		   traits    => ['Array'],
		   is        => 'rw',
		   isa       => 'ArrayRef',
		   default   => sub { [] },
		   handles   => {
				 'push_skipped'    => 'push',
				 'pop_skipped'     => 'pop',
				 'shift_skipped'   => 'shift',
				 'unshift_skipped' => 'unshift',
				 'clear_skipped'   => 'clear',
				}
		  );

has 'master' => (is => 'rw', isa => 'Demeter::Data',
		 trigger => sub{my ($self, $new) = @_;
				if ($new) {
				  $self->sum($new->Clone);
				  $self->sum->standard;
				  $self->sum->set(is_col=>0, i0_string=>q{}, signal_string=>q{}, i0_scale=>1, signal_scale=>1);
				};
			      });
has 'sum'    => (is => 'rw', isa => 'Demeter::Data');

sub BUILD {
  my ($self, @params) = @_;
  $self->mo->push_BulkMerge($self);
};


sub merge {
  my ($self) = @_;

  my $save = $self->po->e_smooth;
  $self->po->set(e_smooth=>$self->smooth);
  my $size  = $self->size || -s $self->master->source;
  my $group = 'mega';
  my ($plug, $thisdata);
  my $count = 1;
  $self->sum -> start_counter("Merging data", $#{$self->data}) if $self->mo->ui eq 'screen';

  Demeter->set_mode(screen=>0);
  foreach my $file (@{$self->data}) {
    $self->push_skipped($file), next if (not -e $file);
    $self->push_skipped($file), next if (not -r $file);
    $self->push_skipped($file), next if (-s $file < $self->margin*$size);
    last if ($count == $self->max);
    ++$count;
    $self->sum -> count if $self->mo->ui eq 'screen';

    if ($self->plugin) {
      my $which = 'Demeter::Plugins::' . $self->plugin;
      $plug     = $which->new(file=>$file);
      my $ok = eval {$plug->fix};
      die $@ if $@;
      $thisdata = Demeter::Data->new(group  => 'mega', quickmerge=>1, file=>$plug->fixed, $plug->suggest('fluorescence'),
				     bkg_e0 => $self->master->bkg_e0,
				    );
    } else {
      $thisdata = Demeter::Data->new(group=>'mega', quickmerge=>1, file=>$file,
				     energy      => $self->master->energy,
				     numerator   => $self->master->numerator,
				     denominator => $self->master->denominator,
				     ln	         => $self->master->ln,
				     bkg_e0      => $self->master->bkg_e0,
				    );
    };
    $thisdata -> _update('data');
    $self->master -> align($thisdata) if $self->align;
    $thisdata -> dispense('process', 'musum');
    if (any {$count == $_} @{$self->subsample}) {
      $self -> dispense('process', 'comment', {comment=>"Quick merge subsample of $count spectra"});
      my $sample = $self->sum->Clone;
      $sample -> set(name=>"Merge of $count scans", is_col=>0, i0_string=>q{}, signal_string=>q{}, i0_scale=>1, signal_scale=>1);
      $sample -> update_norm(1);
      $sample -> dispense('process', 'muave', {count=>$count});
      $self->push_sequence($sample);
    };
    $thisdata->DEMOLISH;
    unlink $plug->fixed if $self->plugin;
  };
  $self->sum -> stop_counter if $self->mo->ui eq 'screen';
  $self->count($count);

  $self->sum -> dispense('process', 'muave', {count=>$self->count});
  $self->sum -> update_norm(1);
  $self->sum -> name("Merge of $count scans");

  $self->sum->xdi_make_clone($self->master, sprintf("BulkMerge of %d scans", $#{$self->data}+1), 1) if (Demeter->xdi_exists);


  $self->po->set(e_smooth=>$save);
  return $self->sum;
};





__PACKAGE__->meta->make_immutable;
1;


=head1 NAME

Demeter::Data::BulkMerge - Efficiantly merge many files into a single spectrum

=head1 VERSION

This documentation refers to Demeter version 0.9.26.

=head1 DESCRIPTION

This object provides an efficient way to merge a large number of files
into a single spectrum.  The assumption is that the user is not
interested in having each individual file processed.  This would be
the case for measuring many repititions for the sake of improving the
statistical quality of the data.

  my $data = Demeter::Data->new(file=>$file, ...);
  my $bulk = Demeter::Data::BulkMerge->new(master => $data,
                                           data => \@list_of_files);
  my $merged = $bulk->merge;
  $_->plot('E') foreach ($data, $merged);

The trick is that each file is only imported to the point of having
arrays for energy and xmu.  Each file in the list is imported to the
same group.  The merge is computed by accumulation and divided by the
total numberof scans.

This requires that one data file be considered carefully.  This is the
C<master>.  All other data are interpolated to the energy grid of the
C<master>.

Care is taken not to include files which are less than 95%
(configurable with the C<margin> attribute) of the size of the master
data file.

Note that preprocessing the data takes time.  A run with the C<plugin>
and C<align> attributes set takes about twice as long as a straight
merge of the raw data.

=head1 ATTRIBUTES

=over 4

=item C<master> [Demeter::Data object]

This contains the L<Demeter::Data> object for the processed data group
to which all subsequent data files are merged.  This acts as the
interpolation standard and as the alignment standard (if the C<align>
attribute is true).  The merged data group will inherit attributes
from this group.  So, if the master has sensible parameters for
normalization and background removal, the merged group will have the
same sensible parameters.

=item C<data> [list of strings]

This is a list of fully resolved paths to the data files to be merged.
These can be relative or absolute paths, but they B<must> resolve
correctly to actual files.  Files that don't exist or aren't readable
will be silently ignored.

=item C<align> [boolean]

When true, this says to align each file in the C<data> list to the
C<master>.

=item C<smooth> [integer]

When non-zero, the alignment will be done using the smoothed
derivative spectrum.  The value of this parameter indicated the number
of smoothings.

=item C<plugin> [string]

The name of the plugin to use to interpret the data.  For example, to
use the L<Demeter::Data::X23A2MED> plugin, this attribute would be set
to C<X23A2MED>.

=item C<margin> [number between 0 and 1]

This number defines the margin in filesize outside of which a data
file is excluded from the merge.  The default is 0.997, thus any file
in the C<data> list which is smaller than 99.7% the size of the
C<master> file will be excluded.

=item C<subsample>  [array of integers]

This is used to specify sub-samplings of the data ensemble, presumably
to test convergence to the mean.  If this is set to C<[4, 16, 64]>
then Data groups will be saved which sum 4, 16, and 64 of the files
included in the merge.  The sub-sampled Data groups are saved to the
C<sequence> attribute.

=item C<sequence>  [array of Data objects]

Data objects from a sub-sampling sequence.

=back

=head1 METHODS

=over 4

=item C<merge>

Performs the merge using some special optimizations that minimize the
interaction with the data processing backend (Ifeffit/Larch).  This
returns a Data object containing the merged spectrum, divided by the
number of spectra included in the merge.

=back

=head1 CONFIGURATION

There are no configuration options for this class.

See L<Demeter::Config> for a description of Demeter's
configuration system.

=head1 DEPENDENCIES

Demeter's dependencies are in the F<Build.PL> file.

=head1 BUGS AND LIMITATIONS

=over 4

=item *

A file that exists and is readable, but is not data will make for a
confusing error

=item *

Standard deviation not computed

=back

Please report problems to the Ifeffit Mailing List
(L<http://cars9.uchicago.edu/mailman/listinfo/ifeffit/>)

Patches are welcome.

=head1 AUTHOR

Bruce Ravel, L<http://bruceravel.github.io/home>

L<http://bruceravel.github.io/demeter/>

=head1 LICENCE AND COPYRIGHT

Copyright (c) 2006-2019 Bruce Ravel (L<http://bruceravel.github.io/home>). All rights reserved.

This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See L<perlgpl>.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

=cut