1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
|
package Demeter::Data::BulkMerge;
use Moose;
extends 'Demeter';
with 'Demeter::Data::XDI';
use List::MoreUtils qw(any);
has 'align' => (is => 'rw', isa => 'Bool', default => 0);
has 'smooth' => (is => 'rw', isa => 'Int', default => 0);
has 'plugin' => (is => 'rw', isa => 'Str', default => q{});
has 'max' => (is => 'rw', isa => 'Int', default => 1e9);
has 'size' => (is => 'rw', isa => 'Int', default => 0);
has 'margin' => (is => 'rw', isa => 'LaxNum', default => 0.997);
has 'count' => (is => 'rw', isa => 'Int', default => 0);
has 'data' => (
traits => ['Array'],
is => 'rw',
isa => 'ArrayRef',
default => sub { [] },
handles => {
'push_data' => 'push',
'pop_data' => 'pop',
'shift_data' => 'shift',
'unshift_data' => 'unshift',
'clear_data' => 'clear',
}
);
has 'subsample' => (
traits => ['Array'],
is => 'rw',
isa => 'ArrayRef[Int]',
default => sub { [] },
handles => {
'push_subsample' => 'push',
'pop_subsample' => 'pop',
'shift_subsample' => 'shift',
'unshift_subsample' => 'unshift',
'clear_subsample' => 'clear',
}
);
has 'sequence' => (
traits => ['Array'],
is => 'rw',
isa => 'ArrayRef',
default => sub { [] },
handles => {
'push_sequence' => 'push',
'pop_sequence' => 'pop',
'shift_sequence' => 'shift',
'unshift_sequence' => 'unshift',
'clear_sequence' => 'clear',
}
);
has 'skipped' => (
traits => ['Array'],
is => 'rw',
isa => 'ArrayRef',
default => sub { [] },
handles => {
'push_skipped' => 'push',
'pop_skipped' => 'pop',
'shift_skipped' => 'shift',
'unshift_skipped' => 'unshift',
'clear_skipped' => 'clear',
}
);
has 'master' => (is => 'rw', isa => 'Demeter::Data',
trigger => sub{my ($self, $new) = @_;
if ($new) {
$self->sum($new->Clone);
$self->sum->standard;
$self->sum->set(is_col=>0, i0_string=>q{}, signal_string=>q{}, i0_scale=>1, signal_scale=>1);
};
});
has 'sum' => (is => 'rw', isa => 'Demeter::Data');
sub BUILD {
my ($self, @params) = @_;
$self->mo->push_BulkMerge($self);
};
sub merge {
my ($self) = @_;
my $save = $self->po->e_smooth;
$self->po->set(e_smooth=>$self->smooth);
my $size = $self->size || -s $self->master->source;
my $group = 'mega';
my ($plug, $thisdata);
my $count = 1;
$self->sum -> start_counter("Merging data", $#{$self->data}) if $self->mo->ui eq 'screen';
Demeter->set_mode(screen=>0);
foreach my $file (@{$self->data}) {
$self->push_skipped($file), next if (not -e $file);
$self->push_skipped($file), next if (not -r $file);
$self->push_skipped($file), next if (-s $file < $self->margin*$size);
last if ($count == $self->max);
++$count;
$self->sum -> count if $self->mo->ui eq 'screen';
if ($self->plugin) {
my $which = 'Demeter::Plugins::' . $self->plugin;
$plug = $which->new(file=>$file);
my $ok = eval {$plug->fix};
die $@ if $@;
$thisdata = Demeter::Data->new(group => 'mega', quickmerge=>1, file=>$plug->fixed, $plug->suggest('fluorescence'),
bkg_e0 => $self->master->bkg_e0,
);
} else {
$thisdata = Demeter::Data->new(group=>'mega', quickmerge=>1, file=>$file,
energy => $self->master->energy,
numerator => $self->master->numerator,
denominator => $self->master->denominator,
ln => $self->master->ln,
bkg_e0 => $self->master->bkg_e0,
);
};
$thisdata -> _update('data');
$self->master -> align($thisdata) if $self->align;
$thisdata -> dispense('process', 'musum');
if (any {$count == $_} @{$self->subsample}) {
$self -> dispense('process', 'comment', {comment=>"Quick merge subsample of $count spectra"});
my $sample = $self->sum->Clone;
$sample -> set(name=>"Merge of $count scans", is_col=>0, i0_string=>q{}, signal_string=>q{}, i0_scale=>1, signal_scale=>1);
$sample -> update_norm(1);
$sample -> dispense('process', 'muave', {count=>$count});
$self->push_sequence($sample);
};
$thisdata->DEMOLISH;
unlink $plug->fixed if $self->plugin;
};
$self->sum -> stop_counter if $self->mo->ui eq 'screen';
$self->count($count);
$self->sum -> dispense('process', 'muave', {count=>$self->count});
$self->sum -> update_norm(1);
$self->sum -> name("Merge of $count scans");
$self->sum->xdi_make_clone($self->master, sprintf("BulkMerge of %d scans", $#{$self->data}+1), 1) if (Demeter->xdi_exists);
$self->po->set(e_smooth=>$save);
return $self->sum;
};
__PACKAGE__->meta->make_immutable;
1;
=head1 NAME
Demeter::Data::BulkMerge - Efficiantly merge many files into a single spectrum
=head1 VERSION
This documentation refers to Demeter version 0.9.26.
=head1 DESCRIPTION
This object provides an efficient way to merge a large number of files
into a single spectrum. The assumption is that the user is not
interested in having each individual file processed. This would be
the case for measuring many repititions for the sake of improving the
statistical quality of the data.
my $data = Demeter::Data->new(file=>$file, ...);
my $bulk = Demeter::Data::BulkMerge->new(master => $data,
data => \@list_of_files);
my $merged = $bulk->merge;
$_->plot('E') foreach ($data, $merged);
The trick is that each file is only imported to the point of having
arrays for energy and xmu. Each file in the list is imported to the
same group. The merge is computed by accumulation and divided by the
total numberof scans.
This requires that one data file be considered carefully. This is the
C<master>. All other data are interpolated to the energy grid of the
C<master>.
Care is taken not to include files which are less than 95%
(configurable with the C<margin> attribute) of the size of the master
data file.
Note that preprocessing the data takes time. A run with the C<plugin>
and C<align> attributes set takes about twice as long as a straight
merge of the raw data.
=head1 ATTRIBUTES
=over 4
=item C<master> [Demeter::Data object]
This contains the L<Demeter::Data> object for the processed data group
to which all subsequent data files are merged. This acts as the
interpolation standard and as the alignment standard (if the C<align>
attribute is true). The merged data group will inherit attributes
from this group. So, if the master has sensible parameters for
normalization and background removal, the merged group will have the
same sensible parameters.
=item C<data> [list of strings]
This is a list of fully resolved paths to the data files to be merged.
These can be relative or absolute paths, but they B<must> resolve
correctly to actual files. Files that don't exist or aren't readable
will be silently ignored.
=item C<align> [boolean]
When true, this says to align each file in the C<data> list to the
C<master>.
=item C<smooth> [integer]
When non-zero, the alignment will be done using the smoothed
derivative spectrum. The value of this parameter indicated the number
of smoothings.
=item C<plugin> [string]
The name of the plugin to use to interpret the data. For example, to
use the L<Demeter::Data::X23A2MED> plugin, this attribute would be set
to C<X23A2MED>.
=item C<margin> [number between 0 and 1]
This number defines the margin in filesize outside of which a data
file is excluded from the merge. The default is 0.997, thus any file
in the C<data> list which is smaller than 99.7% the size of the
C<master> file will be excluded.
=item C<subsample> [array of integers]
This is used to specify sub-samplings of the data ensemble, presumably
to test convergence to the mean. If this is set to C<[4, 16, 64]>
then Data groups will be saved which sum 4, 16, and 64 of the files
included in the merge. The sub-sampled Data groups are saved to the
C<sequence> attribute.
=item C<sequence> [array of Data objects]
Data objects from a sub-sampling sequence.
=back
=head1 METHODS
=over 4
=item C<merge>
Performs the merge using some special optimizations that minimize the
interaction with the data processing backend (Ifeffit/Larch). This
returns a Data object containing the merged spectrum, divided by the
number of spectra included in the merge.
=back
=head1 CONFIGURATION
There are no configuration options for this class.
See L<Demeter::Config> for a description of Demeter's
configuration system.
=head1 DEPENDENCIES
Demeter's dependencies are in the F<Build.PL> file.
=head1 BUGS AND LIMITATIONS
=over 4
=item *
A file that exists and is readable, but is not data will make for a
confusing error
=item *
Standard deviation not computed
=back
Please report problems to the Ifeffit Mailing List
(L<http://cars9.uchicago.edu/mailman/listinfo/ifeffit/>)
Patches are welcome.
=head1 AUTHOR
Bruce Ravel, L<http://bruceravel.github.io/home>
L<http://bruceravel.github.io/demeter/>
=head1 LICENCE AND COPYRIGHT
Copyright (c) 2006-2019 Bruce Ravel (L<http://bruceravel.github.io/home>). All rights reserved.
This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See L<perlgpl>.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
=cut
|