1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
|
package Catmandu::Fix::trim;
use Catmandu::Sane;
our $VERSION = '1.2024';
use Moo;
use Catmandu::Util::Path qw(as_path);
use Catmandu::Util qw(trim);
use Unicode::Normalize;
use namespace::clean;
use Catmandu::Fix::Has;
with 'Catmandu::Fix::Builder';
has path => (fix_arg => 1);
has mode => (fix_arg => 1, default => sub {'whitespace'});
sub _build_fixer {
my ($self) = @_;
my $cb;
if ($self->mode eq 'whitespace') {
$cb = sub {
trim($_[0]);
};
}
elsif ($self->mode eq 'nonword') {
$cb = sub {
my $val = $_[0];
$val =~ s/^\W+//;
$val =~ s/\W+$//;
$val;
};
}
elsif ($self->mode eq 'diacritics') {
$cb = sub {
my $val = $_[0];
$val = Unicode::Normalize::NFKD($val);
$val =~ s/\p{NonspacingMark}//g;
$val;
};
}
as_path($self->path)->updater(if_string => $cb);
}
1;
__END__
=pod
=encoding utf-8
=head1 NAME
Catmandu::Fix::trim - trim leading and ending junk from the value of a field
=head1 SYNOPSIS
# the default mode trims whitespace
# e.g. foo => ' abc ';
trim(foo) # foo => 'abc';
trim(foo, whitespace) # foo => 'abc';
# trim non-word characters
# e.g. foo => ' abc / : .';
trim(foo, nonword) # foo => 'abc';
# trim accents
# e.g. foo => 'français' ;
trim(foo,diacritics) # foo => 'francais'
=head1 SEE ALSO
L<Catmandu::Fix>
=cut
|