1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341
|
package Catmandu::Store::Solr;
use Catmandu::Sane;
use Catmandu::Util qw(:is :array);
use Moo;
use MooX::Aliases;
use WebService::Solr;
use Catmandu::Store::Solr::Bag;
use Catmandu::Error;
use LWP::UserAgent;
with 'Catmandu::Store';
with 'Catmandu::Transactional';
=head1 NAME
Catmandu::Store::Solr - A searchable store backed by Solr
=cut
our $VERSION = '0.0304';
=head1 SYNOPSIS
# From the command line
# Import data into Solr
$ catmandu import JSON to Solr < data.json
# Export data from ElasticSearch
$ catmandu export Solr to JSON > data.json
# Export only one record
$ catmandu export Solr --id 1234
# Export using an Solr query
$ catmandu export Solr --query "name:Recruitment OR name:college"
# Export using a CQL query (needs a CQL mapping)
$ catmandu export Solr --q "name any college"
# From Perl
use Catmandu::Store::Solr;
my $store = Catmandu::Store::Solr->new(url => 'http://localhost:8983/solr' );
my $obj1 = $store->bag->add({ name => 'Patrick' });
printf "obj1 stored as %s\n" , $obj1->{_id};
# Force an id in the store
my $obj2 = $store->bag->add({ _id => 'test123' , name => 'Nicolas' });
# send all changes to solr (committed automatically)
$store->bag->commit;
#transaction: rollback issued after 'die'
$store->transaction(sub{
$bag->delete_all();
die("oops, didn't want to do that!");
});
my $obj3 = $store->bag->get('test123');
$store->bag->delete('test123');
$store->bag->delete_all;
# All bags are iterators
$store->bag->each(sub { ... });
$store->bag->take(10)->each(sub { ... });
# Search
# Any extra arguments will be passed on as is to Solr
my $hits = $store->bag->search(query => 'name:Patrick');
=cut
has url => (is => 'ro', default => sub {'http://localhost:8983/solr'});
has keep_alive => (is => 'ro', default => sub {0});
has solr => (is => 'lazy');
has bag_key => (is => 'lazy', alias => 'bag_field');
has on_error => (
is => 'ro',
isa => sub {
array_includes([qw(throw ignore)], $_[0])
or die("on_error must be 'throw' or 'ignore'");
},
lazy => 1,
default => sub {"throw"}
);
has _bags_used => (is => 'ro', lazy => 1, default => sub {[];});
around 'bag' => sub {
my $orig = shift;
my $self = shift;
my $bags_used = $self->_bags_used;
unless (array_includes($bags_used, $_[0])) {
push @$bags_used, $_[0];
}
$orig->($self, @_);
};
sub _build_solr {
my ($self) = @_;
WebService::Solr->new(
$_[0]->url,
{
autocommit => 0,
default_params => {wt => 'json'},
agent => LWP::UserAgent->new(keep_alive => $self->keep_alive),
}
);
}
sub _build_bag_key {
$_[0]->key_for('bag');
}
sub transaction {
my ($self, $sub) = @_;
if ($self->{_tx}) {
return $sub->();
}
my $solr = $self->solr;
my @res;
eval {
#flush buffers of all known bags ( with commit=true ), to ensure correct state
for my $bag_name (@{$self->_bags_used}) {
$self->bag($bag_name)->commit;
}
#mark store as 'in transaction'. All subsequent calls to commit only flushes buffers without setting 'commit' to 'true' in solr
$self->{_tx} = 1;
#transaction
@res = $sub->();
#flushing buffers of all known bags (with commit=false)
for my $bag_name (@{$self->_bags_used}) {
$self->bag($bag_name)->commit;
}
#commit in solr
$solr->commit;
#remove mark 'in transaction'
$self->{_tx} = 0;
1;
} or do {
my $err = $@;
#remove remaining documents from all buffers, because they were added during the transaction
for my $bag_name (@{$self->_bags_used}) {
$self->bag($bag_name)->clear_buffer;
}
#rollback in solr
eval {$solr->rollback};
#remove mark 'in transaction'
$self->{_tx} = 0;
Catmandu::Error->throw($err);
};
@res;
}
=head1 SOLR SCHEMA
The Solr schema needs to support at least the identifier field (C<_id> by default) and a bag
field (C<_bag> by default) to be able to store Catmandu items:
# In schema.xml
<field name="_id" type="string" indexed="true" stored="true" required="true" />
<field name="_bag" type="string" indexed="true" stored="true" required="true" />
The names of these fields can optionally be changed using the C<id_field> and C<_bag>
configuration parameters of L<Catmandu::Store::Solr>.
The C<_id> will contain the record identifier. The C<_bag> field will contain a string
to support L<Catmandu::Bag>-s in Solr.
=head1 CONFIGURATION
=over
=item url
URL of Solr core
Default: C<http://localhost:8983/solr>
=item id_field
Name of unique field in Solr core.
Default: C<_id>
This Solr field is mapped to C<_id> when retrieved
=item bag_field
Name of field in Solr we can use to split the core into 'bags'.
Default: C<_bag>
This Solr field is mapped to C<_bag> when retrieved
=item on_error
Action to take when records cannot be saved to Solr. Default: throw. Available: ignore.
=back
=head1 METHODS
=head2 new( url => $url )
=head2 new( url => $url, id_field => '_id', bag_field => '_bag' )
=head2 new( url => $url, bags => { data => { cql_mapping => \%mapping } } )
Creates a new Catmandu::Store::Solr store connected to a Solr core, specificied by $url.
The store supports CQL searches when a cql_mapping is provided. This hash
contains a translation of CQL fields into Solr searchable fields.
# Example mapping
$cql_mapping = {
title => {
op => {
'any' => 1 ,
'all' => 1 ,
'=' => 1 ,
'<>' => 1 ,
'exact' => {field => 'mytitle.exact' }
} ,
sort => 1,
field => 'mytitle',
cb => ['Biblio::Search', 'normalize_title']
}
}
The CQL mapping above will support for the 'title' field the CQL operators: any, all, =, <> and exact.
For all the operators the 'title' field will be mapping into the Solr field 'mytitle', except
for the 'exact' operator. In case of 'exact' we will search the field 'mytitle.exact'.
The CQL has an optional callback field 'cb' which contains a reference to subroutines to rewrite or
augment the search query. In this case, in the Biblio::Search package there is a normalize_title
subroutine which returns a string or an ARRAY of string with augmented title(s). E.g.
package Biblio::Search;
sub normalize_title {
my ($self,$title) = @_;
my $new_title =~ s{[^A-Z0-9]+}{}g;
$new_title;
}
1;
=head2 transaction
When you issue $bag->commit, all changes made in the buffer are sent to solr, along with a commit.
So committing in Catmandu merely means flushing changes;-).
When you wrap your subroutine within 'transaction', this behaviour is disabled temporarily.
When you call 'die' within the subroutine, a rollback is sent to solr.
Remember that transactions happen at store level: after the transaction, all buffers of all bags are flushed to solr,
and a commit is issued in solr.
# Record 'test' added
$bag->add({ _id => "test" });
# Buffer flushed, and 'commit' sent to solr
$bag->commit();
$bag->store->transaction(sub{
$bag->add({ _id => "test",title => "test" });
# Call to die: rollback sent to solr
die("oops, didn't want to do that!");
});
# Record is still { _id => "test" }
=head1 INHERITED METHODS
This Catmandu::Store implements:
=over 3
=item L<Catmandu::Store>
=item L<Catmandu::Transactional>
=back
Each Catmandu::Bag in this Catmandu::Store implements:
=over 3
=item L<Catmandu::Bag>
=item L<Catmandu::Searchable>
=item L<Catmandu::CQLSearchable>
=back
=head1 SEE ALSO
L<Catmandu::Store>, L<WebService::Solr>
=head1 AUTHOR
Nicolas Steenlant, C<< nicolas.steenlant at ugent.be >>
Patrick Hochstenbach, C<< patrick.hochstenbach at ugent.be >>
Nicolas Franck, C<< nicolas.franck at ugent.be >>
Pieter De Praetere
=head1 LICENSE AND COPYRIGHT
This program is free software; you can redistribute it and/or modify it
under the terms of either: the GNU General Public License as published
by the Free Software Foundation; or the Artistic License.
See http://dev.perl.org/licenses/ for more information.
=cut
1;
|