File: 532-sort_collector.t

package info (click to toggle)
liblucy-perl 0.3.3-4
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 9,328 kB
  • ctags: 8,492
  • sloc: ansic: 80,468; perl: 7,080; yacc: 681; java: 174; lex: 96; makefile: 20
file content (111 lines) | stat: -rw-r--r-- 3,563 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

use strict;
use warnings;

use Test::More tests => 32;
use Lucy::Test;
use List::Util qw( shuffle );
use LucyX::Search::MockMatcher;

my $schema = Lucy::Plan::Schema->new;
my $type = Lucy::Plan::StringType->new( sortable => 1 );
$schema->spec_field( name => 'letter', type => $type );
$schema->spec_field( name => 'number', type => $type );
$schema->spec_field( name => 'id',     type => $type );

my $folder  = Lucy::Store::RAMFolder->new;
my $indexer = Lucy::Index::Indexer->new(
    index  => $folder,
    schema => $schema,
);

my @letters = 'a' .. 'z';
my @numbers = 1 .. 5;
my @docs    = (
    { letter => 'c', number => '4', id => 1, },
    { letter => 'b', number => '2', id => 2, },
    { letter => 'a', number => '5', id => 3, },
);
for my $id ( 4 .. 100 ) {
    my $doc = {
        letter => $letters[ rand @letters ],
        number => $numbers[ rand @numbers ],
        id     => $id,
    };
    push @docs, $doc;
}
$indexer->add_doc($_) for @docs;
$indexer->commit;

my $polyreader = Lucy::Index::IndexReader->open( index => $folder );
my $seg_reader = $polyreader->get_seg_readers->[0];

my $by_letter = Lucy::Search::SortSpec->new(
    rules => [
        Lucy::Search::SortRule->new( field => 'letter' ),
        Lucy::Search::SortRule->new( type  => 'doc_id' ),
    ]
);

my $collector = Lucy::Search::Collector::SortCollector->new(
    sort_spec => $by_letter,
    schema    => $schema,
    wanted    => 1,
);

$collector->set_reader($seg_reader);
$collector->collect($_) for 1 .. 100;
my $match_docs = $collector->pop_match_docs;
is( $match_docs->[0]->get_doc_id,
    3, "Early doc numbers preferred by collector" );

my @docs_and_scores;
my %uniq_doc_ids;
for ( 1 .. 30 ) {
    my $doc_num = int( rand(10000) ) + 1;
    while ( $uniq_doc_ids{$doc_num} ) {
        $doc_num = int( rand(10000) ) + 1;
    }
    $uniq_doc_ids{$doc_num} = 1;
    push @docs_and_scores, [ $doc_num, rand(10) ];
}
@docs_and_scores = sort { $a->[0] <=> $b->[0] } @docs_and_scores;
my @ranked
    = sort { $b->[1] <=> $a->[1] || $a->[1] <=> $b->[1] } @docs_and_scores;
my @doc_ids = map { $_->[0] } @docs_and_scores;
my @scores  = map { $_->[1] } @docs_and_scores;

for my $size ( 0 .. @doc_ids ) {
    my $matcher = LucyX::Search::MockMatcher->new(
        doc_ids => \@doc_ids,
        scores  => \@scores,
    );
    my $collector
        = Lucy::Search::Collector::SortCollector->new( wanted => $size, );
    $collector->set_matcher($matcher);
    $matcher->collect( collector => $collector );

    my @wanted;
    if ($size) {
        @wanted = map { $_->[0] } @ranked[ 0 .. $size - 1 ];
    }
    else {
        @wanted = ();
    }
    my @got = map { $_->get_doc_id } @{ $collector->pop_match_docs };
    is_deeply( \@got, \@wanted, "random docs and scores, wanted = $size" );
}