File: test.xml

package info (click to toggle)
sphinxsearch 2.2.11-8
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, sid, trixie
  • size: 25,720 kB
  • sloc: cpp: 102,259; xml: 85,608; sh: 9,259; php: 3,790; ansic: 3,158; yacc: 1,969; java: 1,336; ruby: 1,289; python: 1,062; pascal: 912; perl: 381; lex: 275; makefile: 150; sql: 77; cs: 35
file content (171 lines) | stat: -rw-r--r-- 6,738 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
<?xml version="1.0" encoding="utf-8"?>

<test>
<name>snippets vs exact_phrase + snippets vs passage duplication</name>

<config>
searchd
{
	<searchd_settings/>
}

source src
{
	type = mysql
	<sql_settings/>
	sql_query = SELECT 1, 'text';
}

index idx
{
	source			= src
	path			= <data_path/>/test
}

index exact
{
	source			= src
	path			= <data_path/>/exact
	morphology = stem_en
	index_exact_words = 1
	blend_chars = (, ), -
	blend_mode = trim_none, trim_head, trim_tail, trim_both, skip_pure
}

index idx_ru
{
	source			= src
	path			= <data_path/>/idx_ru
	min_word_len = 1
    morphology = stem_enru
}
</config>

<db_insert>select 1;</db_insert>

<custom_test><![CDATA[

$text = <<<END
	one perfusate chubby profound chubby multiplicity promontory
	chubby pilotfish pilotfish chubby bisect pilotfish perfusate
	multiplicity whichsoever pilotfish INVALID CAJUPUT reedbuck
	whichsoever disposition felspar pilotfish reedbuck pilotfish
	detestable

	two bisect reedbuck whichsoever profound comforter detestable
	bisect comforter CAJUPUT felspar INVALID humanism profound multiplicity
	promontory profound reedbuck perfusate promontory felspar
	multiplicity reedbuck profound profound whirligig detestable
	whirligig INVALID

	three pilotfish promontory CAJUPUT INVALID CAJUPUT INVALID EXPOSITOR
	whirligig felspar disposition pilotfish chubby promontory
	pilotfish INVALID CAJUPUT detestable INVALID felspar whichsoever profound
	whichsoever comforter humanism detestable promontory comforter
	chubby CAJUPUT INVALID humanism CAJUPUT
END;

$words = array
(
	'cajuput invalid',
	'cajuput invalid expositor'
);

$results = array();
foreach ( $words as $query )
{
	$reply = $client->BuildExcerpts ( array($text), 'idx', $query, array('exact_phrase' => true) );
	$results [] = $query;
	$results [] = $reply;
}

$text = 'A native of Honolulu, Hawaii, Obama is a graduate of Columbia University and Harvard Law School, where he waas the president of the Harvard Law Review. He was a community organizer in Chicago before earning his law degree. He worrked as a civil rights attorney in Chicago and taught constitutional law at the University of Chicago Law School from 19992 to 2004.Obama served three terms in the Illinois Senate from 1997 to 2004.';
$query = '"University and Harvard Law School" obama';
$reply = $client->BuildExcerpts ( array($text), 'idx', $query ,array('query_mode'=>true, 'around'=>2) );
$results [] = $query;
$results [] = $reply;

$text = 'This is a large house. Its a doggy house. The doggy house is most doggy here. There is no any doggy house around.';
$query = '"the doggy house"';
$reply = $client->BuildExcerpts ( array($text), 'idx', $query,
	array('query_mode'=>true, 'around'=> 2, 'weight_order'=>true, 'limit_words'=>20) );
$results [] = $query;
$results [] = $reply;

$query = 'the doggy house';

$reply = $client->BuildExcerpts ( array($text), 'idx', $query, array( 'around'=> 3, 'limit'=>100, 'limit_passages'=>1 ) );
$results [] = $query;
$results [] = $reply;

$reply = $client->BuildExcerpts ( array($text), 'idx', $query, array( 'around'=> 3, 'exact_phrase'=>true, 'limit'=>100, 'limit_passages'=>1 ) );
$results [] = $query;
$results [] = $reply;

$reply = $client->BuildExcerpts ( array($text), 'idx', $query, array('around'=> 3, 'limit_words'=>6, 'exact_phrase'=>true) );
$results [] = $query;
$results [] = $reply;

$query = 'is most';
$reply = $client->BuildExcerpts ( array($text), 'idx', $query, array('around'=> 2, 'limit_words'=>4) );
$results [] = $query;
$results [] = $reply;

$query = 'is house';
$reply = $client->BuildExcerpts ( array($text), 'idx', $query, array('around'=> 2, 'before_match'=>'<start of %PASSAGE_ID%>', 'after_match'=>'<%PASSAGE_ID% ends>') );
$results [] = $query;
$results [] = $reply;

$query = 'is house';
$reply = $client->BuildExcerpts ( array($text), 'idx', $query, array('around'=> 2, 'limit_words'=>10, 'before_match'=>'%PASSAGE_ID% !-! ', 'after_match'=>' !-! %PASSAGE_ID%', 'start_passage_id'=>1000) );
$results [] = $query;
$results [] = $reply;

$query = 'is house';
$reply = $client->BuildExcerpts ( array($text), 'idx', $query, array('around'=> 2, 'limit_words'=>10, 'before_match'=>'<b>', 'after_match'=>'</b %PASSAGE_ID%>', 'start_passage_id'=>1000) );
$results [] = $query;
$results [] = $reply;

$query = 'is house';
$reply = $client->BuildExcerpts ( array($text), 'idx', $query, array('around'=> 2, 'limit_words'=>10, 'before_match'=>'<b %PASSAGE_ID%>', 'after_match'=>'</b>', 'start_passage_id'=>1000) );
$results [] = $query;
$results [] = $reply;

$query = '=welcome';
$reply = $client->BuildExcerpts ( array('=welcome', '=welcome'), 'exact', $query, array('query_mode'=>1) );
$results [] = "crash on exact words and 2 documents with '=' symbol";
$results [] = $reply;

$query = ' =\(12b\-1\) ';
$docs = array ( 'Distribution and Service (12b-1) fees' );

$opts =array ( 'query_mode'=>1, 'limit'=>15, 'around'=>2, 'allows_empty'=>1 );
$reply = $client->BuildExcerpts ( $docs, 'exact', $query, $opts );
$results [] = 'exact-blened: plain path';
$results [] = $reply;

$opts =array ( 'query_mode'=>1, 'limit'=>0, 'around'=>0, 'allows_empty'=>1 );
$reply = $client->BuildExcerpts ( $docs, 'exact', $query, $opts );
$results [] = 'exact-blened: fast path';
$results [] = $reply;

$query = 'word1 word2';
$docs = array ( 'Here are some word1 and word1 or word1, word1. More samples: word1, word1, word1. At the same time, there are more words like word2, word2, word2 and word2. But what should be highlighted is this: word1 or word2. Thats it.' );
$opts = array( 'around'=> 3, 'limit'=>60 );
$reply = $client->BuildExcerpts ( $docs, 'exact', $query, $opts );
$results [] = 'explicit uniq qword weighting';
$results [] = $reply;

$results [] = $client->BuildExcerpts ( array('=welcome home'), 'exact', '=welcome', array('query_mode'=>1) );
$results [] = $client->BuildExcerpts ( array('=welcome home'), 'exact', '\=welcome', array('query_mode'=>1) );
$results [] = $client->BuildExcerpts ( array('=welcome home'), 'exact', '\=welcome', array('query_mode'=>0) );

// regression weight ordered passage generation
$query = 'масляков';
$docs = array ( 'ждународного сeоюза КВН Александр Масляков и веедущий программы  Премьл Лига КВН  Александр Масляков младший   ши Масляков никогда не принимал' ); 
$opts = array( 'around'=> 4, 'weight_order'=>true );
$results [] = $client->BuildExcerpts ( $docs, 'idx_ru', $query, $opts );

]]></custom_test>

</test>