File: test.xml

package info (click to toggle)
sphinxsearch 2.2.11-8
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, sid, trixie
  • size: 25,720 kB
  • sloc: cpp: 102,259; xml: 85,608; sh: 9,259; php: 3,790; ansic: 3,158; yacc: 1,969; java: 1,336; ruby: 1,289; python: 1,062; pascal: 912; perl: 381; lex: 275; makefile: 150; sql: 77; cs: 35
file content (573 lines) | stat: -rw-r--r-- 15,419 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
<?xml version="1.0" encoding="utf-8"?>
<test>

<name>hitbuffer edge cases</name>

<config>
indexer
{
	mem_limit		= 16M
}

searchd
{
	<searchd_settings/>
	workers = threads
	binlog_path = #
    subtree_docs_cache  = 128k
    subtree_hits_cache  = 128k
}

source test1
{
	type			= mysql
	<sql_settings/>
	sql_query		= select * from test_table1
	sql_attr_uint	= gid
}

index test1
{
	source			= test1
	path			= <data_path/>/test1
}

source test2
{
	type			= mysql
	<sql_settings/>
	sql_query		= select * from test_table2
	sql_attr_uint	= gid
}

index test2
{
	source			= test2
	path			= <data_path/>/test2
	docinfo		= extern
	html_strip		= 1
	index_sp		= 1
}

source test3
{
	type			= mysql
	<sql_settings/>
	sql_query		= select * from test_table3
	sql_attr_uint	= gid
}
index test3
{
	source			= test3
	path			= <data_path/>/test3
	docinfo		= extern
}

index rt
{
	type = rt
	path = <data_path/>/rt

	rt_field = content
	rt_attr_uint = dummy
	morphology = none
}

index optimized_filter
{
	type = rt
	rt_mem_limit = 128k
	path = <data_path/>/optimized_filter

	rt_field = content
	rt_attr_uint = idd0
	rt_attr_uint = idd1
	rt_attr_uint = idd2
	rt_attr_uint = idd3
	rt_attr_uint = idd4
	rt_attr_uint = idd5
	rt_attr_uint = idd6
	rt_attr_uint = idd7
	rt_attr_uint = idd8
	rt_attr_uint = idd9
}

source src_hits
{
	type			= mysql
	<sql_settings/>
	sql_query		= SELECT id, title, 11 as idd FROM table_hits
	sql_attr_uint	= idd
}

index hits_plain
{
	source			= src_hits
	path			= <data_path/>/plain
	docinfo			= extern
	hit_format = plain
}

index hits_inline
{
	source			= src_hits
	path			= <data_path/>/inline
	docinfo			= extern
}

<!-- flushing hits+docinfo in inline mode (bug #1310) -->
source src_inline
{
	type = mysql
	<sql_settings/>
	sql_query = SELECT 1 id, REPEAT('a ', 1024*1024-32768), 32 gid UNION SELECT 2 id, REPEAT('a ', 32768), 32 gid
	sql_attr_bigint = gid
}

index idx_inline
{
	source			= src_inline
	path			= <data_path/>/idx_inline
	docinfo = inline
}

source src_inline2
{
	type = mysql
	<sql_settings/>
	sql_query = SELECT id, title, CRC32(id+11) + 31 gid1, CRC32(id+13) + 32 gid2, CRC32(id+17) + 33 gid3, CRC32(id+19) + 34 gid4, CRC32(id+23) + 35 gid5, CRC32(id+27) + 36 gid6, CRC32(id+29) + 37 gid7, CRC32(id+31) + 38 gid8, CRC32(id+37) + 39 gid9 from table_inline2
	sql_attr_bigint = gid1
	sql_attr_bigint = gid2
	sql_attr_bigint = gid3
	sql_attr_bigint = gid4
	sql_attr_bigint = gid5
	sql_attr_bigint = gid6
	sql_attr_bigint = gid7
	sql_attr_bigint = gid8
	sql_attr_bigint = gid9
}

index idx_inline2
{
	source			= src_inline2
	path			= <data_path/>/idx_inline2
	docinfo = inline
	dict = keywords
}

<!-- quorum tail vs quorum duplicates -->
source src_quorum_tail
{
	type = mysql
	<sql_settings/>
	sql_query = SELECT *, 100 + id as idd from table_quorum_tail
	sql_attr_bigint = idd
}
index idx_qt
{
	source			= src_quorum_tail
	path			= <data_path/>/idx_quorum_tail
	docinfo = extern
}

source src_quorum_tail_2
{
	type = mysql
	<sql_settings/>
	sql_query = SELECT *, 100 + id as idd from table_quorum_tail_2
	sql_attr_bigint = idd
}
index idx_qt_2
{
	source			= src_quorum_tail_2
	path			= <data_path/>/idx_quorum_tail_2
	docinfo = extern
}

index rt_auto_expand
{
	type = rt
	charset_type = utf-8
	rt_mem_limit = 128k
	path = <data_path/>/rt_auto_expand

	dict = keywords
	rt_attr_uint = idd1
	rt_attr_uint = idd2
	rt_attr_uint = idd3
	rt_attr_uint = idd4
	rt_attr_uint = idd5
	rt_field = content
	min_infix_len = 3
	enable_star = 1
	expand_keywords = 1
}

source some1
{
	type			= mysql
	<sql_settings/>
	sql_query		= select * from test_table1
	sql_attr_uint	= gid
	sql_field_string = title
}


index some1
{
	source = some1
	path = <data_path/>/some1
	hitless_words = all
}

source dup_end1
{
	type			= mysql
	<sql_settings/>
	sql_query		= SELECT 1 id, 11 gid, '1127206,819,100' title UNION SELECT 1 id, 11 gid, '1,2,3,4,5,1127206,819,100' title
	sql_attr_uint	= gid
}

source dup_end2
{
	type			= mysql
	<sql_settings/>
	sql_query		= SELECT 1 id, 11 gid, '1127206,819,100' title UNION SELECT 1 id, 11 gid, '1,2,3,4,5,6,7,8,1127206,819,100,113,114,115,100' title
	sql_attr_uint	= gid
}

index dup_end1
{
	source			= dup_end1
	path			= <data_path/>/dup_end1
}

index dup_end2
{
	source			= dup_end2
	path			= <data_path/>/dup_end2
}

source src_xml1
{
	type = xmlpipe2
	xmlpipe_command = cat <this_test/>/hits1.xml
}

index idx_xml1
{
	source = src_xml1
	path = <data_path/>/idx_xml1
	docinfo = extern
}

</config>

<db_create>
create table test_table1
(
	id int not null,
	gid int not null,
	title varchar(16384) not null
);
</db_create>
<db_create>
create table test_table2
(
	id int not null,
	gid int not null,
	title varchar(256) not null
);
</db_create>
<db_create>
create table test_table3
(
	id int not null,
	gid int not null,
	title varchar(64) not null
);
</db_create>
<db_create>
create table table_hits
(
	id int not null,
	title varchar(256) not null
);
</db_create>
<db_create>
create table table_inline2
(
	id int not null,
	title varchar(21000) not null
) ENGINE=MYISAM;
</db_create>
<db_create>
create table table_quorum_tail
(
	id int not null,
	title varchar(4096) not null
);
</db_create>
<db_create>
create table table_quorum_tail_2
(
	id int not null,
	title varchar(64) not null
);
</db_create>
<db_drop>drop table if exists test_table1;</db_drop>
<db_drop>drop table if exists test_table2;</db_drop>
<db_drop>drop table if exists test_table3;</db_drop>
<db_drop>drop table if exists table_hits;</db_drop>
<db_drop>drop table if exists table_inline2;</db_drop>
<db_drop>drop table if exists table_quorum_tail;</db_drop>
<db_drop>drop table if exists table_quorum_tail_2;</db_drop>
<custom_insert><![CDATA[

// OR boundary
// weight on 11 must be 2xxx; used to be 1xxx (because of positions mismatch)
$t = str_repeat ( "aaa ", 509 );
mysql_query ( "INSERT INTO test_table1 VALUES ( 10, 1, 'bbb bbb bbb bbb $t' )" ) or die("q1:".mysql_error());
mysql_query ( "INSERT INTO test_table1 VALUES ( 11, 1, 'aaa aaa aaa aaa bbb bbb aaa aaa' )" ) or die("q2:".mysql_error());

$i3 = '';
for ( $i=1; $i < 600; $i++ )
	mysql_query ( "INSERT INTO test_table2 VALUES ( $i, 1, 'The box. A dummy.' )" )  or die("q3:".mysql_error()."\n$i");

mysql_query ( "INSERT INTO test_table2 VALUES ( 701, 1, 'The good box. A dummy.' )" )  or die("q4:".mysql_error());

// common subtree case
for ( $i=1; $i < 401; $i++ )
	mysql_query ( "INSERT INTO test_table3 VALUES ( $i, $i, 'aaa bbb' )" )  or die("q5:".mysql_error()."\n$i");

for ( $i=402; $i < 602; $i++ )
	mysql_query ( "INSERT INTO test_table3 VALUES ( $i, $i, 'aaa' )" )  or die("q6:".mysql_error()."\n$i");
	
// regression skiplist vs hit_format
for ($i=1; $i<145; $i++)
{
	mysql_query ("INSERT INTO table_hits VALUES ( $i, 'I like as I' ) " );
}
mysql_query ("INSERT INTO table_hits VALUES ( 9200, 'And I promised myself  and her  that I was going to do something with my life instead of sitting around.  I promised her I would follow my dreams of the NHL.' ) " );
mysql_query ("INSERT INTO table_hits VALUES ( 9250, 'promised like as I' ) " );
for ($i=9300; $i<9500; $i++)
{
	mysql_query ("INSERT INTO table_hits VALUES ( $i, 'i like as ' ) " );
}

// regression docinfo=inline tail hits
for ($i=1; $i<350; $i++)
{
	$text = ' his from other planet ';
	for ( $j=0; $j<550; $j++ )
	{
		$text .= ' there are also good people located ';
		$pos = ( $i + 1 ) * ( $j + 1 ) * 13;
		$pos = $pos % ( strlen ( $text ) - 3 );
		$text[$pos + 0] = 'c';
		$text[$pos + 1] = 'o';
		$text[$pos + 2] = 's';
	}
	$text .= ' good game ';
	mysql_query ("INSERT INTO table_inline2 VALUES ( $i, ' $text ' ) " ) or die("q-inline-2: at doc=$i ".mysql_error()."\n$i");
}
	
// quorum tail vs quorum duplicates
for ($i=1; $i<4; $i++)
{
	mysql_query ( "INSERT INTO table_quorum_tail VALUES ( $i, 'I like as I we as I let me as I' ) " ) or die("qd:".mysql_error()."\n$i");
}
// double tail from child
for ($i=100; $i<103; $i++)
{
	mysql_query ( "INSERT INTO table_quorum_tail VALUES ( $i, 'walk away from me' ) " ) or die("qd:".mysql_error()."\n$i");
}
$text = '';
for ($i=1; $i<1100; $i++)
{
	$text .= 'me ';
}
mysql_query ("INSERT INTO table_quorum_tail VALUES ( 104, '$text walk as we or other like.' ) " ) or die("qd:".mysql_error()."\n$i");
mysql_query ("INSERT INTO table_quorum_tail VALUES ( 105, 'me me me walk as we or other like.' ) " ) or die("qd:".mysql_error()."\n$i");
for ($i=1; $i<514; $i++)
{
	mysql_query ( "INSERT INTO table_quorum_tail_2 VALUES ( $i, 'match that dude' ) " ) or die("qd-2:".mysql_error()."\n$i");
}
]]></custom_insert>

<custom_test>
<![CDATA[
$results = array ();

$client->SetMatchMode ( SPH_MATCH_EXTENDED2 );

// legacy
$res = $client->Query ( "aaa | bbb", "test1" );
unset ( $res["time"] );
$results[] = $res;

$res  = $client->Query ( "box SENTENCE good", "test2" );
unset ( $res["time"] );
$results[] = $res;

$results[] = 'common subtree crash';

$client->SetFilter ( "@id", array ( 999999 ) );
$client->AddQuery ( "aaa -bbb", "test3" );

$client->ResetFilters ();
$client->AddQuery ( "aaa -bbb", "test3" );

$res = $client->RunQueries();

if ( !$res )
{
	$results[] = $client->GetLastError();
	return;
}

foreach ( $res as $r )
{
	unset ( $r["time"] );
	$results[] = $r;
}

// regression hitless vs subtree cache
$results[] = 'hitless vs subtree cache';
$client->SetSortMode ( SPH_SORT_ATTR_ASC, 'title' );
$client->AddQuery ( "(\"aaa bbb\") (\"aaa cc\")", "some1" );
$client->AddQuery ( "(\"aaa bbb\") (\"aaa cc\")", "some1" );
$client->AddQuery ( "(\"aaa bbb\") (\"aaa cc\")", "some1" );
$res = $client->RunQueries();
if ( !$res )
{
	$results[] = $client->GetLastError();
} else
{
	foreach ( $res as $r )
	{
		unset ( $r["time"] );
		$results[] = $r;
	}
}

// regression RT dictionary lost words on merge with ID64
$query = create_function ( '$q, $sock','
	
	$results = array( $q );
	$res = @mysql_query ( $q, $sock );
	if ( $res===false )
	{
		$results[]  = mysql_errno( $sock ) . "; " . mysql_error ( $sock );
	} else
	{	
		while ($row = @mysql_fetch_array($res, MYSQL_ASSOC))
		{
			if ( array_key_exists ( "Variable_name", $row ) && $row["Variable_name"]=="time" )
			{
				continue;
			}
			if ( !array_key_exists ( "Variable_name", $row ) || !array_key_exists ( "Value", $row ) )
			{
				$line = "";
				foreach ( $row as $k=>$v )
				{
					$line .= $k . " = " . $v . "\t";
				}
				$results[] = $line;
			} else
			{
				$results[] = $row["Variable_name"] . " = " . $row["Value"];
			}
		}	
		@mysql_free_result ( $res );
	}
	return $results;
');

global $sd_address, $sd_sphinxql_port;
$sockStr = "$sd_address:$sd_sphinxql_port";
if ($sd_address == "localhost")
	$sockStr = "127.0.0.1:$sd_sphinxql_port";
$sock = @mysql_connect ($sockStr,"","",	true );

for ( $doc=1; $doc<121; $doc++ )
{
	$text = 'dummy text is going here';
	if ( $doc==31 )
		$text = 'ACT I.';
	else if ( $doc==100 )
		$text = 'And let us once again assail your ears, basketball';
	
	$insert = " REPLACE INTO rt ( id, content ) VALUES ( $doc, '$text' ) ";
	$query ( $insert, $sock );
}	

$results = array_merge ( $results, $query ( "select id from rt limit 0,2", $sock ) );
$results = array_merge ( $results, $query ( "show meta", $sock ) );	
$results = array_merge ( $results, $query ( "select id from rt where match ( 'basketball' )", $sock ) );
$results = array_merge ( $results, $query ( "show meta", $sock ) );	

// regression - optimized filter broken with extra-filter
$text = 'dummy text is going here';
for ( $doc=1; $doc<1700; $doc++ )
{
	$id = $doc % 1500;
	$insert = " REPLACE INTO optimized_filter VALUES ( $id, '$text', 0, 1, 2, 3, 4, 5, 6, 7,8, 9 ) ";
	$query ( $insert, $sock );
}	
$results = array_merge ( $results, $query ( "select * from optimized_filter where idd5>1 and idd5<20 limit 2", $sock ) );

// regression skiplist vs hit_format
$results = array_merge ( $results, $query ( "select * from hits_plain where match (' \" i promised \" ' )", $sock ) );
$results = array_merge ( $results, $query ( "show meta", $sock ) );	
$results = array_merge ( $results, $query ( "select * from hits_inline where match (' \" i promised \" ' )", $sock ) );
$results = array_merge ( $results, $query ( "show meta", $sock ) );	

// regression docinfo=inline tail hits
$results = array_merge ( $results, $query ( "select * from idx_inline2 where match (' good game ' ) and id>340", $sock ) );

// quorum tail vs quorum duplicates
$results = array_merge ( $results, $query ( "select * from idx_qt where match (' \" I I I I do my best \"/4 ' )", $sock ) );
$results = array_merge ( $results, $query ( "select * from idx_qt where match (' \" me me me notfound \"/2 NEAR/5 like ' )", $sock ) );
$results = array_merge ( $results, $query ( "select * from idx_qt where match (' \" me me me notfound \"/2 NEAR/6 like ' )", $sock ) );
$results = array_merge ( $results, $query ( "select * from idx_qt where match (' \" me me me notfound \"/2 NEAR/7 like ' )", $sock ) );

$results = array_merge ( $results, $query ( "select *, weight() from idx_qt_2 where match (' \" match match that pal \"/2 ' ) order by id desc limit 5 option ranker=wordcount", $sock ) );

// regression - expand_keywords got lost for disk chunks of RT ndex
$text = 'dummy text is going here dummy text is going here dummy text is going here dummy text is going here dummy text is going here dummy text is going here aborted';
$insert = "REPLACE INTO rt_auto_expand VALUES ( 1, 'uniq got placed here avi', 1, 2, 3, 4, 5 ) ";
$query ( $insert, $sock );

for ( $id=2; $id<1700; $id++ )
{
	$insert = "REPLACE INTO rt_auto_expand VALUES ( $id, '$text', 1, 2, 3, 4, 5 ) ";
	$query ( $insert, $sock );
}
$insert = "REPLACE INTO rt_auto_expand VALUES ( 2000, 'uniq got placed here', 1, 2, 3, 4, 5 ) ";
$query ( $insert, $sock );
$results = array_merge ( $results, $query ( "select * from rt_auto_expand where match ( 'uniq' ) ", $sock ) );
$results = array_merge ( $results, $query ( "show meta", $sock ) );
$results = array_merge ( $results, $query ( "select * from rt_auto_expand where match ( 'ace' ) ", $sock ) );
$results = array_merge ( $results, $query ( "show meta", $sock ) );

// regression call keywords fails at disk chunk term with skiplist
$results = array_merge ( $results, $query ( " CALL KEYWORDS ('got aborted avi', 'rt_auto_expand', 1) ", $sock ) );

// regression field end hits vs duplicated doc
$results = array_merge ( $results, $query ( "select * from dup_end1 where match (' 100 ' )", $sock ) );
$results = array_merge ( $results, $query ( "show meta", $sock ) );
$results = array_merge ( $results, $query ( "select * from dup_end2 where match (' 100 ' )", $sock ) );
$results = array_merge ( $results, $query ( "show meta", $sock ) );


// regression 
$results = array_merge ( $results, $query ( "select id, weight() w FROM idx_xml1 WHERE MATCH('\"missed go3\" | (go3 go4)') OPTION ranker=expr('top(lcs)')", $sock ) );

@mysql_close ( $sock );

]]>
</custom_test>

</test>