File: update_stream_file.php

package info (click to toggle)
php-solr 2.8.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,620 kB
  • sloc: ansic: 14,274; xml: 1,313; php: 1,239; pascal: 11; makefile: 3
file content (34 lines) | stat: -rw-r--r-- 1,038 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
<?php

$extractParams = new SolrModifiableParams();
$extractParams
//  index the document, using the unique ID: doc1
    ->set(SolrExtractRequest::LITERALS_PREFIX . 'id', 'doc1')

// capture what is inside paragraph tags
    ->set(SolrExtractRequest::CAPTURE_ELEMENTS, 'p')

// Indexes attributes of the Tika XHTML elements into separate fields
    ->set(SolrExtractRequest::CAPTURE_ATTRIBUTES, 'true')

// map p content to solr field
    ->set(
        SolrExtractRequest::FIELD_MAPPING_PREFIX . 'p',
        'an_indexed_field_name_that_holds_paragraphs'
        )

// capture unmapped content here
    ->set(SolrExtractRequest::DEFAULT_FIELD, '__text__')

// restrict capturing to matching xpath expression
->set(
    SolrExtractRequest::XPATH_EXPRESSION, 
    '/xhtml:html/xhtml:body/xhtml:div//node()'
    )
;

// please reference docs/documentation.php for the rest of the parameters

$extractRequest = SolrExtractRequest::createFromFile('path/to/file.html', $extractParams);
$response = $client->sendUpdateStream($extractRequest);