File: update_stream_file.php

package info (click to toggle)

php-solr 2.8.1-1

links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 2,620 kB
sloc: ansic: 14,274; xml: 1,313; php: 1,239; pascal: 11; makefile: 3

file content (34 lines) | stat: -rw-r--r-- 1,038 bytes

parent folder | download | duplicates (3)

<?php

$extractParams = new SolrModifiableParams();
$extractParams
//  index the document, using the unique ID: doc1
    ->set(SolrExtractRequest::LITERALS_PREFIX . 'id', 'doc1')

// capture what is inside paragraph tags
    ->set(SolrExtractRequest::CAPTURE_ELEMENTS, 'p')

// Indexes attributes of the Tika XHTML elements into separate fields
    ->set(SolrExtractRequest::CAPTURE_ATTRIBUTES, 'true')

// map p content to solr field
    ->set(
        SolrExtractRequest::FIELD_MAPPING_PREFIX . 'p',
        'an_indexed_field_name_that_holds_paragraphs'
        )

// capture unmapped content here
    ->set(SolrExtractRequest::DEFAULT_FIELD, '__text__')

// restrict capturing to matching xpath expression
->set(
    SolrExtractRequest::XPATH_EXPRESSION, 
    '/xhtml:html/xhtml:body/xhtml:div//node()'
    )
;

// please reference docs/documentation.php for the rest of the parameters

$extractRequest = SolrExtractRequest::createFromFile('path/to/file.html', $extractParams);
$response = $client->sendUpdateStream($extractRequest);