File: SimpleHighlight.pm

package info (click to toggle)
swish-e 2.4.3-1
  • links: PTS
  • area: main
  • in suites: sarge
  • size: 7,248 kB
  • ctags: 7,642
  • sloc: ansic: 47,385; sh: 8,502; perl: 5,101; makefile: 719; xml: 9
file content (113 lines) | stat: -rw-r--r-- 2,705 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#=======================================================================
#  Simple Highlighting Code
#    $Id: SimpleHighlight.pm,v 1.3 2003/12/18 05:00:37 whmoseley Exp $
# This one is not accurate and is more for speed
#=======================================================================
package SWISH::SimpleHighlight;
use strict;

sub new {
    my ( $class, $settings, $headers ) = @_;


    return bless {
        settings=> $settings,
	headers => $headers,
    }, $class;
}

sub header {
    my $self = shift;
    return '' unless ref $self->{headers} eq 'HASH';
    return $self->{headers}{$_[0]} || '';
}


#==========================================================================
#

sub highlight {

    my ( $self, $text_ref, $phrase_array, $prop_name ) = @_;


    my $settings = $self->{settings};

    my $Max_Words = $settings->{max_words} || 100;
    my $max_chars = 8 * $Max_Words;


    # first trim down the property - would likely be faster to use substr()
    # limits what is searched, but also means some man not show highlighting
    # is also not limited to the description property

    my $text = length( $$text_ref ) > $max_chars
	? substr( $$text_ref, 0, $max_chars ) . " ..."
        : substr( $$text_ref, 0, $max_chars );

    my $start = "\007";  # Unlikely chars
    my $end   = "\010";


    my @matches = $self->set_match_regexp( $phrase_array, $prop_name );
    $text =~ s/($_)/${start}$1${end}/gi for @matches;

    # Replace entities

    my %entities = (
        '&' => '&',
        '>' => '>',
        '<' => '&lt;',
        '"' => '&quot;',
    );

    $text =~ s/([&"<>])/$entities{$1}/ge;  # " fix emacs

    my $On = $settings->{highlight_on} || '<b>';
    my $Off = $settings->{highlight_off} || '</b>';

    $text =~ s/$start/$On/g;
    $text =~ s/$end/$Off/g;
    my $wc = quotemeta $self->header('wordcharacters');

    $$text_ref = $text;


    return 1;  # return true because the property was trimmed and escaped.
}


#============================================
# Returns compiled regular expressions for matching
#
# This builds a list of expressions to match against the text.

sub set_match_regexp {
    my ( $self, $phrases, $prop_name ) = @_;

    my $wc = quotemeta $self->header('wordcharacters');

    my @matches;

    # convert each phrase for this meta into a regular expression

    for ( @$phrases ) {

        # Fix up wildcards
        my $exp = join "[^$wc]+",
            map { '\b' . $_ . '\b' }
            map { substr( $_, -1, 1 ) eq '*'
                ? quotemeta( substr( $_, 0, -1) ) . "[$wc]*?"
                : quotemeta
            } @$_;


        push @matches, qr/$exp/i;
    }

    return @matches;


}
1;