File: example9.pl

package info (click to toggle)
swish-e 2.4.3-7
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 7,308 kB
  • ctags: 7,642
  • sloc: ansic: 47,402; sh: 8,508; perl: 5,281; makefile: 723; xml: 9
file content (68 lines) | stat: -rwxr-xr-x 1,629 bytes parent folder | download | duplicates (11)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/perl -w
use strict;

# This is a short example that basically does the same
# thing as the default file system access method by
# recursing directories, but also shows how to process different
# file types -- in this example pdf is converted to xml for indexing.

# in this example, only .pdf and .config files are indexed.

# the pdf2xml module is in the prog-bin directory of the swish-e distribution
use lib '../prog-bin';

use File::Find;  # for recursing a directory tree
use pdf2xml;     # example module for pdf to xml conversion
                 # Not that you need IndexContents XML .pdf in the
                 # swish-e config file

# See perldoc File::Find for information on following symbolic links

use constant DEBUG => 0;

# See if a directory was passed in via the SwishProgParameters swish
# directive

my $dir = shift || '.';

find(
    {
        wanted => \&wanted,
        no_chdir => 1,
    },
    $dir,
);

sub wanted {
    return if -d;

    if ( /\.pdf$/ ) {
        print STDERR "Indexing pdf $File::Find::name\n" if DEBUG;
        print ${ pdf2xml( $File::Find::name ) };

    } elsif ( /\.config$/ ) {
        print STDERR "Indexing $File::Find::name\n" if DEBUG;
        print ${ get_content( $File::Find::name ) };

    } else {
        print STDERR "Skipping $File::Find::name\n" if DEBUG;
    }
}


sub get_content {
    my $path = shift;

    my ( $size, $mtime )  = (stat $path )[7,9];
    open FH, $path or die "$path: $!";

    my $content =  <<EOF;
Content-Length: $size
Last-Mtime: $mtime
Path-Name: $path

EOF
    local $/ = undef;
    $content .= <FH>;
    return \$content;
}