File: con2.pl

package info (click to toggle)
idzebra 2.2.10-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 10,644 kB
  • sloc: ansic: 54,389; xml: 27,054; sh: 6,214; makefile: 1,099; perl: 210; tcl: 64
file content (41 lines) | stat: -rwxr-xr-x 799 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/usr/bin/perl -w

my $state = 'init';
my $topic = '';
my $title;
my $description;

my $no = 0;

while ($_ = <STDIN>) {
    if (/<Topic r:id=\"(.*?)\">/) {
	$topic = $1;
    }
    elsif (/<ExternalPage about=\"(.*?)\">/) {
	$url = $1;
    }
    elsif (/<d:Title>(.*?)<\/d:Title>/) {
	$title = $1;
    }
    elsif (/<d:Description>(.*?)<\/d:Description>/) {
	$description = $1;
    }
    elsif (/<\/ExternalPage>/) {
	if (($no % 30000) == 0) {
	    if ($no) {
		 close(XO);
            }
	    open(XO, ">dmoz." . ($no / 30000) . ".xml");
	}
	print XO "<meta>\n";
	print XO " <title>$title</title>\n";
	print XO " <description>$description</description>\n";
	print XO " <url>$url</url>\n";
	print XO " <topic>$topic</topic>\n";
	print XO "</meta>\n";
	$no++;
    }
}
if ($no != 0) {
    close(XO);
}