File: CategoryLinks.pm

package info (click to toggle)
libparse-mediawikidump-perl 1.0.6-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd, wheezy
  • size: 212 kB
  • ctags: 107
  • sloc: perl: 1,156; xml: 205; sql: 16; makefile: 2
file content (115 lines) | stat: -rw-r--r-- 1,741 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
package Parse::MediaWikiDump::CategoryLinks;

our $VERSION = '1.0.3';

use strict;
use warnings;

sub new {
	my ($class, $source) = @_;
	my $self = {};

	$$self{BUFFER} = [];
	$$self{BYTE} = 0;

	bless($self, $class);

	$self->open($source);
	$self->init;

	return $self;
}

sub next {
	my ($self) = @_;
	my $buffer = $$self{BUFFER};
	my $link;

	while(1) {
		if (defined($link = pop(@$buffer))) {
			last;
		}

		#signals end of input
		return undef unless $self->parse_more;
	}

	return Parse::MediaWikiDump::category_link->new($link);
}

#private functions with OO interface
sub parse_more {
	my ($self) = @_;
	my $source = $$self{SOURCE};
	my $need_data = 1;
	
	while($need_data) {
		my $line = <$source>;

		last unless defined($line);

		$$self{BYTE} += length($line);

		while($line =~ m/\((\d+),'(.*?)','(.*?)',(\d+)\)[;,]/g) {
			push(@{$$self{BUFFER}}, [$1, $2, $3, $4]);
			$need_data = 0;
		}
	}

	#if we still need data and we are here it means we ran out of input
	if ($need_data) {
		return 0;
	}
	
	return 1;
}

sub open {
	my ($self, $source) = @_;

	if (ref($source) ne 'GLOB') {
		die "could not open $source: $!" unless
			open($$self{SOURCE}, $source);

		$$self{SOURCE_FILE} = $source;
	} else {
		$$self{SOURCE} = $source;
	}

	binmode($$self{SOURCE}, ':utf8');

	return 1;
}

sub init {
	my ($self) = @_;
	my $source = $$self{SOURCE};
	my $found = 0;
	
	while(<$source>) {
		if (m/^LOCK TABLES `categorylinks` WRITE;/) {
			$found = 1;
			last;
		}
	}

	die "not a MediaWiki link dump file" unless $found;
}

sub current_byte {
	my ($self) = @_;

	return $$self{BYTE};
}

sub size {
	my ($self) = @_;
	
	return undef unless defined $$self{SOURCE_FILE};

	my @stat = stat($$self{SOURCE_FILE});

	return $stat[7];
}

1;