File: Stats.pm

package info (click to toggle)
librdf-generator-void-perl 0.16-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 376 kB
  • sloc: perl: 382; makefile: 2; sh: 1
file content (155 lines) | stat: -rw-r--r-- 4,347 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
package RDF::Generator::Void::Stats;

use 5.006;
use strict;
use warnings;
use Moose;

=head1 NAME

RDF::Generator::Void::Stats - Generate statistics needed for good VoID descriptions

=head1 SYNOPSIS

Typically called for you by L<RDF::Generator::Void> as:

  my $stats = RDF::Generator::Void::Stats->new(generator => $self);


=head2 METHODS

=head3 C<< BUILD >>

Called by Moose to initialize an object.

=head3 C<generator>

Parameter to the constructor, to pass a L<RDF::Generator::Void> object.

=head3 C<vocabularies>

A hashref used to find common vocabularies in the data.

=head3 C<entities>

The number of distinct entities, as defined in the specification.

=head3 C<properties>

The number of distinct properties, as defined in the specification.

=head3 C<subjects>

The number of distinct subjects, as defined in the specification.

=head3 C<objects>

The number of distinct objects, as defined in the specification.

=head3 C<propertyPartitions>

A hashref containing the number of triples for each property.

=head3 C<classPartitions>

A hashref containing the number of triples for each class.


=cut

# The following attributes also act as read-write methods.
has vocabularies => ( is => 'rw', isa => 'HashRef' );

has ['entities', 'properties', 'subjects', 'objects'] => ( is => 'rw', isa => 'Int' );

has propertyPartitions => (is => 'rw', isa => 'HashRef' );

has classPartitions => (is => 'rw', isa => 'HashRef' );

# This is a read-only method, meaning that the constructor has it as a
# parameter, but then it can only be read from.
has generator => (
					 is       => 'ro',
					 isa      => 'RDF::Generator::Void',
					 required => 1,
					);

# The BUILD method is kinda the constructor. It is called when the
# user calls the constructor. In here, the statistics is generated.
sub BUILD {
	my ($self) = @_;

	# Initialize local hashes to count stuff.
	my (%vocab_counter, %entities, %properties, %subjects, %objects, %classes);

	my $gen = $self->generator;
	# Here, we take the data in the model we want to generate
	# statistics for and we iterate over it. Doing it this way, we
	# should be able to generate all statistics in a single pass of the
	# data.
	$gen->inmodel->get_statements->each(sub {
		my $st = shift;
		next unless $st->rdf_compatible; # To allow for non-RDF data models (e.g. N3)
		
		# wrap in eval, as this can potentially throw an exception.
		eval {
			my ($vocab_uri) = $st->predicate->qname;
			# The hash has a unique key, so now we count the number of qnames for each qname in the data
			$vocab_counter{$vocab_uri}++;
		};

		

		if ($gen->has_urispace && $st->subject->is_resource) {
			# Compute entities. We assume that all entities are subjects
			# with a prefix matching the uriSpace. Again, we use the
			# property that keys are unique, but we just set it to some
			# true value since we don't need to count how frequently each
			# entity is present.
			(my $urispace = $gen->urispace) =~ s/\./\\./g;
			$entities{$st->subject->uri_value} = 1 if ($st->subject->uri_value =~ m/^$urispace/);
		}
		
		$subjects{$st->subject->sse} = 1;
		$properties{$st->predicate->uri_value}{'triples'}++;
		$objects{$st->object->sse} = 1;

		if ((!$gen->has_level) || ($gen->has_level && $gen->level >= 1)) {
			if (($st->predicate->uri_value eq 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type')
				 && $st->object->is_resource) {
				$classes{$st->object->uri_value}++
			}
		}

		if ((!$gen->has_level) || ($gen->has_level && $gen->level > 2)) {
			$properties{$st->predicate->uri_value}{'countsubjects'}{$st->subject->sse} = 1;
			$properties{$st->predicate->uri_value}{'countobjects'}{$st->object->sse} = 1;
		}

	});

	# Finally, we update the attributes above, they are returned as a side-effect
	$self->vocabularies(\%vocab_counter);
	$self->entities(scalar keys %entities);
	$self->properties(scalar keys %properties);
	$self->subjects(scalar keys %subjects);
	$self->objects(scalar keys %objects);
	if ((!$gen->has_level) || ($gen->has_level && $gen->level >= 1)) {
		$self->propertyPartitions(\%properties);
		$self->classPartitions(\%classes);
	}
}

=head1 FURTHER DOCUMENTATION

Please see L<RDF::Generator::Void> for further documentation.

=head1 AUTHORS AND COPYRIGHT


Please see L<RDF::Generator::Void> for information about authors and copyright for this module.


=cut

1;