File: ParseXSDoc.pm

package info (click to toggle)
libglib-perl 2%3A1.223-1
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 1,508 kB
  • ctags: 292
  • sloc: perl: 3,797; ansic: 595; makefile: 6
file content (870 lines) | stat: -rw-r--r-- 23,066 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
package Glib::ParseXSDoc;

# vim: set ts=4 :

use strict;
use Data::Dumper;
use Storable qw(store_fd);
use Exporter;
use Carp;

our @ISA = qw(Exporter);
our @EXPORT = qw(
	xsdocparse
);

our $VERSION = '1.003';

our $NOISY = $ENV{NOISYDOC};

=head1 NAME

Glib::ParseXSDoc - Parse POD and XSub declarations from XS files.

=head1 DESCRIPTION

This is the heart of an automatic API reference documentation system for
XS-based Perl modules.  FIXME more info here!!

FIXME document recognized POD directives and the output data structures

=head1 FUNCTIONS

=over

=item xsdocparse (@filenames)

Parse xs files for xsub signatures and pod.  Writes to standard output a
data structure suitable for eval'ing in another Perl script, describing
all the stuff found.  The output contains three variables:

=over

=item $xspods = ARRAYREF

array of pods found in the verbatim C portion of the XS file, listed in the
order found.  These are assumed to pertain to the XS/C api, not the Perl api.
Any C<=for apidoc> paragraphs following an C<=object> paragraphs in the
verbatim sections are stripped (as are the C<=object> paragraphs), and will
appear instead in C<< $data->{$package}{pods} >>.

=item $data = HASHREF

big hash keyed by package name (as found in the MODULE line), containing under
each key a hash with all the xsubs and pods in that package, in the order
found.  Packages are consolidated across multiple files.

=back

FYI, this creates a new parser and calls C<parse_file> on it for each
input filename; then calls C<swizzle_pods> to ensure that any
C<=for apidoc name> pods are matched up with their target xsubs; and
finally calls Data::Dumper to write the data to stdout.  So, if you want
to get finer control over how the output is created, or keep all the data
in-process, now you know how.  :-)

=cut

sub xsdocparse {
	my @filenames = @_;

	my $parser = Glib::ParseXSDoc->new;
	foreach my $filename (@filenames) {
		$parser->parse_file ($filename);
	}
	$parser->canonicalize_xsubs;
	$parser->swizzle_pods;
	$parser->preprocess_pods;
	$parser->clean_out_empty_pods;

	print "# THIS FILE IS AUTOMATICALLY GENERATED - ANY CHANGES WILL BE LOST\n";
	print "# generated by $0 ".scalar (localtime)."\n";
	print "# input files:\n";
	map { print "#   $_\n" } @filenames;
	print "#\n\n";

	# Data::Dumper converts the whole output to a string, and consequently
	# uses an obscene amount of ram on Gtk2's nearly 200 xs files.  Use
	# Storable unless the user really really wants to force us to fall back
	# to Data::Dumper.  Storable doesn't seem to work well on win32, so
	# always use Data::Dumper there.
	my $use_dd = $ENV{FORCE_DATA_DUMPER} || $^O eq 'MSWin32';
	if ($use_dd) {
		$Data::Dumper::Purity = 1;
		print Data::Dumper->Dump([$parser->{xspods}, $parser->{data}],
		                       [qw($xspods            $data)]);
		print "\n1;\n";
	} else {
		print "use Storable qw(fd_retrieve);\n";
		print "\$xspods = fd_retrieve \\*DATA;\n";
		print "\$data = fd_retrieve \\*DATA;\n";

		print "\n1;\n";
		print "__DATA__\n";

		# NOTE: don't assume STDOUT, because other code may have select'd
		# a different file handle.
		store_fd $parser->{xspods}, select;
		store_fd $parser->{data}, select;
	}

	return [ keys %{$parser->{data}} ];
}


=back

=cut

# =========================================================================

=head1 METHODS

=over

=item $Glib::ParseXSDoc::verbose

If true, this causes the parser to be verbose.

=cut

our $verbose = undef;


=item $parser = Glib::ParseXSDoc->new

Create a new xsub parser.

=cut

sub new {
	my $class = shift;
	return bless {
		# state
		module => undef,
		package => undef,
		prefix => undef,
		# data
		xspods => [],	#pods for the exported xs interface, e.g. the C stuff
		data => {},	# all the shizzle, by package name
	}, $class;
}

=item string = $parser->package

Get the current package name.  Falls back to the module name.  Will be undef
if the parser hasn't reached the first MODULE line.

=cut

sub package {
		my $self = shift;
		return ($self->{package} || $self->{module})
}

=item HASHREF = $parser->pkgdata

The data hash corresponding to the current package, honoring the most recently
encountered C<=for object> directive.  Ensures that it exists.
Returns a reference to the member of the main data structure, so modifications
are permanent and useful.

=cut

sub pkgdata {
		my $self = shift;
		my $pkg = $self->{object} || $self->package;
		my $pkgdata = $self->{data}{$pkg};
		if (not defined $pkgdata) {
				$pkgdata = {};
				$self->{data}{$pkg} = $pkgdata;
		}
		return $pkgdata;
}


=item $parser->parse_file (filename)

Parse one xs file.  Stores all the collected data in I<$parser>'s internal
data structures.

=cut

sub parse_file {
	my $self = shift;
	my $filename = shift;

	local *IN;
	open IN, $filename or die "can't open $filename: $!\n";
	print STDERR "scanning $filename\n" if $verbose;
	$self->{filehandle} = \*IN;
	$self->{filename} = $filename;

	# there was once a single state machine to parse an entire
	# file, but it turned into a bi-level state machine because
	# of the two-part nature of XS files.  that's silly, so i've
	# broken it into two loops: the part that scans up to the
	# first MODULE line, and the part that scans the rest of the
	# file.

	my $lastpod = undef;	# most recently-read pod (for next xsub)
	my @thesepackages = ();	# packages seen in this file

	# In the verbatim C portion of the file:
	# seek the first MODULE line *outside* comments.
	# collect any pod we encounter; only certain ones are 
	# precious to us...  my... preciousssss... ahem.
	$self->{module}  = undef;
	$self->{package} = undef;
	$self->{prefix}  = undef;
	$self->{object}  = undef;
	while (<IN>) {
		chomp;
		# in the verbatim C section before the first MODULE line,
		# we need to be on the lookout for a few things...
		# we need the first MODULE line, of course...
		if ($self->is_module_line ($_)) {
			last; # go to the next state machine.

		# mostly we want pods.
		} elsif (/^=/) {
			my $thispod = $self->slurp_pod_paragraph ($_);
			# we're only interested in certain pod directives here.
			if (/^=for\s+(apidoc|object)\b/) {
				my $which = $1;
				warn "$filename:".($.-@{$thispod->{lines}}+1).":"
				   . " =for $which found before "
				   . "MODULE directive\n";
			}
			push @{ $self->{xspods} }, $thispod;

##		# we also need to track whether we're in a C comment, because
##		# MODULE directives are ignore in multiline comments.
##		} elsif (m{/\*}) {
##			# there was an open comment marker on this line.
##			# see if it's alone.
##			s{/\*.*\*/}{}g;
##			if (m{/\*}) {
##				# look for the end...
##				while (<IN>) {
##				}
##			}
		}
	}

	# preprocessor conditionals
	my @cond;

	$lastpod = undef;
	while (<IN>) {
		#
		# we're seeking xsubs and pods to document the Perl interface.
		#
		if ($self->is_module_line ($_)) {
			# xsubs cannot steal pods across MODULE lines.
			$lastpod = undef;

		} elsif (/^\s*$/) {
			# ignore blank lines; but a blank line after a pod
			# means it can't be associated with an xsub.
			$lastpod = undef;

		} elsif (/^\s*#\s*(if|ifdef|ifndef)\s*(\s.*)$/) {
			#warn "conditional $1 $2\n";
			push @cond, $2;
			#print Dumper(\@cond);
		} elsif (/^\s*#\s*else\s*(\s.*)?$/) {
			#warn "else $cond[-1]\n";
			if (exists $cond[$#cond]) {
				$cond[$#cond] = '!' . $cond[$#cond];
			}
		} elsif (/^\s*#\s*endif\s*(\s.*)?$/) {
			#warn "endif $cond[-1]\n";
			pop @cond;
		} elsif (/^\s*#/) {
			# ignore comments.  we've already determined that 
			# this isn't a preprocessor directive (or at least
			# not one in which we're interested).

		} elsif (/^(BOOT|PROTOTYPES)/) {
			# ignore keyword lines in which we aren't interested

		} elsif (/^=/) {
			# slurp in pod, up to and including the next =cut.
			# put it in $lastpod so that the next-discovered
			# xsub can claim it.
			$lastpod = $self->slurp_pod_paragraph ($_);

			# we're interested in certain pod directives at
			# this point...
			if (/^=for\s+object(?:\s+([\w\:]*))?(.*)/) {
				$self->{object} = $1;
				if ($2) {
					$self->pkgdata->{blurb} = $2;
					$self->pkgdata->{blurb} =~ s/^\s*-\s*//;

					# If the line has the special form
					# "=for object Foo (Bar)", we take this
					# to mean: document the object Bar in
					# the file Foo.
					if ($self->pkgdata->{blurb} =~ s/\s*\((.*)\)//)
					{
						print STDERR "Documenting object $1 in file "
									.$self->{object}."\n";
						$self->pkgdata->{object} = $1;
						if ('' eq $self->pkgdata->{blurb})
						{
							delete $self->pkgdata->{blurb};
						}
					}
				}
			} elsif (/^=for\s+(enum|flags)\s+([\w:]+)/) {
				push @{ $self->pkgdata->{enums} }, {
					type => $1,
					name => $2,
					pod => $lastpod,
				};
				# claim this pod now!
				$lastpod = undef;
			} elsif (/^=for\s+see_also\s+(.+)$/) {
				push @{ $self->pkgdata->{see_alsos} }, $1;
				# claim this pod now!
				$lastpod = undef;
			} elsif (/^=for\s+deprecated_by\s+([\w:]+)$/) {
				push @{ $self->pkgdata->{deprecated_bys} }, $1;
				$lastpod = undef;
			}
			push @{ $self->pkgdata->{pods} }, $lastpod
				if defined $lastpod;

		} elsif (/^\w+/) {
			# there's something at the beginning of the line!
			# we've ruled out everything else, so this must be
			# an xsub.  slurp in everything up to the next
			# blank line (or end of file).   i know that's not
			# *really* an entire XSUB body, but we don't care
			# -- we only need the return value, name, arg types,
			# and body type, and there aren't supposed to be 
			# blank lines in all of that.
			my @thisxsub = ($_);
			while (<IN>) {
				chomp;
				last if /^\s*$/;
				push @thisxsub, $_;
			}
			my $xsub = $self->parse_xsub (\@thisxsub);
			if ($lastpod) {
				# aha! we'll lay claim to that...
				pop @{ $self->pkgdata->{pods} };
				$xsub->{pod} = $lastpod;
				$lastpod = undef;
			}
			$xsub->{preprocessor_conditionals} = [ @cond ];
			push @{ $self->pkgdata->{xsubs} }, $xsub;

		} else {
			# this is probably xsub function body, comment, or
			# some other stuff we don't care about.
		}
	}

	# that's it for this file...
	close IN;
	delete $self->{filehandle};
	delete $self->{filename};
}


=item $parser->swizzle_pods

Match C<=for apidoc> pods to xsubs.

=cut

sub swizzle_pods {
	my $self = shift;
	foreach my $package (keys %{$self->{data}}) {
		my $pkgdata = $self->{data}{$package};
		next unless $pkgdata->{pods};
		next unless $pkgdata->{xsubs};
		my $pods = $pkgdata->{pods};
		for (my $i = @$pods-1 ; $i >= 0 ; $i--) {
			my $firstline = $pods->[$i]{lines}[0];
			next unless $firstline =~ /=for\s+apidoc\s+([:\w]+)\s*/;
			my $name = $1;
			foreach my $xsub (@{ $pkgdata->{xsubs} }) {
				if ($name eq $xsub->{symname}) {
					$xsub->{pod} = $pods->[$i];
					splice @$pods, $i, 1;
					last;
				}
			}
		}
	}
}


=item $parser->preprocess_pods

Honor the C<__hide__> and C<__function__> directives in C<=for apidoc> lines.

We look for the strings anywhere, but you'll typically have it at the end of
the line, e.g.:

  =for apidoc symname __hide__        for detached blocks
  =for apidoc __hide__                for attached blocks

  =for apidoc symname __function__    for functions rather than methods
  =for apidoc __function__            for functions rather than methods

=cut

sub preprocess_pods {
	my $self = shift;
	foreach my $package (keys %{$self->{data}}) {
		my $pkgdata = $self->{data}{$package};

		foreach (@{$pkgdata->{pods}})
		{
			my $firstline = $_->{lines}[0];
			if ($firstline) {
				$_->{position} = $1 if ($firstline =~ /=for\s+position\s+(\w+)/);
			}
		}

		next unless $pkgdata->{xsubs};

		# look for magic keywords in the =for apidoc
		foreach (@{$pkgdata->{xsubs}})
		{
			my $firstline = $_->{pod}{lines}[0];
			if ($firstline) {
				$_->{function} = ($firstline =~ /__function__/);
				$_->{hidden} = ($firstline =~ /__hide__/);
				$_->{deprecated} = ($firstline =~ /__deprecated__/);
				$_->{gerror} = ($firstline =~ /__gerror__/);
			}
		}
	}
}


# ===============================================================

=item bool = $parser->is_module_line ($line)

Analyze I<$line> to see if it contains an XS MODULE directive.  If so, returns
true after setting the I<$parser>'s I<module>, I<package>, and I<prefix>
accordingly.

=cut

sub is_module_line {
	my $self = shift;
	my $l = shift;
	if ($l =~ /^MODULE\s*=\s*([:\w]+)
	            (?:\s+PACKAGE\s*=\s*([:\w]+)
	            (?:\s+PREFIX\s*=\s*([:\w]+))?)?
	            /x) {
		$self->{module}  = $1;
		$self->{package} = $2 || $self->{module};
		$self->{prefix}  = $3;
		$self->{object}  = undef;
		return 1;
	} else {
		return 0;
	}
}


=item $pod = $parser->slurp_pod_paragraph ($firstline, $term_regex=/^=cut\s*/)

Slurp up POD lines from I<$filehandle> from here to the next
I<$term_regex> or EOF.  Since you probably already read a
line to determine that we needed to start a pod, you can pass
that first line to be included.

=cut

sub slurp_pod_paragraph {
	my $parser     = shift;
	my $firstline  = shift;
	my $term_regex = shift || qr/^=cut\s*/o;
	my $filehandle = $parser->{filehandle};

	# just in case.
	chomp $firstline;

	my @lines = $firstline ? ($firstline) : ();
	while (my $line = <$filehandle>) {
		chomp $line;
		push @lines, $line;
		last if $line =~ m/$term_regex/;
	}

	return {
		filename => $parser->{filename},
		line => $. - @lines,
		lines => \@lines,
	};
}


=item $xsub = $parser->parse_xsub (\@lines)

=item $xsub = $parser->parse_xsub (@lines)

Parse an xsub header, in the form of a list of lines,
into a data structure describing the xsub.  That includes
pulling out the argument types, aliases, and code type.

Without artificial intelligence, we cannot reliably 
determine anything about the types or number of parameters
returned from xsubs with PPCODE bodies.

OUTLIST parameters are pulled from the args list and put
into an "outlist" key.  IN_OUTLIST parameters are put into
both.

Data type names are not mangled at all.

Note that the method can take either a list of lines or a reference to a
list of lines.  The flat list form is provided for compatibility; the
reference form is preferred, to avoid duplicating a potentially large list
of strings.

=cut

sub parse_xsub {
	my ($self, @thisxsub) = @_;

	# allow for pass-by-reference.
	@thisxsub = @{ $thisxsub[0] }
	    if @thisxsub == 1 && 'ARRAY' eq ref $thisxsub[0];

	map { s/#.*$// } @thisxsub;

	my $filename = $self->{filename};
	my $oldwarn = $SIG{__WARN__};
#$SIG{__WARN__} = sub {
#		warn "$self->{filename}:$.:  "
#		   . join(" / ", $self->{module}||"", $self->{package}||"")
#		   . "\n    $_[0]\n   ".Dumper(\@thisxsub)
#};

	my $lineno = $. - @thisxsub;
	my %xsub = (
		'filename' => $filename,
		'line' => ($.-@thisxsub),
		'module' => $self->{module},
		'package' => $self->package, # to be overwritten as needed
	);
	my $args;

	#warn Dumper(\@thisxsub);

	# merge continuation lines.  xsubpp allows continuation lines in the
	# xsub arguments list and barfs on them in other spots, but with xsubpp
	# providing such validation, we'll just cheat and merge any that we find.
	# this will bork the line counting logic we have below, but i don't see
	# a fix for it without major tearup of the code here.
	my @foo = @thisxsub;
	@thisxsub = shift @foo;
	while (my $s = shift @foo) {
		if ($thisxsub[$#thisxsub] =~ s/\\$//) {
			chomp $thisxsub[$#thisxsub];
			$thisxsub[$#thisxsub] .= $s;
		} else {
			push @thisxsub, $s;
		}
	}

	if ($thisxsub[0] =~ /^([^(]+\s+\*?)   # return type, possibly with a *
						  \b([:\w]+)\s*   # symbol name
						  \(              # open paren
						    (.*)          # whatever's inside, if anything
						  \)              # close paren, maybe with space
						  \s*;?\s*$/x) {  # and maybe other junk at the end
		# all on one line
		$xsub{symname} = $2;
		$args = $3;
		my $r = $1;
		$xsub{return_type} = [$r]
			unless $r =~ /^void\s*$/;
		shift @thisxsub; $lineno++;

	} elsif ($thisxsub[1] =~ /^(\S+)\s*\((.*)\);?\s*$/) {
		# multiple lines
		$xsub{symname} = $1;
		$args = $2;
		# return type is on line 0
		$thisxsub[0] =~ s/\s*$//;
		$xsub{return_type} = [$thisxsub[0]]
			unless $thisxsub[0] =~ /^void\s*$/;
		shift @thisxsub; $lineno++;
		shift @thisxsub; $lineno++;
	}

	# eat padding spaces from the arg string.  i tried several ways of
	# building this into the regexen above, but found nothing that still
	# allowed the arg string to be empty, which we'll have for functions
	# (not methods) without resorting to extremely arcane negatory
	# lookbeside assertiveness operators.
	$args =~ s/^\s*//;
	$args =~ s/\s*$//;

	# we can get empty arg strings on non-methods.
	#warn "$filename:$lineno: WTF : args string is empty\n"
	#	if not defined $args;

	my %args = ();
	my @argstr = split /\s*,\s*/, $args;
	#warn Dumper([$args, \%args, \@argstr]);
	for (my $i = 0 ; $i < @argstr ; $i++) {
		# the last one can be an ellipsis, let's handle that specially
		if ($i == $#argstr and $argstr[$i] eq '...') {
			$args{'...'} = { name => '...', };
			push @{ $xsub{args} }, $args{'...'};
			last;
		}
		if ($argstr[$i] =~
		               /^(?:(IN_OUTLIST|OUTLIST)\s+)? # OUTLIST would be 1st
		                 ([^=]+(?:\b|\s))?  # arg type is optional, too
		                 (\w+)              # arg name
		                 (?:\s*=\s*(.+))?   # possibly a default value
		                 $/x) {
			if (defined $1) {
				push @{ $xsub{outlist} }, {
					type => $2,
					name => $3,
				};
				if ($1 eq 'IN_OUTLIST') {
					# also an arg
					$args{$3} = {
						type => $2,
						name => $3,
					};
					$args{$3}{default} = $4 if defined $4;
					push @{ $xsub{args} }, $args{$3};
				}
			
			} else {
				$args{$3} = {
					type => $2,
					name => $3,
				};
				$args{$3}{default} = $4 if defined $4;
				push @{ $xsub{args} }, $args{$3};
			}
		} elsif ($argstr[$i] =~ /^g?int\s+length\((\w+)\)$/) {
			#warn " ******* $i is string length of $1 *****\n";
		} else {
			warn "$filename:$lineno: ($xsub{symname}) don't know how to"
			   . " parse arg $i, '$argstr[$i]'\n";
		}
	}

	

	my $xstate = 'args';
	while ($_ = shift @thisxsub) {
		if (/^\s*ALIAS:/) {
			$xstate = 'alias';
		} elsif (/\s*(PREINIT|CLEANUP|OUTPUT|C_ARGS):/) {
			$xstate = 'code';
		} elsif (/\s*(PPCODE|CODE):/) {
			$xsub{codetype} = $1;
			last;
		} elsif ($xstate eq 'alias') {
			/^\s*([:\w]+)\s*=\s*(\d+)\s*$/;
			if (defined $2) {
				$xsub{alias}{$1} = $2;
			} else {
				warn "$filename:$lineno: WTF : seeking alias on line $_\n";
			}
		} elsif ($xstate eq 'args') {
			if (/^\s*
			      (.+(?:\b|\s))      # datatype
			      (\w+)              # arg name
			      ;?                 # optional trailing semicolon
			      \s*$/x)
			{
				if (exists $args{$2}) {
					$args{$2}{type} = $1
				} else {
					warn "$filename:$lineno: unused arg $2\n";
					warn "  line was '$_'\n";
				}
			} elsif (/^\s*/) {
				# must've stripped a comment.
			} else {
				warn "$filename:$lineno: WTF : seeking args on line $_\n";
			}
		}
		$lineno++;
	}

	# mangle the symbol name from an xsub into its actual perl name.
	$xsub{original_name} = $xsub{symname};
	if (defined $self->{prefix}) {
		my $pkg = $self->package;
		$xsub{symname} =~ s/^($self->{prefix})?/$pkg\::/;
	} else {
		$xsub{symname} = ($self->package)."::".$xsub{symname};
	}

	# sanitize all the C type declarations, which we have 
	# collected in the arguments, outlist, and return types.
	if ($xsub{args}) {
		foreach my $a (@{ $xsub{args} }) {
			$a->{type} = sanitize_type ($a->{type})
				if defined $a->{type};
		}
	}
	if ($xsub{outlist}) {
		foreach my $a (@{ $xsub{outlist} }) {
			$a->{type} = sanitize_type ($a->{type})
				if defined $a->{type};
		}
	}
	if ($xsub{return_type}) {
		for (my $i = 0 ; $i < @{ $xsub{return_type} } ; $i++) {
			$xsub{return_type}[$i] =
				sanitize_type ($xsub{return_type}[$i]);
		}
	}

	$SIG{__WARN__} = $oldwarn;

	return \%xsub;
}



sub sanitize_type {
		local $_ = shift;
		s/\s+/ /g;        # squash all whitespace
		s/^\s//;          # zap leading space
		s/\s$//;          # zap trailing space
		s/(?<=\S)\*$/ */; # stars may not be glued to the name
		return $_;
}


sub canonicalize_xsubs {
	my $self = shift;

	return undef unless 'HASH' eq ref $self->{data};

	# make sure that each package contains an xsub hash for each
	# xsub, whether an alias or not.
	foreach my $package (keys %{$self->{data}}) {
		my $pkgdata = $self->{data}{$package};
		next unless $pkgdata or $pkgdata->{xsubs};
		my $xsubs = $pkgdata->{xsubs};
		@$xsubs = map { split_aliases ($_) } @$xsubs;
	}
}

sub split_aliases {
	my $xsub = shift;
	return $xsub unless exists $xsub->{alias};
	return $xsub unless 'HASH' eq ref $xsub->{alias};
	my %aliases = %{ $xsub->{alias} };
	my @xsubs = ();
	my %seen = ();
	foreach my $a (sort { $aliases{$a} <=> $aliases{$b} } keys %aliases) {
		push @xsubs, {
			%$xsub,
			symname => $a,
			pod => undef,
			# we do a deep copy on the args, so that changes to one do not
			# affect another.  in particular, adding docs or hiding an arg
			# in one xsub shouldn't affect another.
			args => deep_copy_ref ($xsub->{args}),
		};
		$seen{ $aliases{$a} }++;
	}
	if (! $seen{0}) {
		unshift @xsubs, $xsub;
	}

	return @xsubs;
}


sub deep_copy_ref {
		my $ref = shift;
		return undef if not $ref;
		my $reftype = ref $ref;
		if ('ARRAY' eq $reftype) {
				my @newary = map { deep_copy_ref ($_) } @$ref;
				return \@newary;
		} elsif ('HASH' eq $reftype) {
				my %newhash = map { $_, deep_copy_ref ($ref->{$_}) } keys %$ref;
				return \%newhash;
		} else {
				return $ref;
		}
}

=item $parser->clean_out_empty_pods

Looks through the data member of the parser and removes any keys (and
associated values) when no pod, enums, and xsubs exist for the package.

=cut

sub clean_out_empty_pods
{
	my $data = shift;
	return unless (exists ($data->{data}));
	$data = $data->{data};

	my $pod;
	my $xsub;
	foreach (keys %$data)	
	{
		$pod = $data->{$_};
		next if ((exists $pod->{pods} and scalar @{$pod->{pods}}) or
				 exists $pod->{enums} or 
				 scalar (grep (!/DESTROY/, 
								 map { $_->{hidden} 
								       ? ()
									   : $_->{symname} }
								 	@{$pod->{xsubs}})));
		#print STDERR "Deleting $_ from doc.pl's \$data\n";
		delete $data->{$_}; 
	}
}


1;

__END__

=back

=head1 AUTHOR

muppet E<lt>scott at asofyet dot orgE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2003, 2004 by muppet

This library is free software; you can redistribute it and/or modify it under
the terms of the GNU Library General Public License as published by the Free
Software Foundation; either version 2.1 of the License, or (at your option) any
later version.

This library is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.  See the GNU Library General Public License for more
details.

You should have received a copy of the GNU Library General Public License along
with this library; if not, write to the Free Software Foundation, Inc., 59
Temple Place - Suite 330, Boston, MA  02111-1307  USA.

=cut