File: Uncruft.pm

package info (click to toggle)
libstring-license-perl 0.0.2-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 2,892 kB
  • sloc: perl: 1,519; cpp: 360; ansic: 210; python: 27; ruby: 21; xml: 16; sh: 8; makefile: 3
file content (61 lines) | stat: -rw-r--r-- 1,221 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
package Uncruft;

my $CLASS = __PACKAGE__;

use strict;
use warnings;

use base qw(Exporter);
our @EXPORT = qw(uncruft);

my $html_xml_tags_re = qr/<\/?(?:p|br|ref)(?:\s[^>]*)?>/i;

sub uncruft
{
	($_) = @_;

	# Remove generic comments: look for 4 or more lines beginning with
	# regular comment pattern and trim it. Fall back to old algorithm
	# if no such pattern found.
	my @matches = m/^[ \t]*([^a-zA-Z0-9\s]{1,3})[ \t]+\S/mg;
	if ( @matches >= 4 ) {
		my $comment_re = qr/^[ \t]*[\Q$matches[0]\E]{1,3}[ \t]*/m;
		s/$comment_re//g;
	}

	my @wordmatches = m/^[ \t]*(dnl|REM|COMMENT)[ \t]+\S/mg;
	if ( @wordmatches >= 4 ) {
		my $comment_re = qr/^[ \t]*\Q$wordmatches[0]\E[ \t]*/m;
		s/$comment_re//g;
	}

	# Remove other side of "boxed" comments
	s/[ \t]*[*#][ \t]*$//gm;

	# Remove Fortran comments
	s/^[cC]$//gm;
	s/^[cC] //gm;

	# Remove C / C++ comments
	s#(\*/|/\*|(?<!:)//)##g;

	# Strip escaped newline
	s/\s*\\n\s*/ /g;

	# strip trailing dash, assuming it is soft-wrap
	# (example: disclaimers in GNU autotools file "install-sh")
	s/-\r?\n//g;

	# strip common html and xml tags
	s/$html_xml_tags_re//g;

	tr/\t\r\n/ /;

	# this also removes quotes
	tr% A-Za-z.,:@;0-9\(\)/-%%cd;
	tr/ //s;

	return $_;
}

1;