1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
|
#!/usr/bin/perl -w
# Break long quoted strings in perl code into smaller pieces
# This version only breaks at blanks. See sub break_at_blanks to
# customize.
#
# usage:
# break_long_quotes.pl -ln myfile.pl >myfile.new
#
# where n specifies the maximum quote length.
# NOTES:
# 1. Use with caution - has not been extensively tested
#
# 2. The output is not beautified so that you can use diff to see what
# changed. If all is ok, run the output through perltidy to clean it up.
#
# 3. This version only breaks single-line quotes contained within
# either single or double quotes.
# Steve Hancock, Sept 28, 2006
#
use strict;
use Getopt::Std;
$| = 1;
use vars qw($opt_l $opt_h);
my $usage = <<EOM;
usage: break_long_quotes.pl [ -ln ] filename >outfile
where n=line length (default 72)
EOM
getopts('hl:') or die "$usage";
if ($opt_h) { die $usage }
if ( !defined $opt_l ) {
$opt_l = 70;
}
else {
$opt_l =~ /^\d+$/ or die "$usage";
}
unless ( @ARGV == 1 ) { die $usage }
my $file = $ARGV[0];
scan_file( $file, $opt_l );
sub scan_file {
my ( $file, $line_length ) = @_;
use Perl::Tidy;
use IO::File;
my $fh = IO::File->new( $file, 'r' );
unless ($fh) { die "cannot open '$file': $!\n" }
my $formatter = MyWriter->new($line_length);
my $err=perltidy(
'formatter' => $formatter, # callback object
'source' => $fh,
'argv' => "-npro -se", # don't need .perltidyrc
# errors to STDOUT
);
if ($err){
die "Error calling perltidy\n";
}
$fh->close();
} ## end sub scan_file
#####################################################################
#
# This is a class with a write_line() method which receives
# tokenized lines from perltidy
#
#####################################################################
package MyWriter;
sub new {
my ( $class, $line_length ) = @_;
my $comment_block = "";
bless {
_rcomment_block => \$comment_block,
_maximum_comment_length => 0,
_max_quote_length => $line_length,
_in_hanging_side_comment => 0,
}, $class;
} ## end sub new
sub write_line {
# This is called from perltidy line-by-line
# We will look for quotes and fix them up if necessary
my $self = shift;
my $line_of_tokens = shift;
my $line_type = $line_of_tokens->{_line_type};
my $input_line_number = $line_of_tokens->{_line_number};
my $input_line = $line_of_tokens->{_line_text}; # the original line
my $rtoken_type = $line_of_tokens->{_rtoken_type}; # type of tokens
my $rtokens = $line_of_tokens->{_rtokens}; # text of tokens
my $starting_in_quote =
$line_of_tokens->{_starting_in_quote}; # text of tokens
my $ending_in_quote = $line_of_tokens->{_ending_in_quote}; # text of tokens
my $max_quote_length = $self->{_max_quote_length};
chomp $input_line;
# look in lines of CODE (and not POD for example)
if ( $line_type eq 'CODE' && @$rtoken_type ) {
my $jmax = @$rtoken_type - 1;
# find leading whitespace
my $leading_whitespace = ( $input_line =~ /^(\s*)/ ) ? $1 : "";
if ($starting_in_quote) {$leading_whitespace=""};
my $new_line = $leading_whitespace;
# loop over tokens looking for quotes (token type Q)
for ( my $j = 0 ; $j <= $jmax ; $j++ ) {
# pull out the actual token text
my $token = $$rtokens[$j];
# look for long quoted strings on a single line
# (multiple line quotes not currently handled)
if ( $$rtoken_type[$j] eq 'Q'
&& !( $j == 0 && $starting_in_quote )
&& !( $j == $jmax && $ending_in_quote )
&& ( length($token) > $max_quote_length ) )
{
my $quote_char = substr( $token, 0, 1 );
if ( $quote_char eq '"' || $quote_char eq '\'' ) {
# safety check - shouldn't happen
my $check_char = substr( $token, -1, 1 );
if ( $check_char ne $quote_char ) {
die <<EOM;
programming error at line $input_line
starting quote character is <<$quote_char>> but ending quote character is <<$check_char>>
quoted string is:
$token
EOM
} ## end if ( $check_char ne $quote_char)
$token =
break_at_blanks( $token, $quote_char, $max_quote_length );
} ## end if ( $quote_char eq '"'...
} ## end if ( $$rtoken_type[$j]...
$new_line .= $token;
} ## end for ( my $j = 0 ; $j <=...
# substitute the modified line for the original line
$input_line = $new_line;
} ## end if ( $line_type eq 'CODE')
# print the line
$self->print($input_line."\n");
return;
} ## end sub write_line
sub break_at_blanks {
# break a string at one or more spaces so that the longest substring is
# less than the desired length (if possible).
my ( $str, $quote_char, $max_length ) = @_;
my $blank = ' ';
my $prev_char = "";
my @break_after_pos;
my $quote_pos = -1;
while ( ( $quote_pos = index( $str, $blank, 1 + $quote_pos ) ) >= 0 ) {
# as a precaution, do not break if preceded by a backslash
if ( $quote_pos > 0 ) {
next if ( substr( $str, $quote_pos - 1, 1 ) eq '\\' );
}
push @break_after_pos, $quote_pos;
} ## end while ( ( $quote_pos = index...
push @break_after_pos, length($str);
my $starting_pos = 0;
my $new_str = "";
for ( my $i = 1 ; $i < @break_after_pos ; $i++ ) {
my $pos = $break_after_pos[$i];
my $length = $pos - $starting_pos;
if ( $length > $max_length - 1 ) {
$pos = $break_after_pos[ $i - 1 ];
$new_str .= substr( $str, $starting_pos, $pos - $starting_pos + 1 )
. "$quote_char . $quote_char";
$starting_pos = $pos + 1;
} ## end if ( $length > $max_length...
} ## end for ( my $i = 1 ; $i < ...
my $pos = length($str);
$new_str .= substr( $str, $starting_pos, $pos );
return $new_str;
} ## end sub break_at_blanks
sub print {
my ( $self, $input_line ) = @_;
print $input_line;
}
# called once after the last line of a file
sub finish_formatting {
my $self = shift;
$self->flush_comments();
}
|