1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
|
# Reweigh the words
# Sample: word 8671269 to word 200
# Source: command line argument
# Output to terminal
# Biggest value = # of lines.
# Divide this by 240 and round up (255-14 to avoid 0-15 values)
# Divide all other values (lines left in the list) by that number and round down.
# All values should now be between 15 and 254.
if( $#ARGV != 2 ){
print "Need 3 arguments: <file> <min> <max>\n";
die;
}
# Open original file
use utf8;
open FILE, $ARGV[0] or die $!;
my $count=0;
my $min = $ARGV[1];
my $max = $ARGV[2];
# Count the # of lines
while (<FILE>) {
$count++;
}
# Calculate the divider to ensure results between min and max
my $divider = int( $count / ($max - $min)) + 1;
sub is_integer { $_[0] =~ /^[+-]?\d+$/ }
# Re-open the source file and update the weight
open FILE, "<:encoding(utf8)", $ARGV[0] or die $!;
# remove ’, “, ।, —, ‘, ·, −, °, ”, ॥
while (my $line = <FILE>) {
$count--;
# Replace the weight if its a word line,
# otherwise print without actions
if ($line =~ /\s/) {
my $weighed = int( $count / $divider) + $min;
my ($name) = $line =~ m/(.*)\s/;
if (length($name) > 1 && !is_integer($name)) {
$line =~ s/(\d*[.])?\d+/$weighed/g;
utf8::encode($line);
print $line;
}
}
}
close FILE;
|