1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
|
#!/usr/bin/perl
# Author: Mario Stanke
# Contact: mario.stanke@uni-greifswald.de
# Last modification: May 1st 2018
use Getopt::Long;
use strict;
my $complement = 0;
my $whitespace = 0;
my $patfrom = "";
my $patto = "";
GetOptions(
'v!' => \$complement,
'patfrom:s' => \$patfrom,
'patto:s' => \$patto,
'whitespace!' => \$whitespace
);
if ( @ARGV != 2 ) {
print "Efficient script for printing only those lines where the n-th "
. " column is a word in the match.lst file\n\n";
print "Usage:\n\n";
print "cat input | $0 match.lst n > output\n\n";
print "Columns are based on tab-separation and are 1-based.\n";
print "Options:\n";
print " --v Use complement. Print all lines NOT matching.\n";
print " --patfrom --patto Apply query replace regular expression to the \n"
. " entry in the n-th column first before \n"
. " checking for membership in the list.\n"
. " Will use the pattern s/\$patfrom/\$patto/\n"
. " Useful for removing modifications, e.g.\n"
. " tripping a trailing -1 before performing the\n"
. " check.\n"
. " Both default to empty patterns.\n";
print " --whitespace Split columns at whitespace rather than tab.\n";
exit(1);
}
my $matchfile = $ARGV[0];
my $n = $ARGV[1];
open( MATCH, "<$matchfile" ) or die("Could not open $matchfile\n");
my %include = ();
while (<MATCH>) {
chomp;
$include{$_}++;
}
close MATCH;
my @f;
my $field;
while (<STDIN>) {
chomp;
if ($whitespace) {
@f = split( /\s+/, $_, $n + 1 );
} else {
@f = split( /\t/, $_, $n + 1 );
}
$field = $f[ $n - 1 ];
$field =~ s/$patfrom/$patto/oee if ( $patto ne "" || $patfrom ne "" );
print $_ . "\n"
if ( ( !$complement && $include{$field} )
|| ( $complement && !$include{$field} ) );
}
|