File: join.pl

package info (click to toggle)
snpeff 5.2.f%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 701,384 kB
  • sloc: java: 62,547; perl: 2,279; sh: 1,185; python: 744; xml: 507; makefile: 50
file content (57 lines) | stat: -rwxr-xr-x 1,472 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/perl

#------------------------------------------------------------------------------
#
# Joins the lines of file_big and file_small by the common 
# values in column_big_file and column_file_small.
#
# Note: It is assumed the there column_file_small is a primery key in file_small. 
#------------------------------------------------------------------------------

use strict;

my(%linesByKey); # Lines from fileSmall, indexed by key

# Command line arguments
my($all) = 0;
if( $ARGV[0] eq '-all' ) { $all = 1; shift @ARGV; }

my($diff) = 0;
if( $ARGV[0] eq '-diff' ) { $diff = 1; shift @ARGV; }

my($fileBig, $colFileBig, $fileSmall, $colFileSmall) = ($ARGV[0], $ARGV[1], $ARGV[2], $ARGV[3]);
die "Usage: join.pl [-all|-diff] file_big.txt column_big_file file_small.txt column_file_small\n" if( $colFileSmall eq '');
$colFileBig--; $colFileSmall--; # Transform to zero-based

#---
# Read small file
#---
my($l, @t, $key, $i, $newLine);
open SF, $fileSmall;
while( $l = <SF> ) {
	chomp $l;
	@t = (); # Empty array
	@t = split /\t/, $l;
	$key = $t[$colFileSmall];
	$linesByKey{$key} = $l;
}
close SF;

#---
# Read fileBig
#---
open BF, $fileBig;
while( $l = <BF> ) {
	chomp $l;
	@t = split /\t/, $l;
	$key = $t[$colFileBig];

	if( $all )	{ print "$l\t$linesByKey{$key}\n"; }
	elsif( $diff ) {
		if( ! exists $linesByKey{$key} )	{ print "$l\t$linesByKey{$key}\n"; }
	} else {
		if( exists $linesByKey{$key} )		{ print "$l\t$linesByKey{$key}\n"; }
	}
}
close FB;