File: analyser.pl

package info (click to toggle)
sideretro 1.1.6-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 6,636 kB
  • sloc: ansic: 15,270; perl: 46; python: 44; makefile: 3
file content (75 lines) | stat: -rw-r--r-- 1,802 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env perl
################################################################################
#
#         FILE: analyser.pl
#
#        USAGE: perl analyser.pl
#
#  DESCRIPTION: Filter relevant insertions from a VCF file.
#
################################################################################

use strict;
use warnings;

# Some variables.
my $count = 0;
my $count_val = 0;
my $count_ins = 0;
my $count_homo_zyg = 0;
my $line_sz = 0;
my $shr = 0;

# Test for input arguments from command line.
if ( @ARGV ) {
	my $vcf = $ARGV[0];

	# Test input existence.
	die "ERROR: Non existent file \"$vcf\"" unless -f $vcf;

	open my $fh, '<', $vcf or die "Can't open file $vcf";
	while ( <$fh> ) {
		$count++;

		# Skip header lines.
		next if /^#/;
		chomp;
		my @line = split;
		$count_val++;

		# Skip Unknown chromosomes.
		next if (length($line[0]) < 4)
			or (length($line[0]) > 5)
			or ($line[6] ne "PASS");

		$count_ins++;
		$line_sz = @line - 9;
		my $ct = 0;

		# Test haplotype.
		foreach ( 9 .. $#line ) {
			$ct++ if (split(":", $line[$_]))[0] =~ /1/;
			$count_homo_zyg++ if $line[$_] =~ /^1\/1:/;
		}
		$shr++ if $ct > 1;
	}
	close $fh;

	# Print results.
	my $str = <<"	EOS";
	GENERAL STATISTICS: $vcf
	================================================================================
	Total number of lines in file:                  $count
	Total non header lines:                         $count_val
	Total valid insertions:                         $count_ins
	Number of shared insertions:                    $shr
	Ratio of homozygous insertions in all samples:  $count_homo_zyg / ($count_ins*$line_sz)
	================================================================================
	EOS

	print $str;
}
else {
	# Print an usage for help.
	die "ERROR: No input arguments";
}