File: SAM_set_transcribed_orient_info.pl

package info (click to toggle)
trinityrnaseq 2.11.0%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 417,528 kB
  • sloc: perl: 48,420; cpp: 17,749; java: 12,695; python: 3,124; sh: 1,030; ansic: 983; makefile: 688; xml: 62
file content (103 lines) | stat: -rwxr-xr-x 2,150 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env perl

use strict;
use lib ("/usr/lib/trinityrnaseq/PerlLib");
use SAM_reader;
use SAM_entry;

my $usage = "usage: $0 file.sam SS_lib_type={F,R,FR,RF}\n\n";

my $sam_file = $ARGV[0] or die $usage;
my $SS_lib_type = $ARGV[1] or die $usage;

unless ($SS_lib_type =~ /^(F|R|FR|RF)$/) {
	die "Error, SS_lib_type must be F, R, FR, or RF";
}


main: {

	my $sam_reader = new SAM_reader($sam_file);


	my $num_verified = 0;
	my $num_conflicted = 0;

	my $counter = 0;

	while ($sam_reader->has_next()) {
		
		$counter++;
		print STDERR "\r[$counter]  " if $counter % 10000 == 0;

		#if ($counter % 300000 == 0) { last; } ## debugging
		
		my $sam_entry = $sam_reader->get_next();

		my $aligned_strand = $sam_entry->get_query_strand();
		my $opposite_strand = ($aligned_strand eq '+') ? '-' : '+';
		
		my $transcribed_strand = $aligned_strand;

		if ($sam_entry->is_paired()) {
			
			unless ($SS_lib_type =~ /^(FR|RF)$/) {
				die "Error, SS_lib_type: $SS_lib_type is not compatible with paired reads";
			}
			

			if ($sam_entry->is_first_in_pair()) {
				
				if ($SS_lib_type eq "RF") {
					$transcribed_strand = $opposite_strand;
				}
			}

			else {
				# second in pair
				if ($SS_lib_type eq "FR") {
					$transcribed_strand = $opposite_strand;
				}
			}
		}
		else {
			## Unpaired or Single Reads
			if ($SS_lib_type eq "R") {
				$transcribed_strand = $opposite_strand;
			}
		}

		my $sam_text = $sam_entry->toString();

		my @x = split(/\t/, $sam_text);

		my $strand_flag = "XS:A:$transcribed_strand";

		if (my ($XS_A) = grep { $_ =~ /^XS:A:/ } @x) {
			if ($XS_A eq $strand_flag) {
				$num_verified++;
			}
			else {
				$num_conflicted++;
				print STDERR "Conflicting strand info: existing = $XS_A, expected = $strand_flag\n";
				@x = grep { $_ !~ /^XS:A:/ } @x;
				push (@x, $strand_flag);
			}
			
		}
		else {
			push (@x, $strand_flag);
		}
		
		
		print join("\t", @x) . "\n";
	}

	if ($num_conflicted || $num_verified) {
		print STDERR "Percent entries with conflicted transcribed strand assignments = " 
			. sprintf("%.2f", $num_conflicted / ($num_conflicted + $num_verified) * 100) . "\n";
	}

	exit(0);

}