File: SAM_strand_separator.pl

package info (click to toggle)
trinityrnaseq 2.15.2%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 468,004 kB
  • sloc: perl: 49,905; cpp: 17,993; java: 12,489; python: 3,282; sh: 1,989; ansic: 985; makefile: 717; xml: 62
file content (150 lines) | stat: -rwxr-xr-x 3,770 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/env perl

use strict;
use warnings;
use Carp;
use lib ("/usr/lib/trinityrnaseq/PerlLib");
use SAM_reader;
use SAM_entry;
use File::Basename;

my $usage = "usage: $0 alignments.sam SS_lib_type=F,R,FR,RF\n\n";

my $sam_file = $ARGV[0] or die $usage;
my $SS_lib_type = $ARGV[1] or die $usage;

unless ($SS_lib_type =~ /^(F|R|FR|RF)$/) {
	die $usage;
}
	


main: {
	
    my $sam_reader = new SAM_reader($sam_file);

    my $sam_basename = basename($sam_file);
    
    
    open (my $plus_ofh, ">$sam_basename.+.sam") or die "Error, cannot write to $sam_basename.+.sam";
	open (my $minus_ofh, ">$sam_basename.-.sam") or die "Error, cannot write to $sam_basename.-.sam";
	
	while (my $sam_entry = $sam_reader->get_next()) {

        if ($sam_entry->is_query_unmapped()) {
            next;
        }
        
		my $transcribed_strand = &get_transcribed_strand($sam_entry);
        
		my $ofh = ($transcribed_strand eq '+') ? $plus_ofh : $minus_ofh;
		
		print $ofh $sam_entry->toString() . "\n";
		
	}
	
	close $plus_ofh;
	close $minus_ofh;

	exit(0);
}




####
sub get_transcribed_strand {
    my ($sam_entry) = @_;
    
    unless ($SS_lib_type) {
        confess "Error, SS_lib_type required as a parameter, possible values: RF,FR,F,R " . $sam_entry->toString();
    }
        
    my $aligned_strand = $sam_entry->get_query_strand();
    my $opposite_strand = ($aligned_strand eq '+') ? '-' : '+';
    
    my $transcribed_strand;
    
    if (! $sam_entry->is_paired()) {
        
        my $is_long_read = &get_long_read_status($sam_entry);
        
        if ($is_long_read) {
            
            my $ts = &get_long_read_transcribed_strand($sam_entry);
            if (defined($ts)) {
                if ($ts ne $aligned_strand) {
                    print STDERR "-warning, long read " . $sam_entry->get_read_name() . " aligned as ($aligned_strand) but splicing indicates ($ts)\n";
                }
                return($ts);
            }
            else {
                return($aligned_strand); # should already be oriented in the forward orientation with respect to input sequence.
            }
        
        }
        
        ## UNPAIRED or SINGLE READS
        unless ($SS_lib_type =~ /^(F|R)$/) {
            confess "Error, cannot have $SS_lib_type library type with unpaired reads " . $sam_entry->toString();
        }
        
        $transcribed_strand = ($SS_lib_type eq "F") ? $aligned_strand : $opposite_strand;
    }
    else {
        
        
        ## paired RNA-Seq reads:  left fragment is on the 3' end revcomped, and right fragment is at the 5' end sense strand.
        
                    
        unless ($SS_lib_type =~ /^(FR|RF)$/) {
            confess "Error, cannot have $SS_lib_type library type with paired reads " . $sam_entry->toString();
        }
                
        if ($sam_entry->is_first_in_pair()) {
            $transcribed_strand = ($SS_lib_type eq "FR") ? $aligned_strand : $opposite_strand;
        }
        else {
            # second pair
            $transcribed_strand = ($SS_lib_type eq "FR") ? $opposite_strand : $aligned_strand;
        }
    }
    
    
    return($transcribed_strand);
    
}



####
sub get_long_read_status {
    my ($sam_entry) = @_;
    
    my @fields = $sam_entry->get_fields();
    @fields = @fields[11..$#fields];
    
    if (grep { /^RG:Z:PBLR$/ } @fields) {
        return(1); 
    }
    else {
        return(0);
    }
}

####
sub get_long_read_transcribed_strand {
    my ($sam_entry) = @_;

    my @fields = $sam_entry->get_fields();
    @fields = @fields[11..$#fields];
    
    if (my $TS_field = grep { /^ts:A:/ } @fields) {
        my @pts = split(":", $TS_field);
        return($pts[2]);
    }
    else {
        return(undef);
    }
}