File: add_LR_reads_to_iworm_bundle.pl

package info (click to toggle)
trinityrnaseq 2.11.0%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 417,528 kB
  • sloc: perl: 48,420; cpp: 17,749; java: 12,695; python: 3,124; sh: 1,030; ansic: 983; makefile: 688; xml: 62
file content (55 lines) | stat: -rwxr-xr-x 1,314 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env perl

use strict;
use warnings;

my $usage = "\n\n\tusage: $0 iworm_bundles_fasta_file sorted_reads_to_components_file\n\n\n";

my $iworm_bundles_fasta_file = $ARGV[0] or die $usage;
my $reads_to_components_file = $ARGV[1] or die $usage;


main: {

    my %component_to_LRs;
    {
        open (my $fh, $reads_to_components_file) or die "Error, cannot open file: $reads_to_components_file";
        while (<$fh>) {
            chomp;
            my @x = split(/\t/);
            my $component_id = $x[0];
            my $read_name = $x[1];

            if ($read_name =~ /^>LR\$\|/) {
                # got a long read!
                my $seq = $x[3];
                $component_to_LRs{$component_id} .= "X$seq";
            }
        }
        close $fh;
    }

    ## now add them to the compoennts:
    open (my $fh, $iworm_bundles_fasta_file) or die "Error, cannot open file: $iworm_bundles_fasta_file";
    while (my $header = <$fh>) {
        
        chomp $header;
        
        my $seq = <$fh>;

        chomp $seq;

        $header =~ /^>s_(\d+)/ or die "Error, cannot decipher component id";
        my $component_id = $1;
        
        if (my $LRs = $component_to_LRs{$component_id}) {
            $seq .= $LRs;
        }

        print "$header\n$seq\n";
    }

    exit(0);
}