File: BED_utils.pm

package info (click to toggle)
trinityrnaseq 2.15.2%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 468,004 kB
  • sloc: perl: 49,905; cpp: 17,993; java: 12,489; python: 3,282; sh: 1,989; ansic: 985; makefile: 717; xml: 62
file content (54 lines) | stat: -rw-r--r-- 1,110 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
package BED_utils;

use strict;
use warnings;
use Carp;
use Gene_obj;

sub index_BED_as_gene_objs {
	my ($gff_filename, $gene_id_to_gene_obj_href) = @_;

	my %contig_to_gene_list;

	open (my $fh, $gff_filename) or die "Error, cannot open file $gff_filename";
	while (<$fh>) {
		if (/^\#/) { next; }
        chomp;
		unless (/\w/) { next; }
		
		my $bed_line = $_;
		
		my $gene_obj;

		eval {
			$gene_obj = &Gene_obj::BED_line_to_gene_obj($bed_line);
			my @introns = $gene_obj->get_intron_coordinates(); # this method breaks if all exons are single bases.  Ignore these weird things.
		};

		if ($@) {
			print STDERR "ERROR, cannot create gene for bed line:\n$bed_line\n$@\n";
			next;
		}
		

		my $gene_id = $gene_obj->{TU_feat_name};
		
		my $indexed_gene_obj = $gene_id_to_gene_obj_href->{$gene_id};
	    if ($indexed_gene_obj) {
			$indexed_gene_obj->add_isoform($gene_obj);
		}
		else {
			$gene_id_to_gene_obj_href->{$gene_id} = $gene_obj;
			my $contig = $gene_obj->{asmbl_id};
			push (@{$contig_to_gene_list{$contig}}, $gene_id);
		}
	}
	close $fh;

	return(\%contig_to_gene_list);
}



1; #EOM