File: mipe2pcroverview.pl

package info (click to toggle)
mipe 1.1-1
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 300 kB
  • ctags: 39
  • sloc: perl: 2,206; makefile: 54
file content (114 lines) | stat: -rwxr-xr-x 3,217 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/perl

#    This library is free software; you can redistribute it and/or
#    modify it under the terms of the GNU Lesser General Public
#    License as published by the Free Software Foundation; either
#    version 2.1 of the License, or (at your option) any later version.
#
#    This library is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#    Lesser General Public License for more details.
#
#    You should have received a copy of the GNU Lesser General Public
#    License along with this library ('COPYING'); if not, write to the Free Software
#    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

use strict;
use warnings;
use XML::Twig;

=head1 NAME

mipe2pcroverview.pl - Generates overview of PCRs from a MIPE file
  included in output: PCR ID, projects, researchers, number of SNPs, length of PCR fragment, remarks
  based on MIPE version v1.1
  arguments: * mipe_file
             * (optional) list of PCR IDs

=head1 SYNOPSIS

mipe2pcroverview.pl your_file.mipe <pcr_id1> <pcr_id2>

=head1 ADDITIONAL INFO

See http://mipe.sourceforge.net

=head1 AUTHOR

Jan Aerts (jan.aerts@bbsrc.ac.uk)

=cut


my ( $file, @pcr_ids ) = @ARGV;
if ( not defined $file ) { die "Please provide filename\n" };
my $twig = XML::Twig->new( TwigHandlers => { pcr => \&pcr }
                         , pretty_print => 'indented' );
$twig->parsefile($file);
exit;

sub pcr {
  my ( $twig, $pcr ) = @_;

  my $to_include = 0;
  my $pcr_id = $pcr->{att}->{id};
  if ( scalar @pcr_ids > 0 ) {
    $to_include = 0;
    foreach ( @pcr_ids ) {
      if ( $pcr_id =~ /$_/i ) {
        $to_include = 1;
      }
    }
  } else {
    $to_include = 1;
  }
  
  if ( $to_include ) {
    my @researchers = $pcr->children('researcher');
    my $researchers;
    if ( scalar @researchers > 0 ) {
      foreach ( @researchers ) {
        $researchers .= $_->text . ';';
      }
      chop $researchers;
    } else {
      $researchers = 'NONE';
    }
    
    my @projects = $pcr->children('project');
    my $projects;
    if ( scalar @projects > 0 ) {
      foreach ( @projects ) {
        $projects .= $_->text . ';';
      }
      chop $projects;
    } else {
      $projects = 'NONE';
    }
    
    my $length = 'UNKNOWN';
    if ( defined $pcr->next_elt('design')->first_child('seq') ) {
      $length = length $pcr->next_elt('design')->first_child('seq')->text;
    } elsif ( defined $pcr->next_elt('design')->first_child('pos') ) {
      my @range = split /\-/, $pcr->next_elt('design')->first_child('pos')->text;
      $length = $range[1] - $range[0];
    }
    
    my $remarks;
    my @remarks = $pcr->children('remark');
    if ( scalar @remarks == 0 ) {
      $remarks = 'NO REMARK';
    } else {
      foreach ( @remarks ) {
        $remarks .= $_->text . '; ';
      }
      chop $remarks; chop $remarks;
    }
    
    my @snps = ( defined $pcr->first_child('use') ) ? $pcr->first_child('use')->children('snp') : ();

    print $pcr_id, "\t", $projects, "\t", $researchers, "\t", scalar @snps, " SNPs\t", $length, "bp\t", $remarks, "\n";

  }
}