File: mipe2genotypes.pl

package info (click to toggle)
mipe 1.1-9
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, forky, sid, trixie
  • size: 392 kB
  • sloc: perl: 2,806; sh: 20; makefile: 10
file content (103 lines) | stat: -rwxr-xr-x 3,050 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/perl

#    This library is free software; you can redistribute it and/or
#    modify it under the terms of the GNU Lesser General Public
#    License as published by the Free Software Foundation; either
#    version 2.1 of the License, or (at your option) any later version.
#
#    This library is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#    Lesser General Public License for more details.
#
#    You should have received a copy of the GNU Lesser General Public
#    License along with this library ('COPYING'); if not, write to the Free Software
#    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

use strict;
use warnings;
use XML::Twig;
use Data::Dumper;

=head1 NAME

mipe2genotypes.pl - Generates list of genotypes from a MIPE file
  included in output: PCR ID, list of SNP IDs, genotypes for each sample
  based on MIPE version v1.1
  arguments: * mipe_file
             * (optional) list of PCR IDs

=head1 SYNOPSIS

mipe2genotypes.pl your_file.mipe <pcr_id1> <pcr_id2>

=head1 ADDITIONAL INFO

See http://mipe.sourceforge.net

=head1 AUTHOR

Jan Aerts (jan.aerts@bbsrc.ac.uk)

=cut


my ( $file, @pcr_ids ) = @ARGV;
if ( not defined $file ) { die "Please provide filename\n" };
my $twig = XML::Twig->new( TwigHandlers => { pcr => \&pcr }
                         , pretty_print => 'indented' );
$twig->parsefile($file);
exit;

sub pcr {
  my ( $twig, $pcr ) = @_;

  my $to_include = 0;
  my $pcr_id = $pcr->{att}->{id};
  if ( scalar @pcr_ids > 0 ) {
    $to_include = 0;
    foreach ( @pcr_ids ) {
      if ( $pcr_id =~ /$_/i ) {
        $to_include = 1;
      }
    }
  } else {
    $to_include = 1;
  }
  
  if ( $to_include ) {
    print $pcr_id, "\n";
    if ( not defined $pcr->first_child('use') ) {
      print "\tNo use part defined\n";
    } else {
      my @snps = $pcr->first_child('use')->children('snp');
      my %snps;
      foreach my $snp ( @snps ) {
        my $snp_id = $snp->{att}->{id};
        my $snp_pos = $snp->first_child('pos')->text;
        $snps{$snp_id} = $snp_pos;
      }
  
      my @samples = $pcr->first_child('use')->children('sample');
      my %diplotypes;
      foreach my $sample ( @samples ) {
        my $sample_id = $sample->{att}->{id};
        my @sample_snps = $sample->children('genotype');
        foreach my $sample_snp ( @sample_snps ) {
          my $sample_snp_id = $sample_snp->first_child('snp_id')->text;
          my $sample_snp_amb = $sample_snp->first_child('amb')->text;
          $diplotypes{$sample_id}{$snps{$sample_snp_id}} = $sample_snp_amb;
        }
      }
  
      print join(';', ( sort { $a <=> $b } values %snps ) ), "\n";
      foreach my $sample ( sort keys %diplotypes ) {
        print $sample, "\t";
        foreach my $snp_pos ( sort { $a <=> $b } values %snps ) {
          print ( $diplotypes{$sample}{$snp_pos} || ' ' );
        }
        print "\n";
      }
    }
  }
}