#!/usr/bin/perl # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library ('COPYING'); if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA use strict; use warnings; use XML::Twig; =head1 NAME csv2mipe.pl - Generates MIPE file based on 3 tab-delimited files based on MIPE version v1.1 arguments: * tab-delimited file with PCR-level data * tab-delimited file with SNP-level data * tab-delimited file with assay-level data Columns in file with PCR-level data: pcr_id pcr_modified (might be multiple, divided by semi-colon ";") pcr_project (might be multiple, divided by semi-colon ";") pcr_researcher (might be multiple, divided by semi-colon ";") pcr_species source_type source_id design_seq primer1_oligo primer1_seq primer1_tm primer2_oligo primer2_seq primer2_tm design_remark (might be multiple, divided by semi-colon ";") use_seq use_revcomp use_remark (might be multiple, divided by semi-colon ";") pcr_remark (might be multiple, divided by semi-colon ";") Columns in file with SNP-level data: pcr_id snp_id snp_pos snp_amb snp_remark (might be multiple, divided by semi-colon ";") Columns in file with assay-level data: pcr_id snp_id assay_id assay_type assay_enzyme assay_oligo assay_specific assay_tail assay_strand assay_remark (might be multiple, divided by semi-colon ";") =head1 SYNOPSIS csv2mipe.pl =head1 ADDITIONAL INFO See http://mipe.sourceforge.net =head1 AUTHOR Jan Aerts (jan.aerts@bbsrc.ac.uk) =cut my ( $pcr_file, $snp_file, $assay_file ) = @ARGV; #if ( not scalar @ARGV == 3 ) { die "Usage: csv2mipe.pl \n" }; ### Read PCR data open PCR, $pcr_file or die "Cannot open $pcr_file\n"; chomp ( my @pcr_data = ( ) ); close PCR; my %pcr_data; foreach ( @pcr_data ) { if ( scalar (split /\t/, $_) != 19 ) { die "Wrong number of fields in the following line of $pcr_file:\n$_\n" }; my ( $pcr_id, $pcr_modified, $pcr_project, $pcr_researcher, $pcr_species, $source_type, $source_id, $design_seq, $primer1_oligo, $primer1_seq, $primer1_tm, $primer2_oligo, $primer2_seq, $primer2_tm, $design_remark, $use_seq, $use_revcomp, $use_remark, $pcr_remark ) = split /\t/, $_; $pcr_data{$pcr_id}{pcr_modified} = $pcr_modified; $pcr_data{$pcr_id}{pcr_project} = $pcr_project; $pcr_data{$pcr_id}{pcr_researcher} = $pcr_researcher; $pcr_data{$pcr_id}{pcr_species} = $pcr_species; $pcr_data{$pcr_id}{source_type} = $source_type; $pcr_data{$pcr_id}{source_id} = $source_id; $pcr_data{$pcr_id}{design_seq} = $design_seq; $pcr_data{$pcr_id}{primer1_oligo} = $primer1_oligo; $pcr_data{$pcr_id}{primer1_seq} = $primer1_seq; $pcr_data{$pcr_id}{primer1_tm} = $primer1_tm; $pcr_data{$pcr_id}{primer2_oligo} = $primer2_oligo; $pcr_data{$pcr_id}{primer2_seq} = $primer2_seq; $pcr_data{$pcr_id}{primer2_tm} = $primer2_tm; $pcr_data{$pcr_id}{design_remark} = $design_remark; $pcr_data{$pcr_id}{use_seq} = $use_seq; $pcr_data{$pcr_id}{use_revcomp} = $use_revcomp; $pcr_data{$pcr_id}{use_remark} = $use_remark; $pcr_data{$pcr_id}{pcr_remark} = $pcr_remark; } ### Read SNP data open SNP, $snp_file or die "Cannot open $snp_file\n"; chomp ( my @snp_data = ( ) ); close SNP; my %snp_data; foreach ( @snp_data ) { if ( scalar (split /\t/, $_) != 5 ) { die "Wrong number of fields in the following line of $snp_file:\n$_\n" }; my ( $pcr_id, $snp_id, $snp_pos, $snp_amb, $snp_remark ) = split /\t/, $_; $snp_data{$pcr_id}{$snp_id}{snp_pos} = $snp_pos; $snp_data{$pcr_id}{$snp_id}{snp_amb} = $snp_amb; $snp_data{$pcr_id}{$snp_id}{snp_remark} = $snp_remark; } ### Read assay data open ASSAY, $assay_file or die "Cannot open $assay_file\n"; chomp ( my @assay_data = ( ) ); close ASSAY; my %assay_data; foreach ( @assay_data ) { if ( scalar (split /\t/, $_) != 10 ) { die "Wrong number of fields in the following line of $assay_file:\n$_\n" }; my ( $pcr_id, $snp_id, $assay_type, $assay_id, $assay_enzyme, $assay_oligo, $assay_specific, $assay_tail, $assay_strand, $assay_remark ) = split /\t/, $_; $assay_data{$pcr_id}{$snp_id}{$assay_id}{assay_type} = $assay_type; $assay_data{$pcr_id}{$snp_id}{$assay_id}{assay_enzyme} = $assay_enzyme; $assay_data{$pcr_id}{$snp_id}{$assay_id}{assay_oligo} = $assay_oligo; $assay_data{$pcr_id}{$snp_id}{$assay_id}{assay_specific} = $assay_specific; $assay_data{$pcr_id}{$snp_id}{$assay_id}{assay_tail} = $assay_tail; $assay_data{$pcr_id}{$snp_id}{$assay_id}{assay_strand} = $assay_strand; $assay_data{$pcr_id}{$snp_id}{$assay_id}{assay_remark} = $assay_remark; } ### Print everything print "\n"; print "\n"; print " 1.0\n"; foreach my $pcr_id ( sort keys %pcr_data ) { print " \n"; print " ", $pcr_id, "\n"; foreach ( split /;/, $pcr_data{$pcr_id}{pcr_modified} ) { print " ", $_, "\n"; } foreach ( split /;/, $pcr_data{$pcr_id}{pcr_project} ) { print " ", $_, "\n"; } foreach ( split /;/, $pcr_data{$pcr_id}{pcr_researcher} ) { print " ", $_, "\n"; } print " ", $pcr_data{$pcr_id}{pcr_species}, "\n"; print " \n"; print " \n"; print " <", $pcr_data{$pcr_id}{source_type}, ">", $pcr_data{$pcr_id}{source_id}, "\n"; print " \n"; print " ", $pcr_data{$pcr_id}{design_seq}, "\n"; print " \n"; print " ", $pcr_data{$pcr_id}{primer1_oligo}, "\n"; print " ", $pcr_data{$pcr_id}{primer1_seq}, "\n"; print " ", $pcr_data{$pcr_id}{primer1_tm}, "\n"; print " \n"; print " \n"; print " ", $pcr_data{$pcr_id}{primer2_oligo}, "\n"; print " ", $pcr_data{$pcr_id}{primer2_seq}, "\n"; print " ", $pcr_data{$pcr_id}{primer2_tm}, "\n"; print " \n"; foreach ( split /;/, $pcr_data{$pcr_id}{design_remark} ) { print " ", $_, "\n"; } print " \n"; print " \n"; print " ", $pcr_data{$pcr_id}{use_seq}, "\n"; print " ", $pcr_data{$pcr_id}{use_revcomp}, "\n"; foreach my $snp_id ( sort keys %{$snp_data{$pcr_id}} ) { print " \n"; print " ", $snp_id, "\n"; print " ", $snp_data{$pcr_id}{$snp_id}{snp_pos}, "\n"; print " ", $snp_data{$pcr_id}{$snp_id}{snp_amb}, "\n"; foreach ( split /;/, $snp_data{$pcr_id}{$snp_id}{snp_remark} ) { print " ", $_, "\n"; } foreach my $assay_id ( sort keys %{$assay_data{$pcr_id}{$snp_id}} ) { print " \n"; print " ", uc $assay_data{$pcr_id}{$snp_id}{$assay_id}{assay_type}, "\n"; print " ", $assay_id, "\n"; if ( uc $assay_data{$pcr_id}{$snp_id}{$assay_id}{assay_type} eq 'SBE' ) { print " ", $assay_data{$pcr_id}{$snp_id}{$assay_id}{assay_oligo}, "\n"; print " ", $assay_data{$pcr_id}{$snp_id}{$assay_id}{assay_specific}, "\n"; print " ", $assay_data{$pcr_id}{$snp_id}{$assay_id}{assay_tail}, "\n"; print " ", $assay_data{$pcr_id}{$snp_id}{$assay_id}{assay_strand}, "\n"; } else { print " ", $assay_data{$pcr_id}{$snp_id}{$assay_id}{assay_enzyme}, "\n"; } foreach ( split /;/, $assay_data{$pcr_id}{$snp_id}{$assay_id}{assay_remark} ) { print " ", $_, "\n"; } print " \n"; } print " \n"; } foreach ( split /;/, $pcr_data{$pcr_id}{use_remark} ) { print " ", $_, "\n"; } print " \n"; foreach ( split /;/, $pcr_data{$pcr_id}{pcr_remark} ) { print " ", $_, "\n"; } print " \n"; } print "\n";