File: ucsc_snp2gff.pl

package info (click to toggle)
libchado-perl 1.31-6
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, sid
  • size: 44,716 kB
  • sloc: sql: 282,721; xml: 192,553; perl: 25,524; sh: 102; python: 73; makefile: 57
file content (26 lines) | stat: -rwxr-xr-x 730 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#!/usr/bin/env perl

# convert UCSC gene files into GFF3 data

use strict;
use File::Basename 'basename';
use Getopt::Long;
use URI::Escape;
use Text::Wrap;
$Text::Wrap::columns = 79;
use Bio::SeqIO;
use Bio::SeqFeature::Generic;
use Data::Dumper;

while(<>){
  chomp;
  my($bin,$chrom,$start,$end,$name,$source,$type) = split /\t/;

  my $gfftype = $type eq 'SNP' ? 'SNP' :
                $type eq 'INDEL' ? 'indel' :
                $type eq 'SEGMENTAL' ? 'simple_sequence_length_polymorphism' :
                $type eq 'unknown' ? 'sequence_variant' :
                die "don't know how to represent variant type $type";

  print join("\t", ($chrom, $source, $gfftype, $start + 1, $end, '.', '.', '.', "ID=$name")), "\n";
}