File: get_ec.pl

package info (click to toggle)
ncbi-tools6 6.1.20170106%2Bdfsg2-6
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 468,504 kB
  • sloc: ansic: 1,474,210; pascal: 6,740; cpp: 6,248; xml: 3,390; sh: 2,139; perl: 1,084; csh: 508; makefile: 437; ruby: 93; lisp: 81; javascript: 16
file content (104 lines) | stat: -rwxr-xr-x 3,244 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/perl

# ftp ftp://ftp.expasy.org/databases/enzyme/enzyme.dat
# ftp ftp://ftp.expasy.org/databases/enzyme/enzclass.txt

# ./get_ec.pl enzyme.dat enzclass.txt

# /am/ncbiapdata/scripts/misc/txt2inc.sh ecnum_ambiguous.txt 
# /am/ncbiapdata/scripts/misc/txt2inc.sh ecnum_deleted.txt 
# /am/ncbiapdata/scripts/misc/txt2inc.sh ecnum_replaced.txt 
# /am/ncbiapdata/scripts/misc/txt2inc.sh ecnum_specific.txt 

$enzfile = shift || die "Must supply enzyme.dat filename\n";
$clsfile = shift || die "Must supply enzclass.txt filename\n";
open (ENZIN, $enzfile) || die "Unable to open $enzfile\n";
open (CLSIN, $clsfile) || die "Unable to open $clsfile\n";
open (OUT_LIVE, ">ecnum_specific.txt") || die "Unable to open ecnum_specific.txt\n";
open (OUT_DEL, ">ecnum_deleted.txt") || die "Unable to open ecnum_deleted.txt\n";
open (OUT_TRANS, ">ecnum_replaced.txt") || die "Unable to open ecnum_replaced.txt\n";
open (OUT_AMB, ">ecnum_ambiguous.txt") || die "Unable to open ecnum_ambiguous.txt\n";

while ($thisline = <ENZIN>) {
  $thisline =~ s/\r//;
  $thisline =~ s/\n//;
  if ($thisline =~ /^CC/) {
    #ignore comment lines
  } elsif ($thisline =~ /^ID\s+(.*)/) {
    $current_id = $1;
    #by default, entry type is 1
    $entry_type = 1;
    $print_id = 1;
    $add_space = 0;
  } elsif ($thisline =~ /^DE\s+(.*)/) {
    $disposition = $1;
    if ($disposition =~ /Deleted entry/) {
      print OUT_DEL "$current_id";
      $entry_type = 2;
    } elsif ($disposition =~ /Transferred entry: (.*)/) {
      print OUT_TRANS "$current_id";
      $entry_type = 3;
      $disposition = $1;
    }
    if ($entry_type == 1) {
      if ($print_id == 1) {
        print OUT_LIVE "$current_id\t";
        $print_id = 0;
      }
      if ($add_space == 1) {
        print OUT_LIVE " ";
      }
      #use substitution to remove trailing period
      $disposition =~ s/\.\s*$//;
      print OUT_LIVE "$disposition";
      if ($disposition !~ /-\s*$/) {
        $add_space = 1;
      }
    } elsif ($entry_type == 3) {
      $next_id = $disposition;
      #use substitution to remove and
      $next_id =~ s/ and//;
      $next_id =~ s/and //;
      #use substitution to remove commas (note use of g for global)
      $next_id =~ s/,//g;
      #use substitution to remove trailing period
      $next_id =~ s/\.\s*$//;
      #use substitution to replace spaces with tabs (note use of g for global)
      $next_id =~ s/\s+/\t/g;
      print OUT_TRANS "\t$next_id";
    }
  } elsif ($thisline =~ /^\/\//) {
    if ($entry_type == 1) {
      print OUT_LIVE "\n";
    } elsif ($entry_type == 2) {
      print OUT_DEL "\n";
    } elsif ($entry_type == 3) {
      print OUT_TRANS "\n";
    }
  }
}

while ($thisline = <CLSIN>) {
  $thisline =~ s/\r//;
  $thisline =~ s/\n//;
  $thisline =~ s/\.\s+/\./;
  if ($thisline =~ /^(.+- )\s+(.*)/) {
    $ec_num = $1;
    $ec_name = $2;
    #use substitution to delete spaces (note use of g for global)
    $ec_num =~ s/\s+//g;
    #use substitution to remove trailing period
    $ec_name =~ s/\.\s*$//;
    print OUT_AMB "$ec_num\t$ec_name\n";
    $ec_num =~ s/-/n/g;
    print OUT_AMB "$ec_num\t$ec_name\n";
  }
}

close (ENZIN);
close (CLSIN);
close (OUT_LIVE);
close (OUT_DEL);
close (OUT_TRANS);
close (OUT_AMB);