File: chemcomp2config.pl

package info (click to toggle)
freesasa 2.1.2-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 13,740 kB
  • sloc: ansic: 11,281; cpp: 1,342; sh: 397; perl: 169; makefile: 119; yacc: 87; lex: 54
file content (67 lines) | stat: -rw-r--r-- 2,058 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# Simple script to read an entry from the Chemical Component
# Dictionary (ftp://ftp.wwpdb.org/pub/pdb/data/monomers) and create
# lines for a classifier using the ProtOr classes in the paper by Tsai
# et al. (http://www.ncbi.nlm.nih.gov/pubmed/10388571).

## Example ##
# Using the entry for ALA
#
#  RESIDUE   ALA     13
#  CONECT      N      3 CA   H    H2  
#  CONECT      CA     4 N    C    CB   HA  
#  CONECT      C      3 CA   O    OXT 
#  CONECT      O      1 C   
#  CONECT      CB     4 CA   HB1  HB2  HB3 
#  CONECT      OXT    2 C    HXT 
#  CONECT      H      1 N   
#  CONECT      H2     1 N   
#  CONECT      HA     1 CA  
#  CONECT      HB1    1 CB  
#  CONECT      HB2    1 CB  
#  CONECT      HB3    1 CB  
#  CONECT      HXT    1 OXT 
#  END   
#  HET    ALA             13
#  HETNAM     ALA ALANINE
#  FORMUL      ALA    C3 H7 N1 O2
#
# We get the output
#
#  ALA N N3H2
#  ALA CA C4H1
#  ALA C C3H0
#  ALA O O1H0
#  ALA CB C4H3
#  ALA OXT O2H1
#
# These lines can be added to the atoms-section of a classifier
# config-file, and then the types N3H2, C4H1 need to be defined in the
# types-section. See share/protor.config for a template including
# all the standard amino acids, nucleic acids, plus some common
# non-standard entries. In the setting of a polypeptide chain the
# N-atom should be N3H1, since it is linked to the next residue, but
# since N3H1 and N3H2 have the same radius this doesn't necessarily
# have to be fixed.
#
# The script does not do anything clever, so if your entry contains
# some unusual elements the output should be checked (the atom SE in
# SEC becomes S2H1 for instance).
#

use strict;
my $res;
while (<>) {
    if (/RESIDUE\s+(\w+)/) { $res = $1; next }
    if (/CONECT/) {
        my @fields = split /\s+/, $_;
        shift @fields;
        my $atom = shift @fields;
        next if ($atom =~ m/^H/);
        my $val = shift @fields;
        my $nh = 0;
        foreach (@fields) {
            ++$nh if ($_ =~ m/^H/);
        }
        print "$res $atom ",substr($atom,0,1),$val,"H$nh\n";
    }
}