File: format-for-pdb.awk

package info (click to toggle)
macromoleculebuilder 4.0.0%2Bdfsg-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 122,552 kB
  • sloc: cpp: 23,631; python: 5,047; ansic: 2,101; awk: 145; perl: 144; makefile: 40; sh: 21
file content (94 lines) | stat: -rw-r--r-- 2,304 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
BEGIN {
ORS = ""
CHAIN = "CHAIN-NOT-SET"
FIRSTRESIDUE = -1111
BIOPOLYMER = "NOT-SET"
BIOPOLYMER = "protein";
}

(FNR == 1) {print "\n\n";}

((substr($1,1,6) =="HETATM") || ($1 =="ATOM")  ) {

sub ( /C1\*/ , "C1\'" , $0);
sub ( /C2\*/ , "C2\'" , $0);
sub ( /C3\*/ , "C3\'" , $0);
sub ( /C4\*/ , "C4\'" , $0);
sub ( /C5\*/ , "C5\'" , $0);

sub ( /O2\*/ , "O2\'" , $0);
sub ( /O3\*/ , "O3\'" , $0);
sub ( /O4\*/ , "O4\'" , $0);
sub ( /O5\*/ , "O5\'" , $0);



CHAIN = substr($0,22,1)
FIRSTRESIDUE = substr($0,23,4);
LASTRESIDUE = FIRSTRESIDUE - 1;
#print BIOPOLYMER," ",CHAIN," ",FIRSTRESIDUE," ";


if (substr($0,22,1) != CHAIN  ) {
#if (FIRSTRESIDUE != -1111) {
#    if ($1 != "TER"){
#        print "TER \n";
#    }
#}

if ( ($3 == "C")) {
}
}
# get atom name:
FIRSTATOMLETTER = substr($0,12,4 );
RESNAME         = substr($0,18,3 );
#print FIRSTATOMLETTER, " >1\n";
# get rid of whitespace and numbers:
gsub ( / / , "" , FIRSTATOMLETTER);
ATOMNAMENOWHITESPACE = FIRSTATOMLETTER;
# molmodel uses CD rather than the PDB-standard CD1
#print ATOMNAMENOWHITESPACE,", ",RESNAME,"\n";
if ((ATOMNAMENOWHITESPACE == "CD") && (RESNAME == "ILE")) {
    sub ( /CD./ , "CD1" , $0 );
}
gsub ( /[0-9]/ , "" , FIRSTATOMLETTER);
#print FIRSTATOMLETTER, " >2 \n";
#sub ( / / , "" , FIRSTATOMLETTER);
#sub ( / / , "" , FIRSTATOMLETTER);
#print FIRSTATOMLETTER, "\n";
# first remaining letter should be element type
FIRSTATOMLETTER = substr(FIRSTATOMLETTER ,0,1  );
#print FIRSTATOMLETTER, "\n";
if (FIRSTATOMLETTER != "H")
    print $0"\n";
}

(((substr($1,1,6) =="HETATM") || ($1 =="ATOM")) && ($3 == "C")) {
BIOPOLYMER = "protein";
LASTRESIDUE =LASTRESIDUE +1;
    #print $0,"\n";

}
(((substr($1,1,6) =="HETATM") ||($1 =="ATOM")) && (($3 == "C3'") || ($3 == "C3*"))) { # else this is RNA, strip out whitespaces and use single letter code:
    BIOPOLYMER = "Nucleic Acid";
    #print $0"\n";
    TEMP  =substr($0,18,3);
    gsub(/ */,"", TEMP);
    #print TEMP ;
    #print $0,"\n";
}

((substr($1,1,6) !="HETATM") && ($1 !="ATOM") && (substr($1,1,6) !="REMARK")  ) {
print $0,"\n";
}

END   {

#if (FIRSTRESIDUE != -1111) {
#    if ($1 != "TER") {
#        print "TER  \n";
#    }
#}
ORS = "\n"; print "\n\n"
print "END\n";
}