1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
|
BEGIN {
ORS = ""
CHAIN = "CHAIN-NOT-SET"
FIRSTRESIDUE = -1111
BIOPOLYMER = "NOT-SET"
BIOPOLYMER = "protein";
}
(FNR == 1) {print "\n\n";}
((substr($1,1,6) =="HETATM") || ($1 =="ATOM") ) {
sub ( /C1\*/ , "C1\'" , $0);
sub ( /C2\*/ , "C2\'" , $0);
sub ( /C3\*/ , "C3\'" , $0);
sub ( /C4\*/ , "C4\'" , $0);
sub ( /C5\*/ , "C5\'" , $0);
sub ( /O2\*/ , "O2\'" , $0);
sub ( /O3\*/ , "O3\'" , $0);
sub ( /O4\*/ , "O4\'" , $0);
sub ( /O5\*/ , "O5\'" , $0);
CHAIN = substr($0,22,1)
FIRSTRESIDUE = substr($0,23,4);
LASTRESIDUE = FIRSTRESIDUE - 1;
#print BIOPOLYMER," ",CHAIN," ",FIRSTRESIDUE," ";
if (substr($0,22,1) != CHAIN ) {
#if (FIRSTRESIDUE != -1111) {
# if ($1 != "TER"){
# print "TER \n";
# }
#}
if ( ($3 == "C")) {
}
}
# get atom name:
FIRSTATOMLETTER = substr($0,12,4 );
RESNAME = substr($0,18,3 );
#print FIRSTATOMLETTER, " >1\n";
# get rid of whitespace and numbers:
gsub ( / / , "" , FIRSTATOMLETTER);
ATOMNAMENOWHITESPACE = FIRSTATOMLETTER;
# molmodel uses CD rather than the PDB-standard CD1
#print ATOMNAMENOWHITESPACE,", ",RESNAME,"\n";
if ((ATOMNAMENOWHITESPACE == "CD") && (RESNAME == "ILE")) {
sub ( /CD./ , "CD1" , $0 );
}
gsub ( /[0-9]/ , "" , FIRSTATOMLETTER);
#print FIRSTATOMLETTER, " >2 \n";
#sub ( / / , "" , FIRSTATOMLETTER);
#sub ( / / , "" , FIRSTATOMLETTER);
#print FIRSTATOMLETTER, "\n";
# first remaining letter should be element type
FIRSTATOMLETTER = substr(FIRSTATOMLETTER ,0,1 );
#print FIRSTATOMLETTER, "\n";
if (FIRSTATOMLETTER != "H")
print $0"\n";
}
(((substr($1,1,6) =="HETATM") || ($1 =="ATOM")) && ($3 == "C")) {
BIOPOLYMER = "protein";
LASTRESIDUE =LASTRESIDUE +1;
#print $0,"\n";
}
(((substr($1,1,6) =="HETATM") ||($1 =="ATOM")) && (($3 == "C3'") || ($3 == "C3*"))) { # else this is RNA, strip out whitespaces and use single letter code:
BIOPOLYMER = "Nucleic Acid";
#print $0"\n";
TEMP =substr($0,18,3);
gsub(/ */,"", TEMP);
#print TEMP ;
#print $0,"\n";
}
((substr($1,1,6) !="HETATM") && ($1 !="ATOM") && (substr($1,1,6) !="REMARK") ) {
print $0,"\n";
}
END {
#if (FIRSTRESIDUE != -1111) {
# if ($1 != "TER") {
# print "TER \n";
# }
#}
ORS = "\n"; print "\n\n"
print "END\n";
}
|