1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
|
use strict;
# Script to generate source code for a classifier based on a
# config-file. Useful for having default classifiers statically in
# memory. It generates a constant classifier that should be placed in
# its own source-file.
# This interpreter has stricter requirements on the input format than
# the one in the C-code, since it is only for internal use. It also
# doesn't do any syntax checks, so it will only work on valid config
# files.
# The script should be run from the same directory as it is stored,
# because it depends on the subdir 'rsa/' to gengerate reference SASA
# values for RSA output. The file classifier_protor.c in the
# source-directory was generated using the command
#
# perl config2c.pl protor ../share/protor.config
#
# where 'protor' will be the variable name prefix and the name used in
# output generated from this classifier. It is also referred to in
# freesasa.h.
# Ideally RSA values should be stored in the configuration file, in a
# separate section. This will be added in the future.
my %types;
my %atoms;
my %residues;
my %rsa;
my $name;
my $n_classes = 0;
my $n_residues = 0;
my $atom_flag = 0;
my $type_flag = 0;
(scalar @ARGV == 2) or die "usage : $0 <variable_prefix> <config-file>\n";
my $prefix = shift @ARGV;
my $config_file = shift @ARGV;
open(my $input, "<$config_file") or die "Can't open $config_file. $!";
while (<$input>) {
next if (/^#/);
chomp;
$_ =~ s/^(.*)#.*/$1/; # strip comments
next if (/^\s*$/);
if (/^types:/) {
$type_flag = 1;
$atom_flag = 0;
next;
}
if (/^atoms:/) {
$type_flag = 0;
$atom_flag = 1;
next;
}
if (/^name:/) {
$_ =~ s/name:\s+(\S+)/$1/;
$name = $_;
next;
}
if ($type_flag) {
my ($name,$radius,$class) = split /\s+/, $_;
my $class_code;
$types{$name}{radius} = $radius;
if ($class =~ /apolar/i) {$class_code = 'FREESASA_ATOM_APOLAR';}
elsif ($class =~ /polar/i) {$class_code = 'FREESASA_ATOM_POLAR';}
else {die "Only classes 'polar' and 'apolar' allowed";}
$types{$name}{class} = $class_code;
}
if ($atom_flag) {
my ($res,$atom,$type) = split /\s+/, $_;
$atoms{$res}{$atom} = $type;
if (! exists $residues{$res}) {
$residues{$res} = $n_residues;
++$n_residues;
}
}
}
close($input);
# generate RSA values
my @pdb = `ls rsa/*.pdb`;
foreach my $p (@pdb) {
chomp $p;
my $name = substr($p,4,3);
my @rsa_atoms = `cat $p`;
my (@pol_atoms, @apol_atoms);
foreach (@rsa_atoms) {
next if (! /^ATOM/);
next if (! (substr($_,25,1) eq '2'));
next if (/H\s*$/); #skip hydrogen
my $atom = substr($_,12,4);
next if (substr($_,1,1) eq "H");
my $c;
$atom =~ s/\s//g;
if (exists $atoms{$name}{$atom}) {
$c = $types{$atoms{$name}{$atom}}{class};
} elsif (exists $atoms{'ANY'}{$atom}) {
$c = $types{$atoms{'ANY'}{$atom}}{class};
} else {
die "Atom $name $atom not in classifier";
}
push(@apol_atoms, $atom) if ($c =~ /FREESASA_ATOM_APOLAR/);
push(@pol_atoms, $atom) if ($c =~ /FREESASA_ATOM_POLAR/);
}
my $select_total = '--select="res2, resi 2"';
my $select_bb = '--select="bb, resi 2 and name c+n+o+ca"';
my $select_sc = '--select="sc, resi 2 and not name c+n+o+ca"';
my ($select_apol, $select_pol);
if (scalar @apol_atoms > 0) {
$select_apol = '--select="apol, resi 2 and name ' . join('+',@apol_atoms) . '"';
}
if (scalar @pol_atoms > 0) {
$select_pol = '--select="pol, resi 2 and name ' . join('+',@pol_atoms) . '"';
}
my @data = `../src/freesasa $p -c $config_file -n 1000 $select_total $select_bb $select_sc $select_apol $select_pol`;
my %subarea;
$subarea{pol} = $subarea{apol} = 0;
$subarea{name} = $name;
foreach (@data) {
if (/^res2 :\ +(\d+.\d+)/) {
$subarea{total} = $1;
}
if (/^bb :\ +(\d+.\d+)/) {
$subarea{bb} = $1;
}
if (/^sc :\ +(\d+.\d+)/) {
$subarea{sc} = $1;
}
if (/^pol :\ +(\d+.\d+)/) {
$subarea{pol} = $1;
}
if (/^apol :\ +(\d+.\d+)/) {
$subarea{apol} = $1;
}
}
$rsa{$name} = "\{\"$name\", $subarea{total}, $subarea{bb}, ".
"$subarea{sc}, $subarea{pol}, $subarea{apol}, 0\}";
}
my @res_array = sort keys %residues;
print "#include \"classifier.h\"\n\n";
print "/* Autogenerated code from the script config2c.pl */\n\n";
print "static const char *$prefix\_residue_name[] = {";
print "\"$_\", "foreach (@res_array);
print "};\n";
foreach my $res (@res_array) {
my @atom_names = keys %{$atoms{$res}};
print "static const char *$prefix\_$res\_atom_name[] = {";
print "\"$_\", " foreach (@atom_names);
print "};\n";
print "static double $prefix\_$res\_atom_radius[] = {";
print $types{$atoms{$res}{$_}}{radius},", " foreach (@atom_names);
print "};\n";
print "static int $prefix\_$res\_atom_class[] = {";
print $types{$atoms{$res}{$_}}{class},", " foreach (@atom_names);
print "};\n";
print "static struct classifier_residue $prefix\_$res\_cfg = {\n";
print " ", scalar keys %{$atoms{$res}}, ",\n";
print " \"$res\",\n";
print " (char**) $prefix\_$res\_atom_name,\n";
print " (double*) $prefix\_$res\_atom_radius,\n";
print " (freesasa_atom_class*) $prefix\_$res\_atom_class,\n";
if (exists $rsa{$res}) {
print " $rsa{$res},";
} else {
print " {NULL, 0, 0, 0, 0, 0},";
}
print"\n};\n\n"
}
print "static struct classifier_residue *$prefix\_residue_cfg[] = {\n ";
foreach my $res (@res_array) {
print "&$prefix\_$res\_cfg, ";
}
print "};\n\n";
print "const freesasa_classifier freesasa_$prefix\_classifier = {\n";
print " $n_residues,";
print " (char**) $prefix\_residue_name,\n";
print " \"$name\",\n";
print " (struct classifier_residue **) $prefix\_residue_cfg,\n";
print "};\n\n";
|