File: nmrpdb_parse.pl

package info (click to toggle)
bioperl 1.6.924-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 50,776 kB
  • ctags: 11,412
  • sloc: perl: 175,865; xml: 27,565; lisp: 2,034; sh: 1,958; makefile: 19
file content (183 lines) | stat: -rwxr-xr-x 4,650 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#!/usr/bin/perl
use strict;

# This program will read in an NMR derived PDB file containing
# multiple conformers, and will allow the user to extract either
# one or all of the models to separate files.
#
# Although the program will run interactively, the command line
# usage is "nmrsplit [input.file] [number of model to extract]"
#
#
#
# Written 13/12/00 by Simon Andrews (simon.andrews@bbsrc.ac.uk)

# Submitted to bioperl script project 2001/08/06

# Description:  Script which reads  an NMR-derived  multiple structure
# PDB file, and will either extract a single structure from it, or
# will  extract all of the structures into single  files.  This  is
# useful when you want to  work with a single representative structure
# from an NMR  ensemble - especially in conjunction  with the OLDERADO
# database (http://neon.chem.le.ac.uk/olderado/) which  finds  the
# most representative structure from an ensemble.


my $Input = $ARGV[0];  	# File to be read
my $Pullout = $ARGV[1];	# Specific model to extract
my @Pdbfile; 		# Array for whole PDB file
my $Header = ""; 	# String to hold the PDB header
my $Model = ""; 	# String to hold individual models
my $Output; 		# Prefix for output files
my $Modno = 1; 		# Number of the model being processed

while () {
    if ($Input)	{

	if (-r $Input) {
	    last;
	}else{
	    print "\"$Input\" does not exist, or could not be read\n";
	}
    }

    print "\nEnter name of multiple PDB file: ";
    $Input = <STDIN>;
    chomp $Input;
    $Input =~ s/^\s*//;
    $Input =~ s/\s*$//;

    next;
}

while () {
    if ($Pullout) {

	if ($Pullout =~ /^\d+$/){
	    if ($Pullout == int $Pullout) {
		last;
	    }else {
		print "\"$Pullout\" should be an integer\n";
	    }
	}else  {
	    print "\"$Pullout\" should be a number\n";
	}
    }

    print "\nEnter number of specific model to extract (Return for none): ";
    $Pullout = <STDIN>;
    chomp $Pullout;
    $Pullout =~ s/^\s*//;
    $Pullout =~ s/\s*$//;

    last unless ($Pullout);
    next;
}


($Output = $Input) =~ s/\.\w*$//; # Take off everything after the last . to use as prefix


open (PDB,$Input) || die "Can't open $Input because $!";



########## Read the header information ####################


while (<PDB>) {
    if (/^MODEL\b/){last;}
    $Header = $Header . $_;
}


######### Read the separate models #######################


while () {

    model();
    if ($Model)	{      # Check if we're past the last model
	if ($Pullout) {		 # Check if we're writing one or all
	    last if ($Modno > $Pullout);# No point continuing if we've got the one we want
	    readout();
	}else	{
	    writeout();
	}
	$Model = "";
	++$Modno;
    }else  {
	last;
    }
}
--$Modno;     # Correct last increment which didn't find a model

if (($Pullout) & ($Modno < $Pullout)) {
    print "\nCannot find model $Pullout : Only $Modno models in this file\n";
}

#################### subroutines start here ##########################


sub model {

    while (<PDB>) {
	if (/^(MODEL\b|END\b|MASTER\b)/){next;} 
	# Stops you getting MODEL... at the top of the output
	# and makes sure there isn't a file containing just END or MASTER

	if (/^ENDMDL\b/){last;}    # Check for the end of the model
	$Model = $Model . $_; 	   # Append the line to $Model
    }
}

sub writeout  {	 # Used when all files are being written out

    if (-e "$Output\_$Modno.pdb"){  # Check whether we're overwriting anything

	print "\n$Output\_$Modno.pdb already exists. Overwrite (y/n)? ";
	my $Question = <STDIN>;
	unless ($Question =~ /^y/i) {
	    print "\nSkipping $Output\_$Modno.pdb";
	    return;
	}
    }

    open (OUT,">$Output\_$Modno.pdb") || die "Can't open $Output\_$Modno.pdb because $!";
    print "\nWriting $Output\_$Modno.pdb ...";
    print OUT $Header;
    print OUT $Model;
    print OUT "END\n";	# Adds and END statement to the PDB file

    close OUT || die "Couldn't close $Output\_$Modno.pdb because $!";
}


sub readout {

    if ($Modno == $Pullout) {

	if (-e "$Output\_$Modno.pdb") {	# Check whether we're overwriting anything

	    print "\n$Output\_$Modno.pdb already exists. Overwrite (y/n)? ";
	    my $Question = <STDIN>;
	    unless ($Question =~ /^y/i)	{
		print "\nModel not extracted\n";
		$Model = "";
		return;
	    }
	}

	open (OUT,">$Output\_$Modno.pdb") || die "Can't open $Output\_$Modno.pdb because $!";
	print "\nWriting $Output\_$Modno.pdb ...\n";
	print OUT $Header;
	print OUT $Model;
	print OUT "END\n"; # Adds and END statement to the PDB file

	close OUT || die "Couldn't close $Output\_$Modno.pdb because $!";

	$Model = "";  # Stops the reading after this model
    }else  {
	print "\nReading Model $Modno ...";
    }
}