File: TestInterfaceClustering.java

package info (click to toggle)
biojava5-live 5.4.0%2Bdfsg-5
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 110,812 kB
  • sloc: java: 257,052; xml: 27,480; python: 64; sh: 43; makefile: 39
file content (277 lines) | stat: -rw-r--r-- 10,112 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.nbio.structure.xtal;

import static org.junit.Assert.*;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.GZIPInputStream;

import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.StructureIO;
import org.biojava.nbio.structure.align.util.AtomCache;
import org.biojava.nbio.structure.asa.GroupAsa;
import org.biojava.nbio.structure.contact.StructureInterface;
import org.biojava.nbio.structure.contact.StructureInterfaceCluster;
import org.biojava.nbio.structure.contact.StructureInterfaceList;
import org.biojava.nbio.structure.io.FileParsingParameters;
import org.biojava.nbio.structure.io.PDBFileParser;
import org.junit.Ignore;
import org.junit.Test;

import javax.vecmath.Matrix4d;

public class TestInterfaceClustering {

        @Ignore("The test requires the network")
	@Test
	public void test3DDO() throws IOException, StructureException {

		// 3DDO is special in that it contains 6 chains in 1 entity, all of them with different residue numbering

		AtomCache cache = new AtomCache();
		FileParsingParameters params = new FileParsingParameters();
		params.setAlignSeqRes(true);
		cache.setFileParsingParams(params);
		cache.setUseMmCif(true);

		StructureIO.setAtomCache(cache);

		Structure s = StructureIO.getStructure("3DDO");

		CrystalBuilder cb = new CrystalBuilder(s);
		StructureInterfaceList interfaces = cb.getUniqueInterfaces(5.5);
		interfaces.calcAsas(100, 1, 0);
		interfaces.removeInterfacesBelowArea();

		List<StructureInterfaceCluster> clusters = interfaces.getClusters();

		// 22 if below 35A2 interfaces are filtered
		assertEquals(22,interfaces.size());

		// we simply want to test that some interfaces cluster together, for this entry
		// it is problematic because of different residue numbering between different chains of same entity
		assertTrue("Expected fewer than 22 interfaces (some interfaces should cluster together)",clusters.size()<22);

		// first 2 clusters are of size 3
		assertEquals("Cluster 1 should have 3 members",3,clusters.get(0).getMembers().size());
		assertEquals("Cluster 2 should have 3 members",3,clusters.get(1).getMembers().size());

		// detection of isologous test: first 3 interfaces should be isologous

		assertTrue("Interface 1 should be isologous",interfaces.get(1).isIsologous());
		assertTrue("Interface 2 should be isologous",interfaces.get(2).isIsologous());
		assertTrue("Interface 3 should be isologous",interfaces.get(3).isIsologous());



	}

	/**
	 * Test for NCS clustering in viral capsid structures that contain NCS operators.
	 * @throws IOException
	 * @throws StructureException
	 */
        @Ignore("The test requires the network")
	@Test
	public void test1AUY() throws IOException, StructureException {

		// 1AUY is a viral capsid with NCS ops

		AtomCache cache = new AtomCache();
		FileParsingParameters params = new FileParsingParameters();
		params.setAlignSeqRes(true);
		cache.setFileParsingParams(params);
		cache.setUseMmCif(true);

		StructureIO.setAtomCache(cache);

		// 3vbr would be an example of capsids with several chains
		Structure s = StructureIO.getStructure("1auy");

		Map<String,String> chainOrigNames = new HashMap<>();
		Map<String,Matrix4d> chainNcsOps = new HashMap<>();
		CrystalBuilder.expandNcsOps(s,chainOrigNames,chainNcsOps);
		CrystalBuilder cb = new CrystalBuilder(s, chainOrigNames, chainNcsOps);

		StructureInterfaceList interfaces = cb.getUniqueInterfaces(5.5);

		List<StructureInterfaceCluster> clusters = interfaces.getClusters();

		assertNotNull(clusters);

		assertTrue(clusters.size()<=interfaces.size());

		interfaces.calcAsas(100, 1, 0);

		// after calculating ASAs we should have ids for all interfaces
		for (StructureInterface interf : interfaces) {
			assertTrue(interf.getId()>0);
		}


		int numInterfacesShouldbeKept = 0;

		List<StructureInterfaceCluster> ncsClusterShouldbeKept = new ArrayList<>();
		for (StructureInterfaceCluster ncsCluster : interfaces.getClustersNcs()) {
			if (ncsCluster.getMembers().get(0).getTotalArea()>=StructureInterfaceList.DEFAULT_MINIMUM_INTERFACE_AREA) {
				//System.out.println("NCS cluster is above cutoff area and has "+ncsCluster.getMembers().size()+ " members");
				ncsClusterShouldbeKept.add(ncsCluster);
				numInterfacesShouldbeKept += ncsCluster.getMembers().size();
			}
		}

		clusters = interfaces.getClusters();

		assertNotNull(clusters);

		assertTrue(clusters.size()<=interfaces.size());

		interfaces.removeInterfacesBelowArea();

		assertNotNull(interfaces.getClustersNcs());

		// making sure that removeInterfacesBelowArea does not throw away the members for which area wasn't calculated
		for (StructureInterfaceCluster ncsCluster : ncsClusterShouldbeKept) {
			assertTrue(interfaces.getClustersNcs().contains(ncsCluster));
		}

		assertEquals(numInterfacesShouldbeKept, interfaces.size());

		clusters = interfaces.getClusters();

		assertNotNull(clusters);

		assertTrue(clusters.size()<=interfaces.size());

		for (StructureInterface interf : interfaces) {
			GroupAsa groupAsa = interf.getFirstGroupAsas().values().iterator().next();
			String expected = interf.getMoleculeIds().getFirst();
			String actual = groupAsa.getGroup().getChain().getName();
			// in 1auy this works always since there's only 1 chain. But it is useful in testing cases like 3vbr with serveral chains
			assertEquals(expected.charAt(0), actual.charAt(0));
		}
	}


	@Test
	public void test3C5FWithSeqresPdb() throws IOException, StructureException {

		InputStream inStream = new GZIPInputStream(this.getClass().getResourceAsStream("/org/biojava/nbio/structure/io/3c5f_raw.pdb.gz"));
		assertNotNull(inStream);

		PDBFileParser pdbpars = new PDBFileParser();
		FileParsingParameters params = new FileParsingParameters();
		params.setAlignSeqRes(true);
		pdbpars.setFileParsingParameters(params);

		Structure s = pdbpars.parsePDBFile(inStream) ;

		assertNotNull(s);

		assertEquals(8, s.getPolyChains().size());

		// 1 protein, 3 nucleotide chains, 1 NA nonpoly chain, 1 water: 6 entities
		assertEquals(6, s.getEntityInfos().size());

		CrystalBuilder cb = new CrystalBuilder(s);
		StructureInterfaceList interfaces = cb.getUniqueInterfaces(5.5);
		interfaces.calcAsas(100, 1, 0);
		interfaces.removeInterfacesBelowArea();

		List<StructureInterfaceCluster> clusters = interfaces.getClusters();

		// 23 if below 35A2 interfaces are filtered
		assertEquals(23,interfaces.size());

		// we simply want to test that some interfaces cluster together
		assertTrue("Expected fewer than 23 interfaces (some interfaces should cluster together)",clusters.size()<23);

		// third cluster (index 2) is of size 2
		assertEquals("Cluster 3 should have 2 members",2,clusters.get(2).getMembers().size());

		assertTrue("Interface 3 should be isologous",interfaces.get(3).isIsologous());


	}

	// This doesn't work yet, since for raw files without a SEQRES, the seqres groups are not populated. Instead
	// in that case Compound.getAlignedResIndex() returns residue numbers as given (without insertion codes) and
	// thus in general residues will not be correctly aligned between different chains of same entity. This breaks
	// cases like 3ddo (with no SEQRES records) where residue numbering is different in every chain of the one entity.
	// Then contact overlap calculation will be wrong and interface clustering won't work.
	// see https://github.com/eppic-team/eppic/issues/39
	// See also TestCompoundResIndexMapping
	//@Test
	public void test3DDONoSeqresPdb() throws IOException, StructureException {

		// 3ddo contains 6 chains in 1 entity, with residue numbering completely different in each of the chains

		InputStream inStream = new GZIPInputStream(this.getClass().getResourceAsStream("/org/biojava/nbio/structure/io/3ddo_raw_noseqres.pdb.gz"));
		assertNotNull(inStream);

		PDBFileParser pdbpars = new PDBFileParser();
		FileParsingParameters params = new FileParsingParameters();
		params.setAlignSeqRes(true);
		pdbpars.setFileParsingParameters(params);

		Structure s = pdbpars.parsePDBFile(inStream) ;

		assertNotNull(s);

		assertEquals(6, s.getChains().size());

		assertEquals(1, s.getEntityInfos().size());

		CrystalBuilder cb = new CrystalBuilder(s);
		StructureInterfaceList interfaces = cb.getUniqueInterfaces(5.5);
		interfaces.calcAsas(100, 1, 0);
		interfaces.removeInterfacesBelowArea();

		List<StructureInterfaceCluster> clusters = interfaces.getClusters();

		// 22 if below 35A2 interfaces are filtered
		assertEquals(22,interfaces.size());

		// we simply want to test that some interfaces cluster together, for this entry
		// it is problematic because of different residue numbering between different chains of same entity
		assertTrue("Expected fewer than 22 interfaces (some interfaces should cluster together)",clusters.size()<22);

		// first 2 clusters are of size 3
		assertEquals("Cluster 1 should have 3 members",3,clusters.get(0).getMembers().size());
		assertEquals("Cluster 2 should have 3 members",3,clusters.get(1).getMembers().size());

		// detection of isologous test: first 3 interfaces should be isologous

		assertTrue("Interface 1 should be isologous",interfaces.get(1).isIsologous());
		assertTrue("Interface 2 should be isologous",interfaces.get(2).isIsologous());
		assertTrue("Interface 3 should be isologous",interfaces.get(3).isIsologous());



	}
}