1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
|
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package demo;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
import org.biojava.nbio.core.sequence.io.CasePreservingProteinSequenceCreator;
import org.biojava.nbio.core.sequence.io.GenericFastaHeaderParser;
import org.biojava.nbio.core.sequence.io.template.SequenceCreatorInterface;
import org.biojava.nbio.core.sequence.io.template.SequenceHeaderParserInterface;
import org.biojava.nbio.structure.Atom;
import org.biojava.nbio.structure.ResidueNumber;
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.StructureTools;
import org.biojava.nbio.structure.align.gui.StructureAlignmentDisplay;
import org.biojava.nbio.structure.align.model.AFPChain;
import org.biojava.nbio.structure.align.util.AlignmentTools;
import org.biojava.nbio.structure.align.util.AtomCache;
import org.biojava.nbio.structure.io.FastaStructureParser;
import org.biojava.nbio.structure.io.StructureSequenceMatcher;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.StructureTools;
import org.biojava.nbio.structure.Atom;
import org.biojava.nbio.structure.ResidueNumber;
/**
* Demo of how to use the {@link FastaStructureParser} class to read protein
* structures from a FASTA file.
*
* @author Spencer Bliven
*
*/
public class DemoAlignmentFromFasta {
public static void getAlignmentFromFasta() throws StructureException {
// Load a test sequence
// Normally this would come from a file, eg
// File fasta = new File("/path/to/file.fa");
String fastaStr =
"> 1KQ1.A\n" +
"mianeniqdkalenfkanqtevtvfflngFQ.MKGVIEEYDK.....YVVSLNsqgkQHLIYKh......\n" +
".......................AISTYTVetegqastesee\n" +
"> 1C4Q.D\n" +
"............................tPDcVTGKVEYTKYndddtFTVKVG....DKELATnranlqs\n" +
"lllsaqitgmtvtiktnachnggGFSEVIFr...........\n";
InputStream fasta;
try {
fasta = new ByteArrayInputStream(fastaStr.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
return;
}
// Create a header parser to parse the header lines into valid structure accessions.
// The resulting accession can be anything interpretable by AtomCache.getStructure.
// Possible Examples: "4HHB" (whole structure), "d4hhba_" (SCOP domain),
// "4HHB.A:1-15" (residue range)
// For this example, the built-in fasta parser will extract the correct accession.
SequenceHeaderParserInterface<ProteinSequence, AminoAcidCompound> headerParser;
headerParser = new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>();
// Create AtomCache to fetch structures from the PDB
AtomCache cache = new AtomCache();
// Create SequenceCreator. This converts a String to a ProteinSequence
AminoAcidCompoundSet aaSet = AminoAcidCompoundSet.getAminoAcidCompoundSet();
SequenceCreatorInterface<AminoAcidCompound> creator;
creator = new CasePreservingProteinSequenceCreator(aaSet);
// parse file
FastaStructureParser parser = new FastaStructureParser(
fasta, headerParser, creator, cache);
try {
parser.process();
} catch (IOException e) {
e.printStackTrace();
return;
} catch (StructureException e) {
e.printStackTrace();
return;
}
ResidueNumber[][] residues = parser.getResidues();
ProteinSequence[] sequences = parser.getSequences();
Structure[] structures = parser.getStructures();
// Set lowercase residues to null too
for(int structNum = 0; structNum<sequences.length;structNum++) {
CasePreservingProteinSequenceCreator.setLowercaseToNull(
sequences[structNum],residues[structNum]);
}
// Remove alignment columns with a gap
residues = StructureSequenceMatcher.removeGaps(residues);
// Create AFPChain from the alignment
Atom[] ca1 = StructureTools.getAtomCAArray(structures[0]);
Atom[] ca2 = StructureTools.getAtomCAArray(structures[1]);
AFPChain afp = AlignmentTools.createAFPChain(ca1, ca2, residues[0], residues[1]);
try {
StructureAlignmentDisplay.display(afp, ca1, ca2);
} catch (StructureException e) {
e.printStackTrace();
return;
}
}
public static void main(String[] args) throws StructureException {
getAlignmentFromFasta();
}
}
|