File: loop_structure_db.py

package info (click to toggle)
promod3 3.4.2%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 966,596 kB
  • sloc: cpp: 55,820; python: 18,058; makefile: 85; sh: 51
file content (85 lines) | stat: -rw-r--r-- 3,414 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from promod3 import loop
from ost import io, seq
import os

# StructureDB where all data get extracted
structure_db_one = loop.StructureDB(loop.StructureDBDataType.All)

# StructureDB where we only have the default data 
# (positions and sequence) plus residue depths and dihedrals.
# In order to pass the required flags, we use a bitwise or.
structure_db_two = loop.StructureDB(
                   loop.StructureDBDataType.ResidueDepths |
                   loop.StructureDBDataType.Dihedrals)

# Lets fill in some structures. It gets assumed, that all required
# data lies in the following directories.
structure_dir = "data"
prof_dir = "data"

# The naming of the files in the directories is e.g. 1CRN.pdb for 
# the structure and 1CRNA.hhm for the profile. 
# The structure possibly contain several chains, whereas the hhm 
# file is only for that specific chain.
structure_ids = ["1CRN", "1AKI"]
chain_names = ["A", "A"]

for s_id, ch_name in zip(structure_ids, chain_names):

    # Join together the data paths.
    structure_path = os.path.join(structure_dir, s_id + ".pdb")
    prof_path = os.path.join(prof_dir, s_id + ch_name + ".hhm")

    # Let's load the structure.
    structure = io.LoadPDB(structure_path).Select("peptide=True")
    
    # And the according profile in hhm format.
    prof = io.LoadSequenceProfile(prof_path)

    # For simplicity we use as SEQRES the sequence from the profile.
    # In this case the numbering of the structures already matches.
    seqres = seq.CreateSequence(ch_name, prof.sequence)

    # Add the stuff to the first StructureDB
    structure_db_one.AddCoordinates(s_id, ch_name, structure, 
                                    seqres, prof)

    # Add the stuff to the second StructureDB, 
    # No profile required here...
    structure_db_two.AddCoordinates(s_id, ch_name, structure, 
                                    seqres)

                                
# We now have two structures in both databases...
# Lets get a summary of whats actually in there
structure_db_one.PrintStatistics()
structure_db_two.PrintStatistics()

# There is no profile derived from structures assigned to 
# structure_db_one yet, the memory is only allocated and set to 
# zero. In structure_db_two, there'll never be stored a structure 
# profile as we did not initialize it accordingly. 
# However, we can still use its coordinates and residue depths to
# generate profiles!  
# To demonstrate, we use our structure_db_two to derive profiles 
# and set them in structure_db_one.

for i in range(structure_db_one.GetNumCoords()):

    # extract all required information
    bb_list = structure_db_one.GetBackboneList(i)
    res_depths = structure_db_one.GetResidueDepths(i)

    # generate structure profiles based on structure_db_two
    prof = structure_db_two.GenerateStructureProfile(bb_list, 
                                                     res_depths)
    # and add it to the previously created structure_db
    structure_db_one.SetStructureProfile(i, prof)

# That's it! Let's save both databases down.
# structure_db_two will use much less memory, as it contains less data. 
# Sidenote: We're saving the portable version. If you intent to use 
# your database only on one system, the Save / Load functions should
# be preferred, as the loading will be much faster.
structure_db_one.SavePortable("my_db_one.dat")
structure_db_two.SavePortable("my_db_two.dat")