File: modelling_loop_scoring.py

package info (click to toggle)
promod3 3.4.2%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 966,596 kB
  • sloc: cpp: 55,820; python: 18,058; makefile: 85; sh: 51
file content (86 lines) | stat: -rw-r--r-- 3,506 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from ost import io, seq
from promod3 import modelling, loop

# setup raw model
tpl = io.LoadPDB('data/1crn_cut.pdb')
seq_trg = 'TTCCPSIVARSNFNVCRLPGTPEAICATYTGCIIIPGATCPGDYAN'
seq_tpl = 'TTCCPSIVARSNFNVCRLPGTPEA------GCIIIPGATCPGDYAN'
aln = seq.CreateAlignment(seq.CreateSequence('trg', seq_trg),
                          seq.CreateSequence('tpl', seq_tpl))
aln.AttachView(1, tpl.CreateFullView())
mhandle = modelling.BuildRawModel(aln)
print(("Number of gaps in raw model: %d" % len(mhandle.gaps)))

# setup default scorers for modelling handle
modelling.SetupDefaultBackboneScoring(mhandle)
modelling.SetupDefaultAllAtomScoring(mhandle)

# setup databases
frag_db = loop.LoadFragDB()
structure_db = loop.LoadStructureDB()
torsion_sampler = loop.LoadTorsionSamplerCoil()

# get data for gap to close
gap = mhandle.gaps[0].Copy()
print(("Gap to close: %s" % str(gap)))
n_stem = gap.before
c_stem = gap.after
start_resnum = n_stem.GetNumber().GetNum()
start_idx = start_resnum - 1   # res. num. starts at 1

# get loop candidates from FragDB
candidates = modelling.LoopCandidates.FillFromDatabase(\
                n_stem, c_stem, gap.full_seq, frag_db, structure_db)
print(("Number of loop candidates: %d" % len(candidates)))

# all scores will be kept in a score container which we update
all_scores = modelling.ScoreContainer()
# the keys used to identify scores are globally defined
print(("Stem RMSD key = '%s'" \
      % modelling.ScoringWeights.GetStemRMSDsKey()))
print(("Profile keys = ['%s', '%s']" \
      % (modelling.ScoringWeights.GetSequenceProfileScoresKey(),
         modelling.ScoringWeights.GetStructureProfileScoresKey())))
print(("Backbone scoring keys = %s" \
      % str(modelling.ScoringWeights.GetBackboneScoringKeys())))
print(("All atom scoring keys = %s" \
      % str(modelling.ScoringWeights.GetAllAtomScoringKeys())))

# get stem RMSDs for each candidate (i.e. how well does it fit?)
# -> this must be done before CCD to be meaningful
candidates.CalculateStemRMSDs(all_scores, n_stem, c_stem)

# close the candidates with CCD
orig_indices = candidates.ApplyCCD(n_stem, c_stem, torsion_sampler)
print(("Number of closed loop candidates: %d" % len(candidates)))

# get subset of previously computed scores
all_scores = all_scores.Extract(orig_indices)

# add profile scores (needs profile for target sequence)
prof = io.LoadSequenceProfile("data/1CRNA.hhm")
candidates.CalculateSequenceProfileScores(all_scores, structure_db,
                                          prof, start_idx)
candidates.CalculateStructureProfileScores(all_scores, structure_db,
                                           prof, start_idx)
# add backbone scores
candidates.CalculateBackboneScores(all_scores, mhandle, start_resnum)
# add all atom scores
candidates.CalculateAllAtomScores(all_scores, mhandle, start_resnum)

# use default weights to combine scores
weights = modelling.ScoringWeights.GetWeights(with_db=True,
                                              with_aa=True)
scores = all_scores.LinearCombine(weights)

# rank them (best = lowest "score")
arg_sorted_scores = sorted([(v,i) for i,v in enumerate(scores)])
print("Ranked candidates: score, index")
for v,i in arg_sorted_scores:
  print(("%g, %d" % (v,i)))

# insert best into model, update scorers and clear gaps
best_candidate = candidates[arg_sorted_scores[0][1]]
modelling.InsertLoopClearGaps(mhandle, best_candidate, gap)
print(("Number of gaps in closed model: %d" % len(mhandle.gaps)))
io.SavePDB(mhandle.model, "model.pdb")