File: SmartProteinManager.cpp

package info (click to toggle)
ausaxs 1.1.8-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 72,592 kB
  • sloc: cpp: 49,853; ansic: 6,901; python: 730; makefile: 18
file content (216 lines) | stat: -rw-r--r-- 9,991 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
// SPDX-License-Identifier: LGPL-3.0-or-later
// Author: Kristian Lytje

#include <em/manager/SmartProteinManager.h>
#include <em/detail/ImageStackBase.h>
#include <em/detail/EMGrid.h>
#include <hist/intensity_calculator/CompositeDistanceHistogram.h>
#include <hist/histogram_manager/PartialHistogramManagerMT.h>
#include <data/Molecule.h>
#include <data/Body.h>
#include <utility/Console.h>
#include <utility/Limit3D.h>
#include <utility/Logging.h>
#include <settings/HistogramSettings.h>
#include <settings/ExvSettings.h>
#include <settings/EMSettings.h>
#include <settings/GridSettings.h>
#include <settings/Flags.h>

#include <vector>
#include <cassert>

using namespace ausaxs;
using namespace ausaxs::em::managers;
using namespace ausaxs::data;

SmartProteinManager::~SmartProteinManager() = default;

std::unique_ptr<hist::ICompositeDistanceHistogram> SmartProteinManager::get_histogram(double cutoff) {
    return get_protein(cutoff)->get_histogram();
}

observer_ptr<data::Molecule> SmartProteinManager::get_protein(double cutoff) {
    update_protein(cutoff);

    auto ax = images->get_header()->get_axes();

    // ensure the grid is large enough to contain the entire map
    //? is this a waste of time since it is being overwritten for every iteration anyway?
    Limit3D limits(ax.x.min, ax.x.max, ax.y.min, ax.y.max, ax.z.min, ax.z.max);
    double expand_x = 0.5*limits.x.span()*settings::grid::scaling;
    double expand_y = 0.5*limits.y.span()*settings::grid::scaling;
    double expand_z = 0.5*limits.z.span()*settings::grid::scaling;
    limits.x.min -= expand_x, limits.x.max += expand_x;
    limits.y.min -= expand_y, limits.y.max += expand_y;
    limits.z.min -= expand_z, limits.z.max += expand_z;

    // hydration must be cleared to ensure the grid state will be exactly the same as the one used to generate the hydration
    // when the grid is first created, *only* the body atoms are added to the grid, with the hydration being added later
    // if we do not clear the hydration now, since it can be bound to individual bodies, the hydration will be added in-between the body atoms, 
    // potentially leading to a different grid state than the one used to generate the hydration
    protein->clear_hydration();

    protein->set_grid(std::make_unique<grid::EMGrid>(limits));
    for (auto& body : protein->get_bodies()) {
        protein->get_grid()->add(body);
    }
    if (settings::em::hydrate) {protein->generate_new_hydration();}
    return protein.get();
}

observer_ptr<const data::Molecule> SmartProteinManager::get_protein() const {
    assert(protein != nullptr && "SmartProteinManager::get_protein: Protein has not been initialized yet.");
    return protein.get();
}

void SmartProteinManager::set_charge_levels(const std::vector<double>& levels) noexcept {
    ProteinManager::set_charge_levels(levels);
    protein = nullptr; // the protein must be generated anew to ensure the bodies remains in sync with the new levels
}

std::vector<EMAtom> SmartProteinManager::generate_atoms(double cutoff) const {
    // we use a list since we will have to append quite a few other lists to it
    std::list<EMAtom> atoms;
    const auto& imagestack = images->images();
    unsigned int step = settings::em::sample_frequency;
    for (unsigned int i = 0; i < imagestack.size(); i += step) {
        std::list<EMAtom> im_atoms = imagestack[i].generate_atoms(cutoff);
        atoms.splice(atoms.end(), im_atoms); // move im_atoms to end of atoms
    }

    // convert list to vector
    return std::vector<EMAtom>(std::make_move_iterator(std::begin(atoms)), std::make_move_iterator(std::end(atoms)));
}

std::unique_ptr<data::Molecule> SmartProteinManager::generate_new_protein(double cutoff) const {
    std::vector<EMAtom> atoms = generate_atoms(cutoff);
    std::vector<Body> bodies(charge_levels.size());
    std::vector<AtomFF> current_atoms(atoms.size());

    if (atoms.empty()) {
        console::print_warning("Warning in SmartProteinManager::generate_protein: No voxels found for cutoff \"" + std::to_string(cutoff) + "\".");
        return std::make_unique<data::Molecule>(bodies);
    }

    if (charge_levels.empty()) {
        throw except::out_of_bounds("SmartProteinManager::generate_protein: charge_levels is empty.");
    }

    // sort vector so we can slice it into levels of charge density
    std::sort(atoms.begin(), atoms.end(), [] (const EMAtom& atom1, const EMAtom& atom2) {return atom1.charge_density() < atom2.charge_density();});

    unsigned int charge_index = 0, atom_index = 0, current_index = 0;
    double charge = charge_levels[charge_index]; // initialize charge

    while (atoms[atom_index].charge_density() < cutoff) {++atom_index;} // search for first atom with charge larger than the cutoff
    while (charge < cutoff) {charge = charge_levels[++charge_index];}   // search for first charge level larger than the cutoff 
    while (atom_index < atoms.size()) {
        if (atoms[atom_index].charge_density() < charge) {
            current_atoms[current_index++] = atoms[atom_index++].get_atom_ff();
        } else {
            // create the body for this charge bin
            current_atoms.resize(current_index);
            bodies[charge_index] = Body(current_atoms);

            // prepare the next body
            current_index = 0;
            current_atoms.resize(atoms.size() - atom_index);

            // increment the charge level
            if (charge_index+1 == charge_levels.size()) [[unlikely]] {
                throw except::unexpected("SmartProteinManager::generate_protein: Reached end of charge levels list.");
            }
            charge = charge_levels[++charge_index];
        }
    }
    
    // create the final body of the loop
    current_atoms.resize(current_index);
    bodies[charge_index] = Body(current_atoms);

    return std::make_unique<data::Molecule>(std::move(bodies));
}

void SmartProteinManager::toggle_histogram_manager_init(bool state) {
    settings::flags::init_histogram_manager = state;
}

void SmartProteinManager::update_protein(double cutoff) {
    if (protein == nullptr || protein->size_atom() == 0) {
        // the protein is not initialized, so simply assign it a new one
        toggle_histogram_manager_init(false);
        logging::log("SmartProteinManager::update_protein: protein is nullptr or empty. Generating new protein.");
        protein = generate_new_protein(cutoff); 
        protein->set_histogram_manager(settings::hist::HistogramManagerChoice::PartialHistogramManagerMT);
        previous_cutoff = cutoff;
        return;
    }

    if (cutoff == previous_cutoff) {
        logging::log("SmartProteinManager::update_protein: cutoff is the same as previous. Nothing to do.");
        return;
    }

    // sanity check
    if (charge_levels.empty()) {
        throw except::unexpected("SmartProteinManager::update_protein: charge_levels is empty.");
    }
    logging::log("SmartProteinManager::update_protein: cutoff = " + std::to_string(cutoff) + ", previous_cutoff = " + std::to_string(previous_cutoff));

    // the idea is to generate a new protein with the new cutoff, and then replace the relevant bodies in the old protein with the new ones
    // this will trigger the "update" signals for the replaced bodies, which will inform the histogram manager to only recalculate their contributions
    std::unique_ptr<data::Molecule> new_protein = generate_new_protein(cutoff);
    if (cutoff < previous_cutoff) {
        // since cutoff is smaller than previously, we have to change all bins in the range [cutoff, previous_cutoff]

        // skip all bins before the relevant range
        unsigned int charge_index = 0;
        double current_cutoff = charge_levels[0];
        while (current_cutoff < cutoff && charge_index < charge_levels.size()) {
            current_cutoff = charge_levels[++charge_index];
        }

        // iterate through the remaining bins, and use a break statement to stop when we leave the relevant range
        for (; charge_index < charge_levels.size(); ++charge_index) {
            // check if the current bin is inside the range
            if (charge_levels[charge_index] < previous_cutoff) {
                // if so, we replace it with the new contents
                protein->get_body(charge_index) = std::move(new_protein->get_body(charge_index));
            } else {
                // if we have the same number of atoms as earlier, nothing has changed
                if (new_protein->get_body(charge_index).size_atom() == protein->get_body(charge_index).size_atom()) {
                    break;
                }

                // otherwise we replace it and stop iterating
                protein->get_body(charge_index) = std::move(new_protein->get_body(charge_index));
                break;
            }
        }
    } else {
        // since cutoff is larger than previously, we have to change all bins in the range [previous_cutoff, cutoff]

        // skip all bins before the relevant range
        unsigned int charge_index = 0;
        double current_cutoff = charge_levels[0];
        while (current_cutoff < previous_cutoff && charge_index < charge_levels.size()) {
            current_cutoff = charge_levels[++charge_index];
        }

        // iterate through the remaining bins, and use a break statement to stop when we leave the relevant range
        for (; charge_index < charge_levels.size(); ++charge_index) {
            // check if the current bin is inside the range
            if (charge_levels[charge_index] < cutoff) {
                // if so, we replace it with the new contents
                protein->get_body(charge_index) = std::move(new_protein->get_body(charge_index));
            } else {
                // otherwise we replace it and stop iterating
                protein->get_body(charge_index) = std::move(new_protein->get_body(charge_index));
                break;
            }
        }
    }

    previous_cutoff = cutoff;
}