File: MassExplainer.C

package info (click to toggle)
openms 1.11.1-5
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 436,688 kB
  • ctags: 150,907
  • sloc: cpp: 387,126; xml: 71,547; python: 7,764; ansic: 2,626; php: 2,499; sql: 737; ruby: 342; sh: 325; makefile: 128
file content (384 lines) | stat: -rw-r--r-- 12,571 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
// --------------------------------------------------------------------------
//                   OpenMS -- Open-Source Mass Spectrometry
// --------------------------------------------------------------------------
// Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
// ETH Zurich, and Freie Universitaet Berlin 2002-2013.
//
// This software is released under a three-clause BSD license:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of any author or any participating institution
//    may be used to endorse or promote products derived from this software
//    without specific prior written permission.
// For a full list of authors, refer to the file AUTHORS.
// --------------------------------------------------------------------------
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
// INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// --------------------------------------------------------------------------
// $Maintainer: Chris Bielow $
// $Authors: Chris Bielow $
// --------------------------------------------------------------------------

#include <OpenMS/DATASTRUCTURES/MassExplainer.h>

#include <OpenMS/CHEMISTRY/EmpiricalFormula.h>
#include <OpenMS/DATASTRUCTURES/Compomer.h>


#include <iostream>
#include <algorithm>

namespace OpenMS
{

  MassExplainer::MassExplainer() :
    explanations_(),
    adduct_base_(),
    q_min_(1),
    q_max_(5),
    max_span_(3),
    max_neutrals_(0)
  {
    init_(true);
  }

  /// Constructor
  MassExplainer::MassExplainer(AdductsType adduct_base) :
    explanations_(),
    adduct_base_(adduct_base),
    q_min_(1),
    q_max_(5),
    max_span_(3),
    max_neutrals_(0)
  {
    init_(true);
  }

  /// Constructor
  MassExplainer::MassExplainer(Int q_min, Int q_max, Int max_span, DoubleReal thresh_logp) :
    explanations_(),
    adduct_base_(),
    q_min_(q_min),
    q_max_(q_max),
    max_span_(max_span),
    thresh_p_(thresh_logp),
    max_neutrals_(0)
  {
    init_(false);
  }

  /// Constructor
  MassExplainer::MassExplainer(AdductsType adduct_base, Int q_min, Int q_max, Int max_span, DoubleReal thresh_logp, Size max_neutrals) :
    explanations_(),
    adduct_base_(adduct_base),
    q_min_(q_min),
    q_max_(q_max),
    max_span_(max_span),
    thresh_p_(thresh_logp),
    max_neutrals_(max_neutrals)
  {
    init_(false);
  }

  /// check consistency of input
  /// @param init_thresh_p set default threshold (set to "false" to keep current value)
  void MassExplainer::init_(bool init_thresh_p)
  {
    if (init_thresh_p)
    {
      // every compound with log_p_<thresh_p_ will be discarded
      // we allow at most two Na+
      thresh_p_ = log(0.15) * 2 + log(0.7) * (q_max_ - 2);
    }

    // check consistency of members
    if (q_max_ < q_min_)
    {
      Int tmp = q_max_;
      q_max_ = q_min_;
      q_min_ = tmp;
      std::cerr << __FILE__ << ": Warning! \"q_max < q_min\" needed fixing!\n";
    }

    if (max_span_ > (q_max_ - q_min_ + 1))
    {
      max_span_ = q_max_ - q_min_ + 1;
      std::cerr << __FILE__ << ": Warning! \"max_span_ > (q_max - q_min + 1)\" needed fixing!\n";
    }

    if (adduct_base_.empty())
    {
      //default adducts are: H+, Na+, K+, NH4+
      // do NOT use "+" in empirical formula, as every + will add a proton weight!

      adduct_base_.push_back(createAdduct_("H", 1, 0.7));
      adduct_base_.push_back(createAdduct_("Na", 1, 0.1));
      adduct_base_.push_back(createAdduct_("NH4", 1, 0.1));
      adduct_base_.push_back(createAdduct_("K", 1, 0.1));

    }
  }

  /// Assignment operator
  MassExplainer & MassExplainer::operator=(const MassExplainer & rhs)
  {
    if (this == &rhs)
      return *this;

    explanations_ = rhs.explanations_;
    adduct_base_ = rhs.adduct_base_;
    q_min_ = rhs.q_min_;
    q_max_ = rhs.q_max_;
    max_span_ = rhs.max_span_;
    thresh_p_ = rhs.thresh_p_;

    return *this;
  }

  /// Destructor
  MassExplainer::~MassExplainer()
  {
  }

  //@}


  /// fill map with possible mass-differences along with their explanation
  void MassExplainer::compute()
  {
    // differentiate between neutral and charged adducts
    AdductsType adduct_neutral, adduct_charged;
    for (AdductsType::const_iterator it = adduct_base_.begin(); it != adduct_base_.end(); ++it)
    {
      if (it->getCharge() == 0)
        adduct_neutral.push_back(*it);
      else
        adduct_charged.push_back(*it);
    }

    // calculate some initial boundaries that can be used to shorten the enumeration process
    //Int q_comp_min = -max_span_; //minimal expected charge of compomer
    //Int q_comp_max = max_span_;  //maximal expected charge of compomer
    Int max_pq = q_max_;                 //maximal number of positve adduct-charges for a compomer
    //Int max_nq = q_max_; //maximal number of negative adduct-charges for a compomer

    for (AdductsType::const_iterator it = adduct_charged.begin(); it != adduct_charged.end(); ++it)
    {
      std::vector<Adduct> new_adducts;
      //create new compomers
      Int i = 1;
      //warning: the following code assumes that max_nq == max_pq!!
      while (abs(i * it->getCharge()) <= max_pq)
      {
        Adduct a(*it);
        // positive amount
        a.setAmount(i);
        // this might not be a valid compomer (e.g. due to net_charge excess)
        // ... but when combined with other adducts it might become feasible again
        new_adducts.push_back(a);
        ++i;
      }

      // combine all new compomers with existing compomers
      std::vector<Adduct>::const_iterator new_it;
      std::vector<Adduct>::const_iterator new_begin = new_adducts.begin();
      std::vector<Adduct>::const_iterator new_end   = new_adducts.end();

      std::size_t idx_last = explanations_.size();
      for (size_t ci = 0; ci < idx_last; ++ci)
      {
        for (new_it = new_begin; new_it != new_end; ++new_it)
        {
          Compomer cmpl(explanations_[ci]);
          cmpl.add(*new_it, Compomer::LEFT);
          explanations_.push_back(cmpl);

          Compomer cmpr(explanations_[ci]);
          cmpr.add(*new_it, Compomer::RIGHT);
          explanations_.push_back(cmpr);
        }
      }
      // finally add new compomers to the list itself
      for (new_it = new_begin; new_it != new_end; ++new_it)
      {
        Compomer cmpl;
        cmpl.add(*new_it, Compomer::LEFT);
        explanations_.push_back(cmpl);

        Compomer cmpr;
        cmpr.add(*new_it, Compomer::RIGHT);
        explanations_.push_back(cmpr);
      }

      //std::cout << "valid explanations: " << explanations_.size() << " after " << it->getFormula() << std::endl;

    } // END adduct add


    std::vector<Compomer> valids_only;
    for (size_t ci = 0; ci < explanations_.size(); ++ci)
    {
      if (compomerValid_(explanations_[ci]))
        valids_only.push_back(explanations_[ci]);
    }
    explanations_.swap(valids_only);

    // add neutral adducts
    Size size_of_explanations = explanations_.size();
    for (AdductsType::const_iterator it_neutral = adduct_neutral.begin(); it_neutral != adduct_neutral.end(); ++it_neutral)
    {
      std::cout << "Adding neutral: " << *it_neutral << "\n";
      for (Int n = 1; n <= (SignedSize)max_neutrals_; ++n)
      {
        // neutral itself:
        Compomer cmpr1;
        cmpr1.add((*it_neutral) * n, Compomer::RIGHT);
        explanations_.push_back(cmpr1);

        Compomer cmpr2;
        cmpr2.add((*it_neutral) * n, Compomer::LEFT);
        explanations_.push_back(cmpr2);


        // neutral in combination with others
        for (Size i = 0; i < size_of_explanations; ++i)
        {
          {
            Compomer cmpr(explanations_[i]);
            cmpr.add((*it_neutral) * n, Compomer::RIGHT);
            explanations_.push_back(cmpr);
          }
          {
            Compomer cmpr(explanations_[i]);
            cmpr.add((*it_neutral) * n, Compomer::LEFT);
            explanations_.push_back(cmpr);
          }
        }
      }
    }


    // sort according to (in-order) net-charge, mass, probability
    std::sort(explanations_.begin(), explanations_.end());

    // set Ids of compomers, which allows to uniquely identify them (for later lookup)
    for (size_t i = 0; i < explanations_.size(); ++i)
      explanations_[i].setID(i);

    //#if DEBUG_FD
    for (size_t ci = 0; ci < explanations_.size(); ++ci)
    {
      std::cerr << explanations_[ci] << " ";
    }
    //#endif

    std::cout << "MassExplainer table size: " << explanations_.size() << "\n";

  }

  //@name Accessors
  //@{

  /// Sets the set of possible adducts
  void MassExplainer::setAdductBase(AdductsType adduct_base)
  {
    adduct_base_ = adduct_base;
  }

  /// Returns the set of adducts
  MassExplainer::AdductsType MassExplainer::getAdductBase() const
  {
    return adduct_base_;
  }

  /// return a compomer by its Id (useful after a query() ).
  const Compomer & MassExplainer::getCompomerById(Size id) const
  {
    return explanations_[id];
  }

  //@}


  /// search the mass database for explanations
  /// @param net_charge       net charge of compomer
  /// @param mass_to_explain  mass in Da that needs explanation
  /// @param mass_delta       allowed deviation from exact mass
  /// @param thresh_log_p     minimal log probability required
  /// @param firstExplanation begin range with candidates according to net_charge and mass
  /// @param lastExplanation  end range
  SignedSize MassExplainer::query(const Int net_charge,
                                  const float mass_to_explain,
                                  const float mass_delta,
                                  const float thresh_log_p,
                                  std::vector<Compomer>::const_iterator & firstExplanation,
                                  std::vector<Compomer>::const_iterator & lastExplanation) const
  {
#ifdef DEBUG_FD
    if (fabs(mass_to_explain) < 120.0)
    {
      std::cout << "query: qnet=" << net_charge << "; explain_mass=" << mass_to_explain << "; delta=+-" << mass_delta << "\n";
    }
#endif

    Compomer cmp_low(net_charge, mass_to_explain - fabs(mass_delta), 1);
    firstExplanation = lower_bound(explanations_.begin(), explanations_.end(), cmp_low);

    Compomer cmp_high(net_charge, mass_to_explain + fabs(mass_delta), thresh_log_p);
    lastExplanation  = lower_bound(explanations_.begin(), explanations_.end(), cmp_high);

    return std::distance(firstExplanation, lastExplanation);
  }

  ///check if the generated compomer is valid judged by its probability, charges etc
  bool MassExplainer::compomerValid_(const Compomer & cmp)
  {
    // probability ok?
    if (cmp.getLogP() < thresh_p_)
      return false;

    // limit the net charge by the maximal span of co-features
    if (abs(cmp.getNetCharge()) >= max_span_)
      return false;

    if (cmp.getNegativeCharges() > q_max_)
      return false;

    if (cmp.getPositiveCharges() > q_max_)
      return false;

    //TODO include mass?
    //if (abs(cmp.mass_) > mass_max_) return false;

    //std::cout << "valid:: " << cmp <<"\n";
    return true;
  }

  /// create a proper adduct from formula and charge and probability
  Adduct MassExplainer::createAdduct_(const String & formula, const Int charge, const DoubleReal p) const
  {

    EmpiricalFormula ef(formula);
    //effectively subtract charge electron masses: (-H plus one Proton)*charge
    ef -= ("H" + String(charge)); // subtracts x hydrogen
    ef.setCharge(charge); // adds x protons

    Adduct a(charge, 1, ef.getMonoWeight(), formula, log(p), 0);

    return a;
  }

}