File: formula.cpp

package info (click to toggle)
massxpert 2.3.6-1squeeze1
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 20,736 kB
  • ctags: 3,541
  • sloc: cpp: 44,108; xml: 7,381; sh: 604; makefile: 108; ansic: 7
file content (1498 lines) | stat: -rw-r--r-- 40,081 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
/* massXpert - the true massist's program.

   Copyright(C) 2006,2007 Filippo Rusconi

   http://www.massxpert.org/massXpert

   This file is part of the massXpert project.

   The massxpert project is the successor to the "GNU polyxmass"
   project that is an official GNU project package(see
   www.gnu.org). The massXpert project is not endorsed by the GNU
   project, although it is released ---in its entirety--- under the
   GNU General Public License. A huge part of the code in massXpert
   is actually a C++ rewrite of code in GNU polyxmass. As such
   massXpert was started at the Centre National de la Recherche
   Scientifique(FRANCE), that granted me the formal authorization to
   publish it under this Free Software License.

   This software is free software; you can redistribute it and/or
   modify it under the terms of the GNU  General Public
   License version 3, as published by the Free Software Foundation.
   

   This software is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this software; if not, write to the

   Free Software Foundation, Inc.,

   51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*/


/////////////////////// Qt includes
#include <QChar>
#include <QString>


/////////////////////// Local includes
#include "formula.hpp"


namespace massXpert
{

  //! Constructs a formula initialized with formula and ...
  /*! Initialization of the formula is done by \p formula.  Upon
    construction of the formula, no parsing occurs.

    \p formula gets simply copied into the member formula.
  
    \param formula formula in the form of a string. Defaults to a null
    string.
  */
  Formula::Formula(const QString &formula)
  {
    if (!formula.isEmpty())
      m_formula = formula;
  }


  //! Constructs a copy of \p other.
  /*! 
  
    \param other formula to be used as a mold.
  */
  Formula::Formula(const Formula &other)
    : m_formula(other.m_formula), 
      m_plusFormula(other.m_plusFormula),
      m_minusFormula(other.m_minusFormula)
  {
    AtomCount *atomCount = 0;
  
    for (int iter = 0 ; iter < other.m_atomCountList.size(); ++iter)
      {
	atomCount = new AtomCount(*other.m_atomCountList.at(iter));
      
	m_atomCountList.append(atomCount);
      }
  }


  //! Destroys the formula.
  Formula::~Formula()
  {
    // Remove all the items in the atomcountList of dynamically
    // allocated Atomcount instances.

    while(!m_atomCountList.isEmpty())
      delete m_atomCountList.takeFirst();
  }


  //! Returns the list of atom count objects.
  /*! 
    \return The list of atom count objects.
  */
  const QList<AtomCount *> &
  Formula::atomCountList() const
  {
    return m_atomCountList;
  }


  //! Creates a new formula initialized using \p this.
  /*! The initialization involved copying all the data from \p this,
    including the atom count lists.

    \return The new formula, which should be deleted when no more in
    use.
  */
  Formula * 
  Formula::clone() const
  {
    Formula *other = new Formula(*this);
  
    return other;
  }

  //! Modifies \p other to be identical to \p this.
  /*!
  
    \param other formula.
  */
  void 
  Formula::clone(Formula *other) const
  {
    if (other == this)
      return;
  
    AtomCount *atomCount = 0;
  
    Q_ASSERT(other);
  
    other->m_formula = m_formula;
    other->m_plusFormula = m_plusFormula;
    other->m_minusFormula = m_minusFormula;
  
    while(!other->m_atomCountList.isEmpty())
      delete other->m_atomCountList.takeFirst();

    for (int iter = 0 ; iter < m_atomCountList.size(); ++iter)
      {
	atomCount = new AtomCount();
      
	m_atomCountList.at(iter)->clone(atomCount);
      
	other->m_atomCountList.append(atomCount);
      }
  }


  //! Modifies \p this  to be identical to \p other.
  /*! 
  
    \param other formula to be used as a mold.
  */
  void 
  Formula::mold(const Formula &other)
  {
    if (&other == this)
      return;

    AtomCount *atomCount = 0;
  
    m_formula = other.m_formula;
    m_plusFormula = other.m_plusFormula;
    m_minusFormula = other.m_minusFormula;
    
    while(!m_atomCountList.isEmpty())
      delete m_atomCountList.takeFirst();
  
    for (int iter = 0 ; iter < other.m_atomCountList.size(); ++iter)
      {
	atomCount = new AtomCount();
      
	atomCount->mold(*other.m_atomCountList.at(iter));
      
	m_atomCountList.append(atomCount);
      }
  }


  //! Assigns \p other to \p this formula.
  /*!   
    \param other formula.
  
    \return true if the formulas are identical, false otherwise.
  */
  Formula &
  Formula::operator =(const Formula &other)
  {
    if (&other != this)
      mold(other);
  
    return *this;
  }


  //! Sets the formula.
  /*! 
  
    \param formula formula initializer.
  */
  void
  Formula::setFormula(const QString &formula)
  {
    m_formula = formula;
  }


  //! Sets the formula.
  /*! 
  
    \param formula formula initializer.
  */
  void
  Formula::setFormula(const Formula &formula)
  {
    m_formula = formula.m_formula;
  }


  //! Returns the formula.
  /*!
  
    \return the formula as a string.
  */
  QString
  Formula::formula() const
  {
    return m_formula;
  }


  //! Sets the plus-formula.
  /*! 
  
    \param formula formula initializer.
  */
  void
  Formula::setPlusFormula(const QString &formula)
  {
    m_plusFormula = formula;
  }


  //! Returns the plus-formula.
  /*!
  
    \return the plus-formula as a string.
  */
  const QString &
  Formula::plusFormula() const
  {
    return m_plusFormula;
  }


  //! Sets the minus-formula.
  /*! 
  
    \param formula formula initializer.
  */
  void
  Formula::setMinusFormula(const QString &formula)
  {
    m_minusFormula = formula;
  }


  //! Returns the minus-formula.
  /*!
  
    \return the formula as a string.
  */
  const QString &
  Formula::minusFormula() const
  {
    return m_minusFormula;
  }


  //! Tests equality.
  /*! The test only pertains to the formula(not the minus-/plus-
    formulas).
  
    \param other formula to be compared with \p this.

    \return true if the formulas are identical, false otherwise.
  */
  bool 
  Formula::operator ==(const Formula &other) const
  {
    return(m_formula == other.m_formula);
  }


  //! Tests inequality.
  /*! The test only pertains to the formula(not the minus-/plus-
    formulas).
  
    \param other formula to be compared with \p this.

    \return true if the formulas differ, false otherwise.
  */
  bool 
  Formula::operator !=(const Formula &other) const
  {
    return(m_formula != other .m_formula);
  }


  //! Tells the actions found in the formula.
  /*! Following analysis of the \p formula argument, this function will
    be able to tell if the formula contains only '+'-associated elements
    or also '-'-associated elements. 

    If a formula contains no sign at all, then it is considered to
    contain only '+'-associated member. As soon as one member is
    associated with a '-' action, the minus actions prevails.
  
    This function is used to quickly have an indication if the
    splitParts() function is to be run or if it is not necessary.

    \param formula formula to report the actions about.

    \return '+' if no '-' action was found, '-' otherwise.

    \sa splitParts(int times, bool store, bool reset)
  */
  QChar 
  Formula::actions(const QString &formula) const
  {
    int minusCount = formula.count('-', Qt::CaseInsensitive);
  
    return(minusCount == 0 ? '+' : '-');
  }


  //! Tells the actions found in the formula.
  /*! Following analysis of the \p formula argument, this function will
    be able to tell if the formula contains only '+'-associated elements
    or also '-'-associated elements. 

    If a formula contains no sign at all, then it is considered to
    contain only '+'-associated member. As soon as one member is
    associated with a '-' action, the minus actions prevails.
  
    This function is used to quickly have an indication if the
    splitParts() function is to be run or if it is not necessary.

    \return '+' if no '-' action was found, '-' otherwise.

    \sa splitParts(int times, bool store, bool reset)
  */
  QChar
  Formula::actions() const
  {
    return actions(m_formula);
  }

  // Returns the number of removed characters.
  int
  Formula::removeTitle()
  {
    int length = m_formula.length();
    
    // We want to remove the possibly-existing title for the formula,
    // which is enclosed between `"' at the beginning of the line.

    // One formula can be like this:

    // "Decomposed adenine" C5H4N5 +H 

    // The "Decomposed adenine" is the title
    // The C5H4N5 +H is the formula.
    
    m_formula.remove(QRegExp("\".*\""));

    // Return the number of removed characters.
    return (length - m_formula.length());
  }
  

  // Returns the number of removed characters.
  int
  Formula::removeSpaces()
  {
    int length = m_formula.length();
    
    // We want to remove all the possibly-existing spaces.
    
    m_formula.remove(QRegExp("\\s+"));

    // Return the number of removed characters.
    return (length - m_formula.length());
  }
  

  //! Splits the formula according to its plus-/minus- actions.
  /*! Analyzes the formula and separates all the minus components of
    that formula from all the plus components. The different components
    are set to their corresponding formula(minus formula and plus
    formula).

    At the end of the split work, each sub-formula(plus- and/or minus-)
    is actually parsed for validity, using the reference atom list.
  
    \param refList List of reference atoms.

    \param times Number of times the formula has to be accounted
    for. Defaults to 1.
  
    \param store Indicates if AtomCount objects created during the
    parsing of the sub-formulas generated by the split of the formula
    have to be stored, or not. Defaults to false.
  
    \param reset Indicates if the list of AtomCount objects has to
    be reset before the splitParts work. This parameter may be useful in
    case the caller needs to "accumulate" the accounting of the
    formula. Defaults to false.
  
    \return MXT_FORMULA_SPLIT_FAIL if the splitting failed,
    MXT_FORMULA_SPLIT_PLUS if the components of the formula are all of
    type plus, MXT_FORMULA_SPLIT_MINUS if all the components of the
    formula are of type minus. The result value can be an OR'ing of
    MXT_FORMULA_SPLIT_PLUS and MXT_FORMULA_SPLIT_MINUS.
  */
  int
  Formula::splitParts(const QList<Atom *> &refList,
		       int times, 
		       bool store, 
		       bool reset)
  {
    QChar curChar;
    QString tempFormula;

    int result = 0;

    bool wasParsingFormula = false;
    bool shouldBeFormula = false;
    bool wasMinusSign = false;
	
    Q_ASSERT(refList.size());
  
    // We are asked to put all the '+' components of the formula
    // into corresponding formula and the same for the '-' components.
	
    m_plusFormula.clear();
    m_minusFormula.clear();

    // Because the formula that we are analyzing might contain a title
    // and spaces , we first remove these. But make a local copy of
    // the member datum.

    QString formula = m_formula;
    
    // One formula can be like this:

    // "Decomposed adenine" C5H4N5 +H 

    // The "Decomposed adenine" is the title
    // The C5H4N5 +H is the formula.
    
    formula.remove(QRegExp("\".*\""));

    // We want to remove all the possibly-existing spaces.
    
    formula.remove(QRegExp("\\s+"));
    
    // If the formula does not contain any '-' character, then we
    // can approximate that all the formula is a '+' formula, that is a
    // plusFormula:
	
    if (actions() == '+')
      {
	m_plusFormula.append(formula);
		
	// At this point we want to make sure that we have a correct
	// formula. Remove all the occurrences of the '+' sign.
	m_plusFormula.replace(QString("+"), QString(""));

	if(m_plusFormula.length() > 0)
	  {
	    if (!parse(refList, m_plusFormula, times, store, reset))
	      return MXT_FORMULA_SPLIT_FAIL;
	    else
	      return MXT_FORMULA_SPLIT_PLUS;
	  }
      }
  
    // At this point, we truly have to iterate in the formula...
  
    for (int iter = 0 ; iter < formula.length() ; ++iter)
      {
	curChar = formula.at(iter);
	//       qDebug() << "curChar:" << curChar;
      
	if(curChar == '+' || curChar == '-')
	  {
	    if (shouldBeFormula)
	      return MXT_FORMULA_SPLIT_FAIL;

	    if (wasParsingFormula)
	      {
		// We were parsing a formula, wich means that we are
		// ending that formula now, by starting another one. For
		// example, if we had "-CH3+COOH" we would typically be
		// at the '+' after having parsed -CH3. So we now have
		// to account for that latter formula.

		if(wasMinusSign)
		  m_minusFormula.append(tempFormula);
		else
		  m_plusFormula.append(tempFormula);

		// Reinit the tempFormula for next round.
		tempFormula.clear();

		// Now set proper bool values for next round.
		shouldBeFormula = true;
		wasMinusSign =(curChar == '-' ? true : false);

		continue;
	      }
	    else
	      {
		wasMinusSign =(curChar == '-' ? true : false);
		shouldBeFormula = true;

		continue;
	      }
	  }
	else
	  {
	    // We are parsing either a digit or an alphabetical
	    // character : we just append it to the tempFormula:
	    tempFormula.append(curChar);

	    wasParsingFormula = true;

	    // We do not necessarily have to expect another formula
	    // component at next round, admitting we were on the
	    // nitrogen atom of CH3CN:
	    shouldBeFormula = false;

	    continue;
	  }
      } // End for (int iter = 0 ; iter < formula.length() ; ++iter)

    // At this point the loop was finished so we might have something
    // interesting cooking:

    if (wasParsingFormula && tempFormula.length() > 0)
      {
	if(wasMinusSign)
	  m_minusFormula.append(tempFormula);
	else
	  m_plusFormula.append(tempFormula);
      }
		
    // At this point we want to make sure that we have a correct
    // formula. First reset the atomcount stuff if required.

    if (reset)
      {
	while(!m_atomCountList.isEmpty())
	  delete m_atomCountList.takeFirst();
      }
  	
    // Now that we have reset if required the atomCountList, we need not
    // and we must not reset during the parsing below, otherwise if we
    // have -H+H3PO4, then we'll compute +H3PO4 first, then we compute
    // -H with reset to true : the +H3PO4 component is destroyed!

    if (m_plusFormula.length() > 0)
      {
	if(!parse(refList, m_plusFormula, times, store, false))
	  return MXT_FORMULA_SPLIT_FAIL;
	else
	  result = MXT_FORMULA_SPLIT_PLUS;
      }
  
    if (m_minusFormula.length() > 0)
      {
	if(!parse(refList, m_minusFormula, -times, store, false))
	  return MXT_FORMULA_SPLIT_FAIL;
	else
	  result |= MXT_FORMULA_SPLIT_MINUS;
      }
  
    //   qDebug() << __FILE__ << __LINE__
    // 	    << formula.toAscii() << "-->" 
    // 	    << "(+)" << m_plusFormula.toAscii() 
    // 	    << "(-)" << m_minusFormula.toAscii();
  
    return result;
  }


  //! Parses the \p formula using the reference atom list.
  /*! Upon parsing of the formula, a list of AtomCount objects are
    created in order to be able to account for the mass of the formula.
  
    \param refList List of reference atoms.

    \param formula Formula to parse.
  
    \param times Number of times that the formula should be accounted
    for. Default value is 1.
  
    \param store Indicates if AtomCount objects created during the
    parsing of the formula have to be stored, or not. Default value is
    false.
  
    \param reset Indicates if AtomCount objects created during the
    parsing of the formula have to be destroyed before doing another
    parsing. This parameter is interesting if the caller needs to
    "accumulate" the accounting of the formula. Default value is false.
  
    \return true if parsing succeeded, false otherwise.
  */
  bool
  Formula::parse(const QList<Atom *> &refList,
		  const QString &formula,
		  int times, 
		  bool store, 
		  bool reset)
  {
    QChar curChar;
    QString parsedCount;
    QString parsedSymbol;
    AtomCount *atomCount = 0;

    bool wasDigit = false;
    bool wasUpper = false;
    bool gotUpper = false;

    Q_ASSERT(refList.size());

    // The formula member is a QString that should hold the formula
    // according to this typical schema: "H2O"(water). That means we
    // only want letters(Upper and lower case and number).

    // The member atomCountList might be reset before starting, or if
    // !reset, then the new atom counts are added to the ones
    // preexisting.

    // The formula should thus not be empty, otherwise there is nothing
    // to do. But it is not an error that the formula be empty.
    if (formula.length() == 0)
      return true;

    if (!checkSyntax(formula))
      return false;

    // Also, the first character of the formula should be an Uppercase
    // letter. If not, logically, the formula is incorrect.
    if (formula.at(0).category() != QChar::Letter_Uppercase)
      return false;

    if (reset)
      {
	// We first want to iterate in the atomCountList and make sure
	// we remove all items from it.

	while(!m_atomCountList.isEmpty())
	  delete m_atomCountList.takeFirst();
      }

    // And now finally start the real parsing stuff.

    for (int iter = 0 ; iter < formula.length() ; ++iter)
      {
	curChar = formula.at(iter);

	if(curChar.category() == QChar::Number_DecimalDigit)
	  {
	    // We are parsing a digit.

	    parsedCount.append(curChar);

	    wasDigit = true;
	    wasUpper = false;

	    continue;
	  }
	else if (curChar.category() == QChar::Letter_Lowercase)
	  {
	    // Current character is lowercase, which means we are inside
	    // of an atom symbol, such as Ca(the 'a') or Nob(either
	    // 'o' or 'b'). Thus, gotUpper should be true !

	    if (!gotUpper)
	      return false;

	    // Make use of the parsed numerical character.
	    parsedSymbol.append(curChar);

	    // Let the people know that we have parsed a lowercase char
	    // and not a digit.
	    wasUpper = false;
	    wasDigit = false;
	  }
	else if (curChar.category() == QChar::Letter_Uppercase)
	  {
	    // Current character is uppercase, which means that we are
	    // at the beginning of an atom symbol. Check if there was a
	    // symbol being parsed before this one.

	    if (parsedSymbol.isEmpty())
	      {
		// Start new parsing round.
		parsedSymbol.append(curChar);

		gotUpper = true;
		wasUpper = true;
		wasDigit = false;
		continue;
	      }
	  
	    // There was a symbol being parsed. Create an object.
	    atomCount = new AtomCount();
	    atomCount->setSymbol(parsedSymbol);
	  
	    // Now we can prepare the field for the next one.
	    parsedSymbol.clear();
	    parsedSymbol.append(curChar);

	    // Before going on, check if the symbol is correct.
	    if (atomCount->isSymbolKnown(refList) == -1)
	      {
		delete atomCount;
	      
		return false;
	      }
	  
	    // If there was a count being parsed, we have to take it
	    // into account.
	    if (wasDigit)
	      {
		// And now we have to convert the string representation
		// of the atom count for that atom to int. In fact, we
		// have to be able to know that water H2O has TWO
		// hydrogen atoms in it.
		bool isok = true;
		atomCount->setCount(parsedCount.toInt(&isok, 10));

		if(atomCount->count() == 0 && !isok)
		  {
		    // The atom counts for nothing ! Or was there
		    // an error in the conversion ?

		    delete atomCount;

		    return false;
		  }
	      
		// But we remember that we have to take into account the
		// times parameter.
	      
		atomCount->setCount(atomCount->count() * times);

		// Clear parsedCount for next count parsing round.
		parsedCount.clear();
	      }
	    else
	      atomCount->setCount(1 * times);
	  
	    // We can now make sure that the atom gets represented
	    // in the formula.atomCountList list of
	    // AtomCount*. But for this we use a function that
	    // will make sure there is not already the same atom
	    // symbol in that List, so as not to duplicate the items
	    // accounting for a single atom symbol.

	    if (store)
	      {
		if(!accountInList(atomCount, 0))
		  delete atomCount;
	      }
	    else
	      delete atomCount;

	    // Let the people know what we got:
	  
	    wasDigit = false;
	    gotUpper = true;
	    wasUpper = true;
	  }
	// end(curChar.category() == QChar::Letter_Uppercase)
      }
    // end for (int iter = 0 ; iter < formula.length() ; ++iter)

    // At this point we are at then end of the string, and we thus might
    // still have something cooking:

    // Thus we have to check that the last parsed atom
    // symbol is correct. First allocate an AtomCount
    // instance and set the symbol to it.

    atomCount = new AtomCount();
    atomCount->setSymbol(parsedSymbol);

    if (atomCount->isSymbolKnown(refList) == -1)
      {
	delete atomCount;

	return false;
      }

    // And now we have to convert the string representation
    // of the atom count for that atom to int. In fact, we
    // have to be able to know that water H2O has TWO
    // hydrogen atoms in it.

    // If there was a count being parsed, we have to take it
    // into account.
    if (wasDigit)
      {
	// And now we have to convert the string representation
	// of the atom count for that atom to int. In fact, we
	// have to be able to know that water H2O has TWO
	// hydrogen atoms in it.
	bool isok = true;
	atomCount->setCount(parsedCount.toInt(&isok, 10));

	if(atomCount->count() == 0 && !isok)
	  {
	    // The atom counts for nothing ! Or was there
	    // an error in the conversion ?

	    delete atomCount;

	    return false;
	  }
	      
	// But we remember that we have to take into account the
	// times parameter.
	      
	atomCount->setCount(atomCount->count() * times);
      }
    else
      atomCount->setCount(1 * times);
	  
    // Finally, if asked by the caller, we can account for
    // this atom symbol/count also !

    if (store)
      {
	if(!accountInList(atomCount, 0))
	  delete atomCount;
      }
    else
      delete atomCount;

    return true;
  }


  //! Accounts the \p atomCount instance into the list.
  /*! The \p atomCount instance passed as parameter is accounted into
    the list. If the list already contains a AtomCount instance by
    the same symbol as \p atomCount, then no other instance is created,
    but the count of the AtomCount object found in the list is
    simply incremented.
  
    \param atomCount instance to be accounted in the list.
  
    \param newcount pointer to an integer into which to report the new
    count for the \p atomCount. Defaults to 0, in which case no report
    is done.
  
    \return true if a new AtomCount instance had to be allocated and
    stored in the list, false if no AtomCount had to be created.
  */
  bool
  Formula::accountInList(AtomCount *atomCount, 
			  int *newcount) 
  {
    AtomCount * iter_atomCount = NULL;
  
    Q_ASSERT(atomCount != NULL);
  
    // We get a AtomCount instance pointer and are asked that that
    // atomCount object be set to the member atomCountList. If an
    // atomCount object by the same atomCount.symbol is already in that
    // list, then its count member gets update with the one in the
    // parameter object.
  
    for (int iter = 0; iter < m_atomCountList.size(); ++iter)
      {
	iter_atomCount = m_atomCountList.at(iter);

	if(iter_atomCount->symbol() == atomCount->symbol())
	  {
	    iter_atomCount->incrementCount(atomCount->count());
	  
	    // If the caller wants to get a numerical feedback on the
	    // new count for current atomCount, then update newcount.
	    if (newcount)
	      *newcount = iter_atomCount->count();
	  
	    // Note that is the result of the operation above is to
	    // have a 0-count item, we can remove it.

	    if (!iter_atomCount->count())
	      {
		m_atomCountList.removeAt(iter);
		
		delete iter_atomCount;
	      }
	    
	    // Because we update a pre-existing atomcount object, we do
	    // not need to put the atomcount in the list. Return false,
	    // so that the caller knows that it can delete that object.
	    return false;
	  }
      }
  
    // We did not find any suitable item in the List, so we just append
    // the atomCount passed as parameter.
    m_atomCountList.append(atomCount);

    if (newcount != 0)
      *newcount = atomCount->count();

    // The atomCount object was truly added to the List.
    return true;
  }


  //! Checks the syntax of the \p formula.
  /*! The syntax of the \p formula is checked by verifying that the
    letters and ciphers in the formula are correctly placed. That is, we
    want that the ciphers appear after an atom symbol and not before
    it. We want that the atom symbol be made of one uppercase letter and
    that the following letters be lowercase.
  
    \attention This is a syntax check and not a true validation, as the
    formula can contain symbols that are syntactically valid but
    corresponding to atom definitions not available on the system.

    \param formula the formula.
  
    \return true upon successful check, false otherwise.

    \sa validate().
  */
  bool
  Formula::checkSyntax(const QString &formula)
  {
    QChar curChar;

    bool wasDigit = false;
    bool gotUpper = false;
    bool wasUpper = false;
    bool wasSign = false;

    // Because the formula that we are analyzing might contain a title
    // and spaces , we first remove these. But make a local copy of
    // the member datum.

    QString localFormula = formula;
    
    // One formula can be like this:

    // "Decomposed adenine" C5H4N5 +H 

    // The "Decomposed adenine" is the title
    // The C5H4N5 +H is the formula.
    
    localFormula.remove(QRegExp("\".*\""));

    // We want to remove all the possibly-existing spaces.
    
    localFormula.remove(QRegExp("\\s+"));
    
  
    for (int iter = 0 ; iter < localFormula.length() ; ++iter)
      {
	curChar = localFormula.at(iter);

	if(curChar.category() == QChar::Number_DecimalDigit)
	  {
	    // We are parsing a digit.

	    // We may not have a digit after a +/- sign.
	    if (wasSign)
	      return false;
	  
	    wasSign = false;
	    wasDigit = true;
	    wasUpper = false;

	    continue;
	  }
	else if (curChar.category() == QChar::Letter_Lowercase)
	  {
	    // Current character is lowercase, which means we are inside
	    // of an atom symbol, such as Ca(the 'a') or Nob(either
	    // 'o' or 'b'). Thus, gotUpper should be true !

	    if (!gotUpper)
	      return false;


	    // We may not have a lowercase character after a +/- sign.
	    if (wasSign)
	      return false;
	  
	    // Let the people know that we have parsed a lowercase char
	    // and not a digit.
	    wasSign = false;
	    wasUpper = false;
	    wasDigit = false;
	  }
	else if (curChar.category() == QChar::Letter_Uppercase)
	  {
	    // Current character is uppercase, which means that we are
	    // at the beginning of an atom symbol.
	  
	    // Let the people know what we got:
	  
	    wasSign = false;
	    wasDigit = false;
	    gotUpper = true;
	    wasUpper = true;
	  }
	else 
	  {
	    if (curChar != '+' && curChar != '-')
	      return false;
	    else
	      {
		// We may not have 2 +/- signs in a raw.
		if(wasSign)
		  return false;
	      }
	  
	    wasSign = true;
	    wasDigit = false;
	    gotUpper = false;
	    wasUpper = false;
	  }
      }
    // end for (int iter = 0 ; iter < localFormula.length() ; ++iter)

    // At this point we found no error condition.
    return true;
  }


  //! Checks the syntax of the formula.
  /*! The syntax of the formula is checked by verifying that the letters
    and ciphers in the formula are correctly placed. That is, we want
    that the ciphers appear after an atom symbol and not before it. We
    want that the atom symbol be made of one uppercase letter and that
    the following letters be lowercase.

    Note that the checking only concerns the formula, and not the
    minus-/plus- formulas.
  
    \attention This is a syntax check and not a true validation, as the
    formula can contain symbols that are syntactically valid but
    corresponding to atom definitions not available on the system.
  
    \return true upon successful check, false otherwise.

    \sa validate().
  */
  bool
  Formula::checkSyntax()
  {
    // The default formula is always m_formula.

    return checkSyntax(m_formula);
  }


  QString 
  Formula::simplify()
  {
    // We have +CH3 -C +C2H6O4N5P
    // We want to have the result of this: that is C2H8O4N5P
    
    return elementalComposition();
  }
  


  //! Validates the formula.
  /*! The validation of the formula involves:

    \li Checking that the formula is not empty;

    \li Splitting that formula into its plus-/minus- parts and parse
    the obtained plus-/minus- formulas. During parsing of the
    minus-/plus- formulas, each atom symbol encountered in the
    formulas is validated against the reference atom list;

    \li Checking that at least the plus- or the minus- part contains
    something(same idea that the formula cannot be empty).
  

    \param refList List of reference atoms.

    \param store Indicates if AtomCount objects created during the
    parsing of the sub-formulas generated by the split of the formula
    have to be stored, or not. Defaults to false.
  
    \param reset Indicates if the list of AtomCount objects has to be
    reset before the splitParts work. This parameter may be useful in
    case the caller needs to "accumulate" the accounting of the
    formula. Defaults to false.
  

    \return true if the validation succeeded, false otherwise.
  */
  bool
  Formula::validate(const QList<Atom *> &refList, 
		     bool store, bool reset)
  {
    if (!m_formula.size())
      return false;
    
    int result = splitParts(refList, 1, store, reset);
  
    if (result == MXT_FORMULA_SPLIT_FAIL)
      return false;

    // The sum of m_plusFormula and m_minusFormula cannot be empty.
    if (m_plusFormula.size() && ! m_plusFormula.size())
      return false;
    
    return true;
  }


  //! Accounts \p this formula's mono/avg masses.
  /*! The masses are calculated first and then the \p mono and \p avg
    parameters are updated using the calculated values. The accounting
    can be compounded \p times times.
  
    \param refList List of atoms to be used as reference.

    \param mono Pointer to the monoisotopic mass to be updated. Defaults
    to 0, in which case the value is not updated.

    \param avg Pointer to the average mass to be updated.  Defaults to 0,
    in which case the value is not updated.

    \param times Times that the masses should be compounded prior to be
    accounted for.

    \return true upon success, false otherwise.
  */
  bool 
  Formula::accountMasses(const QList<Atom *> &refList,
			  double *mono, double *avg, int times)
  {
    // Note the 'times' param below.
    if (splitParts(refList, times, true, true) == MXT_FORMULA_SPLIT_FAIL)
      return false;
  
    for (int iter = 0; iter < m_atomCountList.size(); ++iter)
      {
	AtomCount *atomCount = 0;
      
	atomCount = m_atomCountList.at(iter);
      
	// note the '1' times below because we already accounted
	// for the 'times' parameter in the splitParts() call.
	if(!atomCount->accountMasses(refList, mono, avg, 1))
	  return false;
      }

    return true;
  }


  //! Accounts \p this formula's mono/avg masses.
  /*! The masses are calculated first and then the \p ponderable is
    updated using the calculated values. The accounting can be
    compounded \p times times.
  
    \param refList List of atoms to be used as reference.

    \param ponderable Pointer to the ponderable to be updated. Cannot be 0.

    \param times Times that the masses should be compounded prior to be
    accounted for.

    \return true upon success, false otherwise.
  */
  bool 
  Formula::accountMasses(const QList<Atom *> &refList,
			  Ponderable *ponderable, int times)
  {
    Q_ASSERT(ponderable);
  
    // Note the 'times' param below.
    if (splitParts(refList, times, true, true) == MXT_FORMULA_SPLIT_FAIL)
      return false;
  
    for (int iter = 0; iter < m_atomCountList.size(); ++iter)
      {
	AtomCount *atomCount = m_atomCountList.at(iter);
      
	// note the '1' times below because we already accounted
	// for the 'times' parameter in the splitParts() call.
	if(!atomCount->accountMasses(refList, 
				       &ponderable->rmono(), 
				       &ponderable->ravg(), 1))
	  return false;
      }

    return true;
  }


  //! Accounts the atoms in the formula \p times times.
  /*! Calls splitParts(\p refList, \p times, true, true).
  
    \param refList List of atoms to be used as reference.

    \param times Times that the atom counts should be compounded prior to
    be accounted for.

    \return true upon success, false otherwise.

    \sa splitParts().
  */
  bool 
  Formula::accountAtoms(const QList<Atom *> &refList, int times)
  {
    // Note the 'times' param below.
    if (splitParts(refList, times, true, false) == MXT_FORMULA_SPLIT_FAIL)
      return false;

    return true;
  }


  //! Computes a formula string.
  /*! Computes a formula string by iterating in the list of atom count
    objects.
  
    \return A string containing the formula.
  */
  QString
  Formula::elementalComposition() const
  {
    QString composition;
    bool nextNegative = false;
    
    for (int iter = 0; iter < m_atomCountList.size(); ++iter)
      {
	AtomCount *atomCount = 0;
	
	AtomCount *nextAtomCount = 0;
	
	if (iter + 1 < m_atomCountList.size())
	  {
	    nextAtomCount = m_atomCountList.at(iter + 1);
	    
	    if (nextAtomCount->count() > 0)
	      nextNegative = false;
	    else
	      nextNegative = true;
	  }
	else
	  {
	    // We are already at the last item,
	    // so we do not add a space at the end of a negative
	    // item:
	
	    nextNegative = true;
	  }
	
	atomCount = m_atomCountList.at(iter);

	// Do not list item if it is 0-count.
	if (!atomCount->count())
	  continue;
	
	if (atomCount->count() > 0)
	  {
	    // The count is positive, just list the item as "C12"

	  composition += QString("%1%2")
	    .arg(atomCount->symbol())
	    .arg(atomCount->count());

	  nextNegative = false;
	  }
	else
	  {
	    // The count is negative, so we have to list the item as
	    // "-C12", but then, we do not want to have "-C-12"
	    // because count will be negative, thus we change its
	    // value to positive by *ying by -1. 

	    // Note how we put a %3 arg after the item, so that if
	    // next items are positive they do not get factorized
	    // erroneously under the negative sign. This only in case
	    // the next item is positive. If the previous item was
	    // negative, we do not need any supplementary space
	    // because we are adding a '-' anyways.

	    composition += QString("-%1%2%3")
	      .arg(atomCount->symbol())
	      .arg(atomCount->count() * -1)
	      .arg(nextNegative ? "" : "+");

	    nextNegative = true;
	  }
      }
        
    return composition;
  }


  //! Computes the total number of atoms.
  /*!   

    \return The number of atoms.
  */
  int 
  Formula::totalAtoms() const
  {
    int totalAtomCount = 0;
  
    for (int iter = 0; iter < m_atomCountList.size(); ++iter)
      {
	AtomCount *atomCount = m_atomCountList.at(iter);
      
	totalAtomCount += atomCount->count();
      }
  
    return totalAtomCount;
  }


  //! Computes the total number of isotopes.
  /*! 
  
    \param refList List of atoms to be used as reference.

    \return The number of isotopes.
  */
  int 
  Formula::totalIsotopes(const QList<Atom *> &refList) const
  {
    int totalIsotopeCount = 0;
  
    for (int iter = 0; iter < m_atomCountList.size(); ++iter)
      {
	AtomCount *atomCount = m_atomCountList.at(iter);
      
	Atom listAtom;
      
	if(Atom::isSymbolInList(atomCount->symbol(), 
				  refList, &listAtom) == -1)
	  return -1;
      
	// The number of isotopes for current atomCount is the number of
	// atoms compounded per the number of isotopes in the isotope
	// list.
	totalIsotopeCount += 
	 (listAtom.isotopeList().size() * atomCount->count());
      }
  
    return totalIsotopeCount;
  }


  //! Computes the number of entities(atoms and isotopes).
  /*! 
  
    \param refList List of atoms to be used as reference. 

    \param totalAtoms Pointer to a integer in which to store the number of
    atoms. Defaults to 0, in which case the value is not updated.

    \param totalIsotopes Pointer to a integer in which to store the number
    of isotopes. Defaults to 0, in which case the value is not updated.

    \return true upon a successfull computation, false otherwise.
  */
  bool 
  Formula::totalEntities(const QList<Atom *> &refList,
			  int *totalAtoms, int *totalIsotopes) const
  {
    for (int iter = 0; iter < m_atomCountList.size(); ++iter)
      {
	AtomCount *atomCount = m_atomCountList.at(iter);

	if(totalAtoms)
	  *totalAtoms += atomCount->count();
      
	if(totalIsotopes)
	  {
	    Atom listAtom;
	  
	    if (Atom::isSymbolInList(atomCount->symbol(), 
				      refList, &listAtom) == -1)
	      return false;
	  
	    // The number of isotopes for current atomCount is the number of
	    // atoms compounded per the number of isotopes in the isotope
	    // list.
	    *totalIsotopes += 
	     (listAtom.isotopeList().size() * atomCount->count());
	  }
      }
  
    return true;
  }


  //! Performs a deep copy of the atom count objects.
  /*! Each atom count object in the list of such objects is updated
    deeply with the values obtained from the corresponding atom in the
    \p refList list of reference atoms. This ensures that each atom
    count object in the formula has a deep knowledge of its isotopic
    composition. Such kind of process is used when isotopic pattern
    calculations are to be performed for a given formula.
  
    \param refList List of reference atoms.

    \return true upon success, false otherwise.
  */
  bool
  Formula::deepAtomCopy(const QList<Atom *> &refList)
  {
    // When the formula is parsed, the atomCount objects(derived form
    // Atom) are created by only shallow-copying(only the atom
    // symbol is actually copied to identify the atom).

    // Here, we are asked that the Atom component of the AtomCount
    // objects in the list of such instances be deep-copied from the
    // corresponding Atom found in the reference atom list
    // 'refList'. This way, the updated objects have their actual list
    // of isotopes(this is useful for the isotopic pattern calculation,
    // for example).

    for (int iter = 0; iter < m_atomCountList.size(); ++iter)
      {
	AtomCount *atomCount = 0;
      
	atomCount = m_atomCountList.at(iter);

	Atom listAtom;
      
	if(Atom::isSymbolInList(atomCount->symbol(), 
				  refList, atomCount) == -1)
	  return false;
      }

    return true;
  }



  //! Parses a formula XML element and sets the data to the formula.
  /*! Parses the formula XML element passed as argument and sets the
    data of that element to \p this formula instance(this is called XML
    rendering). The syntax of the parsed formula is checked and the
    result of that check is returned.
  
    \param element XML element to be parsed and rendered.
  
    \return true if parsing and syntax checking were successful, false
    otherwise.
  */
  bool
  Formula::renderXmlFormulaElement(const QDomElement &element)
  {
    if (element.tagName() != "formula")
      return false;
  
    m_formula = element.text();
  
    // Do not forget that we might have a title associated with the
    // formula and spaces. checkSyntax() should care of removing these
    // title and spaces before checking for chemical syntax
    // correctness.

    return checkSyntax();
  }

} // namespace massXpert