1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
|
// --------------------------------------------------------------------------
// OpenMS -- Open-Source Mass Spectrometry
// --------------------------------------------------------------------------
// Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
// ETH Zurich, and Freie Universitaet Berlin 2002-2020.
//
// This software is released under a three-clause BSD license:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of any author or any participating institution
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
// For a full list of authors, refer to the file AUTHORS.
// --------------------------------------------------------------------------
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
// INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// --------------------------------------------------------------------------
// $Maintainer: George Rosenberger $
// $Authors: George Rosenberger, Hannes Roest $
// --------------------------------------------------------------------------
#pragma once
#include <OpenMS/ANALYSIS/OPENSWATH/TransitionTSVFile.h>
#include <boost/range/algorithm.hpp>
#include <boost/range/algorithm_ext/erase.hpp>
#include <iostream>
namespace OpenMS
{
/**
@brief This class supports reading and writing of PQP files.
The PQP files are SQLite databases consisting of several tables
representing the data contained in TraML files. For another file format that stores transitions, see also
TransitionTSVFile.
This class can convert TraML and PQP files into each other
<h2> The file format has the following tables: </h2>
Genes and proteins are described by a primary key as well as a
human-readable gene name or protein accession key.
<table border="0"><tr><td>
<table>
<tr> <th BGCOLOR="#EBEBEB" colspan=3>GENE</th> </tr>
<tr> <td BGCOLOR="#EBEBEB">ID</td> <td>INT</td> <td> Primary Key (gene id)</td> </tr>
<tr> <td BGCOLOR="#EBEBEB">GENE_NAME</td> <td>TEXT</td> <td> Gene name </td> </tr>
<tr> <td BGCOLOR="#EBEBEB">DECOY</td> <td>INT (0 or 1)</td> <td> Whether this is a decoy gene (1: decoy, 0: target) </td> </tr>
</table>
</td><td valign="top">
<table>
<tr> <th BGCOLOR="#EBEBEB" colspan=3>PEPTIDE_GENE_MAPPING</th> </tr>
<tr> <td BGCOLOR="#EBEBEB">PEPTIDE_ID</td> <td>INT</td> <td> Foreign Key (PEPTIDE.ID)</td> </tr>
<tr> <td BGCOLOR="#EBEBEB">GENE_ID</td> <td>INT</td> <td> Foreign Key (GENE.ID)</td> </tr>
</table>
</td></table>
<table border="0"><tr><td>
<table>
<tr> <th BGCOLOR="#EBEBEB" colspan=3>PROTEIN</th> </tr>
<tr> <td BGCOLOR="#EBEBEB">ID</td> <td>INT</td> <td> Primary Key (protein id)</td> </tr>
<tr> <td BGCOLOR="#EBEBEB">PROTEIN_ACCESSION</td> <td>TEXT</td> <td> Protein accession </td> </tr>
<tr> <td BGCOLOR="#EBEBEB">DECOY</td> <td>INT (0 or 1)</td> <td> Whether this is a decoy protein (1: decoy, 0: target) </td> </tr>
</table>
</td><td valign="top">
<table>
<tr> <th BGCOLOR="#EBEBEB" colspan=3>PEPTIDE_PROTEIN_MAPPING</th> </tr>
<tr> <td BGCOLOR="#EBEBEB">PEPTIDE_ID</td> <td>INT</td> <td> Foreign Key (PEPTIDE.ID)</td> </tr>
<tr> <td BGCOLOR="#EBEBEB">PROTEIN_ID</td> <td>INT</td> <td> Foreign Key (PROTEIN.ID)</td> </tr>
</table>
</td></table>
Peptides are physical analytes that are present in the sample and can carry post-translational modifications (PTMs). They are described by their amino-acid sequence and the modifications they carry:
<table border="0"><tr><td>
<table>
<tr> <th BGCOLOR="#EBEBEB" colspan=3>PEPTIDE</th> </tr>
<tr> <td BGCOLOR="#EBEBEB">ID</td> <td>INT</td> <td> Primary Key (peptide id)</td> </tr>
<tr> <td BGCOLOR="#EBEBEB">UNMODIFIED_SEQUENCE</td> <td>TEXT</td> <td> Peptide sequence (unmodified) </td> </tr>
<tr> <td BGCOLOR="#EBEBEB">MODIFIED_SEQUENCE</td> <td>TEXT</td> <td> Peptide sequence (modified) <sup>1</sup></td> </tr>
<tr> <td BGCOLOR="#EBEBEB">DECOY</td> <td>INT (0 or 1)</td> <td> Whether this is a decoy peptide (1: decoy, 0: target) </td> </tr>
</table>
</td><td valign="top">
<table>
<tr> <th BGCOLOR="#EBEBEB" colspan=3>PRECURSOR_PEPTIDE_MAPPING</th> </tr>
<tr> <td BGCOLOR="#EBEBEB">PRECURSOR_ID</td> <td>INT</td> <td> Foreign Key (PRECURSOR.ID)</td> </tr>
<tr> <td BGCOLOR="#EBEBEB">PEPTIDE_ID</td> <td>INT</td> <td> Foreign Key (PEPTIDE.ID)</td> </tr>
</table>
</td></table>
Compounds are generic analytes that are present in the sample (but are not peptides). This is used for small molecules which are described by their molecular formula and the SMILES representation (structural representation):
<table border="0"><tr><td>
<table>
<tr> <th BGCOLOR="#EBEBEB" colspan=3>COMPOUND</th> </tr>
<tr> <td BGCOLOR="#EBEBEB">ID</td> <td>INT</td> <td> Primary Key (compound id)</td> </tr>
<tr> <td BGCOLOR="#EBEBEB">COMPOUND_NAME</td> <td>TEXT</td> <td> Compound name (common name of the analyte)</td> </tr>
<tr> <td BGCOLOR="#EBEBEB">SUM_FORMULA</td> <td>TEXT</td> <td> Molecular formula of the compound </td> </tr>
<tr> <td BGCOLOR="#EBEBEB">SMILES</td> <td>TEXT</td> <td> SMILES representation</td> </tr>
<tr> <td BGCOLOR="#EBEBEB">ADDUCTS</td> <td>TEXT</td> <td> List of adducts for the compound</td> </tr>
<tr> <td BGCOLOR="#EBEBEB">DECOY</td> <td>INT (0 or 1)</td> <td> Whether this is a decoy compound (1: decoy, 0: target) </td> </tr>
</table>
</td><td valign="top">
<table>
<tr> <th BGCOLOR="#EBEBEB" colspan=3>PRECURSOR_COMPOUND_MAPPING</th> </tr>
<tr> <td BGCOLOR="#EBEBEB">PRECURSOR_ID</td> <td>INT</td> <td> Foreign Key (PRECURSOR.ID)</td> </tr>
<tr> <td BGCOLOR="#EBEBEB">COMPOUND_ID</td> <td>INT</td> <td> Foreign Key (COMPOUND.ID)</td> </tr>
</table>
</td></table>
Precursors are generated upon ionization from peptides or small molecule analytes (compounds) and are described by their charge state, mass-to-charge ratio, retention time and ion mobility drift time:
<table border="0"><tr><td>
<table>
<tr> <th BGCOLOR="#EBEBEB" colspan=3>PRECURSOR</th> </tr>
<tr> <td BGCOLOR="#EBEBEB">ID</td> <td>INT</td> <td> Primary Key (precursor id)</td> </tr>
<tr> <td BGCOLOR="#EBEBEB">TRAML_ID</td> <td>TEXT</td> <td> TraML identifiers (maps to the "id=" attribute in TraML)</td> </tr>
<tr> <td BGCOLOR="#EBEBEB">GROUP_LABEL</td> <td>TEXT</td> <td> designates to which peptide label group (as defined in MS:1000893) the peptide belongs to<sup>2</sup></td> </tr>
<tr> <td BGCOLOR="#EBEBEB">PRECURSOR_MZ</td> <td>REAL</td> <td> %Precursor m/z </td> </tr>
<tr> <td BGCOLOR="#EBEBEB">CHARGE</td> <td>TEXT</td> <td> %Precursor charge state </td> </tr>
<tr> <td BGCOLOR="#EBEBEB">LIBRARY_INTENSITY</td> <td>TEXT</td> <td> %Precursor library intensity </td> </tr>
<tr> <td BGCOLOR="#EBEBEB">LIBRARY_RT</td> <td>TEXT</td> <td> Library retention time (RT) </td> </tr>
<tr> <td BGCOLOR="#EBEBEB">LIBRARY_DRIFT_TIME</td> <td>TEXT</td> <td> Library drift time (ion mobility drift time or collisional cross-section) </td> </tr>
<tr> <td BGCOLOR="#EBEBEB">DECOY</td> <td>INT (0 or 1)</td> <td> Whether this is a decoy precursor (1: decoy, 0: target) </td> </tr>
</table>
</td><td valign="top">
<table>
<tr> <th BGCOLOR="#EBEBEB" colspan=3>TRANSITION_PRECURSOR_MAPPING</th> </tr>
<tr> <td BGCOLOR="#EBEBEB">TRANSITION_ID</td> <td>INT</td> <td> Foreign Key (TRANSITION.ID)</td> </tr>
<tr> <td BGCOLOR="#EBEBEB">PRECURSOR_ID</td> <td>INT</td> <td> Foreign Key (PRECURSOR.ID)</td> </tr>
</table>
</td></table>
Transitions are generated upon fragmentation from precursors and are described by their charge state and (fragment) mass-to-charge ratio. For peptide fragments, an ion type (e.g. y for y-ions) and a ordinal (e.g. 6 for a y6 ion) can be recorded. Note that detecting transitions are used for precursor detection and will indicate whether a given precursor is present or not in the sample. Use detecting transitions for the top N transitions of a precursor to detect a set of transitions in the sample. Identifying transitions will be used to discriminate different peptidoforms of the same precursor (see <a href="http://openswath.org/en/latest/docs/ipf.html">IPF Workflow</a> for PTM inference).
<table border="0"><tr><td>
<table>
<tr> <th BGCOLOR="#EBEBEB" colspan=3>TRANSITION</th> </tr>
<tr> <td BGCOLOR="#EBEBEB">ID</td> <td>INT</td> <td> Primary Key (transition id)</td> </tr>
<tr> <td BGCOLOR="#EBEBEB">TRAML_ID</td> <td>TEXT</td> <td> TraML identifiers (maps to the "id=" attribute in TraML)</td> </tr>
<tr> <td BGCOLOR="#EBEBEB">PRODUCT_MZ</td> <td>TEXT</td> <td> Fragment ion m/z </td> </tr>
<tr> <td BGCOLOR="#EBEBEB">CHARGE</td> <td>TEXT</td> <td> Fragment ion charge </td> </tr>
<tr> <td BGCOLOR="#EBEBEB">TYPE</td> <td>CHAR</td> <td> Fragment ion type (e.g. "b" or "y")</td> </tr>
<tr> <td BGCOLOR="#EBEBEB">ANNOTATION</td> <td>TEXT</td> <td> Fragment ion annotation </td> </tr>
<tr> <td BGCOLOR="#EBEBEB">ORDINAL</td> <td>INT</td> <td> Fragment ion ordinal </td> </tr>
<tr> <td BGCOLOR="#EBEBEB">DETECTING</td> <td>INT (0 or 1)</td> <td>1: use transition to detect peak group, 0: don't use transition for detection</td> </tr>
<tr> <td BGCOLOR="#EBEBEB">IDENTIFYING</td> <td>INT (0 or 1)</td> <td> 1: use transition for peptidoform inference in the <a href="http://openswath.org/en/latest/docs/ipf.html">IPF Workflow</a>, 0: don't use transition for identification</td> </tr>
<tr> <td BGCOLOR="#EBEBEB">QUANTIFYING</td> <td>INT (0 or 1)</td> <td> 1: use transition to quantify peak group, 0: don't use transition for quantification</td> </tr>
<tr> <td BGCOLOR="#EBEBEB">LIBRARY_INTENSITY</td> <td>REAL</td> <td> Fragment ion library intensity </td> </tr>
<tr> <td BGCOLOR="#EBEBEB">DECOY</td> <td>INT (0 or 1)</td> <td> Whether this is a decoy transition (1: decoy, 0: target) </td> </tr>
</table>
</td>
</table>
</td></table>
There is one extra table directly mapping TRANSITION to PEPTIDE which is mainly used for the
<a href="http://openswath.org/en/latest/docs/ipf.html">IPF Workflow</a> for PTM inference. It directly maps transitions to peptidoforms
(one identification transition can map to multiple peptidoforms):
<table border="0"><tr><td>
</td><td>
<table>
<tr> <th BGCOLOR="#EBEBEB" colspan=3>TRANSITION_PEPTIDE_MAPPING</th> </tr>
<tr> <td BGCOLOR="#EBEBEB">TRANSITION_ID</td> <td>INT</td> <td> Foreign Key (TRANSITION.ID)</td> </tr>
<tr> <td BGCOLOR="#EBEBEB">PEPTIDE_ID</td> <td>INT</td> <td> Foreign Key (PEPTIDE.ID)</td> </tr>
</table>
</td></table>
</td></table>
Another extra table describes the file format version:
<table>
<tr> <th BGCOLOR="#EBEBEB" colspan=3>VERSION</th> </tr>
<tr> <td BGCOLOR="#EBEBEB">ID</td> <td>INT</td> <td> %File Format version </td> </tr>
</table>
<p>
Remarks:
</p>
<ul>
<li>
1. modifications should be supplied inside the sequence using UniMod
identifiers or freetext identifiers that are understood by %OpenMS. See also @ref OpenMS::AASequence for more information. For example:
<ul>
<li> PEPT(Phosphorylation)IDE(UniMod:27)A ) </li>
</ul>
</li>
<li>
2. peptide label groups designate groups of peptides that are isotopically
modified forms of the same peptide species. For example, the heavy and
light forms of the same peptide will both be assigned the same peptide
group label. For example:
<ul>
<li> PEPTIDEAK -> gets label "PEPTIDEAK_gr1" </li>
<li> PEPTIDEAK[+8] -> gets label "PEPTIDEAK_gr1" </li>
<li> PEPT(Phosphorylation)IDEAK -> gets label "PEPTIDEAK_gr2" </li>
<li> PEPT(Phosphorylation)IDEAK[+8] -> gets label "PEPTIDEAK_gr2" </li>
</ul>
</li>
</ul>
</p>
@htmlinclude OpenMS_TransitionPQPFile.parameters
*/
class OPENMS_DLLAPI TransitionPQPFile :
public TransitionTSVFile
{
private:
/** @brief Read PQP SQLite file
*
* @param filename The input file
* @param transition_list The output list of transitions
* @param legacy_traml_id Should legacy TraML IDs be used (boolean)?
*
*/
void readPQPInput_(const char* filename, std::vector<TSVTransition>& transition_list, bool legacy_traml_id = false);
/** @brief Write a TargetedExperiment to a file
*
* @param filename Name of the output file
* @param targeted_exp The data structure to be written to the file
*/
void writePQPOutput_(const char* filename, OpenMS::TargetedExperiment& targeted_exp);
public:
//@{
/// Constructor
TransitionPQPFile();
/// Destructor
~TransitionPQPFile() override;
//@}
/** @brief Write out a targeted experiment (TraML structure) into a PQP file
*
* @param filename The output file
* @param targeted_exp The targeted experiment
*
*/
void convertTargetedExperimentToPQP(const char* filename, OpenMS::TargetedExperiment& targeted_exp);
/** @brief Read in a PQP file and construct a targeted experiment (TraML structure)
*
* @param filename The input file
* @param targeted_exp The output targeted experiment
* @param legacy_traml_id Should legacy TraML IDs be used (boolean)?
*
*/
void convertPQPToTargetedExperiment(const char* filename, OpenMS::TargetedExperiment& targeted_exp, bool legacy_traml_id = false);
/** @brief Read in a PQP file and construct a targeted experiment (Light transition structure)
*
* @param filename The input file
* @param targeted_exp The output targeted experiment
* @param legacy_traml_id Should legacy TraML IDs be used (boolean)?
*
*/
void convertPQPToTargetedExperiment(const char* filename, OpenSwath::LightTargetedExperiment& targeted_exp, bool legacy_traml_id = false);
};
}
|