libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
spomsspectrum.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/specpeptidoms/spomsspectrum.cpp
3 * \date 24/03/2025
4 * \author Aurélien Berthier
5 * \brief SpecPeptidOMS Spectrum
6 *
7 * C++ implementation of the SpecPeptidOMS algorithm described in :
8 * (1) Benoist, É.; Jean, G.; Rogniaux, H.; Fertin, G.; Tessier, D. SpecPeptidOMS Directly and
9 * Rapidly Aligns Mass Spectra on Whole Proteomes and Identifies Peptides That Are Not Necessarily
10 * Tryptic: Implications for Peptidomics. J. Proteome Res. 2025.
11 * https://doi.org/10.1021/acs.jproteome.4c00870.
12 */
13
14/*
15 * Copyright (c) 2025 Aurélien Berthier
16 * <aurelien.berthier@ls2n.fr>
17 *
18 * This program is free software: you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation, either version 3 of the License, or
21 * (at your option) any later version.
22 *
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License
29 * along with this program. If not, see <http://www.gnu.org/licenses/>.
30 */
31
32#include <algorithm>
33#include <unordered_set>
34#include "spomsspectrum.h"
39
40namespace pappso
41{
42namespace specpeptidoms
43{
44// SpOMSSpectrum::SpOMSSpectrum(const specglob::ExperimentalSpectrum &exp_spectrum)
46 pappso::PrecisionPtr precision_ptr,
47 const pappso::AaCode &aaCode)
48 : std::vector<pappso::specglob::ExperimentalSpectrumDataPoint>(
49 specglob::ExperimentalSpectrum(qmass_spectrum, precision_ptr)),
50 m_qualifiedMassSpectrum(qmass_spectrum),
51 m_precision_ptr(precision_ptr),
52 m_aaCode(aaCode),
54{
55 m_aapositions.reserve(m_aaCode.getSize());
56 for(std::size_t iter = 0; iter < m_aaCode.getSize(); iter++)
57 {
58 m_aapositions.push_back(std::make_shared<std::vector<AaPosition>>());
59 m_aapositions.back()->reserve(this->size() - 1);
60 }
61 m_supported_peaks.reserve(this->size());
62 m_supported_peaks.push_back(std::make_shared<std::vector<uint8_t>>());
63 m_reindexed_peaks.push_back(0);
64 for(std::size_t iter = 1; iter < this->size(); iter++)
65 {
66 m_supported_peaks.push_back(std::make_shared<std::vector<uint8_t>>());
67 m_reindexed_peaks.push_back(-1);
68 }
69 this->at(0).peak_mz = pappso::MHPLUS + 2 * pappso::MPROTIUM + pappso::MASSOXYGEN;
70 this->back().peak_mz = m_qualifiedMassSpectrum.getPrecursorMass() + pappso::MHPLUS;
72}
73
87
89 double precursor_mass_error)
90 : std::vector<pappso::specglob::ExperimentalSpectrumDataPoint>(
91 pappso::specglob::ExperimentalSpectrum(
92 other.m_qualifiedMassSpectrum, other.m_precision_ptr, precursor_mass_error)),
95 m_aaCode(other.m_aaCode),
96 m_precursor_mass_error(precursor_mass_error)
97{
98 m_aapositions.reserve(m_aaCode.getSize());
99 for(std::size_t iter = 0; iter < m_aaCode.getSize(); iter++)
100 {
101 m_aapositions.push_back(std::make_shared<std::vector<AaPosition>>());
102 m_aapositions.back()->reserve(this->size() - 1);
103 }
104 m_supported_peaks.reserve(this->size());
105 m_supported_peaks.push_back(std::make_shared<std::vector<uint8_t>>());
106 m_reindexed_peaks.push_back(0);
107 for(std::size_t iter = 1; iter < this->size(); iter++)
108 {
109 m_supported_peaks.push_back(std::make_shared<std::vector<uint8_t>>());
110 m_reindexed_peaks.push_back(-1);
111 }
112 this->at(0).peak_mz = pappso::MHPLUS + 2 * pappso::MPROTIUM + pappso::MASSOXYGEN;
113 this->back().peak_mz =
114 m_qualifiedMassSpectrum.getPrecursorMass() + pappso::MHPLUS + precursor_mass_error;
116}
117
121
122// Add comments !!
123void
125{
126 // bool found;
127 uint8_t aa;
128 std::vector<double>::iterator iter1, iter2;
129 std::size_t peak1, peak2, next_l_peak;
130 std::vector<double> mass_list = getMassList();
131
132 peak1 = -1;
133 for(iter1 = mass_list.begin(); iter1 != mass_list.end(); iter1++)
134 {
135 peak1++;
136 peak2 = peak1;
137 for(iter2 = iter1 + 1; iter2 != mass_list.end(); iter2++)
138 {
139 peak2++;
140 aa = m_aaCode.getAaCodeByMass(*(iter2) - *(iter1), m_precision_ptr);
141 if(aa != 0)
142 {
143 next_l_peak = 0;
144 for(std::size_t iter = 1; iter < peak1;
145 iter++) // Search of the closer supported left peak.
146 // Possible optimization => search from the right
147 {
148 if(m_reindexed_peaks.at(iter) >= 0)
149 {
150 next_l_peak = iter;
151 }
152 }
153 if(m_reindexed_peaks.at(peak2) == -1)
154 {
155 addSupportedPeak(peak2);
156 m_supported_peaks.at(peak2)->push_back(aa);
157 }
158 if(m_reindexed_peaks.at(peak1) >= 0)
159 {
160 addAaPosition(aa, peak2, peak1, true);
161 }
162 else
163 {
164 addAaPosition(aa, peak2, next_l_peak, false);
165 }
166 }
167 }
168 }
169
172
173 for(uint8_t aa = 1; aa < m_aaCode.getSize() + 1; ++aa)
174 {
175 qDebug() << m_aaCode.getAa(aa).getLetter();
176 for(auto iter = m_aapositions.at(aa - 1)->begin(); iter != m_aapositions.at(aa - 1)->end();
177 ++iter)
178 {
179 qDebug() << iter->l_peak << this->at(iter->l_peak).peak_mz << iter->r_peak
180 << this->at(iter->r_peak).peak_mz << iter->l_support;
181 }
182 }
183
184 // std::size_t i = 0;
185 // for(auto &data_point : *this)
186 // {
187 // data_point.indice = i;
188 // i++;
189 // }
190
192}
193
194// pappso::Aa const *
195// SpOMSSpectrum::findAAMass(double mass, bool *found) const
196// {
197// bool ok;
198// // auto charge = m_qualifiedMassSpectrum.getPrecursorCharge(&ok);
199
200// if(!ok)
201// {
202// throw pappso::PappsoException(
203// QObject::tr("precursor charge is not defined in spectrum %1")
204// .arg(m_qualifiedMassSpectrum.getMassSpectrumId().getNativeId()));
205// }
206// pappso::MzRange mz_range(mass / m_qualifiedMassSpectrum.getPrecursorCharge(),
207// m_precision_ptr);
208
209// for(std::unordered_map<const Aa, double>::const_iterator aa = aaMasses.begin();
210// aa != aaMasses.end();
211// aa++)
212// {
213// if(mz_range.contains(aa->second))
214// {
215// if(found != nullptr)
216// {
217// *found = true;
218// }
219// return &(aa->first);
220// }
221// }
222// if(found != nullptr)
223// {
224// *found = false;
225// }
226// return nullptr;
227// }
228
229// Not sure if optimal
230void
232{
233 std::vector<specglob::ExperimentalSpectrumDataPoint> kept_peaks;
234 for(std::vector<specglob::ExperimentalSpectrumDataPoint>::iterator iter = this->begin();
235 iter != this->end();
236 iter++)
237 {
238 if(m_reindexed_peaks.at(iter->indice) >= 0)
239 {
240 kept_peaks.push_back(*iter);
241 }
242 }
243 this->clear();
244 this->assign(kept_peaks.begin(), kept_peaks.end());
245}
246
247void
249 const std::size_t r_peak,
250 const std::size_t l_peak,
251 bool l_support)
252{
253 // aa=0 corresponds to no amino acid identified, thus aa is always >=1. We substract 1 to aa to
254 // avoid keeping an empty, useless vector.
255 if(l_support)
256 {
257 m_aapositions.at(aa - 1)->push_back(
258 {r_peak, l_peak, computeCondition(l_peak, l_support), l_support});
259 }
260 else
261 {
262 m_aapositions.at(aa - 1)->push_back(
263 {r_peak, l_peak, computeCondition(l_peak, l_support), l_support});
264 }
265}
266
267uint32_t
269 bool l_support) const
270{
271 uint32_t condition;
272 if(l_peak == 0)
273 {
274 condition = 2;
275 }
276 else if(!l_support)
277 {
278 condition = 1;
279 }
280 else
281 {
282 condition = 0;
283 for(std::vector<uint8_t>::iterator aa = m_supported_peaks.at(l_peak)->begin();
284 aa != m_supported_peaks.at(l_peak)->end();
285 aa++)
286 {
287 condition += 2 << *(aa);
288 }
289 }
290 return condition;
291}
292
293
294const std::vector<pappso::specpeptidoms::AaPosition> &
296{
297
298 return *m_aapositions.at(aa_code - 1);
299}
300
301std::vector<pappso::specpeptidoms::AaPosition>
303 std::uint8_t aa_code, std::vector<std::size_t> &peaks_to_remove) const
304{
305 std::vector<AaPosition> aa_positions;
306 for(auto aap : *m_aapositions.at(aa_code - 1))
307 {
308 if(std::find(peaks_to_remove.begin(), peaks_to_remove.end(), aap.r_peak) ==
309 peaks_to_remove.end())
310 {
311 aa_positions.push_back(aap);
312 }
313 }
314 return aa_positions;
315}
316
317std::vector<double>
319{
320 std::vector<double> mass_list;
321 for(const specglob::ExperimentalSpectrumDataPoint &n : *this)
322 {
323 mass_list.push_back(n.peak_mz);
324 }
325 return mass_list;
326}
327
330{
331 return this->at(indice).type;
332}
333
334uint
339double
344
345
346double
347pappso::specpeptidoms::SpOMSSpectrum::getMZShift(std::size_t l_peak, std::size_t r_peak) const
348{
349 if(std::max(r_peak, l_peak) > size())
350 {
352 QObject::tr("getMZShift : l_peak %1 or r_peak %2 greater than size %3")
353 .arg(l_peak)
354 .arg(r_peak)
355 .arg(size()));
356 }
357 return this->at(r_peak).peak_mz - this->at(l_peak).peak_mz;
358}
359
360double
362{
363 if(peak > size())
364 {
366 QObject::tr("getMissingMass : peak %1 greater than size %2").arg(peak).arg(size()));
367 }
368 return this->m_qualifiedMassSpectrum.getPrecursorMass() - m_precursor_mass_error -
369 this->at(peak).peak_mz + MHPLUS;
370}
371
372void
374{
375 std::size_t counter = 0;
376 for(std::size_t iter = 0; iter < peak; iter++)
377 {
378 if(m_reindexed_peaks.at(iter) >= 0)
379 {
380 counter++;
381 }
382 }
383 m_reindexed_peaks.at(peak) = counter;
384 for(std::size_t iter = peak + 1; iter < m_reindexed_peaks.size(); iter++)
385 {
386 if(m_reindexed_peaks.at(iter) >= 0)
387 {
388 m_reindexed_peaks.at(iter)++;
389 }
390 }
391}
392
393void
395{
396 for(auto aa = m_aapositions.begin(); aa != m_aapositions.end(); aa++)
397 {
398 for(auto aap = aa->get()->begin(); aap != aa->get()->end(); aap++)
399 {
400 aap->l_peak = m_reindexed_peaks.at(aap->l_peak);
401 aap->r_peak = m_reindexed_peaks.at(aap->r_peak);
402 }
403 }
404}
405
406void
408{
409 std::size_t left_index, right_index;
410
411 m_complementary_peak_indexes.reserve(this->size());
412 while(m_complementary_peak_indexes.size() < this->size())
413 {
414 m_complementary_peak_indexes.push_back(0);
415 }
416 left_index = 0;
417 right_index = this->size() - 1;
418 double comp_mass = m_qualifiedMassSpectrum.getPrecursorMass() + 2 * MHPLUS;
419
420 while(left_index < right_index)
421 {
422 pappso::MzRange mz_range(comp_mass - this->at(left_index).peak_mz, m_precision_ptr);
423 if(mz_range.contains(this->at(right_index).peak_mz))
424 {
425 m_complementary_peak_indexes.at(left_index) = right_index;
426 m_complementary_peak_indexes.at(right_index) = left_index;
427 qDebug() << left_index << right_index;
428 }
429 if(comp_mass - this->at(left_index).peak_mz - this->at(right_index).peak_mz >= 0)
430 {
431 left_index++;
432 }
433 else
434 {
435 right_index--;
436 }
437 }
438}
439
440std::size_t
445} // namespace specpeptidoms
446} // namespace pappso
collection of integer code for each amino acid 0 => null 1 to 20 => amino acid sorted by there mass (...
Definition aacode.h:44
bool contains(pappso_double) const
Definition mzrange.cpp:116
Class representing a fully specified mass spectrum.
void preprocessSpectrum()
Preprocess the spectrum.
double getMZShift(std::size_t l_peak, std::size_t r_peak) const
Returns the mz difference between two peaks.
uint getPrecursorCharge() const
Returns the spectrum's precursor's charge.
SpOMSSpectrum(pappso::QualifiedMassSpectrum &qmass_spectrum, pappso::PrecisionPtr precision_ptr, const pappso::AaCode &aaCode)
double getMissingMass(std::size_t peak) const
Returns the missing mass between a peak and the precursor's mass (shift at the end).
std::vector< std::size_t > m_complementary_peak_indexes
std::vector< std::shared_ptr< std::vector< uint8_t > > > m_supported_peaks
uint32_t computeCondition(const std::size_t l_peak, bool l_support) const
Computes the "condition" integer, used to apply the three peaks rule.
void removeUnsupportedMasses()
Removes the unsupported peaks (without an amino acid to the left) from the spectrum.
pappso::QualifiedMassSpectrum m_qualifiedMassSpectrum
std::vector< std::shared_ptr< std::vector< AaPosition > > > m_aapositions
void correctPeakIndexes()
Reindexes the peaks after removal of the unsupported peaks.
void addSupportedPeak(std::size_t peak)
Add a peak to the supported peaks list.
void addAaPosition(uint8_t aa, const std::size_t r_peak, const std::size_t l_peak, bool l_support)
Adds an amino acid position to the data structure.
void fillComplementaryPeakIndexes()
For each point of the spectrum, indicate the index of its complementary peak;.
std::size_t getComplementaryPeak(std::size_t peak) const
const std::vector< AaPosition > & getAaPositions(std::uint8_t aa_code) const
Returns the list of aa_positions for a given amino acid code.
specglob::ExperimentalSpectrumDataPointType peakType(std::size_t indice) const
Returns the type of one of the spectrum's peaks.
std::vector< double > getMassList() const
Returns the spectrum's list of masses.
ExperimentalSpectrumDataPointType
Definition types.h:78
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
const pappso_double MHPLUS(1.007276466879)
const pappso_double MPROTIUM(1.007825032241)
unsigned int uint
Definition types.h:67
const pappso_double MASSOXYGEN(15.99491461956)
const PrecisionBase * PrecisionPtr
Definition precision.h:122