libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
spomsspectrum.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/specpeptidoms/spomsspectrum.cpp
3 * \date 24/03/2025
4 * \author Aurélien Berthier
5 * \brief SpecPeptidOMS Spectrum
6 *
7 * C++ implementation of the SpecPeptidOMS algorithm described in :
8 * (1) Benoist, É.; Jean, G.; Rogniaux, H.; Fertin, G.; Tessier, D. SpecPeptidOMS Directly and
9 * Rapidly Aligns Mass Spectra on Whole Proteomes and Identifies Peptides That Are Not Necessarily
10 * Tryptic: Implications for Peptidomics. J. Proteome Res. 2025.
11 * https://doi.org/10.1021/acs.jproteome.4c00870.
12 */
13
14/*
15 * Copyright (c) 2025 Aurélien Berthier
16 * <aurelien.berthier@ls2n.fr>
17 *
18 * This program is free software: you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation, either version 3 of the License, or
21 * (at your option) any later version.
22 *
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License
29 * along with this program. If not, see <http://www.gnu.org/licenses/>.
30 */
31
32#include <algorithm>
33#include <unordered_set>
34#include "spomsspectrum.h"
39
40namespace pappso
41{
42namespace specpeptidoms
43{
44// SpOMSSpectrum::SpOMSSpectrum(const specglob::ExperimentalSpectrum &exp_spectrum)
46 pappso::PrecisionPtr precision_ptr,
47 const pappso::AaCode &aaCode)
48 : std::vector<pappso::specglob::ExperimentalSpectrumDataPoint>(
49 specglob::ExperimentalSpectrum(qmass_spectrum, precision_ptr)),
50 m_qualifiedMassSpectrum(qmass_spectrum),
51 m_precision_ptr(precision_ptr),
52 m_aaCode(aaCode),
54{
55 m_aapositions.reserve(m_aaCode.getSize());
56 for(std::size_t iter = 0; iter < m_aaCode.getSize(); iter++)
57 {
58 m_aapositions.push_back(std::make_shared<std::vector<AaPosition>>());
59 m_aapositions.back()->reserve(this->size() - 1);
60 }
61 m_supported_peaks.reserve(this->size());
62 m_supported_peaks.push_back(std::make_shared<std::vector<uint8_t>>());
63 m_reindexed_peaks.push_back(0);
64 for(std::size_t iter = 1; iter < this->size(); iter++)
65 {
66 m_supported_peaks.push_back(std::make_shared<std::vector<uint8_t>>());
67 m_reindexed_peaks.push_back(-1);
68 }
69 this->at(0).peak_mz = pappso::MHPLUS + 2 * pappso::MPROTIUM + pappso::MASSOXYGEN;
70 this->back().peak_mz = m_qualifiedMassSpectrum.getPrecursorMass() + pappso::MHPLUS;
72}
73
87
89 double precursor_mass_error)
90 : std::vector<pappso::specglob::ExperimentalSpectrumDataPoint>(
91 pappso::specglob::ExperimentalSpectrum(
92 other.m_qualifiedMassSpectrum, other.m_precision_ptr, precursor_mass_error)),
95 m_aaCode(other.m_aaCode),
96 m_precursor_mass_error(precursor_mass_error)
97{
98 m_aapositions.reserve(m_aaCode.getSize());
99 for(std::size_t iter = 0; iter < m_aaCode.getSize(); iter++)
100 {
101 m_aapositions.push_back(std::make_shared<std::vector<AaPosition>>());
102 m_aapositions.back()->reserve(this->size() - 1);
103 }
104 m_supported_peaks.reserve(this->size());
105 m_supported_peaks.push_back(std::make_shared<std::vector<uint8_t>>());
106 m_reindexed_peaks.push_back(0);
107 for(std::size_t iter = 1; iter < this->size(); iter++)
108 {
109 m_supported_peaks.push_back(std::make_shared<std::vector<uint8_t>>());
110 m_reindexed_peaks.push_back(-1);
111 }
112 this->at(0).peak_mz = pappso::MHPLUS + 2 * pappso::MPROTIUM + pappso::MASSOXYGEN;
113 this->back().peak_mz =
114 m_qualifiedMassSpectrum.getPrecursorMass() + pappso::MHPLUS + precursor_mass_error;
116}
117
121
122// Add comments !!
123void
125{
126 // bool found;
127 uint8_t aa;
128 std::vector<double>::iterator iter1, iter2;
129 std::size_t peak1, peak2, next_l_peak;
130 std::vector<double> mass_list = getMassList();
131
132 peak1 = -1;
133 for(iter1 = mass_list.begin(); iter1 != mass_list.end(); iter1++)
134 {
135 peak1++;
136 peak2 = peak1;
137 for(iter2 = iter1 + 1; iter2 != mass_list.end(); iter2++)
138 {
139 peak2++;
140 aa = m_aaCode.getAaCodeByMass(*(iter2) - *(iter1), m_precision_ptr);
141 if(aa != 0)
142 {
143 next_l_peak = 0;
144 for(std::size_t iter = 1; iter < peak1;
145 iter++) // Search of the closer supported left peak.
146 // Possible optimization => search from the right
147 {
148 if(m_reindexed_peaks.at(iter) >= 0)
149 {
150 next_l_peak = iter;
151 }
152 }
153 if(m_reindexed_peaks.at(peak2) == -1)
154 {
155 addSupportedPeak(peak2);
156 m_supported_peaks.at(peak2)->push_back(aa);
157 }
158 else
159 {
160 auto it = std::find(
161 m_supported_peaks.at(peak2)->begin(), m_supported_peaks.at(peak2)->end(), aa);
162 if(it == m_supported_peaks.at(peak2)->end())
163 {
164 m_supported_peaks.at(peak2)->push_back(aa);
165 }
166 }
167 if(m_reindexed_peaks.at(peak1) >= 0)
168 {
169 addAaPosition(aa, peak2, peak1, *(iter1), true);
170 }
171 else
172 {
173 addAaPosition(aa, peak2, next_l_peak, *(iter1), false);
174 }
175 }
176 }
177 }
178
181
182 for(uint8_t aa = 1; aa < m_aaCode.getSize() + 1; ++aa)
183 {
184 qDebug() << m_aaCode.getAa(aa).getLetter();
185 for(auto iter = m_aapositions.at(aa - 1)->begin(); iter != m_aapositions.at(aa - 1)->end();
186 ++iter)
187 {
188 qDebug() << iter->l_peak << this->at(iter->l_peak).peak_mz << iter->r_peak
189 << this->at(iter->r_peak).peak_mz << iter->l_support << iter->condition;
190 }
191 }
192
193 // std::size_t i = 0;
194 // for(auto &data_point : *this)
195 // {
196 // data_point.indice = i;
197 // i++;
198 // }
199
201}
202
203// pappso::Aa const *
204// SpOMSSpectrum::findAAMass(double mass, bool *found) const
205// {
206// bool ok;
207// // auto charge = m_qualifiedMassSpectrum.getPrecursorCharge(&ok);
208
209// if(!ok)
210// {
211// throw pappso::PappsoException(
212// QObject::tr("precursor charge is not defined in spectrum %1")
213// .arg(m_qualifiedMassSpectrum.getMassSpectrumId().getNativeId()));
214// }
215// pappso::MzRange mz_range(mass / m_qualifiedMassSpectrum.getPrecursorCharge(),
216// m_precision_ptr);
217
218// for(std::unordered_map<const Aa, double>::const_iterator aa = aaMasses.begin();
219// aa != aaMasses.end();
220// aa++)
221// {
222// if(mz_range.contains(aa->second))
223// {
224// if(found != nullptr)
225// {
226// *found = true;
227// }
228// return &(aa->first);
229// }
230// }
231// if(found != nullptr)
232// {
233// *found = false;
234// }
235// return nullptr;
236// }
237
238// Not sure if optimal
239void
241{
242 std::vector<specglob::ExperimentalSpectrumDataPoint> kept_peaks;
243 for(std::vector<specglob::ExperimentalSpectrumDataPoint>::iterator iter = this->begin();
244 iter != this->end();
245 iter++)
246 {
247 if(m_reindexed_peaks.at(iter->indice) >= 0)
248 {
249 kept_peaks.push_back(*iter);
250 }
251 }
252 this->clear();
253 this->assign(kept_peaks.begin(), kept_peaks.end());
254}
255
256void
258 uint8_t aa, const std::size_t r_peak, const std::size_t l_peak, double l_mass, bool l_support)
259{
260 // aa=0 corresponds to no amino acid identified, thus aa is always >=1. We substract 1 to aa to
261 // avoid keeping an empty, useless vector.
262 m_aapositions.at(aa - 1)->push_back(
263 {r_peak, l_peak, l_mass, computeCondition(l_peak, l_support), l_support});
264 qDebug() << "l_peak" << l_peak << "r_peak" << r_peak << "l_mass" << l_mass << "l_support"
265 << l_support << "condition" << computeCondition(l_peak, l_support);
266}
267
268uint32_t
270 bool l_support) const
271{
272 uint32_t condition;
273 if(!l_support)
274 {
275 condition = 1;
276 }
277 else if(l_peak == 0)
278 {
279 condition = 2;
280 }
281 else
282 {
283 condition = 0;
284 for(std::vector<uint8_t>::iterator aa = m_supported_peaks.at(l_peak)->begin();
285 aa != m_supported_peaks.at(l_peak)->end();
286 aa++)
287 {
288 condition += 2 << *(aa);
289 }
290 }
291 return condition;
292}
293
294
295const std::vector<pappso::specpeptidoms::AaPosition> &
297{
298
299 return *m_aapositions.at(aa_code - 1);
300}
301
302std::vector<pappso::specpeptidoms::AaPosition>
304 std::uint8_t aa_code, std::vector<std::size_t> &peaks_to_remove) const
305{
306 std::vector<AaPosition> aa_positions;
307 for(auto aap : *m_aapositions.at(aa_code - 1))
308 {
309 if(std::find(peaks_to_remove.begin(), peaks_to_remove.end(), aap.r_peak) ==
310 peaks_to_remove.end())
311 {
312 aa_positions.push_back(aap);
313 }
314 }
315 return aa_positions;
316}
317
318std::vector<double>
320{
321 std::vector<double> mass_list;
322 for(const specglob::ExperimentalSpectrumDataPoint &n : *this)
323 {
324 mass_list.push_back(n.peak_mz);
325 }
326 return mass_list;
327}
328
331{
332 return this->at(indice).type;
333}
334
335uint
340double
345
346
347double
348pappso::specpeptidoms::SpOMSSpectrum::getMZShift(std::size_t l_peak, std::size_t r_peak) const
349{
350 if(std::max(r_peak, l_peak) > size())
351 {
353 QObject::tr("getMZShift : l_peak %1 or r_peak %2 greater than size %3")
354 .arg(l_peak)
355 .arg(r_peak)
356 .arg(size()));
357 }
358 return this->at(r_peak).peak_mz - this->at(l_peak).peak_mz;
359}
360
361double
363{
364 if(peak > size())
365 {
367 QObject::tr("getMissingMass : peak %1 greater than size %2").arg(peak).arg(size()));
368 }
369 return this->m_qualifiedMassSpectrum.getPrecursorMass() - m_precursor_mass_error -
370 this->at(peak).peak_mz + MHPLUS;
371}
372
373void
375{
376 std::size_t counter = 0;
377 for(std::size_t iter = 0; iter < peak; iter++)
378 {
379 if(m_reindexed_peaks.at(iter) >= 0)
380 {
381 counter++;
382 }
383 }
384 m_reindexed_peaks.at(peak) = counter;
385 for(std::size_t iter = peak + 1; iter < m_reindexed_peaks.size(); iter++)
386 {
387 if(m_reindexed_peaks.at(iter) >= 0)
388 {
389 m_reindexed_peaks.at(iter)++;
390 }
391 }
392}
393
394void
396{
397 for(auto aa = m_aapositions.begin(); aa != m_aapositions.end(); aa++)
398 {
399 for(auto aap = aa->get()->begin(); aap != aa->get()->end(); aap++)
400 {
401 aap->l_peak = m_reindexed_peaks.at(aap->l_peak);
402 aap->r_peak = m_reindexed_peaks.at(aap->r_peak);
403 }
404 }
405}
406
407void
409{
410 std::size_t left_index, right_index;
411
412 m_complementary_peak_indexes.reserve(this->size());
413 while(m_complementary_peak_indexes.size() < this->size())
414 {
415 m_complementary_peak_indexes.push_back(0);
416 }
417 left_index = 0;
418 right_index = this->size() - 1;
419 double comp_mass = m_qualifiedMassSpectrum.getPrecursorMass() + 2 * MHPLUS;
420
421 while(left_index < right_index)
422 {
423 pappso::MzRange mz_range(comp_mass - this->at(left_index).peak_mz, m_precision_ptr);
424 if(mz_range.contains(this->at(right_index).peak_mz))
425 {
426 m_complementary_peak_indexes.at(left_index) = right_index;
427 m_complementary_peak_indexes.at(right_index) = left_index;
428 qDebug() << left_index << right_index;
429 }
430 if(comp_mass - this->at(left_index).peak_mz - this->at(right_index).peak_mz >= 0)
431 {
432 left_index++;
433 }
434 else
435 {
436 right_index--;
437 }
438 }
439}
440
441std::size_t
446} // namespace specpeptidoms
447} // namespace pappso
collection of integer code for each amino acid 0 => null 1 to 20 => amino acid sorted by there mass (...
Definition aacode.h:44
bool contains(pappso_double) const
Definition mzrange.cpp:115
Class representing a fully specified mass spectrum.
void preprocessSpectrum()
Preprocess the spectrum.
double getMZShift(std::size_t l_peak, std::size_t r_peak) const
Returns the mz difference between two peaks.
uint getPrecursorCharge() const
Returns the spectrum's precursor's charge.
SpOMSSpectrum(pappso::QualifiedMassSpectrum &qmass_spectrum, pappso::PrecisionPtr precision_ptr, const pappso::AaCode &aaCode)
double getMissingMass(std::size_t peak) const
Returns the missing mass between a peak and the precursor's mass (shift at the end).
std::vector< std::size_t > m_complementary_peak_indexes
std::vector< std::shared_ptr< std::vector< uint8_t > > > m_supported_peaks
uint32_t computeCondition(const std::size_t l_peak, bool l_support) const
Computes the "condition" integer, used to apply the three peaks rule.
void removeUnsupportedMasses()
Removes the unsupported peaks (without an amino acid to the left) from the spectrum.
void addAaPosition(uint8_t aa, const std::size_t r_peak, const std::size_t l_peak, double l_mass, bool l_support)
Adds an amino acid position to the data structure.
pappso::QualifiedMassSpectrum m_qualifiedMassSpectrum
std::vector< std::shared_ptr< std::vector< AaPosition > > > m_aapositions
void correctPeakIndexes()
Reindexes the peaks after removal of the unsupported peaks.
void addSupportedPeak(std::size_t peak)
Add a peak to the supported peaks list.
void fillComplementaryPeakIndexes()
For each point of the spectrum, indicate the index of its complementary peak;.
std::size_t getComplementaryPeak(std::size_t peak) const
const std::vector< AaPosition > & getAaPositions(std::uint8_t aa_code) const
Returns the list of aa_positions for a given amino acid code.
specglob::ExperimentalSpectrumDataPointType peakType(std::size_t indice) const
Returns the type of one of the spectrum's peaks.
std::vector< double > getMassList() const
Returns the spectrum's list of masses.
ExperimentalSpectrumDataPointType
Definition types.h:78
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
const pappso_double MHPLUS(1.007276466879)
const pappso_double MPROTIUM(1.007825032241)
unsigned int uint
Definition types.h:67
const pappso_double MASSOXYGEN(15.99491461956)
const PrecisionBase * PrecisionPtr
Definition precision.h:122