libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
psmfilereaderbase.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/cbor/psm/psmfilereaderbase.h
3 * \date 05/07/2025
4 * \author Olivier Langella
5 * \brief Base class to read CBOR PSM file
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2025 Olivier Langella <Olivier.Langella@universite-paris-saclay.fr>.
10 *
11 * This file is part of PAPPSOms-tools.
12 *
13 * PAPPSOms-tools is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms-tools is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms-tools. If not, see <http://www.gnu.org/licenses/>.
25 *
26 ******************************************************************************/
27
28#include "psmfilereaderbase.h"
30#include <QDebug>
32#include <QCborArray>
33
34
35namespace pappso
36{
37namespace cbor
38{
39namespace psm
40{
41
45
47{
48 // qWarning() << "~PsmFileReaderBase";
49}
50
51
52void
54{
55
56 qDebug();
57 initCborReader(cborp);
58
59 qDebug();
60 if(mpa_cborReader->isMap())
61 {
62 readRoot(monitor);
63 }
64 qDebug();
65}
66
67void
69{
70 qDebug();
71 initCborReader(cborp);
72
73 qDebug();
74 if(mpa_cborReader->isMap())
75 {
76 readRoot(monitor);
77 }
78 qDebug();
79}
80
81
82bool
84{
85 for(auto &it : m_currentPsmProteinRefList)
86 {
87 if(!m_proteinMap.getByAccession(it.accession).isTarget)
88 return true;
89 }
90 return false;
91}
92
93bool
95{
96 for(auto &it : m_currentPsmProteinRefList)
97 {
98 if(m_proteinMap.getByAccession(it.accession).isTarget)
99 return true;
100 }
101 return false;
102}
103
104
105void
107{
108 qDebug();
109 mpa_cborReader->enterContainer();
110
112 if(m_expectedString == "informations")
113 {
114 qDebug() << m_expectedString;
115 readInformations(monitor);
117
118 qDebug() << m_expectedString;
119 if(m_expectedString == "log")
120 {
121 qDebug() << m_expectedString;
122 readLog(monitor);
124 }
125
126 logReady(monitor);
127 }
128 else
129 {
130 throw pappso::PappsoException("ERROR: expecting informations element");
131 }
132
133 qDebug() << m_expectedString;
134
135 if(m_expectedString == "parameter_map")
136 {
137 qDebug();
138 readParameterMap(monitor);
139 }
140 else
141 {
142 throw pappso::PappsoException("ERROR: expecting parameter_map element");
143 }
144
145
147 m_targetFastaFiles.clear();
148 m_decoyFastaFiles.clear();
149 if(m_expectedString == "target_fasta_files")
150 {
153 }
154
155 if(m_expectedString == "decoy_fasta_files")
156 {
159 }
160 fastaFilesReady(monitor);
161
162 if(m_expectedString == "protein_map")
163 {
164 readProteinMap(monitor);
166 }
167
168 if(m_expectedString == "sample_list")
169 {
170 sampleListStarted(monitor);
171 mpa_cborReader->enterContainer(); // array
172 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
173 {
174 readSample(monitor);
175 }
176 mpa_cborReader->leaveContainer(); // array
177 sampleListFinished(monitor);
178 }
179 else
180 {
182 QObject::tr("ERROR: expecting sample_list element not %1").arg(m_expectedString));
183 }
184 mpa_cborReader->leaveContainer(); // whole file
185 if(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
186 {
187 readRoot(monitor);
188 }
189}
190
191void
193{
194 bool is_ok;
195 // m_cborInformations.clear();
196 is_ok = mpa_cborReader->readCborMap(m_cborInformations);
197
198 if(!is_ok)
199 {
200 throw pappso::PappsoException("ERROR: PSM cbor header informations not well formed");
201 }
202 qDebug() << m_cborInformations.keys();
203 if(m_cborInformations.value("type").toString() != "psm")
204 {
205 QStringList all_keys;
206 for(auto it_k : m_cborInformations.keys())
207 {
208 all_keys << it_k.toString();
209 }
211 QObject::tr("ERROR: this file does not contain PSM data but %1 and %2")
212 .arg(m_cborInformations.value("type").toString())
213 .arg(all_keys.join(" ")));
214 }
215 informationsReady(monitor);
216}
217
218void
220{
221 bool is_ok;
222 // m_cborInformations.clear();
223 is_ok = mpa_cborReader->readCborArray(m_cborLog);
224
225 if(!is_ok)
226 {
227 throw pappso::PappsoException("ERROR: PSM cbor header log not well formed");
228 }
229}
230
231
232void
234{
235 bool is_ok;
236 m_cborParameterMap.clear();
237 is_ok = mpa_cborReader->readCborMap(m_cborParameterMap);
238
239 if(!is_ok)
240 {
241 throw pappso::PappsoException("ERROR: PSM cbor parameter_map not well formed");
242 }
243 parameterMapReady(monitor);
244}
245
246void
252
253
256{
257 PsmProteinRef protein_ref;
258 protein_ref.accession = "";
259 protein_ref.positions.clear();
260 mpa_cborReader->enterContainer();
262 qDebug() << m_expectedString;
263 if(m_expectedString == "accession")
264 {
265 is_ok = mpa_cborReader->decodeString(protein_ref.accession);
266 if(!is_ok)
267 {
268 throw pappso::PappsoException("ERROR: protein accession is not a string");
269 }
270 }
271 else
272 {
273 throw pappso::PappsoException("ERROR: expecting accession element in PSM protein_list");
274 }
275
277 qDebug() << m_expectedString;
278 if(m_expectedString == "positions")
279 {
280 mpa_cborReader->readArray(protein_ref.positions);
281
282 // mpa_cborReader->next();
283 }
284 else
285 {
287 QString("ERROR: expecting positions element in PSM protein_list not %1")
288 .arg(m_expectedString));
289 }
290 mpa_cborReader->leaveContainer();
291
292 qDebug() << "end";
293 return protein_ref;
294}
295
296
299{
300 is_ok = false;
301 PsmFile file;
302 mpa_cborReader->enterContainer();
304 if(m_expectedString == "name")
305 {
306 if(!mpa_cborReader->decodeString(file.name))
307 {
308 throw pappso::PappsoException("file name is not a string");
309 }
310 is_ok = true;
311 }
312 else
313 {
314 throw pappso::PappsoException("ERROR: expecting name element in file");
315 }
316 mpa_cborReader->leaveContainer();
317 return file;
318}
319
320
321void
323{
324 writer.startMap();
325 writer.append("name");
326 writer.append(psm_file.name);
327 writer.endMap();
328}
329
330void
332 const std::vector<PsmFile> &file_list)
333{
334 writer.startArray();
335 for(auto &psm_file : file_list)
336 {
337 writePsmFile(writer, psm_file);
338 }
339 writer.endArray();
340}
341
342
343void
345{
346 //"name": "tandem2017_nopatch_20120906_balliau_extract_1_A01_urnb-1",
347 qDebug();
348 mpa_cborReader->enterContainer();
350
351 qDebug() << m_expectedString;
352 if(m_expectedString == "name")
353 {
354 if(!mpa_cborReader->decodeString(m_currentSampleName))
355 {
356 throw pappso::PappsoException("sample name is not a string");
357 }
358 }
359 else
360 {
361 throw pappso::PappsoException("ERROR: expecting name element in file");
362 }
363 //"identification_file_list": [{ "name":
364 //"/home/langella/data1/tandem/tandem2017_nopatch_20120906_balliau_extract_1_A01_urnb-1.xml",
365 //}],
366
368
369 qDebug() << m_expectedString;
371 if(m_expectedString == "identification_file_list")
372 {
373 bool is_ok;
374 mpa_cborReader->enterContainer();
375
376 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
377 {
379 }
380 mpa_cborReader->leaveContainer();
381
383 }
384 //"peaklist_file": {"name": "tandem2017_nopatch_20120906_balliau_extract_1_A01_urnb-1.mzml"
385 //},
386
387 if(m_expectedString == "peaklist_file")
388 {
389 bool is_ok;
391 }
392 else
393 {
394 throw pappso::PappsoException("ERROR: expecting peaklist_file element in sample");
395 }
396 //"scan_list": [
397 sampleStarted(monitor);
399 if(m_expectedString == "scan_list")
400 {
401 mpa_cborReader->enterContainer();
402
403 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
404 {
405 readScan(monitor);
406 }
407 mpa_cborReader->leaveContainer();
408 }
409 else
410 {
411 throw pappso::PappsoException("ERROR: expecting scan_list element in sample");
412 }
413 mpa_cborReader->leaveContainer();
414
415 sampleFinished(monitor);
416}
417
418void
420{
421 qDebug();
422 m_cborScanId.clear();
423 mpa_cborReader->enterContainer();
424 //"id": {
425 //"index": 1976
426 //},
427 qDebug() << "scan begin";
428
430 qDebug() << m_expectedString;
431 if(m_expectedString == "id")
432 {
433 if(!mpa_cborReader->readCborMap(m_cborScanId))
434 {
435 throw pappso::PappsoException(QObject::tr("id element in scan is not a cbor map"));
436 }
437 }
438 else
439 {
441 QObject::tr("ERROR: expecting id element in scan not %1").arg(m_expectedString));
442 }
443 //"precursor": {
444 //"z": 2,
445 //"mz": 1120.529471
446 //},
447
449 m_cborScanPrecursor.clear();
450 qDebug() << m_expectedString;
451 if(m_expectedString == "precursor")
452 {
453 if(!mpa_cborReader->readCborMap(m_cborScanPrecursor))
454 {
455 throw pappso::PappsoException(QObject::tr("precursor element in scan is not a cbor map"));
456 }
457 }
458 //"ms2": {PSM CBOR format documentation
459 //"rt": 12648.87,
460 //"mz" :[1,2,3,4],
461 //"intensity" : [1,2,3,4]
462 //},
463
465 qDebug() << m_expectedString;
466 m_cborScanMs2.clear();
467 if(m_expectedString == "ms2")
468 {
469 if(!mpa_cborReader->readCborMap(m_cborScanMs2))
470 {
472 QObject::tr("ms2 element in scan is not a cbor map %1 %2:\n%3")
474 .arg(m_cborScanId.value("index").toInteger())
475 .arg(mpa_cborReader->lastError().toString()));
476 }
477 }
478 //"psm_list": [
479 scanStarted(monitor);
480
482 qDebug() << m_expectedString;
483 if(m_expectedString == "psm_list")
484 {
485 mpa_cborReader->enterContainer();
486 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
487 {
488 readPsm(monitor);
489 }
490 mpa_cborReader->leaveContainer();
491 }
492
493 mpa_cborReader->leaveContainer();
494 qDebug() << "scan end";
495 scanFinished(monitor);
496 qDebug();
497}
498
499void
501{
502 bool is_ok;
503 mpa_cborReader->enterContainer();
505 // "proforma": "AQEEM[+15.99491]AQVAK",
506 if(m_expectedString == "proforma")
507 {
508 if(!mpa_cborReader->decodeString(m_currentPsmProforma))
509 {
510 throw pappso::PappsoException("ERROR: proforma element in psm-scan is not a string");
511 }
512 }
513 else
514 {
515 throw pappso::PappsoException("ERROR: expecting proforma element in psm-scan");
516 }
517 //"protein_list" : [
518 //{
519 //"accession": "GRMZM2G083841_P01",
520 //"position": [15,236]
521 //}
522 //],
523
526 qDebug() << m_expectedString;
527
528 if(m_expectedString == "protein_list")
529 {
530 mpa_cborReader->enterContainer(); // array
531 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
532 {
534 if(!is_ok)
535 {
536 qDebug();
538 QObject::tr("ERROR: reading protein_list element in psm-scan"));
539 }
540 }
541 // qDebug() << mpa_cborReader->type();
542 mpa_cborReader->leaveContainer(); // array
543 }
544 else
545 {
546 throw pappso::PappsoException("ERROR: expecting protein_list element in psm-scan");
547 }
548 // props: {
549 m_cborScanPsmProps.clear();
550
551 //"eval": {
552 qDebug();
553 m_cborScanPsmEval.clear();
555 qDebug() << m_expectedString;
556
557 if(m_expectedString == "props")
558 {
559 is_ok = mpa_cborReader->readCborMap(m_cborScanPsmProps);
560 if(!is_ok)
561 {
562 throw pappso::PappsoException("ERROR: props element in psm-scan is not well formed");
563 }
564 if(!getExpectedString())
565 {
567 QObject::tr("ERROR: expecting eval element in psm-scan %1").arg(m_currentPsmProforma));
568 }
569 }
570 if(m_expectedString == "eval")
571 {
572 is_ok = mpa_cborReader->readCborMap(m_cborScanPsmEval);
573 if(!is_ok)
574 {
575 throw pappso::PappsoException("ERROR: eval element in psm-scan is not well formed");
576 }
577 }
578 else
579
580 {
582 QObject::tr("ERROR: expecting eval element in psm-scan %1 not %2 in %3 %4 %5")
584 .arg(m_expectedString)
585 .arg(__FILE__)
586 .arg(__FUNCTION__)
587 .arg(__LINE__));
588 }
589
590
591 qDebug() << m_expectedString;
592
593
594 mpa_cborReader->leaveContainer();
595 qDebug();
596 psmReady(monitor);
597}
598
599void
601{
602 // PSM is ready, do what you want :)
603}
604
605void
609
610void
614
615void
617{
618}
619
620
621void
625
626void
630
631void
635
636void
640
641void
645
646void
650
653{
654 pappso::PeptideSp peptide_sp;
655 if(m_currentPsmProforma.isEmpty())
656 {
657 throw pappso::PappsoException(QObject::tr("ERROR: m_currentPsmProforma is empty"));
658 }
659 else
660 {
662 }
663 return peptide_sp;
664}
665
668{
669 if(m_currentPeaklistFile.name.isEmpty())
670 {
671 throw pappso::PappsoException(QObject::tr("ERROR: m_currentPeaklistFile is empty"));
672 }
673 if(m_cborScanId.isEmpty())
674 {
675 throw pappso::PappsoException(QObject::tr("ERROR: m_cborScanId is empty"));
676 }
677 if(m_cborScanPrecursor.isEmpty())
678 {
679 throw pappso::PappsoException(QObject::tr("ERROR: m_cborScanPrecursor is empty"));
680 }
681
682 if(!m_cborScanId.keys().contains("index"))
683 {
684 throw pappso::PappsoException("There is no scan index");
685 }
686
687 if(m_cborScanMs2.isEmpty())
688 {
689 throw pappso::PappsoException(QObject::tr("ERROR: m_cborScanMs2 is empty"));
690 }
691 else
692 {
693 if(!m_cborScanMs2.keys().contains("mz"))
694 {
695 throw pappso::PappsoException("There is no ms2 mz values");
696 }
697 if(!m_cborScanMs2.keys().contains("intensity"))
698 {
699 throw pappso::PappsoException("There is no ms2 intensity values");
700 }
701 }
704 pappso::MsRunIdCstSPtr msrun_id_sp = std::make_shared<const pappso::MsRunId>(msrun_id);
705 pappso::MassSpectrumId ms_id(msrun_id_sp);
706 ms_id.setSpectrumIndex(m_cborScanId.value("index").toInteger());
707
708 // native_id
709 if(m_cborScanId.keys().contains("native_id"))
710 {
711 ms_id.setNativeId(m_cborScanId.value("native_id").toString());
712 }
713
714 std::vector<DataPoint> data_point_vector;
715 std::size_t i = 0;
716 for(auto cbor_mz_value : m_cborScanMs2.value("mz").toArray())
717 {
718 data_point_vector.push_back(
719 {cbor_mz_value.toDouble(), m_cborScanMs2.value("intensity").toArray().at(i).toDouble()});
720 i++;
721 }
722
723
724 MassSpectrum mass_spectrum(data_point_vector);
725 pappso::PrecursorIonData precursor_ion_data;
726
727 pappso::QualifiedMassSpectrum qualified_mass_spectrum(ms_id);
728 qualified_mass_spectrum.setMassSpectrumSPtr(mass_spectrum.makeMassSpectrumSPtr());
729 qualified_mass_spectrum.setMsLevel(2);
730
731 if(m_cborScanPrecursor.keys().contains("z"))
732 {
733 precursor_ion_data.charge = m_cborScanPrecursor.value("z").toInteger();
734 }
735 if(m_cborScanPrecursor.keys().contains("mz"))
736 {
737 precursor_ion_data.mz = m_cborScanPrecursor.value("mz").toDouble();
738 }
739 if(m_cborScanPrecursor.keys().contains("intensity"))
740 {
741 precursor_ion_data.intensity = m_cborScanPrecursor.value("intensity").toDouble();
742 }
743 qualified_mass_spectrum.appendPrecursorIonData(precursor_ion_data);
744 if(m_cborScanMs2.keys().contains("rt"))
745 {
746 qualified_mass_spectrum.setRtInSeconds(m_cborScanMs2.value("rt").toDouble());
747 }
748
749
750 return qualified_mass_spectrum.makeQualifiedMassSpectrumSPtr();
751}
752
753void
757
758void
762
763
764double
765PsmFileReaderBase::getPrecursorMass(double mz_prec, uint charge) const
766{
767 // compute precursor mass given the charge state
768 mz_prec = mz_prec * (double)charge;
769 mz_prec -= (MHPLUS * (double)charge);
770 return mz_prec;
771}
772
773
774} // namespace psm
775} // namespace cbor
776} // namespace pappso
void setNativeId(const QString &native_id)
void setSpectrumIndex(std::size_t index)
Class to represent a mass spectrum.
MassSpectrumSPtr makeMassSpectrumSPtr() const
MS run identity MsRunId identifies an MS run with a unique ID (XmlId) and contains eventually informa...
Definition msrunid.h:54
void setSampleName(const QString &name)
set a sample name for this MsRunId
Definition msrunid.cpp:77
static PeptideSp parseString(const QString &pepstr)
Class representing a fully specified mass spectrum.
void appendPrecursorIonData(const PrecursorIonData &precursor_ion_data)
void setMsLevel(uint ms_level)
Set the mass spectrum level.
QualifiedMassSpectrumSPtr makeQualifiedMassSpectrumSPtr() const
void setMassSpectrumSPtr(MassSpectrumSPtr massSpectrum)
Set the MassSpectrumSPtr.
void setRtInSeconds(pappso_double rt)
Set the retention time in seconds.
overrides QCborStreamWriter base class to provide convenient functions
std::vector< PsmProteinRef > m_currentPsmProteinRefList
pappso::QualifiedMassSpectrumSPtr getCurrentQualifiedMassSpectrumSPtr() const
get the qualified Spectrum for the current PSM
pappso::PeptideSp getCurrentPsmPeptideSp() const
bool currentProteinRefListContainsTarget() const
tells if the current PSM has a target accession
virtual void sampleListStarted(pappso::UiMonitorInterface &monitor)
void writePsmFileList(CborStreamWriter &writer, const std::vector< PsmFile > &file_list)
double getPrecursorMass(double mz_prec, uint charge) const
convenient function do compute precusor ion mass
virtual void logReady(pappso::UiMonitorInterface &monitor)
virtual void scanStarted(pappso::UiMonitorInterface &monitor)
virtual void readPsm(pappso::UiMonitorInterface &monitor)
virtual void readLog(pappso::UiMonitorInterface &monitor)
virtual void proteinMapReady(pappso::UiMonitorInterface &monitor)
virtual void sampleStarted(pappso::UiMonitorInterface &monitor)
bool currentProteinRefListContainsDecoy() const
tells if the current PSM has a decoy accession
virtual void readParameterMap(pappso::UiMonitorInterface &monitor)
void readCbor(QFile *cborp, pappso::UiMonitorInterface &monitor)
virtual void readScan(pappso::UiMonitorInterface &monitor)
virtual void readInformations(pappso::UiMonitorInterface &monitor)
virtual void scanFinished(pappso::UiMonitorInterface &monitor)
virtual void sampleListFinished(pappso::UiMonitorInterface &monitor)
virtual void psmReady(pappso::UiMonitorInterface &monitor)
virtual void informationsReady(pappso::UiMonitorInterface &monitor)
void readRoot(pappso::UiMonitorInterface &monitor)
std::vector< PsmFile > m_currentIdentificationFileList
void writePsmFile(CborStreamWriter &writer, const PsmFile &psm_file)
virtual void fastaFilesReady(pappso::UiMonitorInterface &monitor)
virtual void parameterMapReady(pappso::UiMonitorInterface &monitor)
virtual void readProteinMap(pappso::UiMonitorInterface &monitor)
virtual void readSample(pappso::UiMonitorInterface &monitor)
PsmProteinRef readPsmProteinRef(bool &is_ok)
virtual void sampleFinished(pappso::UiMonitorInterface &monitor)
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::shared_ptr< QualifiedMassSpectrum > QualifiedMassSpectrumSPtr
std::shared_ptr< const Peptide > PeptideSp
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition msrunid.h:46
const pappso_double MHPLUS(1.007276466879)
unsigned int uint
Definition types.h:67