libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
psmfilereaderbase.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/cbor/psm/psmfilereaderbase.h
3 * \date 05/07/2025
4 * \author Olivier Langella
5 * \brief Base class to read CBOR PSM file
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2025 Olivier Langella <Olivier.Langella@universite-paris-saclay.fr>.
10 *
11 * This file is part of PAPPSOms-tools.
12 *
13 * PAPPSOms-tools is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms-tools is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms-tools. If not, see <http://www.gnu.org/licenses/>.
25 *
26 ******************************************************************************/
27
28#include "psmfilereaderbase.h"
30#include <QDebug>
32#include <QCborArray>
33
34
35namespace pappso
36{
37namespace cbor
38{
39namespace psm
40{
41
45
47{
48 // qWarning() << "~PsmFileReaderBase";
49}
50
51
52void
54{
55
56 qDebug();
57 initCborReader(cborp);
58
59 qDebug();
60 if(mpa_cborReader->isMap())
61 {
62 readRoot(monitor);
63 }
64 qDebug();
65}
66
67void
69{
70 qDebug();
71 initCborReader(cborp);
72
73 qDebug();
74 if(mpa_cborReader->isMap())
75 {
76 readRoot(monitor);
77 }
78 qDebug();
79}
80
81
82bool
84{
85 for(auto &it : m_currentPsmProteinRefList)
86 {
87 if(!m_proteinMap.getByAccession(it.accession).isTarget)
88 return true;
89 }
90 return false;
91}
92
93bool
95{
96 for(auto &it : m_currentPsmProteinRefList)
97 {
98 if(m_proteinMap.getByAccession(it.accession).isTarget)
99 return true;
100 }
101 return false;
102}
103
104
105void
107{
108 qDebug();
109 mpa_cborReader->enterContainer();
110
112 if(m_expectedString == "informations")
113 {
114 qDebug() << m_expectedString;
115 readInformations(monitor);
117
118 qDebug() << m_expectedString;
119 if(m_expectedString == "log")
120 {
121 qDebug() << m_expectedString;
122 readLog(monitor);
124 }
125
126 logReady(monitor);
127 }
128 else
129 {
130 throw pappso::PappsoException("ERROR: expecting informations element");
131 }
132
133 qDebug() << m_expectedString;
134
135 if(m_expectedString == "parameter_map")
136 {
137 qDebug();
138 readParameterMap(monitor);
139 }
140 else
141 {
142 throw pappso::PappsoException("ERROR: expecting parameter_map element");
143 }
144
145
147 m_targetFastaFiles.clear();
148 m_decoyFastaFiles.clear();
149 if(m_expectedString == "target_fasta_files")
150 {
153 }
154
155 if(m_expectedString == "decoy_fasta_files")
156 {
159 }
160 fastaFilesReady(monitor);
161
162 if(m_expectedString == "protein_map")
163 {
164 readProteinMap(monitor);
166 }
167
168 if(m_expectedString == "sample_list")
169 {
170 sampleListStarted(monitor);
171 mpa_cborReader->enterContainer(); // array
172 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
173 {
174 readSample(monitor);
175 }
176 mpa_cborReader->leaveContainer(); // array
177 sampleListFinished(monitor);
178 }
179 else
180 {
182 QObject::tr("ERROR: expecting sample_list element not %1").arg(m_expectedString));
183 }
184 mpa_cborReader->leaveContainer(); // whole file
185 if(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
186 {
187 readRoot(monitor);
188 }
189}
190
191void
193{
194 bool is_ok;
195 // m_cborInformations.clear();
196 is_ok = mpa_cborReader->readCborMap(m_cborInformations);
197
198 if(!is_ok)
199 {
200 throw pappso::PappsoException("ERROR: PSM cbor header informations not well formed");
201 }
202 qDebug() << m_cborInformations.keys();
203 if(m_cborInformations.value("type").toString() != "psm")
204 {
205 QStringList all_keys;
206 for(auto it_k : m_cborInformations.keys())
207 {
208 all_keys << it_k.toString();
209 }
211 QObject::tr("ERROR: this file does not contain PSM data but %1 and %2")
212 .arg(m_cborInformations.value("type").toString())
213 .arg(all_keys.join(" ")));
214 }
215 informationsReady(monitor);
216}
217
218void
220{
221 bool is_ok;
222 // m_cborInformations.clear();
223 is_ok = mpa_cborReader->readCborArray(m_cborLog);
224
225 if(!is_ok)
226 {
227 throw pappso::PappsoException("ERROR: PSM cbor header log not well formed");
228 }
229}
230
231
232void
234{
235 bool is_ok;
236 m_cborParameterMap.clear();
237 is_ok = mpa_cborReader->readCborMap(m_cborParameterMap);
238
239 if(!is_ok)
240 {
241 throw pappso::PappsoException("ERROR: PSM cbor parameter_map not well formed");
242 }
243 parameterMapReady(monitor);
244}
245
246void
252
253
256{
257 PsmProteinRef protein_ref;
258 protein_ref.accession = "";
259 protein_ref.positions.clear();
260 mpa_cborReader->enterContainer();
262 qDebug() << m_expectedString;
263 if(m_expectedString == "accession")
264 {
265 is_ok = mpa_cborReader->decodeString(protein_ref.accession);
266 if(!is_ok)
267 {
268 throw pappso::PappsoException("ERROR: protein accession is not a string");
269 }
270 }
271 else
272 {
273 throw pappso::PappsoException("ERROR: expecting accession element in PSM protein_list");
274 }
275
277 qDebug() << m_expectedString;
278 if(m_expectedString == "positions")
279 {
280 mpa_cborReader->readArray(protein_ref.positions);
281
282 // mpa_cborReader->next();
283 }
284 else
285 {
287 QString("ERROR: expecting positions element in PSM protein_list not %1")
288 .arg(m_expectedString));
289 }
290 mpa_cborReader->leaveContainer();
291
292 qDebug() << "end";
293 return protein_ref;
294}
295
296
299{
300 is_ok = false;
301 PsmFile file;
302 mpa_cborReader->enterContainer();
304 if(m_expectedString == "name")
305 {
306 if(!mpa_cborReader->decodeString(file.name))
307 {
308 throw pappso::PappsoException("file name is not a string");
309 }
310 is_ok = true;
311 }
312 else
313 {
314 throw pappso::PappsoException("ERROR: expecting name element in file");
315 }
316 mpa_cborReader->leaveContainer();
317 return file;
318}
319
320
321void
323{
324 writer.startMap();
325 writer.append("name");
326 writer.append(psm_file.name);
327 writer.endMap();
328}
329
330void
332 const std::vector<PsmFile> &file_list)
333{
334 writer.startArray();
335 for(auto &psm_file : file_list)
336 {
337 writePsmFile(writer, psm_file);
338 }
339 writer.endArray();
340}
341
342
343void
345{
346 //"name": "tandem2017_nopatch_20120906_balliau_extract_1_A01_urnb-1",
347 qDebug();
348 mpa_cborReader->enterContainer();
350
351 qDebug() << m_expectedString;
352 if(m_expectedString == "name")
353 {
354 if(!mpa_cborReader->decodeString(m_currentSampleName))
355 {
356 throw pappso::PappsoException("sample name is not a string");
357 }
358 }
359 else
360 {
361 throw pappso::PappsoException("ERROR: expecting name element in file");
362 }
363 //"identification_file_list": [{ "name":
364 //"/home/langella/data1/tandem/tandem2017_nopatch_20120906_balliau_extract_1_A01_urnb-1.xml",
365 //}],
366
368
369 qDebug() << m_expectedString;
371 if(m_expectedString == "identification_file_list")
372 {
373 bool is_ok;
374 mpa_cborReader->enterContainer();
375
376 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
377 {
379 }
380 mpa_cborReader->leaveContainer();
381
383 }
384 //"peaklist_file": {"name": "tandem2017_nopatch_20120906_balliau_extract_1_A01_urnb-1.mzml"
385 //},
386
387 if(m_expectedString == "peaklist_file")
388 {
389 bool is_ok;
391 }
392 else
393 {
394 throw pappso::PappsoException("ERROR: expecting peaklist_file element in sample");
395 }
396 //"scan_list": [
397 sampleStarted(monitor);
399 if(m_expectedString == "scan_list")
400 {
401 mpa_cborReader->enterContainer();
402
403 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
404 {
405 readScan(monitor);
406 }
407 mpa_cborReader->leaveContainer();
408 }
409 else
410 {
411 throw pappso::PappsoException("ERROR: expecting scan_list element in sample");
412 }
413 mpa_cborReader->leaveContainer();
414
415 sampleFinished(monitor);
416}
417
418void
420{
421 qDebug();
422 m_cborScanId.clear();
423 mpa_cborReader->enterContainer();
424 //"id": {
425 //"index": 1976
426 //},
427 qDebug() << "scan begin";
428
430 qDebug() << m_expectedString;
431 if(m_expectedString == "id")
432 {
433 if(!mpa_cborReader->readCborMap(m_cborScanId))
434 {
435 throw pappso::PappsoException(QObject::tr("id element in scan is not a cbor map"));
436 }
437 }
438 else
439 {
441 QObject::tr("ERROR: expecting id element in scan not %1").arg(m_expectedString));
442 }
443 //"precursor": {
444 //"z": 2,
445 //"mz": 1120.529471
446 //},
447
449 m_cborScanPrecursor.clear();
450 qDebug() << m_expectedString;
451 if(m_expectedString == "precursor")
452 {
453 if(!mpa_cborReader->readCborMap(m_cborScanPrecursor))
454 {
455 throw pappso::PappsoException(QObject::tr("precursor element in scan is not a cbor map"));
456 }
457 }
458 //"ms2": {PSM CBOR format documentation
459 //"rt": 12648.87,
460 //"mz" :[1,2,3,4],
461 //"intensity" : [1,2,3,4]
462 //},
463
465 qDebug() << m_expectedString;
466 m_cborScanMs2.clear();
467 if(m_expectedString == "ms2")
468 {
469 if(!mpa_cborReader->readCborMap(m_cborScanMs2))
470 {
472 QObject::tr("ms2 element in scan is not a cbor map %1 %2:\n%3")
474 .arg(m_cborScanId.value("index").toInteger())
475 .arg(mpa_cborReader->lastError().toString()));
476 }
477 }
478
479
481 qDebug() << m_expectedString;
482
483
484 if(m_expectedString == "props")
485 {
486 bool is_ok;
487 is_ok = mpa_cborReader->readCborMap(m_cborScanProps);
488 if(!is_ok)
489 {
490 throw pappso::PappsoException("ERROR: props element in scan is not well formed");
491 }
492 if(!getExpectedString())
493 {
495 QObject::tr("ERROR: expecting psm_list element in scan %1").arg(m_currentPsmProforma));
496 }
497 }
498
499 //"psm_list": [
500 scanStarted(monitor);
501 if(m_expectedString == "psm_list")
502 {
503 mpa_cborReader->enterContainer();
504 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
505 {
506 readPsm(monitor);
507 }
508 mpa_cborReader->leaveContainer();
509 }
510
511 mpa_cborReader->leaveContainer();
512 qDebug() << "scan end";
513 scanFinished(monitor);
514 qDebug();
515}
516
517void
519{
520 bool is_ok;
521 mpa_cborReader->enterContainer();
523 // "proforma": "AQEEM[+15.99491]AQVAK",
524 if(m_expectedString == "proforma")
525 {
526 if(!mpa_cborReader->decodeString(m_currentPsmProforma))
527 {
528 throw pappso::PappsoException("ERROR: proforma element in psm-scan is not a string");
529 }
530 }
531 else
532 {
533 throw pappso::PappsoException("ERROR: expecting proforma element in psm-scan");
534 }
535 //"protein_list" : [
536 //{
537 //"accession": "GRMZM2G083841_P01",
538 //"position": [15,236]
539 //}
540 //],
541
544 qDebug() << m_expectedString;
545
546 if(m_expectedString == "protein_list")
547 {
548 mpa_cborReader->enterContainer(); // array
549 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
550 {
552 if(!is_ok)
553 {
554 qDebug();
556 QObject::tr("ERROR: reading protein_list element in psm-scan"));
557 }
558 }
559 // qDebug() << mpa_cborReader->type();
560 mpa_cborReader->leaveContainer(); // array
561 }
562 else
563 {
564 throw pappso::PappsoException("ERROR: expecting protein_list element in psm-scan");
565 }
566 // props: {
567 m_cborScanPsmProps.clear();
568
569 //"eval": {
570 qDebug();
571 m_cborScanPsmEval.clear();
573 qDebug() << m_expectedString;
574
575 if(m_expectedString == "props")
576 {
577 is_ok = mpa_cborReader->readCborMap(m_cborScanPsmProps);
578 if(!is_ok)
579 {
580 throw pappso::PappsoException("ERROR: props element in psm-scan is not well formed");
581 }
582 if(!getExpectedString())
583 {
585 QObject::tr("ERROR: expecting eval element in psm-scan %1").arg(m_currentPsmProforma));
586 }
587 }
588 if(m_expectedString == "eval")
589 {
590 is_ok = mpa_cborReader->readCborMap(m_cborScanPsmEval);
591 if(!is_ok)
592 {
593 throw pappso::PappsoException("ERROR: eval element in psm-scan is not well formed");
594 }
595 }
596 else
597
598 {
600 QObject::tr("ERROR: expecting eval element in psm-scan %1 not %2 in %3 %4 %5")
602 .arg(m_expectedString)
603 .arg(__FILE__)
604 .arg(__FUNCTION__)
605 .arg(__LINE__));
606 }
607
608
609 qDebug() << m_expectedString;
610
611
612 mpa_cborReader->leaveContainer();
613 qDebug();
614 psmReady(monitor);
615}
616
617void
619{
620 // PSM is ready, do what you want :)
621}
622
623void
627
628void
632
633void
635{
636}
637
638
639void
643
644void
648
649void
653
654void
658
659void
663
664void
668
671{
672 pappso::PeptideSp peptide_sp;
673 if(m_currentPsmProforma.isEmpty())
674 {
675 throw pappso::PappsoException(QObject::tr("ERROR: m_currentPsmProforma is empty"));
676 }
677 else
678 {
680 }
681 return peptide_sp;
682}
683
686{
687 if(m_currentPeaklistFile.name.isEmpty())
688 {
689 throw pappso::PappsoException(QObject::tr("ERROR: m_currentPeaklistFile is empty"));
690 }
691 if(m_cborScanId.isEmpty())
692 {
693 throw pappso::PappsoException(QObject::tr("ERROR: m_cborScanId is empty"));
694 }
695 if(m_cborScanPrecursor.isEmpty())
696 {
697 throw pappso::PappsoException(QObject::tr("ERROR: m_cborScanPrecursor is empty"));
698 }
699
700 if(!m_cborScanId.keys().contains("index"))
701 {
702 throw pappso::PappsoException("There is no scan index");
703 }
704
705 if(m_cborScanMs2.isEmpty())
706 {
707 throw pappso::PappsoException(QObject::tr("ERROR: m_cborScanMs2 is empty"));
708 }
709 else
710 {
711 if(!m_cborScanMs2.keys().contains("mz"))
712 {
713 throw pappso::PappsoException("There is no ms2 mz values");
714 }
715 if(!m_cborScanMs2.keys().contains("intensity"))
716 {
717 throw pappso::PappsoException("There is no ms2 intensity values");
718 }
719 }
722 pappso::MsRunIdCstSPtr msrun_id_sp = std::make_shared<const pappso::MsRunId>(msrun_id);
723 pappso::MassSpectrumId ms_id(msrun_id_sp);
724 ms_id.setSpectrumIndex(m_cborScanId.value("index").toInteger());
725
726 // native_id
727 if(m_cborScanId.keys().contains("native_id"))
728 {
729 ms_id.setNativeId(m_cborScanId.value("native_id").toString());
730 }
731
732 std::vector<DataPoint> data_point_vector;
733 std::size_t i = 0;
734 for(auto cbor_mz_value : m_cborScanMs2.value("mz").toArray())
735 {
736 data_point_vector.push_back(
737 {cbor_mz_value.toDouble(), m_cborScanMs2.value("intensity").toArray().at(i).toDouble()});
738 i++;
739 }
740
741
742 MassSpectrum mass_spectrum(data_point_vector);
743 pappso::PrecursorIonData precursor_ion_data;
744
745 pappso::QualifiedMassSpectrum qualified_mass_spectrum(ms_id);
746 qualified_mass_spectrum.setMassSpectrumSPtr(mass_spectrum.makeMassSpectrumSPtr());
747 qualified_mass_spectrum.setMsLevel(2);
748
749 if(m_cborScanPrecursor.keys().contains("z"))
750 {
751 precursor_ion_data.charge = m_cborScanPrecursor.value("z").toInteger();
752 }
753 if(m_cborScanPrecursor.keys().contains("mz"))
754 {
755 precursor_ion_data.mz = m_cborScanPrecursor.value("mz").toDouble();
756 }
757 if(m_cborScanPrecursor.keys().contains("intensity"))
758 {
759 precursor_ion_data.intensity = m_cborScanPrecursor.value("intensity").toDouble();
760 }
761 qualified_mass_spectrum.appendPrecursorIonData(precursor_ion_data);
762 if(m_cborScanMs2.keys().contains("rt"))
763 {
764 qualified_mass_spectrum.setRtInSeconds(m_cborScanMs2.value("rt").toDouble());
765 }
766
767
768 return qualified_mass_spectrum.makeQualifiedMassSpectrumSPtr();
769}
770
771void
775
776void
780
781
782double
783PsmFileReaderBase::getPrecursorMass(double mz_prec, uint charge) const
784{
785 // compute precursor mass given the charge state
786 mz_prec = mz_prec * (double)charge;
787 mz_prec -= (MHPLUS * (double)charge);
788 return mz_prec;
789}
790
791
792} // namespace psm
793} // namespace cbor
794} // namespace pappso
void setNativeId(const QString &native_id)
void setSpectrumIndex(std::size_t index)
Class to represent a mass spectrum.
MassSpectrumSPtr makeMassSpectrumSPtr() const
MS run identity MsRunId identifies an MS run with a unique ID (XmlId) and contains eventually informa...
Definition msrunid.h:54
void setSampleName(const QString &name)
set a sample name for this MsRunId
Definition msrunid.cpp:77
static PeptideSp parseString(const QString &pepstr)
Class representing a fully specified mass spectrum.
void appendPrecursorIonData(const PrecursorIonData &precursor_ion_data)
void setMsLevel(uint ms_level)
Set the mass spectrum level.
QualifiedMassSpectrumSPtr makeQualifiedMassSpectrumSPtr() const
void setMassSpectrumSPtr(MassSpectrumSPtr massSpectrum)
Set the MassSpectrumSPtr.
void setRtInSeconds(pappso_double rt)
Set the retention time in seconds.
overrides QCborStreamWriter base class to provide convenient functions
std::vector< PsmProteinRef > m_currentPsmProteinRefList
pappso::QualifiedMassSpectrumSPtr getCurrentQualifiedMassSpectrumSPtr() const
get the qualified Spectrum for the current PSM
pappso::PeptideSp getCurrentPsmPeptideSp() const
bool currentProteinRefListContainsTarget() const
tells if the current PSM has a target accession
virtual void sampleListStarted(pappso::UiMonitorInterface &monitor)
void writePsmFileList(CborStreamWriter &writer, const std::vector< PsmFile > &file_list)
double getPrecursorMass(double mz_prec, uint charge) const
convenient function do compute precusor ion mass
virtual void logReady(pappso::UiMonitorInterface &monitor)
virtual void scanStarted(pappso::UiMonitorInterface &monitor)
virtual void readPsm(pappso::UiMonitorInterface &monitor)
virtual void readLog(pappso::UiMonitorInterface &monitor)
virtual void proteinMapReady(pappso::UiMonitorInterface &monitor)
virtual void sampleStarted(pappso::UiMonitorInterface &monitor)
bool currentProteinRefListContainsDecoy() const
tells if the current PSM has a decoy accession
virtual void readParameterMap(pappso::UiMonitorInterface &monitor)
void readCbor(QFile *cborp, pappso::UiMonitorInterface &monitor)
virtual void readScan(pappso::UiMonitorInterface &monitor)
virtual void readInformations(pappso::UiMonitorInterface &monitor)
virtual void scanFinished(pappso::UiMonitorInterface &monitor)
virtual void sampleListFinished(pappso::UiMonitorInterface &monitor)
virtual void psmReady(pappso::UiMonitorInterface &monitor)
virtual void informationsReady(pappso::UiMonitorInterface &monitor)
void readRoot(pappso::UiMonitorInterface &monitor)
std::vector< PsmFile > m_currentIdentificationFileList
void writePsmFile(CborStreamWriter &writer, const PsmFile &psm_file)
virtual void fastaFilesReady(pappso::UiMonitorInterface &monitor)
virtual void parameterMapReady(pappso::UiMonitorInterface &monitor)
virtual void readProteinMap(pappso::UiMonitorInterface &monitor)
virtual void readSample(pappso::UiMonitorInterface &monitor)
PsmProteinRef readPsmProteinRef(bool &is_ok)
virtual void sampleFinished(pappso::UiMonitorInterface &monitor)
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::shared_ptr< QualifiedMassSpectrum > QualifiedMassSpectrumSPtr
std::shared_ptr< const Peptide > PeptideSp
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition msrunid.h:46
const pappso_double MHPLUS(1.007276466879)
unsigned int uint
Definition types.h:67