libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
sagereader.cpp
Go to the documentation of this file.
1/**
2 * \file input/sage/sagereader.cpp
3 * \date 21/08/2024
4 * \author Olivier Langella
5 * \brief read data files from Sage output
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2024 Olivier Langella
10 *<Olivier.Langella@universite-paris-saclay.fr>.
11 *
12 * This file is part of i2MassChroQ.
13 *
14 * i2MassChroQ is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation, either version 3 of the License, or
17 * (at your option) any later version.
18 *
19 * i2MassChroQ is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with i2MassChroQ. If not, see <http://www.gnu.org/licenses/>.
26 *
27 ******************************************************************************/
28
29#include "sagereader.h"
30#include <QJsonObject>
31#include <QJsonArray>
32#include <odsstream/tsvreader.h>
33#include <odsstream/odsexception.h>
34#include "sagetsvhandler.h"
39
42 const pappso::cbor::psm::SageFileReader &sage_file_reader,
43 const QString &sage_json_file)
44 : m_sageFileReader(sage_file_reader)
45{
46 mp_monitor = p_monitor;
47 mp_cborWriter = p_output;
48 m_jsonAbsoluteFilePath = sage_json_file;
49}
50
54
55const QString &
60
61
62void
67
73
74void
76 const QString &sequence_in)
77{
78 QString accession = description_in.split(" ", Qt::SkipEmptyParts).at(0);
79 try
80 {
81 const PsmProtein &psm_protein = mp_self->m_psmProteinMap.getByAccession(accession);
82 psm_protein.protein_sp.get()->setSequence(sequence_in);
83 psm_protein.protein_sp.get()->setDescription(description_in);
84 }
86 {
87 }
88 try
89 {
90 accession = accession.prepend(m_decoyTag);
91 const PsmProtein &psm_protein = mp_self->m_psmProteinMap.getByAccession(accession);
92 psm_protein.protein_sp.get()->setSequence(sequence_in);
93 psm_protein.protein_sp.get()->setDescription(description_in);
94 psm_protein.protein_sp.get()->reverse();
95 }
97 {
98 }
99}
100
106
107
108void
110{
111
112 extractMzmlPathList(m_sageFileReader.getJsonDocument());
113 // getTsvFilePath(mp_identificationDataSource->getJsonDocument().object());
114 QFileInfo tsv_file_info(getTsvFilePath(m_sageFileReader.getJsonDocument()));
116 try
117 {
118 TsvReader tsv_reader(handler);
119
120 QFile tsv_file(tsv_file_info.absoluteFilePath());
121 tsv_reader.parse(tsv_file);
122 tsv_file.close();
123 }
124 catch(OdsException &error_ods)
125 {
126 throw pappso::PappsoException(QObject::tr("Error reading %1 file:\n %2")
127 .arg(tsv_file_info.absoluteFilePath())
128 .arg(error_ods.qwhat()));
129 }
130
131
132 // collect protein sequences
133 QFile fastaFile(getFastaFilePath(m_sageFileReader.getJsonDocument()));
134 SageReader::FastaSeq seq(this);
135 pappso::FastaReader reader(seq);
136 reader.parse(fastaFile);
137
138 qDebug();
139 mp_cborWriter->append("protein_map");
141
142
143 mp_cborWriter->append("sample_list");
144 mp_cborWriter->startArray();
145 try
146 {
147 handler.writeSampleList();
148 }
149 catch(OdsException &error_ods)
150 {
151 throw pappso::PappsoException(QObject::tr("Error reading %1 file:\n %2")
152 .arg(tsv_file_info.absoluteFilePath())
153 .arg(error_ods.qwhat()));
154 }
155
156 mp_cborWriter->endArray();
157}
158
159void
161{
162
163 QJsonObject sage_object = json_doc.object();
164 QJsonValue json_mzml_path_list = sage_object.value("mzml_paths");
165 if(json_mzml_path_list.isUndefined())
166 {
167 throw pappso::ExceptionNotFound(QObject::tr("mzml_paths not found in Sage json document"));
168 }
169 m_mzmlPathList.clear();
170
171 for(auto path_mzml : json_mzml_path_list.toArray())
172 {
173 m_mzmlPathList << path_mzml.toString();
174 }
175}
176
177const QString &
178pappso::cbor::psm::SageReader::getMzmlPath(const QString &file_msrun) const
179{
180 for(auto &file_path : m_mzmlPathList)
181 {
182 if(file_path.endsWith(file_msrun))
183 return file_path;
184 }
186 QObject::tr("MS run %1 not found in Sage json document").append(file_msrun));
187}
188
189
190QString
192{
193 QString path;
194 QJsonObject sage_object = json_doc.object();
195 QJsonValue output_path = sage_object.value("output_paths");
196 if(output_path.isUndefined())
197 {
198 throw pappso::ExceptionNotFound(QObject::tr("output_paths not found in Sage json document"));
199 }
200
201 if(!output_path.isArray())
202 {
203 throw pappso::ExceptionNotFound(QObject::tr("output_paths is not an array"));
204 }
205 for(auto element : output_path.toArray())
206 {
207 if(element.isString())
208 {
209 if(element.toString().endsWith(".tsv"))
210 {
211 path = element.toString();
212 }
213 }
214 }
215 return path;
216}
217
218QString
220{
221 QString path;
222 QJsonObject sage_object = json_doc.object();
223 QJsonValue database = sage_object.value("database");
224 if(database.isUndefined())
225 {
226 throw pappso::ExceptionNotFound(QObject::tr("database not found in Sage json document"));
227 }
228 path = database.toObject().value("fasta").toString();
229 if(path.isEmpty())
230 {
231 throw pappso::ExceptionNotFound(QObject::tr("fasta value is empty"));
232 }
233 return path;
234}
235
236
237std::vector<pappso::cbor::psm::SageReader::SageModification>
239{
240 std::vector<SageReader::SageModification> list;
241 QJsonObject sage_object = m_sageFileReader.getJsonDocument().object();
242 QJsonValue database = sage_object.value("database");
243 if(database.isUndefined())
244 {
245 throw pappso::ExceptionNotFound(QObject::tr("database not found in Sage json document"));
246 }
247
248 QJsonValue static_mods = database.toObject().value("static_mods");
249 if(static_mods.isUndefined())
250 {
251 throw pappso::ExceptionNotFound(QObject::tr("static_mods not found in Sage json document"));
252 }
253 for(QString residue_str : static_mods.toObject().keys())
254 {
255 SageModification modif;
256 modif.residue = residue_str.at(0);
258 (Enums::AminoAcidChar)modif.residue.toLatin1(),
259 static_mods.toObject().value(residue_str).toDouble());
260 modif.strModification =
261 QString::number(static_mods.toObject().value(residue_str).toDouble(), 'f', 6);
262 if(modif.strModification.isEmpty())
263 {
264 throw pappso::PappsoException(QObject::tr(" modif.strModification is empty"));
265 }
266 if(modif.modification->getMass() < 0)
267 {
268 modif.strModification = QString("[%1]").arg(modif.strModification);
269 }
270 else
271 {
272 modif.strModification = QString("[+%1]").arg(modif.strModification);
273 }
274 list.push_back(modif);
275 }
276 return list;
277}
278
279std::vector<pappso::cbor::psm::SageReader::SageModification>
281{
282 std::vector<SageReader::SageModification> list;
283 QJsonObject sage_object = m_sageFileReader.getJsonDocument().object();
284 QJsonValue database = sage_object.value("database");
285 if(database.isUndefined())
286 {
287 throw pappso::ExceptionNotFound(QObject::tr("database not found in Sage json document"));
288 }
289
290 QJsonValue var_mods = database.toObject().value("variable_mods");
291 if(var_mods.isUndefined())
292 {
293 throw pappso::ExceptionNotFound(QObject::tr("static_mods not found in Sage json document"));
294 }
295 for(QString residue_str : var_mods.toObject().keys())
296 {
297 SageModification modif;
298 modif.residue = residue_str.at(0);
299 for(QJsonValue one_mass : var_mods.toObject().value(residue_str).toArray())
300 {
302 (Enums::AminoAcidChar)modif.residue.toLatin1(), one_mass.toDouble());
303 modif.strModification = QString::number(one_mass.toDouble(), 'f', 6);
304 if(modif.strModification.isEmpty())
305 {
306 throw pappso::PappsoException(QObject::tr(" modif.strModification is empty"));
307 }
308 if(modif.modification->getMass() < 0)
309 {
310 modif.strModification = QString("[%1]").arg(modif.strModification);
311 }
312 else
313 {
314 modif.strModification = QString("[+%1]").arg(modif.strModification);
315 }
316 list.push_back(modif);
317 }
318 }
319 return list;
320}
321
322QString
324{
325 QString path;
326 QJsonObject sage_object = m_sageFileReader.getJsonDocument().object();
327 QJsonValue database = sage_object.value("database");
328 if(database.isUndefined())
329 {
330 throw pappso::ExceptionNotFound(QObject::tr("database not found in Sage json document"));
331 }
332 path = database.toObject().value("decoy_tag").toString();
333 if(path.isEmpty())
334 {
335 throw pappso::ExceptionNotFound(QObject::tr("decoy_tag value is empty"));
336 }
337 return path;
338}
339
pappso_double getMass() const
void parse(QFile &fastaFile)
static AaModificationP guessAaModificationPbyMonoisotopicMassDelta(Enums::AminoAcidChar aa, pappso_double mass)
Definition utils.cpp:658
overrides QCborStreamWriter base class to provide convenient functions
void setSequence(const QString &description_in, const QString &sequence_in) override
const QString & getMzmlPath(const QString &file_msrun) const
SageReader(pappso::UiMonitorInterface *p_monitor, pappso::cbor::CborStreamWriter *p_output, const SageFileReader &sage_file_reader, const QString &sage_json_file)
std::vector< SageModification > getStaticModificationList() const
pappso::cbor::CborStreamWriter * mp_cborWriter
Definition sagereader.h:99
const QString & getmJsonAbsoluteFilePath() const
void extractMzmlPathList(const QJsonDocument &json_doc)
pappso::cbor::CborStreamWriter & getCborStreamWriter() const
const SageFileReader & m_sageFileReader
Definition sagereader.h:97
std::vector< SageModification > getVariableModificationList() const
QString getFastaFilePath(const QJsonDocument &json_doc)
const SageFileReader & getSageFileReader() const
QString getTsvFilePath(const QJsonDocument &json_doc)
pappso::UiMonitorInterface * mp_monitor
Definition sagereader.h:98
std::shared_ptr< Protein > protein_sp