libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
pappso::BafAsciiFileReader Class Reference

#include <bafasciifilereader.h>

Inheritance diagram for pappso::BafAsciiFileReader:
pappso::MsFileReader

Public Member Functions

 BafAsciiFileReader (const QString &file_name)
virtual ~BafAsciiFileReader ()
virtual Enums::MsDataFormat getFileFormat () override
virtual std::vector< MsRunIdCstSPtrgetMsRunIds (const QString &run_prefix) override
MsRunReaderselectMsRunReader (const QString &file_name) const

Private Member Functions

virtual bool initialize (std::size_t &line_count)
Private Member Functions inherited from pappso::MsFileReader
 MsFileReader (const QString &file_name)
virtual ~MsFileReader ()

Additional Inherited Members

Private Attributes inherited from pappso::MsFileReader
QString m_fileName
Enums::MsDataFormat m_fileFormat = Enums::MsDataFormat::unknown

Detailed Description

Definition at line 15 of file bafasciifilereader.h.

Constructor & Destructor Documentation

◆ BafAsciiFileReader()

pappso::BafAsciiFileReader::BafAsciiFileReader ( const QString & file_name)

Definition at line 29 of file bafasciifilereader.cpp.

29 : MsFileReader{file_name}
30{
31 // To avoid initializing multiple times (costly process), we
32 // only initialize when needed, that is, upon getMsRunIds().
33 // initialize();
34}
MsFileReader(const QString &file_name)

References pappso::MsFileReader::MsFileReader().

◆ ~BafAsciiFileReader()

pappso::BafAsciiFileReader::~BafAsciiFileReader ( )
virtual

Definition at line 37 of file bafasciifilereader.cpp.

38{
39}

Member Function Documentation

◆ getFileFormat()

Enums::MsDataFormat pappso::BafAsciiFileReader::getFileFormat ( )
overridevirtual

Implements pappso::MsFileReader.

Definition at line 258 of file bafasciifilereader.cpp.

259{
260 return m_fileFormat;
261}
Enums::MsDataFormat m_fileFormat

References pappso::MsFileReader::m_fileFormat.

Referenced by pappso::MsFileAccessor::getMsRunIds().

◆ getMsRunIds()

std::vector< MsRunIdCstSPtr > pappso::BafAsciiFileReader::getMsRunIds ( const QString & run_prefix)
overridevirtual

Implements pappso::MsFileReader.

Definition at line 265 of file bafasciifilereader.cpp.

266{
267 std::vector<MsRunIdCstSPtr> ms_run_ids;
268
269 std::size_t ms_data_line_count = 0;
270
271 if(!initialize(ms_data_line_count))
272 return ms_run_ids;
273
274 // Finally create the MsRunId with the file name.
275 MsRunId ms_run_id(m_fileName);
276 ms_run_id.setMsDataFormat(m_fileFormat);
277
278 // We need to set the unambiguous xmlId string.
279 ms_run_id.setXmlId(QString("%1%2").arg(run_prefix).arg(Utils::getLexicalOrderedString(0)));
280
281 // Craft a meaningful sample name because otherwise all the files loaded from
282 // text files will have the same sample name and it will be difficult to
283 // differentiate them.
284 // Orig version:
285 // ms_run_id.setRunId("Single spectrum");
286 // Now the sample name is nothing but the file name without the path.
287
288 QFileInfo file_info(m_fileName);
289
290 // qDebug() << "file name:" << m_fileName;
291
292 QString sample_name = file_info.fileName();
293
294 // qDebug() << "sample name:" << sample_name;
295
296 ms_run_id.setRunId(sample_name);
297
298 // Now set the sample name to the run id:
299
300 ms_run_id.setSampleName(ms_run_id.getRunId());
301
302 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
303 //<< "Current ms_run_id:" << ms_run_id.toString();
304
305 // Finally make a shared pointer out of it and append it to the vector.
306 ms_run_ids.push_back(std::make_shared<MsRunId>(ms_run_id));
307
308 return ms_run_ids;
309}
virtual bool initialize(std::size_t &line_count)
static const QString getLexicalOrderedString(unsigned int num)
Definition utils.cpp:72

References pappso::Utils::getLexicalOrderedString(), pappso::MsRunId::getRunId(), initialize(), pappso::MsFileReader::m_fileFormat, pappso::MsFileReader::m_fileName, pappso::MsRunId::setMsDataFormat(), pappso::MsRunId::setRunId(), pappso::MsRunId::setSampleName(), and pappso::MsRunId::setXmlId().

Referenced by pappso::MsFileAccessor::getMsRunIds().

◆ initialize()

bool pappso::BafAsciiFileReader::initialize ( std::size_t & line_count)
privatevirtual

Definition at line 42 of file bafasciifilereader.cpp.

43{
44 // Here we just test some the lines of the file to check that they comply with
45 // the brukerBafAscii format.
46
47 line_count = 0;
48
49 QFile file(m_fileName);
50
51 if(!file.open(QFile::ReadOnly | QFile::Text))
52 {
53 qDebug() << "Failed to open file" << m_fileName;
54
55 return false;
56 }
57
58 // Construct the regular expression pattern, piecemeal...
59
60 // The retention time as the very first value in the line.
61
62 QString regexp_pattern =
63 QString("^(%1)").arg(Utils::unsignedDoubleNumberNoExponentialRegExp.pattern());
64
65 // The ionization mode (positive or negative)
66 regexp_pattern += QString(",([+-])");
67
68 regexp_pattern += QString(",(ESI|MALDI)");
69
70 // The MS level (ms1 for full scan mass spectrum)
71 regexp_pattern += QString(",ms(\\d)");
72
73 // Do no know what this is for.
74 regexp_pattern += QString(",(-)");
75
76 // The type of peak (profile or centroid).
77 regexp_pattern += QString(",(profile|line)");
78
79 // The m/z range of the mass spectrum.
80
81 regexp_pattern += QString(",(%1-%2)")
84
85 // The count of peaks following this element in the remaining of the line.
86
87 regexp_pattern += QString(",(\\d+)");
88
89 regexp_pattern += QString("(.*$)");
90
91 // qDebug() << "The full regexp_pattern:" << regexp_pattern;
92
93 QRegularExpression line_regexp(regexp_pattern);
94
95 QRegularExpressionMatch regexp_match;
96
97 QString line;
98 bool file_reading_failed = false;
99 bool ok = false;
100
101 // Reading, parsing and checking lines is extremely time consuming.
102 // What we want here is reduce the time all the file's lines are
103 // read. We could say that we want to parse and check the first
104 // CHECKED_LINES_COUNT lines and then avoid parsing and checking, just go
105 // through the lines. At the end of the file, the number of lines that have
106 // been read is stored in the out parameter line_count.
107 std::size_t iter = 0;
108
109 while(!file.atEnd())
110 {
111 line = file.readLine().trimmed();
112
113 ++iter;
114 // qDebug() << "Read one line more: (not yet checked)" << iter;
115 if(iter > CHECKED_LINES_COUNT)
116 continue;
117
118 if(line.startsWith('#') || line.isEmpty() || Utils::endOfLineRegExp.match(line).hasMatch())
119 continue;
120
121 // qDebug() << "Current brukerBafAscii format line " << line_count << ": "
122 // << line.left(30) << " ... " << line.right(30);
123
124 regexp_match = line_regexp.match(line);
125
126 if(regexp_match.hasMatch())
127 {
128 // qDebug() << "The match succeeded.";
129
130 regexp_match.captured(1).toDouble(&ok);
131 if(!ok)
132 {
133 qDebug() << "Failed to extract the retention time of the mass spectrum.";
134
135 file_reading_failed = true;
136
137 break;
138 }
139
140 QString ionization_mode = regexp_match.captured(2);
141 QString source_type = regexp_match.captured(3);
142
143 regexp_match.captured(4).toInt(&ok);
144 if(!ok)
145 {
146 qDebug() << "Failed to extract the MS level of the mass spectrum.";
147
148 file_reading_failed = true;
149
150 break;
151 }
152
153 QString peak_shape_type = regexp_match.captured(6);
154
155 QString mz_range = regexp_match.captured(7);
156
157 mz_range.left(mz_range.indexOf("-")).toDouble(&ok);
158 if(!ok)
159 {
160 qDebug() << "Failed to extract the start of the m/z range.";
161
162 file_reading_failed = true;
163
164 break;
165 }
166
167 mz_range.right(mz_range.indexOf("-") + 1).toDouble(&ok);
168 if(!ok)
169 {
170 qDebug() << "Failed to extract the end of the m/z range.";
171
172 file_reading_failed = true;
173
174 break;
175 }
176
177 // qDebug() << qSetRealNumberPrecision(10)
178 // << "mz_range_start: " << mz_range_start
179 // << "mz_range_end: " << mz_range_end;
180
181 int peak_count = regexp_match.captured(8).toInt(&ok);
182 if(!ok)
183 {
184 qDebug() << "Failed to extract the number of peaks in the mass "
185 "spectrum.";
186
187 file_reading_failed = true;
188
189 break;
190 }
191
192 QString peaks = regexp_match.captured(9);
193 QStringList peaks_stringlist = peaks.split(",", Qt::SkipEmptyParts);
194
195 // qDebug() << "The number of peaks:" << peaks_stringlist.size();
196
197 // Sanity check:
198 if(peaks_stringlist.size() != peak_count)
199 {
200 // qDebug() << "The number of peaks in the mass spectrum does not
201 // "
202 // "match the advertised one.";
203
204 file_reading_failed = true;
205
206 break;
207 }
208
209 // qDebug() << "The retention time:" << retention_time
210 // << "the ionization mode: " << ionization_mode
211 // << "the source type: " << source_type
212 // << "MS level is:" << ms_level
213 // << "peak shape type: " << peak_shape_type
214 // << "m/z range: " << mz_range << "peak count: " <<
215 // peak_count
216 // << "and peaks: " << peaks.left(100) << " ... "
217 // << peaks.right(100) << "";
218
219 // If we are here, that means that the read line has conformed
220 // to the format expected.
221 ++line_count;
222 // qDebug() << "Checked one line more:" << line_count;
223 }
224 // End end of
225 // if(regexp_match.hasMatch())
226 else
227 {
228 qDebug() << "The match failed.";
229 file_reading_failed = true;
230
231 break;
232 }
233 }
234 // End of
235 // while(!file.atEnd())
236
237 file.close();
238
239 if(!file_reading_failed && line_count >= 1)
240 {
242 return true;
243 }
244
246
247 // qDebug() << "The number of parsed mass spectra: " << line_count;
248
249 // qDebug() << "Detected file format:"
250 // << Utils::msDataFormatAsString(m_fileFormat)
251 // << "with number of spectra: " << line_count;
252
253 return false;
254}
static QRegularExpression unsignedDoubleNumberNoExponentialRegExp
Definition utils.h:54
static QRegularExpression endOfLineRegExp
Regular expression that tracks the end of line in text files.
Definition utils.h:69
@ unknown
unknown format
Definition types.h:149
static const std::size_t CHECKED_LINES_COUNT

References pappso::Enums::brukerBafAscii, pappso::CHECKED_LINES_COUNT, pappso::Utils::endOfLineRegExp, line, pappso::MsFileReader::m_fileFormat, pappso::MsFileReader::m_fileName, pappso::Enums::unknown, and pappso::Utils::unsignedDoubleNumberNoExponentialRegExp.

Referenced by getMsRunIds().

◆ selectMsRunReader()

MsRunReader * pappso::BafAsciiFileReader::selectMsRunReader ( const QString & file_name) const

The documentation for this class was generated from the following files: