libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
binarydataarray.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/core/processing/cbor/mzcbor/binarydataarray.cpp
3 * \date 25/11/2025
4 * \author Olivier Langella
5 * \brief PSI BinaryDataArray object for mzML/mzCBOR
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2025 Olivier Langella <Olivier.Langella@universite-paris-saclay.fr>.
10 *
11 * This file is part of PAPPSOms-tools.
12 *
13 * PAPPSOms-tools is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms-tools is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms-tools. If not, see <http://www.gnu.org/licenses/>.
25 *
26 ******************************************************************************/
27
28
29#include "binarydataarray.h"
30#include "cvparam.h"
32#include <qjsonarray.h>
33#include <zlib.h>
34
35void
37{
38 QString txt_value;
39 reader.enterContainer();
40 // qDebug() << txt_value;
41 while(reader.hasNext() && (!reader.isInvalid()))
42 {
43 if(reader.isString())
44 {
45 if(reader.decodeString(txt_value))
46 {
47 // qDebug() << txt_value;
48 if(txt_value == "bits")
49 {
50 // cvParamMap = CvParam::getCvParamsMapFromCbor(reader);
51 bits = reader.toUnsignedInteger();
52 reader.next();
53 }
54 else if(txt_value == "isInt")
55 {
56 // cvParamMap = CvParam::getCvParamsMapFromCbor(reader);
57 isInt = reader.toBool();
58 reader.next();
59 }
60 else if(txt_value == "unit")
61 {
62 // cvParamMap = CvParam::getCvParamsMapFromCbor(reader);
63 reader.decodeString(txt_value);
64 unit = txt_value;
65 }
66 else if(txt_value == "compress")
67 {
68 // cvParamMap = CvParam::getCvParamsMapFromCbor(reader);
69 reader.decodeString(txt_value);
70 compress = txt_value;
71 }
72 else if(txt_value == "data")
73 {
74 // cvParamMap = CvParam::getCvParamsMapFromCbor(reader);
75 // reader.next();
76 // qDebug() << reader.type();
77 auto r = reader.readByteArray();
78 while(r.status == QCborStreamReader::Ok)
79 {
80 byteArray += r.data;
81 r = reader.readByteArray();
82 }
83
84 if(r.status == QCborStreamReader::Error)
85 {
86 // handle error condition
87 // qDebug() << "error";
88 byteArray.clear();
89 }
90 }
91 else
92 {
93 reader.next();
94 }
95 }
96 else
97 {
98 reader.next();
99 }
100 }
101 else
102 {
103 reader.next();
104 }
105 }
106 reader.leaveContainer();
107}
108
109void
111{
112
113 writer.startMap();
114 writer.append("unit");
115 writer.append(unit);
116
117 writer.append("bits");
118 writer.append(bits);
119 writer.append("isInt");
120 writer.append(isInt);
121
122 writer.append("compress");
123 writer.append(compress);
124
125 writer.append("data");
126 writer.append(byteArray);
127 writer.endMap();
128}
129
130void
132{
133
134 qDebug();
135 //<binaryDataArray encodedLength="6380">
136 std::size_t encodedLength = reader.attributes().value("encodedLength").toULongLong();
137 qDebug() << "encodedLength=" << encodedLength;
138 while(reader.readNext() && !reader.isEndElement())
139 {
140 if(reader.isStartElement())
141 {
142 if(reader.name().toString() == "cvParam")
143 {
144 QString accession = reader.attributes().value("accession").toString();
145
146 qDebug() << "accession=" << accession;
147 //<cvParam cvRef="MS" accession="MS:1000523" value="" name="64-bit float" />
148 if(accession == "MS:1000523")
149 {
150 bits = 64;
151 isInt = false;
152 }
153 else if(accession == "MS:1000519")
154 {
155 /*
156 *
157[Term]
158id: MS:1000519
159name: 32-bit integer
160def: "Signed 32-bit little-endian integer." [PSI:MS]
161is_a: MS:1000518 ! binary data type
162*/
163 bits = 32;
164 isInt = true;
165 }
166 else if(accession == "MS:1000521")
167 {
168 /*
169 [Term]
170 id: MS:1000521
171 name: 32-bit float
172 def: "32-bit precision little-endian floating point conforming to IEEE-754." [PSI:MS]
173 is_a: MS:1000518 ! binary data type
174 */
175 bits = 32;
176 isInt = false;
177 }
178 else if(accession == "MS:1000522")
179 {
180
181 /*
182 [Term]
183 id: MS:1000522
184 name: 64-bit integer
185 def: "Signed 64-bit little-endian integer." [PSI:MS]
186 is_a: MS:1000518 ! binary data type*/
187 bits = 64;
188 isInt = true;
189 }
190
191 //<cvParam cvRef="MS" accession="MS:1000574" value="" name="zlib compression" />
192 else if(accession == "MS:1000574")
193 {
194 compress = "zlib";
195 }
196 else if(accession == "MS:1000576")
197 {
198 /*
199 [Term]
200 id: MS:1000576
201 name: no compression
202 def: "No Compression." [PSI:MS]
203 is_a: MS:1000572 ! binary data compression type
204 */
205 compress = "none";
206 }
207
208 else if(accession == "MS:1000515")
209 {
210 unit = accession;
211 }
212 else if(accession == "MS:1000514")
213 {
214 // <cvParam cvRef="MS" accession="MS:1000514" value="" name="m/z array"
215 // unitAccession="MS:1000040" unitName="m/z" unitCvRef="MS" />
216
217 unit = accession;
218 }
219 else if(accession == "MS:1000595")
220 {
221 // <cvParam cvRef="MS" accession="MS:1000595" name="time array" value=""
222 // unitCvRef="UO" unitAccession="UO:0000031" unitName="minute"/>
223
224 unit = accession;
225 }
226 else if(accession == "MS:1000786")
227 {
228 // <cvParam cvRef="MS" accession="MS:1000786" name="non-standard data
229 // array" value="ms level" unitCvRef="UO" unitAccession="UO:0000186"
230 // unitName="dimensionless unit"/>
231 //
232 unit = accession;
233 }
234 else
235 {
236 reader.raiseError(
237 QObject::tr("cvParam accession %1 is not known in binaryDataArray")
238 .arg(accession));
240 QObject::tr("cvParam accession %1 is not known in binaryDataArray")
241 .arg(accession));
242 }
243 reader.skipCurrentElement();
244 }
245 else if(reader.name().toString() == "binary")
246 {
247
248 while(reader.readNext() && !reader.isEndElement())
249 {
250 if(reader.isCharacters())
251 {
252 // clean content:
253 QStringView content = reader.text().trimmed();
254 if((reader.text().toString() == "\n") || (reader.text().toString() == "\n\t"))
255 {
256 }
257 else
258 {
259 // text node
260 if(!content.isEmpty())
261 {
262 // qDebug() << "text isCharacters" << content.mid(0, 10);
263
264 if((std::size_t)reader.text().size() != encodedLength)
265 {
266 qWarning() << "reader.text().size() != encodedLength"
267 << reader.text().size() << " " << encodedLength;
268 }
269
270 // mp_cborWriter->append("@text@");
271 // mp_cborWriter->append(content);
272 byteArray = byteArray.fromBase64(reader.text().trimmed().toLatin1());
273 }
274 }
275 }
276 }
277 }
278 else
279 {
280 reader.skipCurrentElement();
281 }
282 }
283 }
284 qDebug();
285}
286
287
288void
290{
291 //<binaryDataArray encodedLength="1152">
292 writer.writeStartElement("binaryDataArray");
293 auto base64 = byteArray.toBase64();
294 writer.writeAttribute("encodedLength", QString("%1").arg(base64.size()));
295 // <cvParam cvRef="MS" accession="MS:1000514" value="" name="m/z array"
296 // unitAccession="MS:1000040" unitName="m/z" unitCvRef="MS" />
297 CvParam cv_param;
298 cv_param.cvRef = "MS";
299
300 if(unit == "MS:1000514")
301 {
302 cv_param.accession = unit;
303 cv_param.name = "m/z array";
304 cv_param.unitCvRef = "MS";
305 cv_param.unitAccession = "MS:1000040";
306 cv_param.unitName = "m/z";
307 cv_param.setValue("");
308 cv_param.toMzml(writer);
309 }
310 else if(unit == "MS:1000515")
311 {
312 //<cvParam cvRef="MS" accession="MS:1000515" value="" name="intensity array"
313 // unitAccession="MS:1000131" unitName="number of counts" unitCvRef="MS" />
314
315 cv_param.accession = unit;
316 cv_param.name = "intensity array";
317 cv_param.unitCvRef = "MS";
318 cv_param.unitAccession = "MS:1000131";
319 cv_param.unitName = "number of counts";
320 cv_param.setValue("");
321 cv_param.toMzml(writer);
322 }
323 else if(unit == "MS:1000595")
324 {
325 // <cvParam cvRef="MS" accession="MS:1000595" name="time array" value=""
326 // unitCvRef="UO" unitAccession="UO:0000031" unitName="minute"/>
327
328 cv_param.accession = unit;
329 cv_param.name = "time array";
330 cv_param.unitCvRef = "UO";
331 cv_param.unitAccession = "UO:0000031";
332 cv_param.unitName = "minute";
333 cv_param.setValue("");
334 cv_param.toMzml(writer);
335 }
336 else if(unit == "MS:1000786")
337 {
338 // <cvParam cvRef="MS" accession="MS:1000786" name="non-standard data
339 // array" value="ms level" unitCvRef="UO" unitAccession="UO:0000186"
340 // unitName="dimensionless unit"/>
341 //
342 cv_param.accession = unit;
343 cv_param.name = "non-standard data array";
344 cv_param.unitCvRef = "UO";
345 cv_param.unitAccession = "UO:0000186";
346 cv_param.unitName = "dimensionless unit";
347 cv_param.setValue("ms level");
348 cv_param.toMzml(writer);
349 }
350 else
351 {
353 QObject::tr("unit accession %1 is not known in binaryDataArray").arg(unit));
354 }
355
356 // <cvParam cvRef="MS" accession="MS:1000523" value="" name="64-bit float" />
357
358 cv_param.unitCvRef.clear();
359 cv_param.unitAccession.clear();
360 cv_param.unitName.clear();
361 cv_param.setValue("");
362 if(isInt)
363 {
364 /*
365 id: MS:1000519
366 name: 32-bit integer
367 def: "Signed 32-bit little-endian integer." [PSI:MS]
368 is_a: MS:1000518 ! binary data type*/
369 if(bits == 32)
370 {
371 cv_param.accession = "MS:1000519";
372 cv_param.name = "32-bit integer";
373 cv_param.toMzml(writer);
374 }
375 else if(bits == 64)
376 {
377 /*
378 [Term]
379 id: MS:1000522
380 name: 64-bit integer
381 def: "Signed 64-bit little-endian integer." [PSI:MS]
382 is_a: MS:1000518 ! binary data type*/
383 cv_param.accession = "MS:1000522";
384 cv_param.name = "64-bit integer";
385 cv_param.toMzml(writer);
386 }
387 }
388 else
389 {
390 if(bits == 64)
391 {
392 cv_param.accession = "MS:1000523";
393 cv_param.name = "64-bit float";
394 cv_param.toMzml(writer);
395 }
396 else if(bits == 32)
397 {
398 /*
399 [Term]
400 id: MS:1000521
401 name: 32-bit float
402 def: "32-bit precision little-endian floating point conforming to IEEE-754." [PSI:MS]
403 is_a: MS:1000518 ! binary data type
404 */
405 cv_param.accession = "MS:1000521";
406 cv_param.name = "32-bit float";
407 cv_param.toMzml(writer);
408 }
409 }
410 // <cvParam cvRef="MS" accession="MS:1000574" value="" name="zlib compression" />
411 /*
412
413[Term]
414id: MS:1000520
415name: 16-bit float
416def: "OBSOLETE Signed 16-bit float." [PSI:MS]
417is_a: MS:1000518 ! binary data type
418is_obsolete: true
419
420
421[Term]
422id: MS:1000523
423name: 64-bit float
424def: "64-bit precision little-endian floating point conforming to IEEE-754." [PSI:MS]
425is_a: MS:1000518 ! binary data type
426*/
427
428 if(compress == "zlib")
429 {
430 cv_param.accession = "MS:1000574";
431 cv_param.name = "zlib compression";
432 cv_param.toMzml(writer);
433 }
434 else if(compress == "none")
435 { /*
436[Term]
437id: MS:1000576
438name: no compression
439def: "No Compression." [PSI:MS]
440is_a: MS:1000572 ! binary data compression type
441*/
442 cv_param.accession = "MS:1000576";
443 cv_param.name = "no compression";
444 cv_param.toMzml(writer);
445 }
446
447 // <binary>eJwl0W9oW1U
448 // writer.writeStartElement("binary");
449 writer.writeTextElement("binary", base64);
450 // </binary>
451 // writer.writeEndElement();
452 // </binaryDataArray>
453 writer.writeEndElement();
454}
455
456
457void
459 std::vector<double> &double_list) const
460{
461
462 int size_in_byte = 8;
463 if(bits == 32)
464 {
465 size_in_byte = 4;
466 }
467
468 // if(result.decodingStatus == QByteArray::Base64DecodingStatus::Ok)
469 // { // Allocate buffer for decompressed data
470 if(compress == "zlib")
471 {
472 std::vector<unsigned char> data_heap;
473 uLongf decompressedSize = estimated_length * size_in_byte; // Estimate size
474 data_heap.resize(decompressedSize);
475
476 // Decompress the data
477 int result_zlib = uncompress(
478 data_heap.data(), &decompressedSize, (Bytef *)byteArray.constData(), byteArray.size());
479
480 if(result_zlib != Z_OK)
481 {
482 throw pappso::PappsoException(QObject::tr("Decompression failed: %1").arg(result_zlib));
483 }
484
485 // Resize the vector to the actual decompressed size
486 data_heap.resize(decompressedSize);
487 double_list.resize(decompressedSize / size_in_byte);
488
489
490 // double *double_ptr = (double *)&decompressedData[0];
491 std::size_t j = 0;
492 for(std::size_t i = 0; i < data_heap.size(); i += size_in_byte)
493 {
494 if(bits == 32)
495 {
496 if(isInt)
497 {
498 double_list[j] = *(std::int32_t *)&data_heap[i];
499 }
500 else
501 {
502 double_list[j] = *(std::float_t *)&data_heap[i];
503 }
504 }
505 else
506 {
507 if(isInt)
508 {
509 double_list[j] = *(std::int64_t *)&data_heap[i];
510 }
511 else
512 {
513 double_list[j] = *(double *)&data_heap[i];
514 }
515 }
516 // double_ptr++;
517 j++;
518 }
519 }
520 else if(compress == "none")
521 {
522
523 // double *double_ptr = (double *)&decompressedData[0];
524 std::size_t j = 0;
525 for(std::size_t i = 0; i < (std::size_t)byteArray.size(); i += size_in_byte)
526 {
527 if(bits == 32)
528 {
529 if(isInt)
530 {
531 double_list[j] = *(std::int32_t *)&byteArray.constData()[i];
532 }
533 else
534 {
535 double_list[j] = *(std::float_t *)&byteArray.constData()[i];
536 }
537 }
538 else
539 {
540 if(isInt)
541 {
542 double_list[j] = *(std::int64_t *)&byteArray.constData()[i];
543 }
544 else
545 {
546 double_list[j] = *(double *)&byteArray.constData()[i];
547 }
548 }
549 // double_ptr++;
550 j++;
551 }
552 }
553
554
555 // std::vector<double> v(decompressedData.cbegin(), decompressedData.cend());
556 // qDebug() << j << " " << double_list.size();
557}
558
559bool
561{
562 // <cvParam cvRef="MS" accession="MS:1000515" value="" name="intensity array"
563 // unitAccession="MS:1000131" unitName="number of counts" unitCvRef="MS" />
564
565 return unit == "MS:1000515";
566}
567
568bool
570{
571 // <cvParam cvRef="MS" accession="MS:1000514" value="" name="m/z array"
572 // unitAccession="MS:1000040" unitName="m/z" unitCvRef="MS" />
573
574 return unit == "MS:1000514";
575}
576
577QJsonObject
579{
580 QJsonObject binary_data;
581 binary_data.insert("unit", unit);
582 std::vector<double> double_list;
583 decodeVector(estimated_length, double_list);
584
585 QJsonArray double_array;
586
587 for(auto &value : double_list)
588 {
589 double_array.append(value);
590 }
591
592 binary_data.insert("array", double_array);
593 return binary_data;
594}
PSI BinaryDataArray object for mzML/mzCBOR.
simple override of the raw QCborStreamReader This adds convenient functions to put CBOR data into C++...
bool decodeString(QString &the_str)
decode the current cbor value as a string the point to the next value the current value is decoded as...
overrides QCborStreamWriter base class to provide convenient functions
PSI cvParam object for mzML/mzCBOR.
void fromCbor(CborStreamReader &reader)
void fromMzml(QXmlStreamReader &reader)
void toMzml(QXmlStreamWriter &writer)
void toCbor(CborStreamWriter &writer)
void decodeVector(std::size_t estimated_length, std::vector< double > &double_list) const
QJsonObject toJsonObject(std::size_t estimated_length) const
write the structure to a JSON object needs to decode the binary array
void setValue(const QString &value_str)
Definition cvparam.cpp:225
void toMzml(QXmlStreamWriter &writer)
Definition cvparam.cpp:233