libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
aastringcodec.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/amino_acid/aastringcodec.cpp
3 * \date 09/05/2023
4 * \author Olivier Langella
5 * \brief code and decodefrom amino acid string to integer
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2023 Olivier Langella <Olivier.Langella@u-psud.fr>.
10 *
11 * This file is part of PAPPSOms-tools.
12 *
13 * PAPPSOms-tools is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms-tools is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms-tools. If not, see <http://www.gnu.org/licenses/>.
25 *
26 ******************************************************************************/
27
29#include <QDebug>
30
31using namespace pappso;
32
34{
35
36 m_base = m_aaCode.getSize() + 1;
37 m_units.resize(10);
38 uint32_t unit = 1;
39 for(auto &this_unit : m_units)
40 {
41 this_unit = unit;
42 unit *= m_base;
43 }
44}
45
47{
48 m_base = other.m_base;
49 m_units = other.m_units;
50}
51
55
56
57uint32_t
58pappso::AaStringCodec::code(const QString &aa_str) const
59{
60
61 std::size_t pos = 0;
62 uint32_t code = 0;
63 for(auto &aa_char : aa_str)
64 {
65 code += m_aaCode.getAaCode(aa_char.toLatin1()) * m_units[pos];
66 pos++;
67 }
68 return code;
69}
70
71uint32_t
72pappso::AaStringCodec::codeLlc(const QString &aa_str) const
73{
74 std::vector<uint8_t> llc_vec;
75
76 for(auto &aa_char : aa_str)
77 {
78 llc_vec.push_back(m_aaCode.getAaCode(aa_char.toLatin1()));
79 }
80 std::sort(llc_vec.begin(), llc_vec.end(), std::greater<uint8_t>());
81
82
83 std::size_t pos = 0;
84 uint32_t code = 0;
85 for(auto &aa_code : llc_vec)
86 {
87 code += (uint32_t)aa_code * m_units[pos];
88 pos++;
89 }
90 return code;
91}
92
93uint32_t
94pappso::AaStringCodec::codeLlc(std::vector<uint8_t>::const_iterator it_begin,
95 std::size_t size) const
96{
97 std::vector<uint8_t> llc_vec;
98
99 for(std::size_t i = 0; i < size; i++)
100 {
101 llc_vec.push_back(*it_begin);
102 it_begin++;
103 }
104 std::sort(llc_vec.begin(), llc_vec.end(), std::greater<uint8_t>());
105
106
107 std::size_t pos = 0;
108 uint32_t code = 0;
109 for(auto &aa_code : llc_vec)
110 {
111 code += (uint32_t)aa_code * m_units[pos];
112 pos++;
113 }
114 return code;
115}
116
117
118QString
120{
121 QString aa_suite;
122
123 while(code > 0)
124 {
125 aa_suite.append(m_aaCode.getAa((uint8_t)(code % m_base)).getLetter());
126 code /= m_base;
127 }
128
129 // qDebug() << aa_suite;
130
131 return aa_suite;
132}
133
134QStringList
135pappso::AaStringCodec::decode(const std::vector<uint32_t> &code_list) const
136{
137 QStringList aa_string_list;
138 for(auto code : code_list)
139 {
140 aa_string_list << decode(code);
141 }
142 return aa_string_list;
143}
144
145
146double
148{
149 double mass = 0;
150
151 while(code > 0)
152 {
153 mass += m_aaCode.getMass((uint8_t)(code % m_base));
154 code /= m_base;
155 }
156
157 return mass;
158}
159
160
161std::vector<CodeToMass>
163{
164 std::vector<CodeToMass> llc_list;
165 if(size == 0)
166 return llc_list;
167 std::vector<uint8_t> model;
168 for(uint8_t p = 1; p <= size; p++)
169 {
170 model.resize(p, 0);
171
172 for(uint8_t i = 1; i < m_base; i++)
173 {
174 model[0] = i;
175 if(p == 1)
176 {
177 llc_list.push_back(generateCodeMassFromModel(model));
178 }
179 else
180 {
181 recGenerateModel(llc_list, model, 1);
182 }
183 }
184 }
185 return llc_list;
186}
187
188
189std::vector<CodeToMass>
191{
192 std::vector<CodeToMass> llc_list;
193 if(size == 0)
194 return llc_list;
195 std::vector<uint8_t> model;
196 model.resize(size, 0);
197
198 for(uint8_t i = 1; i < m_base; i++)
199 {
200 model[0] = i;
201 recGenerateModel(llc_list, model, 1);
202 }
203 return llc_list;
204}
205
206void
207pappso::AaStringCodec::recGenerateModel(std::vector<CodeToMass> &glist,
208 std::vector<uint8_t> &model,
209 std::size_t position) const
210{
211 if(position == model.size())
212 return;
213
214 if(position == model.size() - 1)
215 {
216 uint8_t max = model[position - 1];
217 for(uint8_t i = 1; i <= max; i++)
218 {
219 model[position] = i;
220 glist.push_back(generateCodeMassFromModel(model));
221 }
222 }
223 else
224 {
225 uint8_t max = model[position - 1];
226 for(uint8_t i = 1; i <= max; i++)
227 {
228 model[position] = i;
229 recGenerateModel(glist, model, position + 1);
230 }
231 }
232}
233
235pappso::AaStringCodec::generateCodeMassFromModel(const std::vector<uint8_t> &model) const
236{
237 CodeToMass code_mass;
238 std::size_t pos = 0;
239 for(auto aacode : model)
240 {
241 code_mass.mass += m_aaCode.getMass(aacode);
242
243 code_mass.code += (uint32_t)aacode * m_units[pos];
244 pos++;
245 }
246
247 // qDebug() << code_mass.code << " " << code_mass.mass;
248 return code_mass;
249}
250
251
252std::size_t
254{
255
256 std::size_t code = 0;
257 for(std::size_t pos = 0; pos < size; pos++)
258 {
259 code += (std::size_t)(m_base - 1) * (std::size_t)m_units[pos];
260 }
261 return code;
262}
263
264bool
265pappso::AaStringCodec::codeOnlyContains(uint32_t code, const std::vector<uint8_t> &aa_ok) const
266{
267
268 while(code > 0)
269 {
270 if(std::find(aa_ok.begin(), aa_ok.end(), (uint8_t)(code % m_base)) == aa_ok.end())
271 return false;
272
273 code /= m_base;
274 }
275 return true;
276}
277
278bool
279pappso::AaStringCodec::uniqueCodeContainsAminoAcid(uint32_t code, uint8_t aa_ok, int times) const
280{
281
282 int number = 0;
283 while(code > 0)
284 {
285 if(aa_ok == (uint8_t)(code % m_base))
286 {
287 number++;
288 if(number == times)
289 return true;
290 }
291
292 code /= m_base;
293 }
294 return false;
295}
296
297
298const pappso::AaCode &
300{
301 return m_aaCode;
302}
collection of integer code for each amino acid 0 => null 1 to 20 => amino acid sorted by there mass (...
Definition aacode.h:44
std::size_t getLimitMax(std::size_t size) const
get the maximum code number for a given peptide size
double getMass(uint32_t code) const
const AaCode & getAaCode() const
QString decode(uint32_t code) const
uint32_t codeLlc(const QString &aa_str) const
get the lowest common denominator integer from amino acide suite string
void recGenerateModel(std::vector< CodeToMass > &glist, std::vector< uint8_t > &model, std::size_t position) const
recursive method to generate models
std::vector< uint32_t > m_units
bool uniqueCodeContainsAminoAcid(uint32_t code, uint8_t aa_ok, int times) const
tell if a unique code only contains one amino acid 1 or n times
uint32_t code(const QString &aa_str) const
get integer from amino acide suite string
const AaCode & m_aaCode
CodeToMass generateCodeMassFromModel(const std::vector< uint8_t > &model) const
AaStringCodec(const AaCode &aaCode)
std::vector< CodeToMass > generateLlcCodeListByMaxPeptideSize(std::size_t size) const
generates all possible combination of llc code mass llc : the lowest common code denominator for a gi...
bool codeOnlyContains(uint32_t code, const std::vector< uint8_t > &aa_ok) const
tell if a code only contains a list of amino acid
std::vector< CodeToMass > generateLlcCodeListUpToMaxPeptideSize(std::size_t size) const
generates all possible combination of llc code mass llc : the lowest common code denominator for a gi...
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39