libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
pappso::Enzyme Class Reference

#include <enzyme.h>

Public Member Functions

 Enzyme ()
 build the default enzyme (trypsin) with recognition_site = "([KR])([^P])"
 Enzyme (const QString &recognition_site)
 build any enzyme given a recognition_site
 ~Enzyme ()
void eat (std::int8_t sequence_database_id, const ProteinSp &protein_sp, bool is_decoy, EnzymeProductInterface &enzyme_product) const
 digest a protein into enzyme products
void setMiscleavage (unsigned int miscleavage)
 sets the maximum number of missed cleavage allowed in the digestion
unsigned int getMiscleavage () const
 get the maximum number of missed cleavage allowed in the digestion
void setTakeOnlyFirstWildcard (bool take_only_first_wildcard)
 take only first m_takeOnlyFirstWildcard
void setMaxPeptideVariantListSize (std::size_t max_peptide_variant_list_size)
 if there are wildcards in the protein sequence : restrict the number of possible peptide sequences
const QRegularExpression & getQRegExpRecognitionSite () const

Private Member Functions

void sanityCheck (EnzymeProductInterface &enzyme_product, std::int8_t sequence_database_id, const ProteinSp &protein_sp, bool is_decoy, const PeptideStr &peptide, unsigned int start, bool is_nter, unsigned int missed_cleavage_number, bool semi_enzyme) const
void replaceWildcards (std::vector< std::string > *p_peptide_variant_list) const

Private Attributes

QRegularExpression m_recognitionSite
 example with a kinase == [K,R]
unsigned int m_miscleavage = 0
bool m_takeOnlyFirstWildcard = false
std::size_t m_maxPeptideVariantListSize = 100
std::vector< char > m_wildCardX
std::vector< char > m_wildCardB
std::vector< char > m_wildCardZ

Detailed Description

Definition at line 31 of file enzyme.h.

Constructor & Destructor Documentation

◆ Enzyme() [1/2]

pappso::Enzyme::Enzyme ( )

build the default enzyme (trypsin) with recognition_site = "([KR])([^P])"

Definition at line 32 of file enzyme.cpp.

33{
34 m_recognitionSite.setPattern("([KR])([^P])");
35 m_miscleavage = 0;
36
37
38 char vv1[] = {'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I',
39 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V'};
40 m_wildCardX.assign(std::begin(vv1), std::end(vv1));
41
42 char vv2[] = {'N', 'D'};
43 m_wildCardB.assign(std::begin(vv2), std::end(vv2));
44
45 char vv3[] = {'Q', 'E'};
46 m_wildCardZ.assign(std::begin(vv3), std::end(vv3));
47}
QRegularExpression m_recognitionSite
example with a kinase == [K,R]
Definition enzyme.h:89
std::vector< char > m_wildCardB
Definition enzyme.h:97
std::vector< char > m_wildCardZ
Definition enzyme.h:98
std::vector< char > m_wildCardX
Definition enzyme.h:96
unsigned int m_miscleavage
Definition enzyme.h:90

References m_miscleavage, m_recognitionSite, m_wildCardB, m_wildCardX, and m_wildCardZ.

◆ Enzyme() [2/2]

pappso::Enzyme::Enzyme ( const QString & recognition_site)

build any enzyme given a recognition_site

Parameters
recognition_siteis a regular expression that must identify 2 motifs : one on Nter side one on Cter side

Definition at line 49 of file enzyme.cpp.

50{
51 m_recognitionSite.setPattern(recognition_site);
52 m_miscleavage = 0;
53
54
55 char vv1[] = {'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I',
56 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V'};
57 m_wildCardX.assign(std::begin(vv1), std::end(vv1));
58
59 char vv2[] = {'N', 'D'};
60 m_wildCardB.assign(std::begin(vv2), std::end(vv2));
61
62 char vv3[] = {'Q', 'E'};
63 m_wildCardZ.assign(std::begin(vv3), std::end(vv3));
64}

References m_miscleavage, m_recognitionSite, m_wildCardB, m_wildCardX, and m_wildCardZ.

◆ ~Enzyme()

pappso::Enzyme::~Enzyme ( )

Definition at line 66 of file enzyme.cpp.

67{
68}

Member Function Documentation

◆ eat()

void pappso::Enzyme::eat ( std::int8_t sequence_database_id,
const ProteinSp & protein_sp,
bool is_decoy,
EnzymeProductInterface & enzyme_product ) const

digest a protein into enzyme products

Parameters
sequence_database_idinteger that references the sequence fatabase (file, stream, url...)
protein_spis the original protein to be digested
is_decoytell if the current protein is a decoy (true) or normal (false) protein
enzyme_productis the object that will receive the digestion products

Definition at line 87 of file enzyme.cpp.

91{
92 /*
93 * for aa in self.aa_to_cut:
94 seq = seq.replace(aa, aa + ' ')
95 seq_stack = []
96 for s in seq.strip().split(' '):
97 seq_stack.append(s)
98 if len(seq_stack) > self.misscleavage + 1:
99 seq_stack.pop(0)
100 s2 = ""
101 for s_miss in seq_stack[::-1]:
102 s2 = s_miss + s2
103 yield s2
104 */
105 qDebug() << "Enzyme::eat begin ";
106 const QString sequence = protein_sp.get()->getSequence();
107 qDebug() << sequence;
108 QStringList peptide_list;
109 int pos = 0;
110 int peptide_start = 0;
111 int peptide_size = sequence.size();
112 QRegularExpressionMatch match_recognition_site = m_recognitionSite.match(sequence, pos);
113 while(match_recognition_site.hasMatch())
114 {
115 pos = match_recognition_site.capturedStart(0);
116 peptide_size = pos + match_recognition_site.captured(1).length() - peptide_start;
117 // qDebug() << "pos=" << pos << " peptide_start=" << peptide_start << "
118 // peptide_size=" << peptide_size << " " <<
119 // sequence.mid(peptide_start,peptide_size);
120 if(peptide_size > 0)
121 {
122 peptide_list.append(sequence.mid(peptide_start, peptide_size));
123 }
124 peptide_start += peptide_size;
125 pos = peptide_start; // all peptides MUST be consecutive
126 match_recognition_site = m_recognitionSite.match(sequence, pos);
127 }
128 peptide_size = sequence.size() - peptide_start;
129 if(peptide_size > 0)
130 {
131 peptide_list.append(sequence.mid(peptide_start, peptide_size));
132 }
133
134 unsigned int start = 1;
135 bool is_nter = true;
136 foreach(const QString &peptide, peptide_list)
137 {
138 // enzyme_product.setPeptide(sequence_database_id, protein_sp,is_decoy,
139 // peptide, start,is_nter,0, false);
140 sanityCheck(enzyme_product,
141 sequence_database_id,
142 protein_sp,
143 is_decoy,
144 peptide,
145 start,
146 is_nter,
147 0,
148 false);
149 is_nter = false;
150 start += peptide.size();
151 }
152
153 unsigned int miscleavage_i = 0;
154 while(miscleavage_i < m_miscleavage)
155 {
156 miscleavage_i++;
157 qDebug() << "miscleavage_i=" << miscleavage_i;
158 int chunk_number = miscleavage_i + 1;
159 unsigned int start = 1;
160 bool is_nter = true;
161
162 for(auto i = 0; i < peptide_list.size(); ++i)
163 {
164 qDebug() << "start=" << start;
165 QStringList peptide_mis_list;
166 for(auto j = 0; (j < chunk_number) && ((i + j) < peptide_list.size()); j++)
167 {
168 peptide_mis_list << peptide_list.at(i + j);
169 }
170 if(peptide_mis_list.size() == chunk_number)
171 {
172 // enzyme_product.setPeptide(sequence_database_id,
173 // protein_sp,is_decoy, peptide_mis_list.join(""), start,is_nter,
174 // miscleavage_i, false);
175 sanityCheck(enzyme_product,
176 sequence_database_id,
177 protein_sp,
178 is_decoy,
179 peptide_mis_list.join(""),
180 start,
181 is_nter,
182 miscleavage_i,
183 false);
184 }
185 is_nter = false;
186 start += peptide_list.at(i).size();
187 }
188 }
189}
void sanityCheck(EnzymeProductInterface &enzyme_product, std::int8_t sequence_database_id, const ProteinSp &protein_sp, bool is_decoy, const PeptideStr &peptide, unsigned int start, bool is_nter, unsigned int missed_cleavage_number, bool semi_enzyme) const
Definition enzyme.cpp:271

References m_miscleavage, m_recognitionSite, and sanityCheck().

◆ getMiscleavage()

unsigned int pappso::Enzyme::getMiscleavage ( ) const

get the maximum number of missed cleavage allowed in the digestion

Returns
miscleavage maximum number of missed cleavade to allow (defaults is 0)

Definition at line 76 of file enzyme.cpp.

77{
78 return m_miscleavage;
79}

References m_miscleavage.

◆ getQRegExpRecognitionSite()

const QRegularExpression & pappso::Enzyme::getQRegExpRecognitionSite ( ) const

Definition at line 348 of file enzyme.cpp.

349{
350 return m_recognitionSite;
351}

References m_recognitionSite.

◆ replaceWildcards()

void pappso::Enzyme::replaceWildcards ( std::vector< std::string > * p_peptide_variant_list) const
private

Definition at line 192 of file enzyme.cpp.

193{
194 std::string new_peptide = p_peptide_variant_list->at(0);
195 qDebug() << "Enzyme::replaceWildcards begin " << new_peptide.c_str();
196 std::vector<std::string> old_peptide_variant_list;
197 old_peptide_variant_list.assign(p_peptide_variant_list->begin(), p_peptide_variant_list->end());
198
199
200 for(char wildcard : {'X', 'B', 'Z'})
201 {
202
203 std::size_t position = new_peptide.find(wildcard);
204 if(position == std::string::npos)
205 {
206 continue;
207 }
208 else
209 {
210 p_peptide_variant_list->clear();
211 /*
212 new_peptide[position] = 'A';
213 p_peptide_variant_list->push_back(new_peptide);
214 break;
215 */
216
217 const std::vector<char> *p_x_replace_wildcard = nullptr;
218 if(wildcard == 'X')
219 {
220 p_x_replace_wildcard = &m_wildCardX;
221 }
222 else if(wildcard == 'B')
223 {
224 p_x_replace_wildcard = &m_wildCardB;
225 }
226 else if(wildcard == 'Z')
227 {
228 p_x_replace_wildcard = &m_wildCardZ;
229 }
230
231 if(p_x_replace_wildcard != nullptr)
232 {
233 for(std::string orig_peptide : old_peptide_variant_list)
234 {
235 for(char replace : *p_x_replace_wildcard)
236 {
237 orig_peptide[position] = replace;
238 p_peptide_variant_list->push_back(orig_peptide);
239 }
240 }
241 }
242 else
243 {
244 throw ExceptionNotPossible(QObject::tr("x_replace_wildcard is empty"));
245 }
246 // new_peptide[position] = 'A';
247 // p_peptide_variant_list->push_back(new_peptide);
248 // p_peptide_variant_list->resize(1);
249 // std::cerr << "Enzyme::replaceWildcards begin
250 // p_peptide_variant_list.size()=" << p_peptide_variant_list->size()
251 // <<
252 // endl;
253 break;
254 }
255 }
256 std::vector<std::string>().swap(
257 old_peptide_variant_list); // clear old_peptide_variant_list reallocating
258
259
260 qDebug() << "Enzyme::replaceWildcards end " << new_peptide.c_str();
261}

References m_wildCardB, m_wildCardX, and m_wildCardZ.

Referenced by sanityCheck().

◆ sanityCheck()

void pappso::Enzyme::sanityCheck ( EnzymeProductInterface & enzyme_product,
std::int8_t sequence_database_id,
const ProteinSp & protein_sp,
bool is_decoy,
const PeptideStr & peptide,
unsigned int start,
bool is_nter,
unsigned int missed_cleavage_number,
bool semi_enzyme ) const
private

Definition at line 271 of file enzyme.cpp.

280{
281 if(peptide.contains('X') || peptide.contains('B') || peptide.contains('Z'))
282 {
283
284 std::vector<std::string> peptide_variant_list;
285 peptide_variant_list.push_back(peptide.toStdString());
286
287 while((peptide_variant_list.at(0).find('X') != std::string::npos) ||
288 (peptide_variant_list.at(0).find('B') != std::string::npos) ||
289 (peptide_variant_list.at(0).find('Z') != std::string::npos))
290 {
291 replaceWildcards(&peptide_variant_list);
292 if(peptide_variant_list.size() > m_maxPeptideVariantListSize)
293 {
294 peptide_variant_list.resize(m_maxPeptideVariantListSize);
295 peptide_variant_list.shrink_to_fit();
296 }
297 }
298
299 // peptide_variant_list.resize(2);
301 {
302 enzyme_product.setPeptide(sequence_database_id,
303 protein_sp,
304 is_decoy,
305 QString(peptide_variant_list.at(0).c_str()),
306 start,
307 is_nter,
308 missed_cleavage_number,
309 semi_enzyme);
310 }
311 else
312 {
313 std::string peptide_variant = peptide_variant_list.back();
314 while(peptide_variant_list.size() > 0)
315 {
316 enzyme_product.setPeptide(sequence_database_id,
317 protein_sp,
318 is_decoy,
319 QString(peptide_variant.c_str()),
320 start,
321 is_nter,
322 missed_cleavage_number,
323 semi_enzyme);
324 peptide_variant_list.pop_back();
325 if(peptide_variant_list.size() > 0)
326 {
327 peptide_variant = peptide_variant_list.back();
328 }
329 }
330 }
331 std::vector<std::string>().swap(
332 peptide_variant_list); // clear peptide_variant_list reallocating
333 }
334 else
335 {
336 enzyme_product.setPeptide(sequence_database_id,
337 protein_sp,
338 is_decoy,
339 peptide,
340 start,
341 is_nter,
342 missed_cleavage_number,
343 semi_enzyme);
344 }
345}
std::size_t m_maxPeptideVariantListSize
Definition enzyme.h:93
void replaceWildcards(std::vector< std::string > *p_peptide_variant_list) const
Definition enzyme.cpp:192
bool m_takeOnlyFirstWildcard
Definition enzyme.h:91

References m_maxPeptideVariantListSize, m_takeOnlyFirstWildcard, replaceWildcards(), and pappso::EnzymeProductInterface::setPeptide().

Referenced by eat().

◆ setMaxPeptideVariantListSize()

void pappso::Enzyme::setMaxPeptideVariantListSize ( std::size_t max_peptide_variant_list_size)

if there are wildcards in the protein sequence : restrict the number of possible peptide sequences

Parameters
max_peptide_variant_list_sizemaximum number of peptide variant (default is 100)

Definition at line 81 of file enzyme.cpp.

82{
83 m_maxPeptideVariantListSize = max_peptide_variant_list_size;
84}

References m_maxPeptideVariantListSize.

◆ setMiscleavage()

void pappso::Enzyme::setMiscleavage ( unsigned int miscleavage)

sets the maximum number of missed cleavage allowed in the digestion

Parameters
miscleavagemaximum number of missed cleavade to allow (defaults is 0)

Definition at line 71 of file enzyme.cpp.

72{
73 m_miscleavage = miscleavage;
74}

References m_miscleavage.

◆ setTakeOnlyFirstWildcard()

void pappso::Enzyme::setTakeOnlyFirstWildcard ( bool take_only_first_wildcard)

take only first m_takeOnlyFirstWildcard

Parameters
booltrue : switch to take only the first possibility if there are X, B or Z wildcards in sequence

Definition at line 264 of file enzyme.cpp.

265{
266 m_takeOnlyFirstWildcard = take_only_first_wildcard;
267}

References m_takeOnlyFirstWildcard.

Member Data Documentation

◆ m_maxPeptideVariantListSize

std::size_t pappso::Enzyme::m_maxPeptideVariantListSize = 100
private

Definition at line 93 of file enzyme.h.

Referenced by sanityCheck(), and setMaxPeptideVariantListSize().

◆ m_miscleavage

unsigned int pappso::Enzyme::m_miscleavage = 0
private

Definition at line 90 of file enzyme.h.

Referenced by Enzyme(), Enzyme(), eat(), getMiscleavage(), and setMiscleavage().

◆ m_recognitionSite

QRegularExpression pappso::Enzyme::m_recognitionSite
private

example with a kinase == [K,R]

Definition at line 89 of file enzyme.h.

Referenced by Enzyme(), Enzyme(), eat(), and getQRegExpRecognitionSite().

◆ m_takeOnlyFirstWildcard

bool pappso::Enzyme::m_takeOnlyFirstWildcard = false
private

Definition at line 91 of file enzyme.h.

Referenced by sanityCheck(), and setTakeOnlyFirstWildcard().

◆ m_wildCardB

std::vector<char> pappso::Enzyme::m_wildCardB
private

Definition at line 97 of file enzyme.h.

Referenced by Enzyme(), Enzyme(), and replaceWildcards().

◆ m_wildCardX

std::vector<char> pappso::Enzyme::m_wildCardX
private

Definition at line 96 of file enzyme.h.

Referenced by Enzyme(), Enzyme(), and replaceWildcards().

◆ m_wildCardZ

std::vector<char> pappso::Enzyme::m_wildCardZ
private

Definition at line 98 of file enzyme.h.

Referenced by Enzyme(), Enzyme(), and replaceWildcards().


The documentation for this class was generated from the following files: