libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
proteinpresenceabsencematrix.cpp
Go to the documentation of this file.
1/**
2 * \file protein/proteinpresenceabsencematrix.cpp
3 * \date 07/02/2024
4 * \author Olivier Langella
5 * \brief presence/absence matrix of amino acid code along the protein sequence
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2024 Olivier Langella
10 *<Olivier.Langella@universite-paris-saclay.fr>.
11 *
12 * This file is part of PAPPSOms++.
13 *
14 * PAPPSOms++ is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation, either version 3 of the License, or
17 * (at your option) any later version.
18 *
19 * PAPPSOms++ is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
26 *
27 ******************************************************************************/
28
30#include "proteinintegercode.h"
31
35
39
40
46
54
55
56const boost::numeric::ublas::matrix<
62
63void
65 const pappso::ProteinIntegerCode &coded_protein,
66 const std::vector<uint32_t> &code_list_from_spectrum)
67{
68 const std::vector<std::uint8_t> &seq_aa_code = coded_protein.getSeqAaCode();
69
70 m_presenceAbsenceMatrix.resize(seq_aa_code.size(), 5);
71 std::vector<std::uint8_t>::const_iterator it_aa = seq_aa_code.begin();
72 auto it_couple = coded_protein.getPeptideCodedFragment(2).begin();
73 auto it_trio = coded_protein.getPeptideCodedFragment(3).begin();
74 auto it_quatro = coded_protein.getPeptideCodedFragment(4).begin();
75 auto it_cinqo = coded_protein.getPeptideCodedFragment(5).begin();
76
77 for(std::size_t i = 0; i < seq_aa_code.size(); i++)
78 {
79 if(std::binary_search(
80 code_list_from_spectrum.begin(), code_list_from_spectrum.end(), (std::uint32_t)(*it_aa)))
81 {
82 // presence
84 }
85 else
86 {
88 }
89 if(std::binary_search(
90 code_list_from_spectrum.begin(), code_list_from_spectrum.end(), *it_couple))
91 {
92 // presence
94 }
95 else
96 {
98 }
99 if(std::binary_search(
100 code_list_from_spectrum.begin(), code_list_from_spectrum.end(), *it_trio))
101 {
102 // presence
104 }
105 else
106 {
108 }
109 if(std::binary_search(
110 code_list_from_spectrum.begin(), code_list_from_spectrum.end(), *it_quatro))
111 {
112 // presence
114 }
115 else
116 {
118 }
119 if(std::binary_search(
120 code_list_from_spectrum.begin(), code_list_from_spectrum.end(), *it_cinqo))
121 {
122 // presence
124 }
125 else
126 {
128 }
129 it_aa++;
130 it_couple++;
131 it_trio++;
132 it_quatro++;
133 it_cinqo++;
134 }
135}
136
137
138std::vector<double>
140{
141
142 std::vector<double> convolution_score;
143
144 for(std::size_t ipos = 0; ipos < m_presenceAbsenceMatrix.size1(); ipos++)
145 {
146 convolution_score.push_back(convolutionKernel(ipos));
147 }
148
149 return convolution_score;
150}
151
152double
154 std::size_t aa_fragment_size) const
155{
156 if(m_presenceAbsenceMatrix(seq_position, aa_fragment_size) ==
158 {
159 switch(aa_fragment_size)
160 {
161 case 0:
162 return 1;
163 case 1:
164 return 3;
165 case 2:
166 return 6;
167 case 3:
168 return 8;
169 case 4:
170 return 10;
171 default:
172 break;
173 }
174 }
175 else
176 {
177 // absent
178 switch(aa_fragment_size)
179 {
180 case 0:
181 return 0.1;
182 case 1:
183 return 0.3;
184 case 2:
185 return 0.6;
186 case 3:
187 return 0.8;
188 case 4:
189 return 1;
190 default:
191 break;
192 }
193 }
194 return 0.1;
195}
196
197double
199{
200 double score = 0;
201
202 std::size_t size_seq = m_presenceAbsenceMatrix.size1();
203 // single :
204 double single_score = 0;
205 std::size_t endpos = std::min(position + 5, size_seq);
206 for(std::size_t ipos = position; ipos < endpos; ipos++)
207 {
208 single_score += getScore(ipos, 0);
209 }
210
211 // duo
212 double duo_score = 0;
213 endpos = std::min(position + 4, size_seq);
214 for(std::size_t ipos = position; ipos < endpos; ipos++)
215 {
216 duo_score += getScore(ipos, 1);
217 }
218
219
220 // trio
221 double trio_score = 0;
222 endpos = std::min(position + 3, size_seq);
223 for(std::size_t ipos = position; ipos < endpos; ipos++)
224 {
225 trio_score += getScore(ipos, 2);
226 }
227
228
229 // quatro
230 double quatro_score = 0;
231 endpos = std::min(position + 2, size_seq);
232 for(std::size_t ipos = position; ipos < endpos; ipos++)
233 {
234 quatro_score += getScore(ipos, 3);
235 }
236
237 // cinqo
238 score += getScore(position, 4);
239 return score * single_score * duo_score * trio_score * quatro_score;
240}
const std::vector< std::uint32_t > & getPeptideCodedFragment(std::size_t size) const
const std::vector< std::uint8_t > & getSeqAaCode() const
std::vector< double > convolution() const
process convolution of spectrum code list along protein sequence
double getScore(std::size_t seq_position, std::size_t aa_fragment_size) const
ProteinPresenceAbsenceMatrix & operator=(const ProteinPresenceAbsenceMatrix &other)
double convolutionKernel(std::size_t position) const
void fillMatrix(const pappso::ProteinIntegerCode &coded_protein, const std::vector< uint32_t > &code_list_from_spectrum)
boost::numeric::ublas::matrix< ProteinPresenceAbsenceMatrixElement > m_presenceAbsenceMatrix
const boost::numeric::ublas::matrix< pappso::ProteinPresenceAbsenceMatrix::ProteinPresenceAbsenceMatrixElement > & getPresenceAbsenceMatrix() const
transform protein amino acid sequence into vectors of amino acid codes
presence/absence matrix of amino acid code along the protein sequence