67 double sum_of_elems = std::accumulate(
shifts.begin(),
shifts.end(), 0);
88 key_cell_init_first.
n_row = 0;
89 key_cell_init_first.
score = 0;
91 key_cell_init_first.
tree_id = 0;
95const std::vector<pappso::specpeptidoms::KeyCell> &
105 std::size_t sequence_length = protein_ptr->size();
109 for(std::size_t row_number = 1; row_number <= sequence_length; row_number++)
130 key_cell_init.
n_row = 0;
147 const std::size_t beginning,
148 const std::size_t length)
153 const QString &protein_seq = protein_ptr->
getSequence();
155 if((qsizetype)(beginning + length) <= protein_seq.size())
161 length2 = protein_seq.size() - beginning;
165 QString sequence_str = protein_seq.sliced(protein_seq.size() - beginning - length2, length2);
170 std::vector<AaPosition> aa_positions;
174 m_scenario.reserve(length2 + 1, spectrum.size());
188 for(std::size_t iter =
m_interest_cells.size(); iter < spectrum.size(); iter++)
195 for(std::size_t row_number = 1; row_number <= length2; row_number++)
198 qDebug() <<
"row_number - 1=" << row_number - 1 <<
" sequence.size()=" << sequence.size();
237 std::vector<std::vector<std::size_t>> corrections = correction_tree.
getPeaks();
238 if(corrections.size() > 0)
244 for(
auto peaks_to_remove : corrections)
251 protein_seq.size() - beginning);
270 QObject::tr(
"SemiGlobalAlignment::preciseAlign failed :\n%1").arg(err.
qwhat()));
278 std::vector<std::size_t> &peaks_to_remove,
281 std::vector<AaPosition> aa_positions;
283 std::vector<std::size_t> final_peaks_to_remove;
287 key_cell_init.
n_row = 0;
297 for(qsizetype row_number = 1; row_number <= sequence.size(); row_number++)
299 qDebug() << row_number - 1 <<
" " << sequence.size();
300 qDebug() <<
"sequence[row_number - 1].aa" << (char)sequence[row_number - 1].aa;
302 aa_positions = spectrum.
getAaPositions(sequence[row_number - 1].code, peaks_to_remove);
339 std::vector<std::vector<std::size_t>> corrections = correction_tree.
getPeaks();
340 if(corrections.size() > 0)
342 for(
auto new_peaks_to_remove : corrections)
344 final_peaks_to_remove = std::vector<std::size_t>(new_peaks_to_remove);
345 final_peaks_to_remove.insert(
346 final_peaks_to_remove.end(), peaks_to_remove.begin(), peaks_to_remove.end());
347 correctAlign(sequence, protein_ptr, spectrum, final_peaks_to_remove, offset);
361 std::size_t beginning,
363 const std::vector<double> &shifts)
368 for(
double precursor_mass_error : shifts)
370 SpOMSSpectrum corrected_spectrum(spectrum, precursor_mass_error);
371 preciseAlign(corrected_spectrum, protein_ptr, beginning, length);
387 const std::size_t row_number,
388 const std::vector<AaPosition> &aa_positions,
390 const bool fast_align,
396 int score_found, score_shift, best_score, alt_score, tree_id;
398 std::size_t best_column,
shift, beginning, missing_aas, length, perfect_shift_origin;
399 KeyCell *current_cell_ptr, *tested_cell_ptr;
400 AlignType alignment_type, temp_align_type;
402 double smallest_aa_mass =
m_aaCode.getMass((std::uint8_t)1);
412 qDebug() << (char)sequence.at(row_number - 2).aa;
413 qDebug() <<
"condition" << condition;
414 condition += 2 << sequence.at(row_number - 2).code;
416 qDebug() <<
"condition" << condition;
420 for(std::vector<AaPosition>::const_iterator aa_position = aa_positions.begin();
421 aa_position != aa_positions.end();
426 if(((condition & aa_position->condition) != 0) ||
430 if(spectrum.
peakType(aa_position->r_peak) ==
443 best_column = aa_position->r_peak;
444 best_score = current_cell_ptr->
score + (row_number - current_cell_ptr->
n_row) *
447 tree_id = current_cell_ptr->
tree_id;
451 if(aa_position->l_support)
454 if(aa_position->l_peak == 0)
456 alt_score = tested_cell_ptr->
score + score_found;
460 if(tested_cell_ptr->
n_row == row_number - 1)
462 alt_score = tested_cell_ptr->
score +
463 (row_number - tested_cell_ptr->
n_row - 1) *
469 alt_score = tested_cell_ptr->
score +
470 (row_number - tested_cell_ptr->
n_row - 1) *
475 if(alt_score >= best_score)
478 best_score = alt_score;
479 best_column = aa_position->l_peak;
499 tree_id = tested_cell_ptr->
tree_id;
506 if(aa_position->l_support)
520 tested_cell_ptr->
n_row,
522 aa_position->l_peak -
shift,
523 aa_position->r_peak) &&
526 alt_score = tested_cell_ptr->
score +
527 (row_number - tested_cell_ptr->
n_row - 1) *
534 alt_score = tested_cell_ptr->
score +
535 (row_number - tested_cell_ptr->
n_row - 1) *
540 if(alt_score > best_score)
542 alignment_type = temp_align_type;
543 best_score = alt_score;
544 best_column = aa_position->l_peak -
shift;
546 tree_id = tested_cell_ptr->
tree_id;
557 perfect_shift_origin =
562 perfect_shift_origin = row_number;
565 if(perfect_shift_origin != row_number)
567 alt_score = tested_cell_ptr->
score + score_found;
572 alt_score = tested_cell_ptr->
score + score_shift;
577 if(alt_score > best_score)
579 alignment_type = temp_align_type;
580 best_score = alt_score;
583 std::floor(spectrum.
getMZShift(0, aa_position->l_peak) / smallest_aa_mass);
602 if(best_column != aa_position->r_peak)
605 {aa_position->r_peak, {row_number, best_score, beginning, tree_id}});
612 row_number - beginning + 1 +
613 std::ceil(spectrum.
getMissingMass(aa_position->r_peak) / smallest_aa_mass) +
616 m_location_saver.addLocation(beginning, length, tree_id, best_score, protein_ptr);
626 perfect_shift_origin,
657 catch(
const std::exception &error)
660 QObject::tr(
"updateAlignmentMatrix failed std::exception :\n%1 %2")
667 QObject::tr(
"updateAlignmentMatrix failed :\n%1").arg(err.
qwhat()));
675 const std::size_t origin_row,
676 const std::size_t current_row,
677 const std::size_t l_peak,
678 const std::size_t r_peak)
const
682 double missing_mass = 0;
683 auto it_end = sequence.begin() + current_row;
684 for(
auto iter = sequence.begin() + origin_row; (iter != it_end) && (iter != sequence.end());
687 missing_mass += iter->mass;
699 catch(
const std::exception &error)
702 QObject::tr(
"perfectShiftPossible failed std exception:\n%1").arg(error.what()));
707 QObject::tr(
"perfectShiftPossible failed :\n%1").arg(err.
qwhat()));
715 const std::size_t current_row,
716 const std::size_t r_peak)
const
718 std::size_t perfect_shift_origin = current_row;
719 double missing_mass = spectrum.
getMZShift(0, r_peak);
722 while(aa_mass < missing_mass && perfect_shift_origin > 0 && !mz_range.
contains(aa_mass))
724 aa_mass += sequence.at(perfect_shift_origin - 1)
726 perfect_shift_origin--;
730 return perfect_shift_origin;
743 std::size_t end_peak)
const
747 std::size_t perfect_shift_end = end_row + 1;
751 while(aa_mass < missing_mass && perfect_shift_end < (std::size_t)sequence.size() &&
754 aa_mass += sequence.at(perfect_shift_end - 1)
760 return perfect_shift_end - 1;
770 QObject::tr(
"perfectShiftPossibleEnd failed :\n%1").arg(err.
qwhat()));
800 std::size_t previous_row;
801 std::size_t previous_column = 0;
802 std::size_t perfect_shift_end;
803 std::pair<std::vector<ScenarioCell>,
int> best_alignment =
m_scenario.getBestAlignment();
805 std::vector<SpOMSAa> skipped_aa;
808 if(best_alignment.first.front().previous_row > offset)
811 QString(
"best_alignment.first.front().previous_row > offset %1 %2")
813 .arg(best_alignment.first.front().previous_row));
815 if(best_alignment.first.back().previous_row > offset)
818 QString(
"best_alignment.first.back().previous_row > offset %1 %2")
820 .arg(best_alignment.first.back().previous_row));
822 m_best_alignment.beginning = offset - best_alignment.first.front().previous_row;
823 m_best_alignment.end = offset - best_alignment.first.back().previous_row - 1;
829 for(
auto cell : best_alignment.first)
831 switch(cell.alignment_type)
834 aa_model.
m_aminoAcid = sequence.at(previous_row - 1).aa;
838 if(previous_row > cell.previous_row + 1)
840 skipped_mass = sequence.at(previous_row - 1)
843 sequence.
sliced(cell.previous_row, previous_row - cell.previous_row - 1);
846 for(
auto aa : skipped_aa)
850 skipped_mass += aa.mass;
853 spectrum.
getMZShift(cell.previous_column, previous_column) - skipped_mass;
858 aa_model.
m_aminoAcid = sequence.at(previous_row - 1).aa;
865 aa_model.
m_aminoAcid = sequence.at(previous_row - 1).aa;
872 spectrum.
getMZShift(cell.previous_column, previous_column) -
873 sequence.at(previous_row - 1).mass);
877 skipped_aa = sequence.
sliced(cell.previous_row, previous_row - cell.previous_row);
878 std::reverse(skipped_aa.begin(), skipped_aa.end());
881 for(
auto aa : skipped_aa)
888 previous_row = cell.previous_row;
889 previous_column = cell.previous_column;
893 previous_row = cell.previous_row;
894 previous_column = cell.previous_column;
914 switch(spectrum.at(peak).type)
917 qDebug() << peak <<
"native";
921 qDebug() << peak <<
"both";
925 qDebug() << peak <<
"synthetic";
928 qDebug() << peak <<
"symmetric";
940 best_alignment.first.front().previous_row,
942 if(perfect_shift_end != best_alignment.first.front().previous_row)
945 sequence.
sliced(best_alignment.first.front().previous_row,
946 perfect_shift_end - best_alignment.first.front().previous_row);
949 for(
auto aa = skipped_aa.begin(); aa != skipped_aa.end(); aa++)
989 const QString &protein_seq)
992 if(alignment.
end > (std::size_t)protein_seq.size())
996 .arg(protein_seq.size()));
998 std::vector<double> potential_mass_errors(alignment.
shifts);
1004 while(
shift > 0 && index > 0)
1006 potential_mass_errors.push_back(
shift);
1010 protein_seq.at(index).toLatin1());
1017 index = alignment.
end + 1;
1018 while(
shift > 0 && index < (std::size_t)protein_seq.size())
1020 potential_mass_errors.push_back(
shift);
1021 qDebug() <<
" shift=" <<
shift <<
" index=" << index
1022 <<
" letter=" << protein_seq.at(index).toLatin1();
1024 protein_seq.at(index).toLatin1());
1028 return potential_mass_errors;
1034 std::size_t minimum_aa_diversity)
1036 qDebug() <<
"sequence=" << sequence <<
" window=" << window
1037 <<
" minimum_aa_diversity=" << minimum_aa_diversity;
1038 if(sequence.size() < window)
1040 auto it_begin = sequence.begin();
1041 auto it_end = sequence.begin() + window;
1042 QString window_copy(sequence.mid(0, window));
1043 while(it_end != sequence.end())
1045 std::partial_sort_copy(it_begin, it_end, window_copy.begin(), window_copy.end());
1047 qDebug() << window_copy;
1048 std::size_t uniqueCount =
1049 std::unique(window_copy.begin(), window_copy.end()) - window_copy.begin();
1051 qDebug() << uniqueCount;
1052 if(uniqueCount < minimum_aa_diversity)
1060const std::vector<pappso::specpeptidoms::KeyCell> &
1063 const std::size_t row_number,
1064 const std::vector<AaPosition> &aa_positions,
1066 const bool fast_align,
collection of integer code for each amino acid 0 => null 1 to 20 => amino acid sorted by there mass (...
double getMass(uint8_t aa_code) const
get the mass of the amino acid given its integer code the amino acid can bear some modification (if a...
pappso_double getMass() const override
bool contains(pappso_double) const
virtual const QString & qwhat() const
std::vector< std::vector< std::size_t > > getPeaks() const
void addPeaks(std::size_t peak1, std::size_t peak2)
LocationSaver m_location_saver
const Alignment & getBestAlignment() const
Returns a const ref to m_best_alignment.
Scenario getScenario() const
Returns a copy of m_scenario.
std::size_t perfectShiftPossibleEnd(const pappso::specpeptidoms::SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, std::size_t end_row, std::size_t end_peak) const
indicates if a perfect shift is possible between the provided positions
void updateAlignmentMatrix(const pappso::specpeptidoms::SpOMSProtein &sequence, const std::size_t row_number, const std::vector< AaPosition > &aa_positions, const SpOMSSpectrum &spectrum, const bool fast_align, const pappso::specpeptidoms::SpOMSProtein *protein_ptr)
updates the scores of the alignment matrix for a given amino acid as well as the location heap/scenar...
void postProcessingAlign(const SpOMSSpectrum &spectrum, const SpOMSProtein *protein_ptr, std::size_t beginning, std::size_t length, const std::vector< double > &shifts)
performs the post-processing : generates corrected spectra and align them
void preciseAlign(const SpOMSSpectrum &spectrum, const SpOMSProtein *protein_ptr, const std::size_t beginning, const std::size_t length)
performs the second alignment search between a protein subsequence and a spectrum.
Alignment m_best_alignment
void correctAlign(const SpOMSProtein &protein_subseq, const SpOMSProtein *protein_ptr, const SpOMSSpectrum &spectrum, std::vector< std::size_t > &peaks_to_remove, std::size_t offset)
Recursively performs the correction of the alignment.
const std::vector< KeyCell > & oneAlignStep(const pappso::specpeptidoms::SpOMSProtein &sequence, const std::size_t row_number, const std::vector< AaPosition > &aa_positions, const SpOMSSpectrum &spectrum, const bool fast_align, const pappso::specpeptidoms::SpOMSProtein *protein_ptr)
function made for testing the fastAlign process, process one line and return the alignment matrix
const ScoreValues & m_scorevalues
Alignment m_best_post_processed_alignment
const std::vector< KeyCell > & getInterestCells() const
convenient function for degub purpose
Alignment m_best_corrected_alignment
bool perfectShiftPossible(const pappso::specpeptidoms::SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, const std::size_t origin_row, const std::size_t current_row, const std::size_t l_peak, const std::size_t r_peak) const
indicates if a perfect shift is possible between the provided positions
std::vector< std::pair< std::size_t, KeyCell > > m_updated_cells
void fastAlign(const SpOMSSpectrum &spectrum, const SpOMSProtein *protein_ptr)
perform the first alignment search between a protein sequence and a spectrum. The member location hea...
static bool checkSequenceDiversity(const QString &sequence, std::size_t window, std::size_t minimum_aa_diversity)
check that the sequence has a minimum of amino acid checkSequenceDiversity
std::size_t perfectShiftPossibleFrom0(const pappso::specpeptidoms::SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, const std::size_t current_row, const std::size_t r_peak) const
indicates if a perfect shift is possible from the spectrum beginning to the provided peak....
std::vector< KeyCell > m_interest_cells
void initFastAlign(const SpOMSSpectrum &spectrum)
function made for testing the fastAlign process, initiate the variables for alignment
static std::vector< double > getPotentialMassErrors(const pappso::AaCode &aa_code, const Alignment &alignment, const QString &protein_seq)
Returns a list of the potential mass errors corresponding to the provided alignment in the provided p...
void saveBestAlignment(const SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, std::size_t offset)
Stores the best alignment from m_scenario in m_best_alignment.
SemiGlobalAlignment(const ScoreValues &score_values, const pappso::PrecisionPtr precision_ptr, const AaCode &aaCode)
pappso::PrecisionPtr m_precision_ptr
LocationSaver getLocationSaver() const
Returns a copy of m_location_saver.
const QString & getSequence() const
std::vector< SpOMSAa > sliced(std::size_t position, std::size_t length) const
double getMZShift(std::size_t l_peak, std::size_t r_peak) const
Returns the mz difference between two peaks.
uint getPrecursorCharge() const
Returns the spectrum's precursor's charge.
double getMissingMass(std::size_t peak) const
Returns the missing mass between a peak and the precursor's mass (shift at the end).
std::size_t getComplementaryPeak(std::size_t peak) const
const std::vector< AaPosition > & getAaPositions(std::uint8_t aa_code) const
Returns the list of aa_positions for a given amino acid code.
specglob::ExperimentalSpectrumDataPointType peakType(std::size_t indice) const
Returns the type of one of the spectrum's peaks.
double getPrecursorMass() const
@ synthetic
does not correspond to existing peak, for computational purpose
@ both
both, the ion and the complement exists in the original spectrum
@ symmetric
new peak : computed symmetric mass from a corresponding native peak
const uint ALIGNMENT_SURPLUS(5)
const int MIN_ALIGNMENT_SCORE(15)
const uint TOL_PEAKS_MISSING(4)
const uint TOL_PEAKS_MISSING_FIRST_COLUMN(5)
const PrecisionBase * PrecisionPtr
void reset()
reinitialize to default score_values
QString getPeptideString(const QString &protein_sequence) const
convenient function to get peptide sequence from location
double getNonAlignedMass() const
convenient function to get the remaining non explained mass shift
std::vector< double > shifts
std::vector< std::size_t > peaks
std::size_t getPositionStart() const
get position of start on the protein sequence
PeptideModel m_peptideModel