public DatabaseSearcher(IList<string> dataFilepaths, int minimumAssumedPrecursorChargeState, int maximumAssumedPrecursorChargeState, double absoluteThreshold, double relativeThresholdPercent, int maximumNumberOfPeaks, bool assignChargeStates, bool deisotope, string proteinFastaDatabaseFilepath, bool onTheFlyDecoys, Protease protease, int maximumMissedCleavages, InitiatorMethionineBehavior initiatorMethionineBehavior, IEnumerable<Modification> fixedModifications, IEnumerable<Modification> variableModifications, int maximumVariableModificationIsoforms, MassTolerance precursorMassTolerance, MassType precursorMassType, bool precursorMonoisotopicPeakCorrection, int minimumPrecursorMonoisotopicPeakOffset, int maximumPrecursorMonoisotopicPeakOffset, MassTolerance productMassTolerance, MassType productMassType, double maximumFalseDiscoveryRate, bool considerModifiedFormsAsUniquePeptides, int maximumThreads, bool minimizeMemoryUsage, string outputFolder) { this.dataFilepaths = dataFilepaths; this.assignChargeStates = assignChargeStates; this.deisotope = deisotope; this.proteinFastaDatabaseFilepath = proteinFastaDatabaseFilepath; this.onTheFlyDecoys = onTheFlyDecoys; this.protease = protease; this.maximumMissedCleavages = maximumMissedCleavages; this.initiatorMethionineBehavior = initiatorMethionineBehavior; this.fixedModifications = fixedModifications; this.variableModifications = variableModifications; this.maximumVariableModificationIsoforms = maximumVariableModificationIsoforms; this.minimumAssumedPrecursorChargeState = minimumAssumedPrecursorChargeState; this.maximumAssumedPrecursorChargeState = maximumAssumedPrecursorChargeState; this.absoluteThreshold = absoluteThreshold; this.relativeThresholdPercent = relativeThresholdPercent; this.maximumNumberOfPeaks = maximumNumberOfPeaks; this.precursorMassTolerance = precursorMassTolerance; this.precursorMassType = precursorMassType; this.precursorMonoisotopicPeakCorrection = precursorMonoisotopicPeakCorrection; this.minimumPrecursorMonoisotopicPeakOffset = minimumPrecursorMonoisotopicPeakOffset; this.maximumPrecursorMonoisotopicPeakOffset = maximumPrecursorMonoisotopicPeakOffset; this.productMassTolerance = productMassTolerance; this.productMassType = productMassType; this.maximumFalseDiscoveryRate = maximumFalseDiscoveryRate; this.considerModifiedFormsAsUniquePeptides = considerModifiedFormsAsUniquePeptides; this.maximumThreads = maximumThreads; this.minimizeMemoryUsage = minimizeMemoryUsage; this.outputFolder = outputFolder; }
static void Main(string[] args) { if (args.Length > 0) { Arguments arguments = new Arguments(args); List <string> data = new List <string>(arguments["d"].Split(',')); int min_assumed_precursor_charge_state = 2; if (arguments["minprecz"] != null) { min_assumed_precursor_charge_state = int.Parse(arguments["minprecz"]); } int max_assumed_precursor_charge_state = 4; if (arguments["maxprecz"] != null) { max_assumed_precursor_charge_state = int.Parse(arguments["maxprecz"]); } double abs_threshold = -1.0; if (arguments["at"] != null) { abs_threshold = double.Parse(arguments["at"], CultureInfo.InvariantCulture); } double rel_threshold_percent = -1.0; if (arguments["rt"] != null) { rel_threshold_percent = double.Parse(arguments["rt"], CultureInfo.InvariantCulture); } int max_peaks = 400; if (arguments["mp"] != null) { max_peaks = int.Parse(arguments["mp"]); } bool assign_charge_states = true; if (arguments["acs"] != null) { assign_charge_states = bool.Parse(arguments["acs"]); } bool deisotope = true; if (arguments["di"] != null) { deisotope = bool.Parse(arguments["di"]); } string database = arguments["db"]; Dictionary <string, Modification> known_variable_modifications = null; HashSet <Modification> variable_mods = new HashSet <Modification>(); if (Path.GetExtension(database).Equals(".xml", StringComparison.InvariantCultureIgnoreCase)) { bool no_uniprot_mods = false; if (arguments["noup"] != null) { no_uniprot_mods = bool.Parse(arguments["noup"]); } if (!no_uniprot_mods) { known_variable_modifications = ProteomeDatabaseReader.ReadUniProtXmlModifications(database); variable_mods.UnionWith(known_variable_modifications.Values); } } bool append_decoys = false; if (arguments["ad"] != null) { append_decoys = bool.Parse(arguments["ad"]); } else { append_decoys = Path.GetExtension(database).Equals(".xml", StringComparison.InvariantCultureIgnoreCase) || !ProteomeDatabaseReader.HasDecoyProteins(database); } ProteaseDictionary proteases = ProteaseDictionary.Instance; Protease protease = proteases["trypsin (no proline rule)"]; if (arguments["p"] != null) { protease = proteases[arguments["p"]]; } int max_missed_cleavages = 2; if (arguments["mmc"] != null) { max_missed_cleavages = int.Parse(arguments["mmc"]); } InitiatorMethionineBehavior initiator_methionine_behavior = InitiatorMethionineBehavior.Variable; if (arguments["imb"] != null) { initiator_methionine_behavior = (InitiatorMethionineBehavior)Enum.Parse(typeof(InitiatorMethionineBehavior), arguments["imb"], true); } ModificationDictionary mods = ModificationDictionary.Instance; List <Modification> fixed_mods = new List <Modification>(); if (arguments["fm"] != null) { foreach (string fixed_mod in arguments["fm"].Split(';')) { fixed_mods.Add(mods[fixed_mod]); } } if (arguments["vm"] != null) { foreach (string variable_mod in arguments["vm"].Split(';')) { Modification mod; if (!mods.TryGetValue(variable_mod, out mod)) { known_variable_modifications.TryGetValue(variable_mod, out mod); } variable_mods.Add(mod); } } int max_variable_mod_isoforms_per_peptide = 1024; if (arguments["mvmi"] != null) { max_variable_mod_isoforms_per_peptide = int.Parse(arguments["mvmi"]); } double precursor_mass_tolerance_value = 2.1; if (arguments["precmtv"] != null) { precursor_mass_tolerance_value = double.Parse(arguments["precmtv"], CultureInfo.InvariantCulture); } MassToleranceUnits precursor_mass_tolerance_units = MassToleranceUnits.Da; if (arguments["precmtu"] != null) { precursor_mass_tolerance_units = (MassToleranceUnits)Enum.Parse(typeof(MassToleranceUnits), arguments["precmtu"], true); } MassTolerance precursor_mass_tolerance = new MassTolerance(precursor_mass_tolerance_value, precursor_mass_tolerance_units); MassType precursor_mass_type = MassType.Monoisotopic; if (arguments["precmt"] != null) { precursor_mass_type = (MassType)Enum.Parse(typeof(MassType), arguments["precmt"], true); } List <double> accepted_precursor_mass_errors = new List <double>(); if (arguments["apme"] != null && arguments["apme"].Length > 0) { foreach (string accepted_precursor_mass_error in arguments["apme"].Split(';')) { accepted_precursor_mass_errors.Add(double.Parse(accepted_precursor_mass_error, CultureInfo.InvariantCulture)); } } else { accepted_precursor_mass_errors.Add(0.0); } double product_mass_tolerance_value = 0.015; if (arguments["prodmtv"] != null) { product_mass_tolerance_value = double.Parse(arguments["prodmtv"], CultureInfo.InvariantCulture); } MassToleranceUnits product_mass_tolerance_units = MassToleranceUnits.Da; if (arguments["prodmtu"] != null) { product_mass_tolerance_units = (MassToleranceUnits)Enum.Parse(typeof(MassToleranceUnits), arguments["prodmtu"], true); } MassTolerance product_mass_tolerance = new MassTolerance(product_mass_tolerance_value, product_mass_tolerance_units); MassType product_mass_type = MassType.Monoisotopic; if (arguments["prodmt"] != null) { product_mass_type = (MassType)Enum.Parse(typeof(MassType), arguments["prodmt"], true); } double max_fdr = 0.01; if (arguments["fdr"] != null) { max_fdr = double.Parse(arguments["fdr"], CultureInfo.InvariantCulture) / 100.0; } bool consider_mods_unique = false; if (arguments["cmu"] != null) { consider_mods_unique = bool.Parse(arguments["cmu"]); } int max_threads = Environment.ProcessorCount; if (arguments["mt"] != null) { max_threads = int.Parse(arguments["mt"]); } bool minimize_memory_usage = false; if (arguments["mmu"] != null) { minimize_memory_usage = bool.Parse(arguments["mmu"]); } string output_folder = Environment.CurrentDirectory; if (arguments["o"] != null) { output_folder = arguments["o"]; } DatabaseSearcher database_searcher = new DatabaseSearcher(data, min_assumed_precursor_charge_state, max_assumed_precursor_charge_state, abs_threshold, rel_threshold_percent, max_peaks, assign_charge_states, deisotope, database, append_decoys, protease, max_missed_cleavages, initiator_methionine_behavior, fixed_mods, variable_mods, max_variable_mod_isoforms_per_peptide, precursor_mass_tolerance, precursor_mass_type, accepted_precursor_mass_errors, product_mass_tolerance, product_mass_type, max_fdr, consider_mods_unique, max_threads, minimize_memory_usage, output_folder); database_searcher.Starting += HandleStarting; database_searcher.StartingFile += HandleStartingFile; database_searcher.UpdateStatus += HandleUpdateStatus; database_searcher.UpdateProgress += HandleUpdateProgress; database_searcher.ThrowException += HandleThrowException; database_searcher.FinishedFile += HandleFinishedFile; database_searcher.Finished += HandleFinished; database_searcher.Search(); } else { Console.WriteLine(Program.GetProductNameAndVersion() + " USAGE"); } }
public static List <ProteinGroup> ApplyProteinParsimony(IEnumerable <PeptideSpectrumMatch> peptideSpectrumMatches, double morpheusScoreThreshold, FileStream proteinFastaDatabase, bool onTheFlyDecoys, IDictionary <string, Modification> knownVariableModifications, Protease protease, int maximumMissedCleavages, InitiatorMethionineBehavior initiatorMethionineBehavior, int maximumThreads) { // make a list of the all the distinct base leucine peptide sequences Dictionary <string, List <Protein> > peptide_proteins = new Dictionary <string, List <Protein> >(); foreach (PeptideSpectrumMatch psm in peptideSpectrumMatches) { if (psm.MorpheusScore >= morpheusScoreThreshold) { if (!peptide_proteins.ContainsKey(psm.Peptide.BaseLeucineSequence)) { peptide_proteins.Add(psm.Peptide.BaseLeucineSequence, new List <Protein>()); } } } // record all proteins that could have been the source of each peptide ParallelOptions parallel_options = new ParallelOptions(); parallel_options.MaxDegreeOfParallelism = maximumThreads; Parallel.ForEach(ProteomeDatabaseReader.ReadProteins(proteinFastaDatabase, onTheFlyDecoys, REQUIRE_MATCHING_KNOWN_MODIFICATIONS_IN_PROTEIN_PARSIMONY ? knownVariableModifications : null), parallel_options, protein => { foreach (Peptide peptide in protein.Digest(protease, maximumMissedCleavages, initiatorMethionineBehavior, null, null)) { lock (peptide_proteins) { List <Protein> proteins; if (peptide_proteins.TryGetValue(peptide.BaseLeucineSequence, out proteins)) { List <Peptide> peptides; if (!protein.IdentifiedPeptides.TryGetValue(peptide.BaseLeucineSequence, out peptides)) { peptides = new List <Peptide>(); peptides.Add(peptide); protein.IdentifiedPeptides.Add(peptide.BaseLeucineSequence, peptides); } else { peptides.Add(peptide); } proteins.Add(protein); } } } } ); // create protein groups (initially with just one protein each) and assign PSMs to them Dictionary <string, ProteinGroup> proteins_by_description = new Dictionary <string, ProteinGroup>(); foreach (PeptideSpectrumMatch psm in peptideSpectrumMatches) { if (psm.MorpheusScore >= morpheusScoreThreshold) { foreach (Protein protein in peptide_proteins[psm.Peptide.BaseLeucineSequence]) { if (REQUIRE_MATCHING_KNOWN_MODIFICATIONS_IN_PROTEIN_PARSIMONY) { // check to make sure this protein's known modifications match the PSM's bool known_modification_match = true; if (psm.Peptide.VariableModifications != null && psm.Peptide.VariableModifications.Count > 0) { foreach (KeyValuePair <int, Modification> kvp in psm.Peptide.VariableModifications) { if (kvp.Value.Known) { List <Modification> protein_modifications = null; if (protein.KnownModifications == null || !protein.KnownModifications.TryGetValue(psm.Peptide.StartResidueNumber - 1 + kvp.Key, out protein_modifications) || !protein_modifications.Contains(kvp.Value)) { known_modification_match = false; break; } } } if (!known_modification_match) { continue; } } } ProteinGroup protein_group; if (!proteins_by_description.TryGetValue(protein.Description, out protein_group)) { protein_group = new ProteinGroup(); protein_group.Add(protein); protein_group.PeptideSpectrumMatches.Add(psm); proteins_by_description.Add(protein.Description, protein_group); } else { protein_group.PeptideSpectrumMatches.Add(psm); } } } } List <ProteinGroup> protein_groups = new List <ProteinGroup>(proteins_by_description.Values); protein_groups.Sort(ProteinGroup.DescendingSummedMorpheusScoreProteinGroupComparison); // todo: remove shared peptides from lower-scoring protein group? // merge indistinguishable proteins (technically protein groups but they only contain a single protein thus far) for (int i = 0; i < protein_groups.Count - 1; i++) { ProteinGroup protein_group = protein_groups[i]; int j = i + 1; while (j < protein_groups.Count) { ProteinGroup lower_protein_group = protein_groups[j]; if (lower_protein_group.SummedMorpheusScore < protein_group.SummedMorpheusScore) { break; } if (lower_protein_group.BaseLeucinePeptideSequences.SetEquals(protein_group.BaseLeucinePeptideSequences)) { protein_group.UnionWith(lower_protein_group); // should only ever be one protein in the group to add protein_groups.RemoveAt(j); } else { j++; } } } // remove subset and subsumable protein groups int k = protein_groups.Count - 1; while (k >= 1) { ProteinGroup protein_group = protein_groups[k]; HashSet <string> protein_group_peptides = new HashSet <string>(protein_group.BaseLeucinePeptideSequences); for (int l = 0; l < k; l++) { ProteinGroup higher_protein_group = protein_groups[l]; protein_group_peptides.ExceptWith(higher_protein_group.BaseLeucinePeptideSequences); if (protein_group_peptides.Count == 0) { break; } } if (protein_group_peptides.Count == 0) { protein_groups.RemoveAt(k); } k--; } return(protein_groups); }
private void btnSearch_Click(object sender, EventArgs e) { if (lstData.Items.Count == 0) { MessageBox.Show("No data files selected"); btnAdd.Focus(); return; } List <string> data_filepaths = new List <string>(lstData.Items.Count); foreach (string data_filepath in lstData.Items) { data_filepaths.Add(data_filepath); } bool assign_charge_states = chkAssignChargeStates.Checked; bool deisotope = chkDeisotope.Checked; string fasta_filepath = txtFastaFile.Text; if (!File.Exists(fasta_filepath)) { if (fasta_filepath.Length > 0) { MessageBox.Show("Invalid protein database file: " + fasta_filepath); } else { MessageBox.Show("Invalid protein database file"); } txtFastaFile.Focus(); return; } bool on_the_fly_decoys = chkOnTheFlyDecoys.Checked; Protease protease = (Protease)cboProtease.SelectedItem; int max_missed_cleavages = (int)numMaxMissedCleavages.Value; InitiatorMethionineBehavior initiator_methionine_behavior = (InitiatorMethionineBehavior)Enum.Parse(typeof(InitiatorMethionineBehavior), cboInitiatorMethionineBehavior.Text, true); List <Modification> fixed_modifications = new List <Modification>(clbFixedModifications.CheckedItems.Count); foreach (object fixed_modification in clbFixedModifications.CheckedItems) { fixed_modifications.Add((Modification)fixed_modification); } List <Modification> variable_modifications = new List <Modification>(clbVariableModifications.CheckedItems.Count); foreach (object variable_modification in clbVariableModifications.CheckedItems) { variable_modifications.Add((Modification)variable_modification); } int max_variable_mod_isoforms = (int)numMaxVariableModIsoforms.Value; int min_assumed_precursor_charge_state = (int)numMinimumAssumedPrecursorChargeState.Value; int max_assumed_precursor_charge_state = (int)numMaximumAssumedPrecursorChargeState.Value; double absolute_threshold = -1.0; if (chkAbsoluteThreshold.Checked) { if (!double.TryParse(txtAbsoluteThreshold.Text, NumberStyles.Float | NumberStyles.AllowThousands, CultureInfo.InvariantCulture, out absolute_threshold)) { MessageBox.Show("Cannot parse absolute MS/MS peak threshold: " + txtAbsoluteThreshold.Text); txtAbsoluteThreshold.Focus(); return; } } double relative_threshold_percent = -1.0; if (chkRelativeThreshold.Checked) { if (!double.TryParse(txtRelativeThresholdPercent.Text, NumberStyles.Float | NumberStyles.AllowThousands, CultureInfo.InvariantCulture, out relative_threshold_percent)) { MessageBox.Show("Cannot parse relative MS/MS peak threshold: " + txtRelativeThresholdPercent.Text); txtRelativeThresholdPercent.Focus(); return; } } int max_peaks = -1; if (chkMaxNumPeaks.Checked) { max_peaks = (int)numMaxPeaks.Value; } MassTolerance precursor_mass_tolerance = new MassTolerance((double)numPrecursorMassTolerance.Value, (MassToleranceUnits)cboPrecursorMassToleranceUnits.SelectedIndex); MassType precursor_mass_type = (MassType)cboPrecursorMassType.SelectedIndex; List <double> accepted_precursor_mass_errors = new List <double>(); if (txtAcceptedPrecursorMassErrors.Text.Length > 0) { foreach (string accepted_precursor_mass_error_text in txtAcceptedPrecursorMassErrors.Text.Split(';')) { double accepted_precursor_mass_error; if (!double.TryParse(accepted_precursor_mass_error_text, NumberStyles.Float | NumberStyles.AllowThousands, CultureInfo.InvariantCulture, out accepted_precursor_mass_error)) { MessageBox.Show("Cannot parse accepted precursor mass errors: " + txtRelativeThresholdPercent.Text); txtAcceptedPrecursorMassErrors.Focus(); return; } accepted_precursor_mass_errors.Add(accepted_precursor_mass_error); } } else { accepted_precursor_mass_errors.Add(0.0); } MassTolerance product_mass_tolerance = new MassTolerance((double)numProductMassTolerance.Value, (MassToleranceUnits)cboProductMassToleranceUnits.SelectedIndex); MassType product_mass_type = (MassType)cboProductMassType.SelectedIndex; double max_false_discovery_rate = (double)numMaximumFalseDiscoveryRatePercent.Value / 100.0; bool consider_modified_unique = chkConsiderModifiedUnique.Checked; int max_threads = (int)numMaxThreads.Value; bool minimize_memory_usage = chkMinimizeMemoryUsage.Checked; string output_folder = txtOutputFolder.Text; if (!Directory.Exists(output_folder)) { try { Directory.CreateDirectory(output_folder); } catch { if (output_folder.Length > 0) { MessageBox.Show("Invalid output folder: " + output_folder); } else { MessageBox.Show("Invalid output folder"); } txtOutputFolder.Focus(); return; } } DatabaseSearcher database_searcher = new DatabaseSearcher(data_filepaths, min_assumed_precursor_charge_state, max_assumed_precursor_charge_state, absolute_threshold, relative_threshold_percent, max_peaks, assign_charge_states, deisotope, fasta_filepath, on_the_fly_decoys, protease, max_missed_cleavages, initiator_methionine_behavior, fixed_modifications, variable_modifications, max_variable_mod_isoforms, precursor_mass_tolerance, precursor_mass_type, accepted_precursor_mass_errors, product_mass_tolerance, product_mass_type, max_false_discovery_rate, consider_modified_unique, max_threads, minimize_memory_usage, output_folder); database_searcher.Starting += HandleStarting; database_searcher.StartingFile += HandleStartingFile; database_searcher.UpdateStatus += HandleUpdateStatus; database_searcher.ReportTaskWithoutProgress += HandleReportTaskWithoutProgress; database_searcher.ReportTaskWithProgress += HandleReportTaskWithProgress; database_searcher.UpdateProgress += HandleUpdateProgress; database_searcher.ThrowException += HandleThrowException; database_searcher.FinishedFile += HandleFinishedFile; database_searcher.Finished += HandleFinished; lstData.SelectedItem = null; tspbProgress.Value = tspbProgress.Minimum; Thread thread = new Thread(new ThreadStart(database_searcher.Search)); thread.IsBackground = true; thread.Start(); }
public IEnumerable <Peptide> Digest(Protease protease, int maximumMissedCleavages, InitiatorMethionineBehavior initiatorMethionineBehavior, int?minimumPeptideLength, int?maximumPeptideLength) { if (Length > 0) { if (protease.CleavageSpecificity != CleavageSpecificity.None) { // these are the 0-based residue indices the protease cleaves AFTER List <int> indices = protease.GetDigestionSiteIndices(this); indices.Insert(0, -1); indices.Add(Length - 1); if (protease.CleavageSpecificity == CleavageSpecificity.Full) { for (int missed_cleavages = 0; missed_cleavages <= maximumMissedCleavages; missed_cleavages++) { for (int i = 0; i < indices.Count - missed_cleavages - 1; i++) { if (initiatorMethionineBehavior != InitiatorMethionineBehavior.Cleave || indices[i] + 1 != 0 || this[0] != 'M') { int length = indices[i + missed_cleavages + 1] - indices[i]; if ((!minimumPeptideLength.HasValue || length >= minimumPeptideLength.Value) && (!maximumPeptideLength.HasValue || length <= maximumPeptideLength.Value)) { // start residue number: +1 for starting at the next residue after the cleavage, +1 for 0->1 indexing // end residue number: +1 for 0->1 indexing Peptide peptide = new Peptide(this, indices[i] + 1 + 1, indices[i + missed_cleavages + 1] + 1, missed_cleavages); yield return(peptide); } } if (initiatorMethionineBehavior != InitiatorMethionineBehavior.Retain && indices[i] + 1 == 0 && this[0] == 'M') { int length = indices[i + missed_cleavages + 1] - indices[i] - 1; if (length > 0 && (!minimumPeptideLength.HasValue || length >= minimumPeptideLength.Value) && (!maximumPeptideLength.HasValue || length <= maximumPeptideLength.Value)) { // start residue number: +1 for skipping initiator methionine, +1 for starting at the next residue after the cleavage, +1 for 0->1 indexing // end residue number: +1 for 0->1 indexing Peptide peptide_without_initiator_methionine = new Peptide(this, indices[i] + 1 + 1 + 1, indices[i + missed_cleavages + 1] + 1, missed_cleavages); yield return(peptide_without_initiator_methionine); } } } } } else // protease.CleavageSpecificity == CleavageSpecificity.Semi || protease.CleavageSpecificity == CleavageSpecificity.SemiN || protease.CleavageSpecificity == CleavageSpecificity.SemiC { if (protease.CleavageSpecificity == CleavageSpecificity.Semi || protease.CleavageSpecificity == CleavageSpecificity.SemiN) { for (int missed_cleavages = 0; missed_cleavages <= maximumMissedCleavages; missed_cleavages++) { for (int i = 0; i < indices.Count - missed_cleavages - 1; i++) { if (initiatorMethionineBehavior != InitiatorMethionineBehavior.Cleave || indices[i] + 1 != 0 || this[0] != 'M') { // conditional ensures that we are generating peptides at their lowest missed cleavage state for (int length = indices[i + missed_cleavages + 1] - indices[i]; length > (indices[i + missed_cleavages + 1] - indices[i]) - (indices[i + missed_cleavages + 1] - indices[(i + missed_cleavages + 1) - 1]); length--) { if ((indices[i] + 1 + 1) + length - 1 <= Length) { if ((!minimumPeptideLength.HasValue || length >= minimumPeptideLength.Value) && (!maximumPeptideLength.HasValue || length <= maximumPeptideLength.Value)) { // start residue number: +1 for starting at the next residue after the cleavage, +1 for 0->1 indexing // end residue number: start residue number + length - 1 Peptide peptide = new Peptide(this, indices[i] + 1 + 1, (indices[i] + 1 + 1) + length - 1, missed_cleavages); yield return(peptide); } } } } if (initiatorMethionineBehavior != InitiatorMethionineBehavior.Retain && indices[i] + 1 == 0 && this[0] == 'M') { // conditional ensures that we are generating peptides at their lowest missed cleavage state for (int length = indices[i + missed_cleavages + 1] - indices[i]; length > (indices[i + missed_cleavages + 1] - indices[i]) - (indices[i + missed_cleavages + 1] - indices[(i + missed_cleavages + 1) - 1]); length--) { if ((indices[i] + 1 + 1 + 1) + length - 1 <= Length) { if ((!minimumPeptideLength.HasValue || length >= minimumPeptideLength.Value) && (!maximumPeptideLength.HasValue || length <= maximumPeptideLength.Value)) { // start residue number: +1 for skipping initiator methionine, +1 for starting at the next residue after the cleavage, +1 for 0->1 indexing // end residue number: start residue number + length - 1 Peptide peptide_without_initiator_methionine = new Peptide(this, indices[i] + 1 + 1 + 1, (indices[i] + 1 + 1 + 1) + length - 1, missed_cleavages); yield return(peptide_without_initiator_methionine); } } } } } } } if (protease.CleavageSpecificity == CleavageSpecificity.Semi || protease.CleavageSpecificity == CleavageSpecificity.SemiC) { for (int missed_cleavages = 0; missed_cleavages <= maximumMissedCleavages; missed_cleavages++) { for (int i = 0; i < indices.Count - missed_cleavages - 1; i++) { // handling for initiator methionine not required // - (protease.CleavageSpecificity == CleavageSpecificity.Semi ? 1 : 0) ensures that we don't repeat the same peptides we generated above in the SemiN digestion // conditional ensures that we are generating peptides at their lowest missed cleavage state for (int length = indices[i + missed_cleavages + 1] - indices[i] - (protease.CleavageSpecificity == CleavageSpecificity.Semi ? 1 : 0); length > (indices[i + missed_cleavages + 1] - indices[i]) - (indices[i + 1] - indices[i]); length--) { if ((indices[i + missed_cleavages + 1] + 1) - length + 1 >= 1) { if ((!minimumPeptideLength.HasValue || length >= minimumPeptideLength.Value) && (!maximumPeptideLength.HasValue || length <= maximumPeptideLength.Value)) { // start residue number: end residue number - length + 1 // end residue number: +1 for 0->1 indexing Peptide peptide = new Peptide(this, (indices[i + missed_cleavages + 1] + 1) - length + 1, indices[i + missed_cleavages + 1] + 1, missed_cleavages); yield return(peptide); } } } } } } } } else // protease.CleavageSpecificity == CleavageSpecificity.None { if (initiatorMethionineBehavior != InitiatorMethionineBehavior.Cleave || this[0] != 'M') { if ((!minimumPeptideLength.HasValue || Length >= minimumPeptideLength.Value) && (!maximumPeptideLength.HasValue || Length <= maximumPeptideLength.Value)) { Peptide peptide = new Peptide(this, 1, Length, -1); yield return(peptide); } } if (initiatorMethionineBehavior != InitiatorMethionineBehavior.Retain && this[0] == 'M') { if (Length > 1 && (!minimumPeptideLength.HasValue || Length - 1 >= minimumPeptideLength.Value) && (!maximumPeptideLength.HasValue || Length - 1 <= maximumPeptideLength.Value)) { Peptide peptide_without_initiator_methionine = new Peptide(this, 2, Length, -1); yield return(peptide_without_initiator_methionine); } } } } }