protected override bool IsMatch(Target target, PeptideDocNode nodePep, out TransitionGroupDocNode nodeGroup) { string seqSimplified = SimplifyUnimodSequence(target.Sequence); var seqLight = FastaSequence.StripModifications(seqSimplified, FastaSequence.RGX_HEAVY); var seqHeavy = FastaSequence.StripModifications(seqSimplified, FastaSequence.RGX_LIGHT); var calcLight = Settings.TryGetPrecursorCalc(IsotopeLabelType.light, nodePep.ExplicitMods); foreach (TransitionGroupDocNode nodeGroupChild in nodePep.Children) { nodeGroup = nodeGroupChild; if (nodeGroup.TransitionGroup.LabelType.IsLight) { // Light modifications must match. if (!EqualsModifications(seqLight, calcLight, null)) { return(false); } // If the sequence only has light modifications, a match has been found. if (Equals(seqLight, seqSimplified)) { return(true); } } else { var calc = Settings.TryGetPrecursorCalc(nodeGroup.TransitionGroup.LabelType, nodePep.ExplicitMods); if (calc != null && EqualsModifications(seqHeavy, calc, calcLight)) { return(true); } } } nodeGroup = null; return(false); }
public PeptideDocNode CreateDocNodeFromSettings(string seq, Peptide peptide, SrmSettingsDiff diff, out TransitionGroupDocNode nodeGroupMatched) { seq = Transition.StripChargeIndicators(seq, TransitionGroup.MIN_PRECURSOR_CHARGE, TransitionGroup.MAX_PRECURSOR_CHARGE); if (peptide == null) { string seqUnmod = FastaSequence.StripModifications(seq); try { peptide = new Peptide(null, seqUnmod, null, null, Settings.PeptideSettings.Enzyme.CountCleavagePoints(seqUnmod)); } catch (InvalidDataException) { nodeGroupMatched = null; return(null); } } // Use the number of modifications as the maximum, if it is less than the current // settings to keep from over enumerating, which can be slow. var filter = new MaxModFilter(Math.Min(seq.Count(c => c == '[' || c == '('), Settings.PeptideSettings.Modifications.MaxVariableMods)); foreach (var nodePep in peptide.CreateDocNodes(Settings, filter)) { var nodePepMod = CreateDocNodeFromSettings(seq, nodePep, diff, out nodeGroupMatched); if (nodePepMod != null) { return(nodePepMod); } } nodeGroupMatched = null; return(null); }
/// <summary> /// Compares the modifications indicated in the sequence string to the calculated masses. /// </summary> /// <param name="seq">The modified sequence.</param> /// <param name="calc">Calculator used to calculate the masses.</param> /// <param name="calcLight"> /// Additional light calculator if necessary to isolate mass changes /// caused by heavy modifications alone. /// </param> /// <returns> /// True if the given calculators explain the modifications indicated on the sequence, /// false otherwise. /// </returns> private bool EqualsModifications(string seq, IPrecursorMassCalc calc, IPrecursorMassCalc calcLight) { var modifications = Settings.PeptideSettings.Modifications; bool structural = calcLight == null; string aas = FastaSequence.StripModifications(seq); foreach (var info in EnumerateSequenceInfos(seq, true)) { int indexAA = info.IndexAA; // ReSharper var aa = aas[indexAA]; var roundedTo = info.RoundedTo; // If the user has indicated the modification by name, find that modification // and calculate the mass. double massKey; if (info.Mass != null) { massKey = (double)info.Mass; } else { var info1 = info; StaticMod modMatch = null; int index; if (structural && ((index = modifications.StaticModifications.IndexOf(mod => Equals(mod.Name, info1.Name))) != -1)) { modMatch = modifications.StaticModifications[index]; } if (!structural && ((index = modifications.HeavyModifications.IndexOf(mod => Equals(mod.Name, info1.Name))) != -1)) { modMatch = modifications.HeavyModifications[index]; } if (modMatch == null) { return(false); } roundedTo = DEFAULT_ROUNDING_DIGITS; massKey = Math.Round(GetDefaultModMass(aa, modMatch), roundedTo); } double massMod = Math.Round(calc.GetAAModMass(aas[indexAA], indexAA, aas.Length), roundedTo); // Subtract the mass difference of the light // modifications to isolate the masses of the heavy modifications. if (calcLight != null) { massMod -= Math.Round(calcLight.GetAAModMass(aas[indexAA], indexAA, aas.Length), roundedTo); } if (!Equals(massKey, massMod)) { return(false); } } return(true); }
public string SimplifyUnimodSequence(string seq) { var sb = new StringBuilder(seq); string aas = FastaSequence.StripModifications(seq); int indexAA = 0; int i = 0; while (i < seq.Length) { var aa = aas[indexAA]; int indexBracket = i + 1; if (indexBracket < seq.Length && (FastaSequence.OPEN_MOD.Contains(seq[indexBracket]))) { char openBracket = seq[indexBracket]; char closeBracket = FastaSequence.CLOSE_MOD[FastaSequence.OPEN_MOD.IndexOf(c => c == openBracket)]; int indexStart = indexBracket + 1; int indexClose = seq.IndexOf(closeBracket, indexBracket); string mod = seq.Substring(indexStart, indexClose - indexStart); i = indexClose; ModTerminus?modTerminus = null; if (indexAA == 0) { modTerminus = ModTerminus.N; } if (indexAA == aas.Length - 1) { modTerminus = ModTerminus.C; } // Here we are only interested in uniMod int uniModId; if (TryGetIdFromUnimod(mod, out uniModId)) { var staticMod = GetStaticMod(uniModId, aa, modTerminus); if (staticMod == null) { ThrowUnimodException(seq, uniModId, indexAA, indexBracket, indexClose); return(null); // Keep ReSharper happy } string name = staticMod.Name; bool isHeavy = !UniMod.DictStructuralModNames.ContainsKey(name); sb[indexBracket] = isHeavy ? '{' : '['; sb[indexClose] = isHeavy ? '}' : ']'; } } // If the next character is a bracket, continue using the same amino // acid and leave i where it is. int iNext = i + 1; if (iNext >= seq.Length || !FastaSequence.OPEN_MOD.Contains(seq[iNext])) { indexAA++; i++; } } return(sb.ToString()); }
public Peptide(FastaSequence fastaSequence, string sequence, int?begin, int?end, int missedCleavages, bool isDecoy) { _fastaSequence = fastaSequence; Sequence = sequence; Begin = begin; End = end; MissedCleavages = missedCleavages; IsDecoy = isDecoy; Validate(); }
public bool Equals(FastaSequence obj) { if (ReferenceEquals(null, obj)) { return(false); } if (ReferenceEquals(this, obj)) { return(true); } return(Equals(obj._name, _name) && Equals(obj._description, _description) && Equals(obj._sequence, _sequence) && ArrayUtil.EqualsDeep(obj.Alternatives, Alternatives) && obj.IsDecoy == IsDecoy); }
private void Validate() { if (IsCustomIon) { Assume.IsNull(_fastaSequence); Assume.IsNull(Sequence); CustomIon.Validate(); } else if (_fastaSequence == null) { if (Begin.HasValue || End.HasValue) { throw new InvalidDataException(Resources.Peptide_Validate_Peptides_without_a_protein_sequence_do_not_support_the_start_and_end_properties); } // No FastaSequence checked the sequence, so check it here. FastaSequence.ValidateSequence(Sequence); } else { // Otherwise, validate the peptide sequence against the group sequence if (!Begin.HasValue || !End.HasValue) { throw new InvalidDataException(Resources.Peptide_Validate_Peptides_from_protein_sequences_must_have_start_and_end_values); } if (0 > Begin.Value || End.Value > _fastaSequence.Sequence.Length) { throw new InvalidDataException(Resources.Peptide_Validate_Peptide_sequence_exceeds_the_bounds_of_the_protein_sequence); } var j = 0; for (var i = Begin.Value; i < End.Value;) { if (!Equals(Sequence[j++], _fastaSequence.Sequence[i++])) { string sequenceCheck = _fastaSequence.Sequence.Substring(Begin.Value, End.Value - Begin.Value); throw new InvalidDataException( string.Format(Resources.Peptide_Validate_The_peptide_sequence__0__does_not_agree_with_the_protein_sequence__1__at__2__3__, Sequence, sequenceCheck, Begin.Value, End.Value)); } } } // CONSIDER: Validate missed cleavages some day? }
public PeptideDocNode GetModifiedNode(string seq, FastaSequence fastaSequence) { var seqUnmod = FastaSequence.StripModifications(seq); var peptide = fastaSequence != null ? fastaSequence.CreateFullPeptideDocNode(Settings, new Target(seqUnmod)).Peptide : new Peptide(null, seqUnmod, null, null, Settings.PeptideSettings.Enzyme.CountCleavagePoints(seqUnmod)); // First, try to create the peptide using the current settings. TransitionGroupDocNode nodeGroup; PeptideDocNode nodePep = CreateDocNodeFromSettings(new Target(seq), peptide, SrmSettingsDiff.ALL, out nodeGroup); if (nodePep != null) { return(nodePep); } // Create the peptideDocNode. nodePep = fastaSequence == null ? new PeptideDocNode(peptide) : fastaSequence.CreateFullPeptideDocNode(Settings, new Target(seqUnmod)); return(CreateDocNodeFromMatches(nodePep, EnumerateSequenceInfos(seq, false))); }
public Peptide(FastaSequence fastaSequence, string sequence, int?begin, int?end, int missedCleavages) : this(fastaSequence, sequence, begin, end, missedCleavages, false) { }
private IEnumerable <AAModInfo> EnumerateSequenceInfos(string seq, bool includeUnmod) { string aas = FastaSequence.StripModifications(seq); bool isSpecificHeavy = FastaSequence.OPEN_MOD.All(paren => aas.Length > seq.Count(c => c == paren)); int indexAA = 0; int indexAAInSeq = 0; int i = 0; while (i < seq.Length) { var aa = aas[indexAA]; int indexBracket = i + 1; if (indexBracket < seq.Length && (FastaSequence.OPEN_MOD.Contains(seq[indexBracket]))) { char openBracket = seq[indexBracket]; bool isHeavy = openBracket == '{'; char closeBracket = FastaSequence.CLOSE_MOD[FastaSequence.OPEN_MOD.IndexOf(c => c == openBracket)]; int indexStart = indexBracket + 1; int indexClose = seq.IndexOf(closeBracket, indexBracket); string mod = seq.Substring(indexStart, indexClose - indexStart); i = indexClose; ModTerminus?modTerminus = null; if (indexAA == 0) { modTerminus = ModTerminus.N; } if (indexAA == aas.Length - 1) { modTerminus = ModTerminus.C; } string name = null; double?mass = null; int roundedTo = 0; // If passed in modification in UniMod notation, look up the id and find the name and mass int uniModId; if (TryGetIdFromUnimod(mod, out uniModId)) { var staticMod = GetStaticMod(uniModId, aa, modTerminus); if (staticMod == null) { throw ThrowUnimodException(seq, uniModId, indexAA, indexBracket, indexClose); } name = staticMod.Name; isHeavy = !UniMod.IsStructuralModification(name); // CONSIDER: Mass depends on TransitionPrediction settings for precursors mass = staticMod.MonoisotopicMass; roundedTo = DEFAULT_ROUNDING_DIGITS; } else { MassModification massModification = MassModification.Parse(mod); if (massModification != null) { mass = massModification.Mass; roundedTo = Math.Min(massModification.Precision, DEFAULT_ROUNDING_DIGITS); } else { name = mod; } } if (mass.HasValue) { mass = Math.Round(mass.Value, roundedTo); } var key = new AAModKey { Name = name, Mass = mass, AA = aa, Terminus = modTerminus, UserIndicatedHeavy = isHeavy, RoundedTo = roundedTo, AppearsToBeSpecificMod = isSpecificHeavy }; yield return(new AAModInfo { ModKey = key, IndexAA = indexAA, IndexAAInSeq = indexAAInSeq, }); } else if (includeUnmod) { // If need unmodified amino acids (as when // checking for equality), yield SequenceKeys for these AA's. var key = new AAModKey { AA = aa, Mass = 0 }; yield return(new AAModInfo { ModKey = key, IndexAA = indexAA, }); } // If the next character is a bracket, continue using the same amino // acid and leave i where it is. int iNext = i + 1; if (iNext >= seq.Length || !FastaSequence.OPEN_MOD.Contains(seq[iNext])) { i = indexAAInSeq = iNext; indexAA++; } } }
private SrmDocument AddProteins(SrmDocument document, ref IdentityPath selectedPath) { if (tabControl1.SelectedTab != tabPageProteinList) return document; var backgroundProteome = GetBackgroundProteome(document); for (int i = gridViewProteins.Rows.Count - 1; i >= 0; i--) { var row = gridViewProteins.Rows[i]; var proteinName = Convert.ToString(row.Cells[colProteinName.Index].Value); if (String.IsNullOrEmpty(proteinName)) { continue; } var pastedMetadata = new ProteinMetadata(proteinName, Convert.ToString(row.Cells[colProteinDescription.Index].Value), NullForEmpty(Convert.ToString(row.Cells[colProteinPreferredName.Index].Value)), NullForEmpty(Convert.ToString(row.Cells[colProteinAccession.Index].Value)), NullForEmpty(Convert.ToString(row.Cells[colProteinGene.Index].Value)), NullForEmpty(Convert.ToString(row.Cells[colProteinSpecies.Index].Value))); FastaSequence fastaSequence = null; if (!backgroundProteome.IsNone) { ProteinMetadata protdbMetadata; fastaSequence = backgroundProteome.GetFastaSequence(proteinName, out protdbMetadata); // Fill in any gaps in pasted metadata with that in protdb pastedMetadata = pastedMetadata.Merge(protdbMetadata); } // Strip any whitespace (tab, newline etc) In case it was copied out of a FASTA file var fastaSequenceString = new string(Convert.ToString(row.Cells[colProteinSequence.Index].Value).Where(c => !Char.IsWhiteSpace(c)).ToArray()); if (!string.IsNullOrEmpty(fastaSequenceString)) { try { if (fastaSequence == null) // Didn't match anything in protdb { fastaSequence = new FastaSequence(pastedMetadata.Name, pastedMetadata.Description, new ProteinMetadata[0], fastaSequenceString); } else { if (fastaSequence.Sequence != fastaSequenceString) { fastaSequence = new FastaSequence(pastedMetadata.Name, pastedMetadata.Description, fastaSequence.Alternatives, fastaSequenceString); } } } catch (Exception exception) { ShowProteinError(new PasteError { Line = i, Column = colProteinDescription.Index, Message = string.Format(Resources.PasteDlg_AddProteins_Invalid_protein_sequence__0__, exception.Message) }); return null; } } if (fastaSequence == null) { ShowProteinError( new PasteError { Line = i, Message = backgroundProteome.IsNone ? Resources.PasteDlg_AddProteins_Missing_protein_sequence : Resources.PasteDlg_AddProteins_This_protein_was_not_found_in_the_background_proteome_database }); return null; } var description = pastedMetadata.Description; if (!string.IsNullOrEmpty(description) && description != fastaSequence.Description) { fastaSequence = new FastaSequence(fastaSequence.Name, description, fastaSequence.Alternatives, fastaSequence.Sequence); } pastedMetadata = pastedMetadata.ChangeName(fastaSequence.Name).ChangeDescription(fastaSequence.Description); // Make sure these agree var nodeGroupPep = new PeptideGroupDocNode(fastaSequence, pastedMetadata, new PeptideDocNode[0]); nodeGroupPep = nodeGroupPep.ChangeSettings(document.Settings, SrmSettingsDiff.ALL); var to = selectedPath; if (to == null || to.Depth < (int)SrmDocument.Level.MoleculeGroups) document = (SrmDocument)document.Add(nodeGroupPep); else { Identity toId = selectedPath.GetIdentity((int)SrmDocument.Level.MoleculeGroups); document = (SrmDocument)document.Insert(toId, nodeGroupPep); } selectedPath = new IdentityPath(nodeGroupPep.Id); } return document; }
public IEnumerable <PeptideDocNode> GetPeptideNodes(SrmSettings settings, bool useFilter) { // FASTA sequences can generate a comprehensive list of available peptides. FastaSequence fastaSeq = Id as FastaSequence; if (fastaSeq != null) { foreach (PeptideDocNode nodePep in fastaSeq.CreatePeptideDocNodes(settings, useFilter, null)) { yield return(nodePep); } } // Peptide lists without variable modifications just return their existing children. else if (!settings.PeptideSettings.Modifications.HasVariableModifications) { foreach (PeptideDocNode nodePep in Children) { if (!nodePep.HasVariableMods) { yield return(nodePep); } } } // If there are variable modifications, fill out the available list. else { var setNonExplicit = new HashSet <Peptide>(); IPeptideFilter filter = (useFilter ? settings : PeptideFilter.UNFILTERED); foreach (PeptideDocNode nodePep in Children) { if (nodePep.Peptide.IsCustomIon) // Modifications mean nothing to custom ions { yield return(nodePep); } else if (nodePep.HasExplicitMods && !nodePep.HasVariableMods) { yield return(nodePep); } else if (!setNonExplicit.Contains(nodePep.Peptide)) { bool returnedResult = false; var peptide = nodePep.Peptide; // The peptide will be returned as the Id of the unmodified instance of this // peptide. If the peptide DocNode is explicitly modified this will cause // two nodes in the tree to have the same Id. So, use a copy instead. if (nodePep.HasExplicitMods) { peptide = (Peptide)peptide.Copy(); } foreach (PeptideDocNode nodePepResult in peptide.CreateDocNodes(settings, filter)) { yield return(nodePepResult); returnedResult = true; } // Make sure the peptide is not removed due to filtering if (!returnedResult) { yield return(nodePep); } setNonExplicit.Add(nodePep.Peptide); } } } }
public PeptideDocNode GetModifiedNode(string seq, FastaSequence fastaSequence) { var seqUnmod = FastaSequence.StripModifications(seq); var peptide = fastaSequence != null ? fastaSequence.CreateFullPeptideDocNode(Settings, seqUnmod).Peptide : new Peptide(null, seqUnmod, null, null, Settings.PeptideSettings.Enzyme.CountCleavagePoints(seqUnmod)); // First, try to create the peptide using the current settings. TransitionGroupDocNode nodeGroup; PeptideDocNode nodePep = CreateDocNodeFromSettings(seq, peptide, SrmSettingsDiff.ALL, out nodeGroup); if (nodePep != null) return nodePep; // Create the peptideDocNode. nodePep = fastaSequence == null ? new PeptideDocNode(peptide) : fastaSequence.CreateFullPeptideDocNode(Settings, seqUnmod); return CreateDocNodeFromMatches(nodePep, EnumerateSequenceInfos(seq, false)); }
public void AppendTransition(ExTransitionInfo info, double? irt, double? libraryIntensity, double productMz, long lineNum) { _autoManageChildren = false; // Treat this like a peptide list from now on. PeptideList = true; if (_activeFastaSeq == null && AA.Length > 0) _activeFastaSeq = new FastaSequence(Name, Description, Alternatives, AA); string sequence = info.PeptideSequence; if (_activePeptide != null) { if (IsPeptideChanged(info)) { CompletePeptide(true); } else { var intersectVariableMods = new List<ExplicitMods>(_activeVariableMods.Intersect( info.PotentialVarMods)); // If unable to explain the next transition with the existing peptide, but the // transition has the same precursor m/z as the last, try completing the existing // peptide, and see if the current precursor can be completed as a new peptide if (intersectVariableMods.Count == 0 && _activePrecursorMz == info.PrecursorMz) { CompletePeptide(false); intersectVariableMods = new List<ExplicitMods>(info.PotentialVarMods); foreach (var infoActive in _activeTransitionInfos) { intersectVariableMods = new List<ExplicitMods>(intersectVariableMods.Intersect( infoActive.PotentialVarMods)); } } if (intersectVariableMods.Count > 0) { _activeVariableMods = intersectVariableMods; } else if (_activePrecursorMz == info.PrecursorMz) { var precursorMz = Math.Round(info.PrecursorMz, MassListImporter.MZ_ROUND_DIGITS); var errorInfo = new TransitionImportErrorInfo(string.Format(Resources.PeptideGroupBuilder_AppendTransition_Failed_to_explain_all_transitions_for_0__m_z__1__with_a_single_set_of_modifications, info.PeptideSequence, precursorMz), null, lineNum); _peptideGroupErrorInfo.Add(errorInfo); return; } else { CompletePeptide(true); } } } if (_activePeptide == null) { int? begin = null; int? end = null; if (_activeFastaSeq != null) { begin = _activeFastaSeq.Sequence.IndexOf(sequence, StringComparison.Ordinal); if (begin == -1) { // CONSIDER: Use fasta sequence format code currently in SrmDocument to show formatted sequence. throw new InvalidDataException(string.Format(Resources.PeptideGroupBuilder_AppendTransition_The_peptide__0__was_not_found_in_the_sequence__1__, sequence, _activeFastaSeq.Name)); } end = begin + sequence.Length; } _activePeptide = new Peptide(_activeFastaSeq, sequence, begin, end, _enzyme.CountCleavagePoints(sequence), info.TransitionExps[0].IsDecoy); _activeModifiedSequence = info.ModifiedSequence; _activePrecursorMz = info.PrecursorMz; _activeVariableMods = new List<ExplicitMods>(info.PotentialVarMods.Distinct()); _activePrecursorExps = new List<PrecursorExp>(info.TransitionExps.Select(exp => exp.Precursor)); } var intersectPrecursors = new List<PrecursorExp>(_activePrecursorExps.Intersect( info.TransitionExps.Select(exp => exp.Precursor))); if (intersectPrecursors.Count > 0) { _activePrecursorExps = intersectPrecursors; } else if (_activePrecursorMz == info.PrecursorMz) { var precursorMz = Math.Round(_activePrecursorMz, MassListImporter.MZ_ROUND_DIGITS); var errorInfo = new TransitionImportErrorInfo(string.Format(Resources.PeptideGroupBuilder_AppendTransition_Failed_to_explain_all_transitions_for_m_z__0___peptide__1___with_a_single_precursor, precursorMz, info.PeptideSequence), null, lineNum); _peptideGroupErrorInfo.Add(errorInfo); return; } else { CompleteTransitionGroup(); } if (_irtValue.HasValue && (irt == null || Math.Abs(_irtValue.Value - irt.Value) > DbIrtPeptide.IRT_MIN_DIFF)) { var precursorMz = Math.Round(info.PrecursorMz, MassListImporter.MZ_ROUND_DIGITS); var errorInfo = new TransitionImportErrorInfo(string.Format(Resources.PeptideGroupBuilder_FinalizeTransitionGroups_Two_transitions_of_the_same_precursor___0___m_z__1_____have_different_iRT_values___2__and__3___iRT_values_must_be_assigned_consistently_in_an_imported_transition_list_, info.PeptideSequence, precursorMz, _irtValue, irt), null, lineNum); _peptideGroupErrorInfo.Add(errorInfo); return; } if (_activePrecursorMz == 0) { _activePrecursorMz = info.PrecursorMz; _activePrecursorExps = new List<PrecursorExp>(info.TransitionExps.Select(exp => exp.Precursor)); } _activeTransitionInfos.Add(info); if (libraryIntensity != null) { _activeLibraryIntensities.Add(new SpectrumPeaksInfo.MI { Intensity = (float)libraryIntensity.Value, Mz = productMz }); } _irtValue = irt; }
public PeptideGroupBuilder(FastaSequence fastaSequence, SrmSettings settings) { _activeFastaSeq = fastaSequence; _autoManageChildren = true; if (fastaSequence != null) { BaseName = Name = fastaSequence.Name; Description = fastaSequence.Description; Alternatives = fastaSequence.Alternatives.ToArray(); } _settings = settings; _enzyme = _settings.PeptideSettings.Enzyme; _peptides = new List<PeptideDocNode>(); _charges = new Dictionary<int, int>(); _groupLibTriples = new List<TransitionGroupLibraryIrtTriple>(); _activeTransitionInfos = new List<ExTransitionInfo>(); _irtPeptides = new List<MeasuredRetentionTime>(); _librarySpectra = new List<SpectrumMzInfo>(); _activeLibraryIntensities = new List<SpectrumPeaksInfo.MI>(); _peptideGroupErrorInfo = new List<TransitionImportErrorInfo>(); _activeModifiedSequence = null; }
public bool Equals(FastaSequence obj) { if (ReferenceEquals(null, obj)) return false; if (ReferenceEquals(this, obj)) return true; return Equals(obj._name, _name) && Equals(obj._description, _description) && Equals(obj._sequence, _sequence) && ArrayUtil.EqualsDeep(obj.Alternatives, Alternatives) && obj.IsDecoy == IsDecoy; }