protected void AnnotateModifications(string fullSequence, int yLoc)
        {
            var peptide = new PeptideWithSetModifications(fullSequence, GlobalVariables.AllModsKnownDictionary);

            // read glycans if applicable
            List <Tuple <int, string, double> > localGlycans = null;

            if (SpectrumMatch.GlycanLocalizationLevel != null)
            {
                localGlycans = PsmFromTsv.ReadLocalizedGlycan(SpectrumMatch.LocalizedGlycan);
            }

            // annotate mods
            foreach (var mod in peptide.AllModsOneIsNterminus)
            {
                double xLocation = (mod.Key - 1) * MetaDrawSettings.AnnotatedSequenceTextSpacing - 12;
                double yLocation = yLoc + 2;

                if (mod.Value.ModificationType == "O-Glycosylation")
                {
                    if (localGlycans.Where(p => p.Item1 + 1 == mod.Key).Count() > 0)
                    {
                        DrawCircle(SequenceDrawingCanvas, new Point(xLocation, yLocation), MetaDrawSettings.ModificationAnnotationColor);
                    }
                    else
                    {
                        DrawCircle(SequenceDrawingCanvas, new Point(xLocation, yLocation), Brushes.Gray);
                    }
                }
                else
                {
                    DrawCircle(SequenceDrawingCanvas, new Point(xLocation, yLocation), MetaDrawSettings.ModificationAnnotationColor);
                }
            }
        }
Example #2
0
        /// <summary>
        /// Checks for an intersection between a peptide and applied variant that shows a sequence change.
        /// </summary>
        /// <param name="pep"></param>
        /// <param name="appliedVariation"></param>
        /// <returns></returns>
        private static bool IntersectsWithVariation(PeptideWithSetModifications pep, SequenceVariation appliedVariation, bool checkUnique)
        {
            // does it intersect?
            int intersectOneBasedStart = Math.Max(pep.OneBasedStartResidueInProtein, appliedVariation.OneBasedBeginPosition);
            int intersectOneBasedEnd   = Math.Min(pep.OneBasedEndResidueInProtein, appliedVariation.OneBasedEndPosition);

            if (intersectOneBasedEnd < intersectOneBasedStart)
            {
                return(false);
            }
            else if (!checkUnique)
            {
                return(true);
            }
            else
            {
                // if the original sequence is too short or long, the intersect of the peptide and variant is unique
                int  intersectSize         = intersectOneBasedEnd - intersectOneBasedStart + 1;
                int  variantZeroBasedStart = intersectOneBasedStart - appliedVariation.OneBasedBeginPosition;
                bool origSeqIsShort        = appliedVariation.OriginalSequence.Length - variantZeroBasedStart < intersectSize;
                bool origSeqIsLong         = appliedVariation.OriginalSequence.Length > intersectSize && pep.OneBasedEndResidueInProtein > intersectOneBasedEnd;
                if (origSeqIsShort || origSeqIsLong)
                {
                    return(true);
                }

                // is the variant sequence intersecting the peptide different than the original sequence?
                string originalAtIntersect = appliedVariation.OriginalSequence.Substring(intersectOneBasedStart - appliedVariation.OneBasedBeginPosition, intersectSize);
                string variantAtIntersect  = appliedVariation.VariantSequence.Substring(intersectOneBasedStart - appliedVariation.OneBasedBeginPosition, intersectSize);
                return(originalAtIntersect != variantAtIntersect);
            }
        }
Example #3
0
 public PeptideWithSetModifications(PeptideWithSetModifications modsFromThisOne, PeptideWithSetModifications everythingElseFromThisOne)
     : base(everythingElseFromThisOne.Protein, everythingElseFromThisOne.OneBasedStartResidueInProtein, everythingElseFromThisOne.OneBasedEndResidueInProtein,
            everythingElseFromThisOne.MissedCleavages, everythingElseFromThisOne.PeptideDescription)
 {
     this.allModsOneIsNterminus = modsFromThisOne.allModsOneIsNterminus;
     this.numFixedMods          = modsFromThisOne.numFixedMods;
 }
Example #4
0
        /// <summary>
        /// Makes the string representing a detected sequence variation, including any modifications on a variant amino acid
        /// </summary>
        /// <param name="p"></param>
        /// <param name="d"></param>
        /// <returns></returns>
        private static string SequenceVariantString(PeptideWithSetModifications p, SequenceVariation applied)
        {
            var modsOnVariantOneIsNTerm = p.AllModsOneIsNterminus
                                          .Where(kv => kv.Key == 1 && applied.OneBasedBeginPosition == 1 || applied.OneBasedBeginPosition <= kv.Key - 2 + p.OneBasedStartResidueInProtein && kv.Key - 2 + p.OneBasedStartResidueInProtein <= applied.OneBasedEndPosition)
                                          .ToDictionary(kv => kv.Key - applied.OneBasedBeginPosition + 1, kv => kv.Value);
            PeptideWithSetModifications variantWithAnyMods = new PeptideWithSetModifications(p.Protein, p.DigestionParams, applied.OneBasedBeginPosition, applied.OneBasedEndPosition, p.CleavageSpecificityForFdrCategory, p.PeptideDescription, p.MissedCleavages, modsOnVariantOneIsNTerm, p.NumFixedMods);

            return($"{applied.OriginalSequence}{applied.OneBasedBeginPosition}{variantWithAnyMods.FullSequence}");
        }
Example #5
0
        public static PeptideSpectralMatch GetSilacPsm(PeptideSpectralMatch psm, SilacLabel silacLabel)
        {
            List <(int Notch, PeptideWithSetModifications Peptide)> updatedBestMatchingPeptides = new List <(int Notch, PeptideWithSetModifications Peptide)>();

            foreach ((int Notch, PeptideWithSetModifications Peptide)notchAndPwsm in psm.BestMatchingPeptides)
            {
                PeptideWithSetModifications modifiedPwsm = CreateSilacPwsm(silacLabel, notchAndPwsm.Peptide);
                updatedBestMatchingPeptides.Add((notchAndPwsm.Notch, modifiedPwsm));
            }
            return(psm.Clone(updatedBestMatchingPeptides));
        }
Example #6
0
 public CompactPeptide(PeptideWithSetModifications peptideWithSetModifications, TerminusType terminusType)
 {
     NTerminalMasses = null;
     CTerminalMasses = null;
     if (terminusType == TerminusType.None || terminusType == TerminusType.N)
     {
         NTerminalMasses = ComputeFollowingFragmentMasses(peptideWithSetModifications, 0, 0, 1).ToArray();
     }
     if (terminusType == TerminusType.None || terminusType == TerminusType.C)
     {
         CTerminalMasses = ComputeFollowingFragmentMasses(peptideWithSetModifications, 0, peptideWithSetModifications.Length + 1, -1).ToArray();
     }
     MonoisotopicMassIncludingFixedMods = peptideWithSetModifications.MonoisotopicMass;
 }
Example #7
0
        public PeptideWithSetModifications Localize(int j, double massToLocalize)
        {
            var    vvv = new Dictionary <int, ModificationWithMass>(allModsOneIsNterminus);
            double massOfExistingMod = 0;

            if (vvv.TryGetValue(j + 2, out ModificationWithMass modToReplace))
            {
                massOfExistingMod = modToReplace.monoisotopicMass;
                vvv.Remove(j + 2);
            }

            vvv.Add(j + 2, new ModificationWithMass(null, null, null, TerminusLocalization.Any, massToLocalize + massOfExistingMod));
            var hm = new PeptideWithSetModifications(numFixedMods, Protein, OneBasedStartResidueInProtein, OneBasedEndResidueInProtein, vvv, MissedCleavages);

            return(hm);
        }
Example #8
0
        protected static IEnumerable <double> ComputeFollowingFragmentMasses(PeptideWithSetModifications yyy, double prevMass, int oneBasedIndexToLookAt, int direction)
        {
            ModificationWithMass currentModification = null;

            do
            {
                if (oneBasedIndexToLookAt != 0 && oneBasedIndexToLookAt != yyy.Length + 1)
                {
                    prevMass += Residue.ResidueMonoisotopicMass[yyy[oneBasedIndexToLookAt - 1]];
                }

                // If modification exists
                if (yyy.allModsOneIsNterminus.TryGetValue(oneBasedIndexToLookAt + 1, out currentModification))
                {
                    if (currentModification.neutralLosses.Count == 1 && oneBasedIndexToLookAt != 0 && oneBasedIndexToLookAt != yyy.Length + 1)
                    {
                        prevMass += currentModification.monoisotopicMass - currentModification.neutralLosses.First();
                        yield return(Math.Round(prevMass, digitsForRoundingMasses));
                    }
                    else
                    {
                        foreach (double nl in currentModification.neutralLosses)
                        {
                            var theMass = prevMass + currentModification.monoisotopicMass - nl;
                            if (oneBasedIndexToLookAt != 0 && oneBasedIndexToLookAt != yyy.Length + 1)
                            {
                                yield return(Math.Round(theMass, digitsForRoundingMasses));
                            }
                            if ((direction == 1 && oneBasedIndexToLookAt + direction < yyy.Length) ||
                                (direction == -1 && oneBasedIndexToLookAt + direction > 1))
                            {
                                foreach (var nextMass in ComputeFollowingFragmentMasses(yyy, theMass, oneBasedIndexToLookAt + direction, direction))
                                {
                                    yield return(Math.Round(nextMass, digitsForRoundingMasses));
                                }
                            }
                        }
                        break;
                    }
                }
                else if (oneBasedIndexToLookAt != 0 && oneBasedIndexToLookAt != yyy.Length + 1) // No modification exists
                {
                    yield return(Math.Round(prevMass, digitsForRoundingMasses));
                }
                oneBasedIndexToLookAt += direction;
            } while ((oneBasedIndexToLookAt > 1 && direction == -1) || (oneBasedIndexToLookAt < yyy.Length && direction == 1));
        }
Example #9
0
        private static bool PeptideIsVariant(PeptideWithSetModifications pwsm)
        {
            bool identifiedVariant = false;

            if (pwsm.Protein.AppliedSequenceVariations.Count() > 0)
            {
                foreach (var variant in pwsm.Protein.AppliedSequenceVariations)
                {
                    if (pwsm.IntersectsAndIdentifiesVariation(variant).identifies)
                    {
                        identifiedVariant = true;
                        break;
                    }
                }
            }
            return(identifiedVariant);
        }
Example #10
0
        //modify the proteins to appear only light (we want a protein sequence to look like PROTEINK instead of PROTEINa)
        public static List <PeptideSpectralMatch> UpdateProteinSequencesToLight(List <PeptideSpectralMatch> originalPsms, List <SilacLabel> labels)
        {
            List <PeptideSpectralMatch> psmsToReturn = new List <PeptideSpectralMatch>();

            foreach (PeptideSpectralMatch psm in originalPsms)
            {
                List <(int Notch, PeptideWithSetModifications Peptide)> originalPeptides = psm.BestMatchingPeptides.ToList();
                List <(int Notch, PeptideWithSetModifications Peptide)> updatedPeptides  = new List <(int Notch, PeptideWithSetModifications Peptide)>();
                foreach ((int Notch, PeptideWithSetModifications Peptide)notchPwsm in originalPeptides)
                {
                    PeptideWithSetModifications pwsm = notchPwsm.Peptide;
                    SilacLabel label          = GetRelevantLabelFromBaseSequence(pwsm.BaseSequence, labels);
                    Protein    updatedProtein = pwsm.Protein;
                    if (label != null)
                    {
                        string proteinLightSequence = updatedProtein.BaseSequence;
                        proteinLightSequence = proteinLightSequence.Replace(label.AminoAcidLabel, label.OriginalAminoAcid);
                        if (label.AdditionalLabels != null)
                        {
                            foreach (SilacLabel additionalLabel in label.AdditionalLabels)
                            {
                                proteinLightSequence = proteinLightSequence.Replace(additionalLabel.AminoAcidLabel, additionalLabel.OriginalAminoAcid);
                            }
                        }
                        updatedProtein = new Protein(pwsm.Protein, proteinLightSequence);
                    }
                    PeptideWithSetModifications updatedPwsm = new PeptideWithSetModifications(
                        updatedProtein,
                        pwsm.DigestionParams,
                        pwsm.OneBasedStartResidueInProtein,
                        pwsm.OneBasedEndResidueInProtein,
                        pwsm.CleavageSpecificityForFdrCategory,
                        pwsm.PeptideDescription,
                        pwsm.MissedCleavages,
                        pwsm.AllModsOneIsNterminus,
                        pwsm.NumFixedMods,
                        pwsm.BaseSequence);
                    updatedPeptides.Add((notchPwsm.Notch, updatedPwsm));
                }

                psmsToReturn.Add(psm.Clone(updatedPeptides));
            }

            return(psmsToReturn);
        }
Example #11
0
        public static PeptideSpectralMatch GetLabeledPsm(PeptideSpectralMatch psm, int notch, PeptideWithSetModifications pwsm, string labeledBaseSequence)
        {
            PeptideWithSetModifications labeledPwsm = new PeptideWithSetModifications(
                pwsm.Protein,
                pwsm.DigestionParams,
                pwsm.OneBasedStartResidueInProtein,
                pwsm.OneBasedEndResidueInProtein,
                pwsm.CleavageSpecificityForFdrCategory,
                pwsm.PeptideDescription,
                pwsm.MissedCleavages,
                pwsm.AllModsOneIsNterminus,
                pwsm.NumFixedMods,
                labeledBaseSequence);

            return(psm.Clone(new List <(int Notch, PeptideWithSetModifications Peptide)> {
                (notch, labeledPwsm)
            }));
        }
        //Needed for parsimony, where there are ambiguous psms
        //Quantification ignores ambiguity
        public static PeptideSpectralMatch GetSilacPsmFromAmbiguousPsm(PeptideSpectralMatch psm, List <SilacLabel> silacLabels)
        {
            List <(int Notch, PeptideWithSetModifications Peptide)> updatedBestMatchingPeptides = new List <(int Notch, PeptideWithSetModifications Peptide)>();

            foreach ((int Notch, PeptideWithSetModifications Peptide)notchAndPwsm in psm.BestMatchingPeptides)
            {
                PeptideWithSetModifications pwsm = notchAndPwsm.Peptide;
                SilacLabel silacLabel            = GetRelevantLabelFromBaseSequence(pwsm.Protein.BaseSequence, silacLabels);
                if (silacLabel == null)
                {
                    updatedBestMatchingPeptides.Add(notchAndPwsm);
                }
                else
                {
                    PeptideWithSetModifications modifiedPwsm = CreateSilacPwsm(true, silacLabel, pwsm); //create light pwsm
                    updatedBestMatchingPeptides.Add((notchAndPwsm.Notch, modifiedPwsm));
                }
            }
            return(psm.Clone(updatedBestMatchingPeptides));
        }
Example #13
0
        public static PeptideWithSetModifications CreateSilacPwsm(SilacLabel silacLabel, PeptideWithSetModifications pwsm)
        {
            string baseSequence = pwsm.BaseSequence;

            baseSequence = baseSequence.Replace(silacLabel.AminoAcidLabel, silacLabel.OriginalAminoAcid); //create light sequence
            if (silacLabel.AdditionalLabels != null)
            {
                foreach (SilacLabel additionalLabel in silacLabel.AdditionalLabels)
                {
                    baseSequence = baseSequence.Replace(additionalLabel.AminoAcidLabel, additionalLabel.OriginalAminoAcid); //create light sequence
                }
            }

            return(new PeptideWithSetModifications(
                       pwsm.Protein,
                       pwsm.DigestionParams,
                       pwsm.OneBasedStartResidueInProtein,
                       pwsm.OneBasedEndResidueInProtein,
                       pwsm.CleavageSpecificityForFdrCategory,
                       pwsm.PeptideDescription,
                       pwsm.MissedCleavages,
                       pwsm.AllModsOneIsNterminus,
                       pwsm.NumFixedMods,
                       baseSequence)); //this is the only thing changing
        }
Example #14
0
 public PeptideWithSetModifications(PeptideWithSetModifications modsFromThisOne, int proteinOneBasedStart, int proteinOneBasedEnd)
     : base(modsFromThisOne.Protein, proteinOneBasedStart, proteinOneBasedEnd, proteinOneBasedEnd - proteinOneBasedStart, modsFromThisOne.PeptideDescription)
 {
     this.allModsOneIsNterminus = modsFromThisOne.allModsOneIsNterminus.Where(b => b.Key > (1 + proteinOneBasedStart - modsFromThisOne.OneBasedStartResidueInProtein) &&
                                                                              b.Key <= (2 + proteinOneBasedEnd - modsFromThisOne.OneBasedStartResidueInProtein)).ToDictionary(b => (b.Key + modsFromThisOne.OneBasedStartResidueInProtein - proteinOneBasedStart), b => b.Value);
 }
        /// <summary>
        /// TODO: Summarize parsimony;
        /// Parsimony algorithm based on: https://www.ncbi.nlm.nih.gov/pubmed/14632076 Anal Chem. 2003 Sep 1;75(17):4646-58.
        /// TODO: Note describing that peptide objects with the same sequence are associated with different proteins
        /// </summary>
        private List <ProteinGroup> RunProteinParsimonyEngine()
        {
            // parsimonious list of proteins built by this protein parsimony engine
            HashSet <Protein> parsimoniousProteinList = new HashSet <Protein>();

            // list of peptides that can only be digestion products of one protein in the proteome (considering different protease digestion rules)
            HashSet <PeptideWithSetModifications> uniquePeptides = new HashSet <PeptideWithSetModifications>();

            // if there are no peptides observed, there are no proteins; return an empty list of protein groups
            if (_fdrFilteredPeptides.Count == 0)
            {
                return(new List <ProteinGroup>());
            }

            // Parsimony stage 0: create peptide-protein associations if needed because the user wants a modification-agnostic parsimony
            if (!_treatModPeptidesAsDifferentPeptides)
            {
                foreach (var protease in _fdrFilteredPsms.GroupBy(p => p.DigestionParams.Protease))
                {
                    Dictionary <string, List <PeptideSpectralMatch> > sequenceWithPsms = new Dictionary <string, List <PeptideSpectralMatch> >();

                    // for each protease, match the base sequence of each peptide to its PSMs
                    foreach (PeptideSpectralMatch psm in protease)
                    {
                        if (sequenceWithPsms.TryGetValue(psm.BaseSequence, out List <PeptideSpectralMatch> peptidesForThisBaseSequence))
                        {
                            peptidesForThisBaseSequence.Add(psm);
                        }
                        else
                        {
                            sequenceWithPsms[psm.BaseSequence] = new List <PeptideSpectralMatch> {
                                psm
                            };
                        }
                    }

                    // create new peptide-protein associations
                    foreach (var baseSequence in sequenceWithPsms)
                    {
                        var peptidesWithNotchInfo = baseSequence.Value.SelectMany(p => p.BestMatchingPeptides).Distinct().ToList();

                        // if the base seq has >1 PeptideWithSetMods object and has >0 mods, it might need to be matched to new proteins
                        if (peptidesWithNotchInfo.Count > 1 && peptidesWithNotchInfo.Any(p => p.Peptide.NumMods > 0))
                        {
                            // list of proteins along with start/end residue in protein and the # missed cleavages
                            // this is needed to create new PeptideWithSetModification objects
                            var peptideInProteinInfo = new List <Tuple <Protein, DigestionParams, int, int, int, int> >();
                            foreach (var peptide in peptidesWithNotchInfo)
                            {
                                peptideInProteinInfo.Add(new Tuple <Protein, DigestionParams, int, int, int, int>(peptide.Peptide.Protein, peptide.Peptide.DigestionParams,
                                                                                                                  peptide.Peptide.OneBasedStartResidueInProtein, peptide.Peptide.OneBasedEndResidueInProtein, peptide.Peptide.MissedCleavages, peptide.Notch));
                            }

                            // add the protein associations to the PSM
                            foreach (PeptideSpectralMatch psm in baseSequence.Value)
                            {
                                foreach (var proteinInfo in peptideInProteinInfo)
                                {
                                    var originalPep = psm.BestMatchingPeptides.First().Peptide;
                                    var pep         = new PeptideWithSetModifications(proteinInfo.Item1, proteinInfo.Item2, proteinInfo.Item3, proteinInfo.Item4,
                                                                                      originalPep.CleavageSpecificityForFdrCategory, originalPep.PeptideDescription, proteinInfo.Item5, originalPep.AllModsOneIsNterminus,
                                                                                      originalPep.NumFixedMods);
                                    _fdrFilteredPeptides.Add(pep);
                                    psm.AddProteinMatch((proteinInfo.Item6, pep));
                                }
                            }
                        }
                    }
                }
            }

            // Parsimony stage 1: add proteins with unique peptides (for each protease)
            var peptidesGroupedByProtease = _fdrFilteredPeptides.GroupBy(p => p.DigestionParams.Protease);

            foreach (var peptidesForThisProtease in peptidesGroupedByProtease)
            {
                Dictionary <string, List <Protein> > peptideSequenceToProteinsForThisProtease = new Dictionary <string, List <Protein> >();
                Dictionary <string, List <PeptideWithSetModifications> > sequenceToPwsm       = new Dictionary <string, List <PeptideWithSetModifications> >();

                foreach (PeptideWithSetModifications peptide in peptidesForThisProtease)
                {
                    string sequence = peptide.BaseSequence;
                    if (_treatModPeptidesAsDifferentPeptides)
                    {
                        //these and next set to full sequence but might be base sequence. treat modified as unique makes sense to use full
                        sequence = peptide.FullSequence;
                    }

                    if (peptideSequenceToProteinsForThisProtease.TryGetValue(sequence, out List <Protein> proteinsForThisPeptideSequence))
                    {
                        proteinsForThisPeptideSequence.Add(peptide.Protein);
                    }
                    else
                    {
                        peptideSequenceToProteinsForThisProtease.Add(sequence, new List <Protein> {
                            peptide.Protein
                        });
                    }

                    if (sequenceToPwsm.TryGetValue(sequence, out List <PeptideWithSetModifications> peptidesForThisSequence))
                    {
                        peptidesForThisSequence.Add(peptide);
                    }
                    else
                    {
                        sequenceToPwsm.Add(sequence, new List <PeptideWithSetModifications> {
                            peptide
                        });
                    }
                }

                foreach (var uniquePeptide in peptideSequenceToProteinsForThisProtease.Where(p => p.Value.Count == 1))
                {
                    // add the protein with the unique peptide to the parsimonious protein list
                    Protein proteinWithUniquePeptideSequence = uniquePeptide.Value.First();
                    parsimoniousProteinList.Add(proteinWithUniquePeptideSequence);

                    // add the unique peptide to the list of unique peptides
                    PeptideWithSetModifications uniquePwsm = sequenceToPwsm[uniquePeptide.Key].First();
                    uniquePeptides.Add(uniquePwsm);
                }
            }

            // Parsimony stage 2: build the peptide-protein matching structure for the parsimony greedy algorithm
            // and remove all peptides observed by proteins with unique peptides
            Dictionary <ParsimonySequence, List <Protein> > peptideSequenceToProteins = new Dictionary <ParsimonySequence, List <Protein> >();

            // this dictionary associates proteins w/ all peptide sequences (list will NOT shrink over time)
            // this is used in case of greedy algorithm ties to figure out which protein has more total peptides observed
            Dictionary <Protein, HashSet <ParsimonySequence> > proteinToPepSeqMatch = new Dictionary <Protein, HashSet <ParsimonySequence> >();

            foreach (var peptide in _fdrFilteredPeptides)
            {
                ParsimonySequence sequence = new ParsimonySequence(peptide, _treatModPeptidesAsDifferentPeptides);

                if (peptideSequenceToProteins.TryGetValue(sequence, out List <Protein> proteinsForThisPeptideSequence))
                {
                    proteinsForThisPeptideSequence.Add(peptide.Protein);
                }
                else
                {
                    peptideSequenceToProteins.Add(sequence, new List <Protein> {
                        peptide.Protein
                    });
                }

                if (proteinToPepSeqMatch.TryGetValue(peptide.Protein, out var peptideSequences))
                {
                    peptideSequences.Add(sequence);
                }
                else
                {
                    proteinToPepSeqMatch.Add(peptide.Protein, new HashSet <ParsimonySequence> {
                        sequence
                    });
                }
            }

            // remove the peptides observed by proteins with unique peptides
            HashSet <ParsimonySequence> toRemove = new HashSet <ParsimonySequence>();

            foreach (var seq in peptideSequenceToProteins)
            {
                bool observedAlready = seq.Value.Any(p => parsimoniousProteinList.Contains(p));

                if (observedAlready)
                {
                    toRemove.Add(seq.Key);
                }
            }
            foreach (var sequence in toRemove)
            {
                peptideSequenceToProteins.Remove(sequence);
            }

            if (peptideSequenceToProteins.Any())
            {
                // Parsimony stage 3: greedy algorithm

                // dictionary with proteins as keys and list of associated peptide sequences as the values.
                // this data structure makes parsimony easier because the algorithm can look up a protein's peptides
                // to remove them from the list of available peptides. this list will shrink as the algorithm progresses
                var algDictionary         = new Dictionary <Protein, HashSet <string> >();
                var algDictionaryProtease = new Dictionary <Protein, HashSet <ParsimonySequence> >();
                foreach (var kvp in peptideSequenceToProteins)
                {
                    foreach (var protein in kvp.Value)
                    {
                        if (algDictionaryProtease.TryGetValue(protein, out HashSet <ParsimonySequence> peptideSequencesWithProtease))
                        {
                            peptideSequencesWithProtease.Add(kvp.Key);
                        }
                        else
                        {
                            algDictionaryProtease.Add(protein, new HashSet <ParsimonySequence> {
                                kvp.Key
                            });
                        }

                        if (algDictionary.TryGetValue(protein, out HashSet <string> peptideSequences))
                        {
                            peptideSequences.Add(kvp.Key.Sequence);
                        }
                        else
                        {
                            algDictionary.Add(protein, new HashSet <string> {
                                kvp.Key.Sequence
                            });
                        }
                    }
                }

                // *** greedy algorithm loop
                int numNewSeqs = algDictionary.Max(p => p.Value.Count);
                while (numNewSeqs != 0)
                {
                    // gets list of proteins with the most unaccounted-for peptide sequences
                    var possibleBestProteinList = algDictionary.Where(p => p.Value.Count == numNewSeqs).ToList();

                    Protein bestProtein = possibleBestProteinList.First().Key;

                    // may need to select different protein in case of a greedy algorithm tie
                    // the protein with the most total peptide sequences wins in this case (doesn't matter if parsimony has grabbed them or not)
                    if (possibleBestProteinList.Count > 1)
                    {
                        int highestNumTotalPep = proteinToPepSeqMatch[bestProtein].Count;
                        foreach (var kvp in possibleBestProteinList)
                        {
                            if (proteinToPepSeqMatch[kvp.Key].Count > highestNumTotalPep)
                            {
                                highestNumTotalPep = proteinToPepSeqMatch[kvp.Key].Count;
                                bestProtein        = kvp.Key;
                            }
                        }
                    }

                    parsimoniousProteinList.Add(bestProtein);

                    // remove observed peptide seqs
                    List <ParsimonySequence> temp = algDictionaryProtease[bestProtein].ToList();
                    foreach (ParsimonySequence peptideSequence in temp)
                    {
                        List <Protein> proteinsWithThisPeptide = peptideSequenceToProteins[peptideSequence];

                        foreach (var protein in proteinsWithThisPeptide)
                        {
                            algDictionary[protein].Remove(peptideSequence.Sequence);
                            algDictionaryProtease[protein].Remove(peptideSequence);
                        }
                    }

                    algDictionary.Remove(bestProtein);
                    algDictionaryProtease.Remove(bestProtein);
                    numNewSeqs = algDictionary.Any() ? algDictionary.Max(p => p.Value.Count) : 0;
                }

                // *** done with greedy algorithm

                // Parsimony stage 4: add back indistinguishable proteins (proteins that have identical peptide sets as parsimonious proteins)
                var allProteinsGroupedByNumPeptides       = proteinToPepSeqMatch.GroupBy(p => p.Value.Count);
                var parsimonyProteinsGroupedByNumPeptides = parsimoniousProteinList.GroupBy(p => proteinToPepSeqMatch[p].Count);
                var indistinguishableProteins             = new ConcurrentBag <Protein>();

                foreach (var group in allProteinsGroupedByNumPeptides)
                {
                    var parsimonyProteinsWithSameNumPeptides = parsimonyProteinsGroupedByNumPeptides.FirstOrDefault(p => p.Key == group.Key);
                    var list = group.ToList();

                    if (parsimonyProteinsWithSameNumPeptides != null)
                    {
                        Parallel.ForEach(Partitioner.Create(0, list.Count),
                                         new ParallelOptions {
                            MaxDegreeOfParallelism = CommonParameters.MaxThreadsToUsePerFile
                        },
                                         (range, loopState) =>
                        {
                            for (int i = range.Item1; i < range.Item2; i++)
                            {
                                Protein otherProtein = list[i].Key;

                                foreach (var parsimonyProtein in parsimonyProteinsWithSameNumPeptides)
                                {
                                    // if the two proteins have the same set of peptide sequences, they're indistinguishable
                                    if (parsimonyProtein != otherProtein && proteinToPepSeqMatch[parsimonyProtein].SetEquals(proteinToPepSeqMatch[otherProtein]))
                                    {
                                        indistinguishableProteins.Add(otherProtein);
                                    }
                                }
                            }
                        }
                                         );
                    }
                }

                foreach (Protein protein in indistinguishableProteins)
                {
                    parsimoniousProteinList.Add(protein);
                }
            }

            // Parsimony stage 5: remove peptide objects that do not have proteins in the parsimonious list
            foreach (PeptideSpectralMatch psm in _allPsms)
            {
                // if this PSM has a protein in the parsimonious list, it removes the proteins NOT in the parsimonious list
                // otherwise, no proteins are removed (i.e., for PSMs that cannot be explained by a parsimonious protein,
                // no protein associations are removed)
                if (psm.BestMatchingPeptides.Any(p => parsimoniousProteinList.Contains(p.Peptide.Protein)))
                {
                    psm.TrimProteinMatches(parsimoniousProteinList);
                }
            }

            // construct protein groups
            List <ProteinGroup> proteinGroups = ConstructProteinGroups(uniquePeptides);

            // finished with parsimony
            return(proteinGroups);
        }
        private List <ProteinGroup> ApplyProteinParsimony()
        {
            if (!compactPeptideToProteinPeptideMatching.Values.Any())
            {
                return(new List <ProteinGroup>());
            }
            // digesting an XML database results in a non-mod-agnostic digestion; need to fix this if mod-agnostic parsimony enabled
            if (!treatModPeptidesAsDifferentPeptides)
            {
                Dictionary <string, HashSet <PeptideWithSetModifications> > baseSeqToProteinMatch = new Dictionary <string, HashSet <PeptideWithSetModifications> >();
                foreach (var peptide in compactPeptideToProteinPeptideMatching.SelectMany(b => b.Value))
                {
                    if (baseSeqToProteinMatch.TryGetValue(peptide.BaseSequence, out HashSet <PeptideWithSetModifications> value))
                    {
                        value.Add(peptide);
                    }
                    else
                    {
                        baseSeqToProteinMatch[peptide.BaseSequence] = new HashSet <PeptideWithSetModifications> {
                            peptide
                        }
                    };
                }

                var blah = new Dictionary <PeptideWithSetModifications, List <CompactPeptideBase> >();
                // where to store results
                foreach (var pep in compactPeptideToProteinPeptideMatching)
                {
                    foreach (var pepWithSetMods in pep.Value)
                    {
                        if (blah.TryGetValue(pepWithSetMods, out List <CompactPeptideBase> list))
                        {
                            list.Add(pep.Key);
                        }
                        else
                        {
                            blah.Add(pepWithSetMods, new List <CompactPeptideBase> {
                                pep.Key
                            });
                        }
                    }
                }

                foreach (var baseSequence in baseSeqToProteinMatch)
                {
                    if (baseSequence.Value.Count > 1 && baseSequence.Value.Any(p => p.NumMods > 0))
                    {
                        // list of proteins along with start/end residue in protein and the # missed cleavages
                        var peptideInProteinInfo = new List <Tuple <Protein, int, int, int> >();
                        foreach (var peptide in baseSequence.Value)
                        {
                            peptideInProteinInfo.Add(new Tuple <Protein, int, int, int>(peptide.Protein, peptide.OneBasedStartResidueInProtein, peptide.OneBasedEndResidueInProtein, (int)peptide.MissedCleavages));
                        }

                        foreach (var peptide in baseSequence.Value)
                        {
                            foreach (var proteinInfo in peptideInProteinInfo)
                            {
                                var pep = new PeptideWithSetModifications(proteinInfo.Item1, proteinInfo.Item2, proteinInfo.Item3, peptide.PeptideDescription, proteinInfo.Item4, peptide.allModsOneIsNterminus, peptide.numFixedMods);
                                foreach (var compactPeptide in blah[peptide])
                                {
                                    compactPeptideToProteinPeptideMatching[compactPeptide].Add(pep);
                                }
                            }
                        }
                    }
                }
            }

            var proteinToPeptidesMatching  = new Dictionary <Protein, HashSet <CompactPeptideBase> >();
            var parsimonyProteinList       = new Dictionary <Protein, HashSet <CompactPeptideBase> >();
            var proteinsWithUniquePeptides = new Dictionary <Protein, HashSet <PeptideWithSetModifications> >();

            // peptide matched to fullseq (used depending on user preference)
            var compactPeptideToFullSeqMatch = compactPeptideToProteinPeptideMatching.ToDictionary(x => x.Key, x => x.Value.First().Sequence);

            foreach (var kvp in compactPeptideToProteinPeptideMatching)
            {
                // finds unique peptides (peptides that can belong to only one protein)
                HashSet <Protein> proteinsAssociatedWithThisPeptide = new HashSet <Protein>(kvp.Value.Select(p => p.Protein));
                if (proteinsAssociatedWithThisPeptide.Count == 1)
                {
                    if (!proteinsWithUniquePeptides.TryGetValue(kvp.Value.First().Protein, out HashSet <PeptideWithSetModifications> peptides))
                    {
                        proteinsWithUniquePeptides.Add(kvp.Value.First().Protein, new HashSet <PeptideWithSetModifications>(kvp.Value));
                    }
                    else
                    {
                        peptides.UnionWith(kvp.Value);
                    }
                }

                // if a peptide is associated with a decoy protein, remove all target protein associations with the peptide
                if (kvp.Value.Any(p => p.Protein.IsDecoy))
                {
                    kvp.Value.RemoveWhere(p => !p.Protein.IsDecoy);
                }

                // if a peptide is associated with a contaminant protein, remove all target protein associations with the peptide
                if (kvp.Value.Any(p => p.Protein.IsContaminant))
                {
                    kvp.Value.RemoveWhere(p => !p.Protein.IsContaminant);
                }
            }

            // makes dictionary with proteins as keys and list of associated peptides as the value (makes parsimony algo easier)
            foreach (var kvp in compactPeptideToProteinPeptideMatching)
            {
                foreach (var peptide in kvp.Value)
                {
                    if (!proteinToPeptidesMatching.TryGetValue(peptide.Protein, out HashSet <CompactPeptideBase> peptides))
                    {
                        proteinToPeptidesMatching.Add(peptide.Protein, new HashSet <CompactPeptideBase>()
                        {
                            kvp.Key
                        });
                    }
                    else
                    {
                        peptides.Add(kvp.Key);
                    }
                }
            }

            // build protein list for each peptide before parsimony has been applied
            var peptideSeqProteinListMatch = new Dictionary <string, HashSet <Protein> >();

            foreach (var kvp in proteinToPeptidesMatching)
            {
                foreach (var peptide in kvp.Value)
                {
                    string pepSequence;
                    if (!treatModPeptidesAsDifferentPeptides)
                    {
                        pepSequence = string.Join("", peptide.NTerminalMasses.Select(b => b.ToString(CultureInfo.InvariantCulture))) + string.Join("", peptide.CTerminalMasses.Select(b => b.ToString(CultureInfo.InvariantCulture))) + peptide.MonoisotopicMassIncludingFixedMods.ToString(CultureInfo.InvariantCulture);
                    }
                    else
                    {
                        pepSequence = compactPeptideToFullSeqMatch[peptide];
                    }
                    if (!peptideSeqProteinListMatch.TryGetValue(pepSequence, out HashSet <Protein> proteinListHere))
                    {
                        peptideSeqProteinListMatch.Add(pepSequence, new HashSet <Protein>()
                        {
                            kvp.Key
                        });
                    }
                    else
                    {
                        proteinListHere.Add(kvp.Key);
                    }
                }
            }

            // dictionary associates proteins w/ unused base seqs (list will shrink over time)
            var algDictionary = new Dictionary <Protein, HashSet <string> >();

            foreach (var kvp in peptideSeqProteinListMatch)
            {
                foreach (var protein in kvp.Value)
                {
                    if (algDictionary.TryGetValue(protein, out HashSet <string> newPeptideBaseSeqs))
                    {
                        newPeptideBaseSeqs.Add(kvp.Key);
                    }
                    else
                    {
                        algDictionary.Add(protein, new HashSet <string> {
                            kvp.Key
                        });
                    }
                }
            }

            // dictionary associates proteins w/ unused base seqs (list will NOT shrink over time)
            var proteinToPepSeqMatch = algDictionary.ToDictionary(x => x.Key, x => x.Value);

            // *** main parsimony loop
            bool uniquePeptidesLeft = false;

            if (proteinsWithUniquePeptides.Any())
            {
                uniquePeptidesLeft = true;
            }
            int numNewSeqs = algDictionary.Max(p => p.Value.Count);

            while (numNewSeqs != 0)
            {
                var possibleBestProteinList = new List <KeyValuePair <Protein, HashSet <string> > >();

                if (uniquePeptidesLeft)
                {
                    var proteinsWithUniquePeptidesLeft = algDictionary.Where(p => proteinsWithUniquePeptides.ContainsKey(p.Key));
                    if (proteinsWithUniquePeptidesLeft.Any())
                    {
                        possibleBestProteinList.Add(proteinsWithUniquePeptidesLeft.First());
                    }
                    else
                    {
                        uniquePeptidesLeft = false;
                    }
                }

                // gets list of proteins with the most unaccounted-for peptide base sequences
                if (!uniquePeptidesLeft)
                {
                    possibleBestProteinList = algDictionary.Where(p => p.Value.Count == numNewSeqs).ToList();
                }

                Protein          bestProtein = possibleBestProteinList.First().Key;
                HashSet <string> newSeqs     = new HashSet <string>(algDictionary[bestProtein]);

                // may need to select different protein
                if (possibleBestProteinList.Count > 1)
                {
                    var proteinsWithTheseBaseSeqs = new HashSet <Protein>();

                    foreach (var kvp in possibleBestProteinList)
                    {
                        if (newSeqs.IsSubsetOf(kvp.Value))
                        {
                            proteinsWithTheseBaseSeqs.Add(kvp.Key);
                        }
                    }

                    if (proteinsWithTheseBaseSeqs.Count > 1)
                    {
                        var proteinsOrderedByTotalPeptideCount = new Dictionary <Protein, HashSet <string> >();
                        foreach (var protein in proteinsWithTheseBaseSeqs)
                        {
                            proteinsOrderedByTotalPeptideCount.Add(protein, proteinToPepSeqMatch[protein]);
                        }

                        bestProtein = proteinsOrderedByTotalPeptideCount.OrderByDescending(kvp => kvp.Value.Count).First().Key;
                    }
                }

                parsimonyProteinList.Add(bestProtein, proteinToPeptidesMatching[bestProtein]);

                // remove used peptides from their proteins
                foreach (var newBaseSeq in newSeqs)
                {
                    HashSet <Protein> proteinsWithThisPeptide = peptideSeqProteinListMatch[newBaseSeq];

                    foreach (var protein in proteinsWithThisPeptide)
                    {
                        algDictionary[protein].Remove(newBaseSeq);
                    }
                }

                algDictionary.Remove(bestProtein);
                if (algDictionary.Any())
                {
                    numNewSeqs = algDictionary.Max(p => p.Value.Count);
                }
                else
                {
                    numNewSeqs = 0;
                }
            }

            // *** done with parsimony

            // add indistinguishable proteins
            var proteinsGroupedByNumPeptides          = proteinToPeptidesMatching.GroupBy(p => p.Value.Count);
            var parsimonyProteinsGroupedByNumPeptides = parsimonyProteinList.GroupBy(p => p.Value.Count);
            var indistinguishableProteins             = new ConcurrentDictionary <Protein, HashSet <CompactPeptideBase> >();

            foreach (var group in proteinsGroupedByNumPeptides)
            {
                var parsimonyProteinsWithSameNumPeptides = parsimonyProteinsGroupedByNumPeptides.FirstOrDefault(p => p.Key == group.Key);
                var list = group.ToList();
                if (parsimonyProteinsWithSameNumPeptides != null)
                {
                    Parallel.ForEach(Partitioner.Create(0, list.Count),
                                     new ParallelOptions {
                        MaxDegreeOfParallelism = -1
                    },
                                     (range, loopState) =>
                    {
                        for (int i = range.Item1; i < range.Item2; i++)
                        {
                            foreach (var parsimonyProteinWithThisNumPeptides in parsimonyProteinsWithSameNumPeptides)
                            {
                                if (parsimonyProteinWithThisNumPeptides.Key != list[i].Key)
                                {
                                    if (proteinToPeptidesMatching[parsimonyProteinWithThisNumPeptides.Key].SetEquals(proteinToPeptidesMatching[list[i].Key]))
                                    {
                                        indistinguishableProteins.GetOrAdd(list[i].Key, proteinToPeptidesMatching[list[i].Key]);
                                    }
                                }
                            }
                        }
                    }
                                     );
                }
            }
            foreach (var protein in indistinguishableProteins)
            {
                parsimonyProteinList.Add(protein.Key, protein.Value);
            }

            foreach (var kvp in compactPeptideToProteinPeptideMatching)
            {
                kvp.Value.RemoveWhere(p => !parsimonyProteinList.ContainsKey(p.Protein));
            }

            Status("Finished Parsimony");

            return(ConstructProteinGroups(new HashSet <PeptideWithSetModifications>(proteinsWithUniquePeptides.Values.SelectMany(p => p)), new HashSet <PeptideWithSetModifications>(compactPeptideToProteinPeptideMatching.Values.SelectMany(p => p))));
        }
        public static PeptideWithSetModifications CreateSilacPwsm(bool heavyToLight, SilacLabel silacLabel, PeptideWithSetModifications pwsm)
        {
            Protein modifiedProtein = CreateSilacProtein(heavyToLight, silacLabel, pwsm.Protein);

            return(new PeptideWithSetModifications(
                       modifiedProtein,
                       pwsm.DigestionParams,
                       pwsm.OneBasedStartResidueInProtein,
                       pwsm.OneBasedEndResidueInProtein,
                       pwsm.CleavageSpecificityForFdrCategory,
                       pwsm.PeptideDescription,
                       pwsm.MissedCleavages,
                       pwsm.AllModsOneIsNterminus,
                       pwsm.NumFixedMods));
        }
Example #18
0
        public static PsmData CreateOnePsmDataEntry(PeptideSpectralMatch psm, Dictionary <string, int> sequenceToPsmCount, Dictionary <string, Dictionary <int, Tuple <double, double> > > timeDependantHydrophobicityAverageAndDeviation_unmodified, Dictionary <string, Dictionary <int, Tuple <double, double> > > timeDependantHydrophobicityAverageAndDeviation_modified, int chargeStateMode, PeptideWithSetModifications selectedPeptide, string[] trainingVariables, int notchToUse, bool label)
        {
            float totalMatchingFragmentCount = 0;

            if (trainingVariables.Contains("TotalMatchingFragmentCount"))
            {
                totalMatchingFragmentCount = (float)Math.Floor(psm.Score);
            }

            float ambiguity = 0;

            if (trainingVariables.Contains("Ambiguity"))
            {
                ambiguity = Math.Min((float)(psm.PeptidesToMatchingFragments.Keys.Count - 1), 10);
            }
            float intensity = 0;

            if (trainingVariables.Contains("Intensity"))
            {
                intensity = (float)(psm.Score - (int)psm.Score);
            }
            float chargeDifference = 0;

            if (trainingVariables.Contains("PrecursorChargeDiffToMode"))
            {
                chargeDifference = -Math.Abs(chargeStateMode - psm.ScanPrecursorCharge);
            }
            float deltaScore = 0;

            if (trainingVariables.Contains("DeltaScore"))
            {
                deltaScore = (float)psm.DeltaScore;
            }
            float psmCount = 1;

            if (trainingVariables.Contains("PsmCount"))
            {
                psmCount = sequenceToPsmCount[String.Join("|", psm.BestMatchingPeptides.Select(p => p.Peptide.FullSequence).ToList())];

                //grouping psm counts as follows is done for stability. you get very nice numbers at low psms to get good statistics. But you get a few peptides with high psm counts that could be either targets or decoys and the values swing between extremes. So grouping psms in bundles really adds stability.
                List <int> psmCountList = new List <int> {
                    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 30, 40, 50, 75, 100, 200, 300, 400, 500
                };
                int closest = psmCountList.OrderBy(item => Math.Abs(psmCount - item)).First();
                psmCount = closest;
            }

            int notch = 0;

            if (trainingVariables.Contains("Notch"))
            {
                notch = notchToUse;
            }

            float modCount = 0;

            if (trainingVariables.Contains("ModsCount"))
            {
                modCount = Math.Min((float)selectedPeptide.AllModsOneIsNterminus.Keys.Count(), 10);
            }

            float missedCleavages = 0;

            if (trainingVariables.Contains("MissedCleavagesCount"))
            {
                missedCleavages = selectedPeptide.MissedCleavages;
            }

            float longestSeq = 0;

            if (trainingVariables.Contains("LongestFragmentIonSeries"))
            {
                longestSeq = psm.GetLongestIonSeriesBidirectional(selectedPeptide);
            }

            float hydrophobicityZscore = float.NaN;

            if (selectedPeptide.BaseSequence.Equals(selectedPeptide.FullSequence) && trainingVariables.Contains("HydrophobicityZScore"))
            {
                hydrophobicityZscore = GetSSRCalcHydrophobicityZScore(psm, selectedPeptide, timeDependantHydrophobicityAverageAndDeviation_unmodified);
            }
            else if (trainingVariables.Contains("HydrophobicityZScore"))
            {
                hydrophobicityZscore = GetSSRCalcHydrophobicityZScore(psm, selectedPeptide, timeDependantHydrophobicityAverageAndDeviation_modified);
            }
            bool isVariantPeptide = PeptideIsVariant(selectedPeptide);

            if (psm.IsDecoy)
            {
                label = false;
            }
            else
            {
                label = true;
            }

            psm.PsmData_forPEPandPercolator = new PsmData
            {
                TotalMatchingFragmentCount = totalMatchingFragmentCount,
                Intensity = intensity,
                PrecursorChargeDiffToMode = chargeDifference,
                DeltaScore               = deltaScore,
                Notch                    = notch,
                PsmCount                 = psmCount,
                ModsCount                = modCount,
                MissedCleavagesCount     = missedCleavages,
                Ambiguity                = ambiguity,
                LongestFragmentIonSeries = longestSeq,
                HydrophobicityZScore     = hydrophobicityZscore,
                IsVariantPeptide         = Convert.ToSingle(isVariantPeptide),
                Label                    = label
            };

            return(psm.PsmData_forPEPandPercolator);
        }
Example #19
0
        private static float GetSSRCalcHydrophobicityZScore(PeptideSpectralMatch psm, PeptideWithSetModifications Peptide, Dictionary <string, Dictionary <int, Tuple <double, double> > > d)
        {
            //Using SSRCalc3 but probably any number of different calculators could be used instead. One could also use the CE mobility.
            SSRCalc3 calc = new SSRCalc3("SSRCalc 3.0 (300A)", SSRCalc3.Column.A300);
            double   hydrophobicityZscore = double.NaN;

            if (d.ContainsKey(psm.FullFilePath))
            {
                int time = (int)(2 * Math.Round(psm.ScanRetentionTime / 2d, 0));
                if (d[psm.FullFilePath].Keys.Contains(time))
                {
                    double predictedHydrophobicity = calc.ScoreSequence(Peptide);
                    hydrophobicityZscore = Math.Abs(d[psm.FullFilePath][time].Item1 - predictedHydrophobicity) / d[psm.FullFilePath][time].Item2;
                }
            }

            double maxHydrophobicityZscore = 10; // each "Z" is one standard deviation. so, maxHydrophobicityZscore 10 is quite large

            if (double.IsNaN(hydrophobicityZscore) || double.IsInfinity(hydrophobicityZscore) || hydrophobicityZscore > maxHydrophobicityZscore)
            {
                hydrophobicityZscore = maxHydrophobicityZscore;
            }

            return((float)hydrophobicityZscore);
        }
        private List <ProteinGroup> ApplyProteinParsimony()
        {
            //if dictionary is empty return an empty list of protein groups
            if (!CompactPeptideToProteinPeptideMatching.Values.Any())
            {
                return(new List <ProteinGroup>());
            }
            // digesting an XML database results in a non-mod-agnostic digestion; need to fix this if mod-agnostic parsimony enabled
            if (!TreatModPeptidesAsDifferentPeptides)//user want modified and unmodified peptides treated the same
            {
                Dictionary <string, HashSet <PeptideWithSetModifications> > baseSeqToProteinMatch = new Dictionary <string, HashSet <PeptideWithSetModifications> >();
                // dictionary where string key is the base sequence and the HashSet is all PeptidesWithSetModificatiosn with the same sequence
                // can access which protein these matching peptides came from through the PeptideWithSetModifications object
                foreach (var peptide in CompactPeptideToProteinPeptideMatching.SelectMany(b => b.Value))
                {
                    if (baseSeqToProteinMatch.TryGetValue(peptide.BaseSequence, out HashSet <PeptideWithSetModifications> value))
                    {
                        value.Add(peptide);
                    }
                    else
                    {
                        baseSeqToProteinMatch[peptide.BaseSequence] = new HashSet <PeptideWithSetModifications> {
                            peptide
                        };
                    }
                }

                var blah = new Dictionary <PeptideWithSetModifications, List <CompactPeptideBase> >();
                // where to store results
                foreach (var pep in CompactPeptideToProteinPeptideMatching)
                {
                    foreach (var pepWithSetMods in pep.Value)
                    {
                        if (blah.TryGetValue(pepWithSetMods, out List <CompactPeptideBase> list))
                        {
                            list.Add(pep.Key);
                        }
                        else
                        {
                            blah.Add(pepWithSetMods, new List <CompactPeptideBase> {
                                pep.Key
                            });
                        }
                    }
                }

                foreach (var baseSequence in baseSeqToProteinMatch)
                {
                    if (baseSequence.Value.Count > 1 && baseSequence.Value.Any(p => p.NumMods > 0))
                    {
                        // list of proteins along with start/end residue in protein and the # missed cleavages
                        var peptideInProteinInfo = new List <Tuple <Protein, DigestionParams, int, int, int> >();
                        foreach (var peptide in baseSequence.Value)
                        {
                            peptideInProteinInfo.Add(new Tuple <Protein, DigestionParams, int, int, int>(peptide.Protein, peptide.DigestionParams, peptide.OneBasedStartResidueInProtein, peptide.OneBasedEndResidueInProtein, (int)peptide.MissedCleavages));
                        }

                        foreach (var peptide in baseSequence.Value)
                        {
                            foreach (var proteinInfo in peptideInProteinInfo)
                            {
                                var pep = new PeptideWithSetModifications(proteinInfo.Item1, proteinInfo.Item2, proteinInfo.Item3, proteinInfo.Item4, peptide.PeptideDescription, proteinInfo.Item5, peptide.AllModsOneIsNterminus, peptide.NumFixedMods);
                                foreach (var compactPeptide in blah[peptide])
                                {
                                    CompactPeptideToProteinPeptideMatching[compactPeptide].Add(pep);
                                }
                            }
                        }
                    }
                }
            }

            var proteinToPeptidesMatching  = new Dictionary <Protein, HashSet <CompactPeptideBase> >();
            var parsimonyProteinList       = new Dictionary <Protein, HashSet <CompactPeptideBase> >();
            var proteinsWithUniquePeptides = new Dictionary <Protein, HashSet <PeptideWithSetModifications> >();

            // peptide matched to fullseq (used depending on user preference)
            var compactPeptideToFullSeqMatch = CompactPeptideToProteinPeptideMatching.ToDictionary(x => x.Key, x => x.Value.First().Sequence);

            foreach (var kvp in CompactPeptideToProteinPeptideMatching)
            {
                HashSet <Protein> proteinsAssociatedWithThisPeptide = new HashSet <Protein>(kvp.Value.Select(p => p.Protein));
                if (proteinsAssociatedWithThisPeptide.Count == 1)
                {
                    if (!proteinsWithUniquePeptides.TryGetValue(kvp.Value.First().Protein, out HashSet <PeptideWithSetModifications> peptides))
                    {
                        proteinsWithUniquePeptides.Add(kvp.Value.First().Protein, new HashSet <PeptideWithSetModifications>(kvp.Value));
                    }
                    else
                    {
                        peptides.UnionWith(kvp.Value);
                    }
                }
                // multiprotease parsimony is "weird" because a peptide sequence can be shared between
                // two proteins but technically be a "unique" peptide because it is unique in that protease digestion
                // this code marks these types of peptides as unique
                else
                {
                    foreach (var peptide in kvp.Value)
                    {
                        Protease protease          = peptide.DigestionParams.Protease;
                        int      sameProteaseCount = kvp.Value.Count(v => v.DigestionParams.Protease == protease);

                        if (sameProteaseCount == 1)
                        {
                            if (!proteinsWithUniquePeptides.TryGetValue(peptide.Protein, out HashSet <PeptideWithSetModifications> peps))
                            {
                                proteinsWithUniquePeptides.Add(peptide.Protein, new HashSet <PeptideWithSetModifications> {
                                    peptide
                                });
                            }
                            else
                            {
                                peps.UnionWith(kvp.Value);
                            }
                        }
                    }
                }

                // if a peptide is associated with a decoy protein, remove all target protein associations with the peptide
                if (kvp.Value.Any(p => p.Protein.IsDecoy))
                {
                    kvp.Value.RemoveWhere(p => !p.Protein.IsDecoy);
                }

                // if a peptide is associated with a contaminant protein, remove all target protein associations with the peptide
                if (kvp.Value.Any(p => p.Protein.IsContaminant))
                {
                    kvp.Value.RemoveWhere(p => !p.Protein.IsContaminant);
                }
            }
            // makes dictionary with proteins as keys and list of associated peptides as the value (makes parsimony algo easier)
            foreach (var kvp in CompactPeptideToProteinPeptideMatching)
            {
                foreach (var peptide in kvp.Value)
                {
                    if (!proteinToPeptidesMatching.TryGetValue(peptide.Protein, out HashSet <CompactPeptideBase> peptides))
                    {
                        proteinToPeptidesMatching.Add(peptide.Protein, new HashSet <CompactPeptideBase>()
                        {
                            kvp.Key
                        });
                    }
                    else
                    {
                        peptides.Add(kvp.Key);
                    }
                }
            }

            // build protein list for each peptide before parsimony has been applied
            var peptideSeqProteinListMatch = new Dictionary <string, HashSet <Protein> >();

            foreach (var kvp in proteinToPeptidesMatching)
            {
                foreach (var peptide in kvp.Value)
                {
                    string pepSequence;
                    if (!TreatModPeptidesAsDifferentPeptides)
                    {
                        string nTerminalMasses = peptide.NTerminalMasses == null ? "" : string.Join("", peptide.NTerminalMasses.Select(b => b.ToString(CultureInfo.InvariantCulture)));
                        string cTerminalMasses = peptide.CTerminalMasses == null ? "" : string.Join("", peptide.CTerminalMasses.Select(b => b.ToString(CultureInfo.InvariantCulture)));
                        pepSequence = nTerminalMasses + cTerminalMasses + peptide.MonoisotopicMassIncludingFixedMods.ToString(CultureInfo.InvariantCulture);
                    }
                    else
                    {
                        pepSequence = compactPeptideToFullSeqMatch[peptide];
                    }
                    if (!peptideSeqProteinListMatch.TryGetValue(pepSequence, out HashSet <Protein> proteinListHere))
                    {
                        peptideSeqProteinListMatch.Add(pepSequence, new HashSet <Protein>()
                        {
                            kvp.Key
                        });
                    }
                    else
                    {
                        proteinListHere.Add(kvp.Key);
                    }
                }
            }

            // dictionary associates proteins w/ unused base seqs (list will shrink over time)
            var algDictionary = new Dictionary <Protein, HashSet <string> >();

            foreach (var kvp in peptideSeqProteinListMatch)
            {
                foreach (var protein in kvp.Value)
                {
                    if (algDictionary.TryGetValue(protein, out HashSet <string> newPeptideBaseSeqs))
                    {
                        newPeptideBaseSeqs.Add(kvp.Key);
                    }
                    else
                    {
                        algDictionary.Add(protein, new HashSet <string> {
                            kvp.Key
                        });
                    }
                }
            }

            // dictionary associates proteins w/ unused base seqs (list will NOT shrink over time)
            var proteinToPepSeqMatch = algDictionary.ToDictionary(x => x.Key, x => x.Value);

            // *** main parsimony loop
            bool uniquePeptidesLeft = proteinsWithUniquePeptides.Any();

            int numNewSeqs = algDictionary.Max(p => p.Value.Count);

            while (numNewSeqs != 0)
            {
                var possibleBestProteinList = new List <KeyValuePair <Protein, HashSet <string> > >();

                if (uniquePeptidesLeft)
                {
                    var proteinsWithUniquePeptidesLeft = algDictionary.Where(p => proteinsWithUniquePeptides.ContainsKey(p.Key));
                    if (proteinsWithUniquePeptidesLeft.Any())
                    {
                        possibleBestProteinList.Add(proteinsWithUniquePeptidesLeft.First());
                    }
                    else
                    {
                        uniquePeptidesLeft = false;
                    }
                }

                // gets list of proteins with the most unaccounted-for peptide base sequences
                if (!uniquePeptidesLeft)
                {
                    possibleBestProteinList = algDictionary.Where(p => p.Value.Count == numNewSeqs).ToList();
                }

                Protein          bestProtein = possibleBestProteinList.First().Key;
                HashSet <string> newSeqs     = new HashSet <string>(algDictionary[bestProtein]);

                // may need to select different protein
                if (possibleBestProteinList.Count > 1)
                {
                    var proteinsWithTheseBaseSeqs = new HashSet <Protein>();

                    foreach (var kvp in possibleBestProteinList)
                    {
                        if (newSeqs.IsSubsetOf(kvp.Value))
                        {
                            proteinsWithTheseBaseSeqs.Add(kvp.Key);
                        }
                    }

                    if (proteinsWithTheseBaseSeqs.Count > 1)
                    {
                        var proteinsOrderedByTotalPeptideCount = new Dictionary <Protein, HashSet <string> >();
                        foreach (var protein in proteinsWithTheseBaseSeqs)
                        {
                            proteinsOrderedByTotalPeptideCount.Add(protein, proteinToPepSeqMatch[protein]);
                        }
                        bestProtein = proteinsOrderedByTotalPeptideCount.OrderByDescending(kvp => kvp.Value.Count).First().Key;
                    }
                }

                parsimonyProteinList.Add(bestProtein, proteinToPeptidesMatching[bestProtein]);

                // remove used peptides from their proteins
                foreach (var newBaseSeq in newSeqs)
                {
                    HashSet <Protein> proteinsWithThisPeptide = peptideSeqProteinListMatch[newBaseSeq];

                    foreach (var protein in proteinsWithThisPeptide)
                    {
                        algDictionary[protein].Remove(newBaseSeq);
                    }
                }
                algDictionary.Remove(bestProtein);
                numNewSeqs = algDictionary.Any() ? algDictionary.Max(p => p.Value.Count) : 0;
            }

            // *** done with parsimony

            // add indistinguishable proteins
            var proteinsGroupedByNumPeptides          = proteinToPeptidesMatching.GroupBy(p => p.Value.Count);
            var parsimonyProteinsGroupedByNumPeptides = parsimonyProteinList.GroupBy(p => p.Value.Count);
            var indistinguishableProteins             = new ConcurrentDictionary <Protein, HashSet <CompactPeptideBase> >();

            foreach (var group in proteinsGroupedByNumPeptides)
            {
                var parsimonyProteinsWithSameNumPeptides = parsimonyProteinsGroupedByNumPeptides.FirstOrDefault(p => p.Key == group.Key);
                var list = group.ToList();
                if (parsimonyProteinsWithSameNumPeptides != null)
                {
                    Parallel.ForEach(Partitioner.Create(0, list.Count),
                                     new ParallelOptions {
                        MaxDegreeOfParallelism = commonParameters.MaxThreadsToUsePerFile
                    },
                                     (range, loopState) =>
                    {
                        for (int i = range.Item1; i < range.Item2; i++)
                        {
                            foreach (var parsimonyProteinWithThisNumPeptides in parsimonyProteinsWithSameNumPeptides)
                            {
                                if (parsimonyProteinWithThisNumPeptides.Key != list[i].Key &&
                                    proteinToPeptidesMatching[parsimonyProteinWithThisNumPeptides.Key].SetEquals(proteinToPeptidesMatching[list[i].Key]))
                                {
                                    indistinguishableProteins.GetOrAdd(list[i].Key, proteinToPeptidesMatching[list[i].Key]);
                                }
                            }
                        }
                    }
                                     );
                }
            }
            foreach (var protein in indistinguishableProteins)
            {
                if (!parsimonyProteinList.ContainsKey(protein.Key))
                {
                    parsimonyProteinList.Add(protein.Key, protein.Value);
                }
            }

            // multiprotease parsimony:
            // this code is a workaround to add back proteins to the parsimonious list that were removed
            // because unique peptides were mistaken for shared peptides. see line 139 for more info
            if (ListOfDigestionParams.Select(v => v.Protease).Distinct().Count() > 1)
            {
                HashSet <Protein> parsimonyProteinSet = new HashSet <Protein>(parsimonyProteinList.Keys);

                // add back in proteins that contain unique peptides
                foreach (var prot in proteinsWithUniquePeptides)
                {
                    if (!parsimonyProteinSet.Contains(prot.Key))
                    {
                        parsimonyProteinList.Add(prot.Key, proteinToPeptidesMatching[prot.Key]);
                    }
                }
            }
            foreach (var kvp in CompactPeptideToProteinPeptideMatching)
            {
                kvp.Value.RemoveWhere(p => !parsimonyProteinList.ContainsKey(p.Protein));
            }

            return(ConstructProteinGroups(new HashSet <PeptideWithSetModifications>(proteinsWithUniquePeptides.Values.SelectMany(p => p)), new HashSet <PeptideWithSetModifications>(CompactPeptideToProteinPeptideMatching.Values.SelectMany(p => p))));
        }
        /// <summary>
        ///
        /// </summary>
        /// <param name="psm"></param>
        /// <param name="sequenceToPsmCount"></param>
        /// <param name="selectedPeptide"></param>
        /// <param name="notchToUse"></param>
        /// <param name="trueOrFalse"></param>
        /// <returns></returns>
        public static PsmData CreateOnePsmDataEntry(PeptideSpectralMatch psm, Dictionary <string, int> sequenceToPsmCount, Dictionary <string, Dictionary <int, Tuple <double, double> > > timeDependantHydrophobicityAverageAndDeviation_unmodified, Dictionary <string, Dictionary <int, Tuple <double, double> > > timeDependantHydrophobicityAverageAndDeviation_modified, PeptideWithSetModifications selectedPeptide, string searchType, int?notchToUse, bool?trueOrFalse = null)
        {
            float ambiguity  = (float)psm.PeptidesToMatchingFragments.Keys.Count;
            float intensity  = (float)(psm.Score - (int)psm.Score);
            float charge     = psm.ScanPrecursorCharge;
            float deltaScore = (float)psm.DeltaScore;
            float psmCount   = sequenceToPsmCount[String.Join("|", psm.BestMatchingPeptides.Select(p => p.Peptide.FullSequence).ToList())];
            int   notch      = 0;

            if (notchToUse.HasValue)
            {
                notch = notchToUse.Value;
            }
            else if (psm.Notch.HasValue)
            {
                notch = psm.Notch.Value;
            }

            if (selectedPeptide == null)
            {
                selectedPeptide = psm.BestMatchingPeptides.Select(p => p.Peptide).First();
            }

            float modCount        = selectedPeptide.AllModsOneIsNterminus.Keys.Count();
            float missedCleavages = selectedPeptide.MissedCleavages;
            float longestSeq      = psm.GetLongestIonSeriesBidirectional(selectedPeptide);

            float hydrophobicityZscore = float.NaN;

            if (selectedPeptide.BaseSequence.Equals(selectedPeptide.FullSequence) && searchType == "standard")
            {
                hydrophobicityZscore = GetSSRCalcHydrophobicityZScore(psm, selectedPeptide, timeDependantHydrophobicityAverageAndDeviation_unmodified);
            }
            else if (searchType == "standard")
            {
                hydrophobicityZscore = GetSSRCalcHydrophobicityZScore(psm, selectedPeptide, timeDependantHydrophobicityAverageAndDeviation_modified);
            }

            bool label;

            if (trueOrFalse != null)
            {
                label = trueOrFalse.Value;
            }
            else if (psm.IsDecoy)
            {
                label = false;
            }
            else
            {
                label = true;
            }

            return(new PsmData()
            {
                Intensity = intensity,
                ScanPrecursorCharge = charge,
                DeltaScore = deltaScore,
                Notch = notch,
                PsmCount = psmCount,
                ModsCount = modCount,
                MissedCleavagesCount = missedCleavages,
                Ambiguity = ambiguity,
                LongestFragmentIonSeries = longestSeq,
                HydrophobicityZScore = hydrophobicityZscore,
                Label = label
            });
        }
Example #22
0
        /// <summary>
        /// TODO: Summarize parsimony;
        /// Parsimony algorithm based on: https://www.ncbi.nlm.nih.gov/pubmed/14632076 Anal Chem. 2003 Sep 1;75(17):4646-58.
        /// TODO: Note describing that peptide objects with the same sequence are associated with different proteins
        /// </summary>
        private List <ProteinGroup> RunProteinParsimonyEngine()
        {
            // parsimonious list of proteins built by this protein parsimony engine
            HashSet <Protein> parsimoniousProteinList = new HashSet <Protein>();

            // list of peptides that can only be digestion products of one protein in the proteome (considering different protease digestion rules)
            HashSet <PeptideWithSetModifications> uniquePeptides = new HashSet <PeptideWithSetModifications>();

            // if there are no peptides observed, there are no proteins; return an empty list of protein groups
            if (_fdrFilteredPeptides.Count == 0)
            {
                return(new List <ProteinGroup>());
            }

            // Parsimony stage 0: create peptide-protein associations if needed because the user wants a modification-agnostic parsimony
            // this is needed for edge cases digesting a protein .xml from UniProt that has peptide sequences shared between proteins
            // that have unevenly-shared modifications
            if (!_treatModPeptidesAsDifferentPeptides)
            {
                foreach (var protease in _fdrFilteredPsms.GroupBy(p => p.DigestionParams.Protease))
                {
                    Dictionary <string, List <PeptideSpectralMatch> > sequenceWithPsms = new Dictionary <string, List <PeptideSpectralMatch> >();

                    // for each protease, match the base sequence of each peptide to its PSMs
                    foreach (PeptideSpectralMatch psm in protease)
                    {
                        if (sequenceWithPsms.TryGetValue(psm.BaseSequence, out List <PeptideSpectralMatch> peptidesForThisBaseSequence))
                        {
                            peptidesForThisBaseSequence.Add(psm);
                        }
                        else
                        {
                            sequenceWithPsms[psm.BaseSequence] = new List <PeptideSpectralMatch> {
                                psm
                            };
                        }
                    }

                    var sequenceWithPsmsList = sequenceWithPsms.ToList();

                    // create new peptide-protein associations as needed
                    Parallel.ForEach(Partitioner.Create(0, sequenceWithPsmsList.Count),
                                     new ParallelOptions {
                        MaxDegreeOfParallelism = CommonParameters.MaxThreadsToUsePerFile
                    },
                                     (range, loopState) =>
                    {
                        for (int i = range.Item1; i < range.Item2; i++)
                        {
                            var baseSequence = sequenceWithPsmsList[i];

                            var peptidesWithNotchInfo = baseSequence.Value.SelectMany(p => p.BestMatchingPeptides).Distinct().ToList();

                            // if the base seq has >1 PeptideWithSetMods object and has >0 mods, it might need to be matched to new proteins
                            if (peptidesWithNotchInfo.Count > 1 && peptidesWithNotchInfo.Any(p => p.Peptide.NumMods > 0))
                            {
                                bool needToAddPeptideToProteinAssociations = false;

                                // numProteinsForThisBaseSequence is the total number of proteins that this base sequence is a digestion product of
                                int numProteinsForThisBaseSequence = peptidesWithNotchInfo.Select(p => p.Peptide.Protein).Distinct().Count();

                                if (numProteinsForThisBaseSequence == 1)
                                {
                                    continue;
                                }

                                foreach (var psm in baseSequence.Value)
                                {
                                    // numProteinsForThisPsm is the number of proteins that this PSM's peptides are associated with
                                    int numProteinsForThisPsm = psm.BestMatchingPeptides.Select(p => p.Peptide.Protein).Distinct().Count();

                                    if (numProteinsForThisPsm != numProteinsForThisBaseSequence)
                                    {
                                        // this PSM is not matched to all the proteins that it should be matched to
                                        // at this point we know that we need to make some new peptide-protein associations
                                        needToAddPeptideToProteinAssociations = true;
                                    }
                                }

                                if (!needToAddPeptideToProteinAssociations)
                                {
                                    continue;
                                }

                                // this gets the digestion info for all of the peptide-protein associations that should exist
                                var proteinToPeptideInfo = new Dictionary <Protein,
                                                                           (DigestionParams DigestParams, int OneBasedStart, int OneBasedEnd, int MissedCleavages, int Notch,
                                                                            CleavageSpecificity CleavageSpecificity)>();

                                foreach (PeptideSpectralMatch psm in baseSequence.Value)
                                {
                                    foreach (var peptideWithNotch in psm.BestMatchingPeptides)
                                    {
                                        PeptideWithSetModifications peptide = peptideWithNotch.Peptide;
                                        Protein protein = peptide.Protein;

                                        if (!proteinToPeptideInfo.ContainsKey(protein))
                                        {
                                            proteinToPeptideInfo.Add(protein,
                                                                     (peptideWithNotch.Peptide.DigestionParams,
                                                                      peptideWithNotch.Peptide.OneBasedStartResidueInProtein,
                                                                      peptideWithNotch.Peptide.OneBasedEndResidueInProtein,
                                                                      peptideWithNotch.Peptide.MissedCleavages,
                                                                      peptideWithNotch.Notch,
                                                                      peptideWithNotch.Peptide.CleavageSpecificityForFdrCategory));
                                        }
                                    }
                                }

                                // create any new associations that need to be made
                                foreach (PeptideSpectralMatch psm in baseSequence.Value)
                                {
                                    PeptideWithSetModifications originalPeptide = psm.BestMatchingPeptides.First().Peptide;
                                    HashSet <Protein> psmProteins = new HashSet <Protein>(psm.BestMatchingPeptides.Select(p => p.Peptide.Protein));

                                    foreach (var proteinWithDigestInfo in proteinToPeptideInfo)
                                    {
                                        if (!psmProteins.Contains(proteinWithDigestInfo.Key))
                                        {
                                            var pep = new PeptideWithSetModifications(
                                                proteinWithDigestInfo.Key,
                                                proteinWithDigestInfo.Value.DigestParams,
                                                proteinWithDigestInfo.Value.OneBasedStart,
                                                proteinWithDigestInfo.Value.OneBasedEnd,
                                                proteinWithDigestInfo.Value.CleavageSpecificity,
                                                originalPeptide.PeptideDescription,
                                                proteinWithDigestInfo.Value.MissedCleavages,
                                                originalPeptide.AllModsOneIsNterminus,
                                                originalPeptide.NumFixedMods);

                                            lock (_fdrFilteredPeptides)
                                            {
                                                _fdrFilteredPeptides.Add(pep);
                                            }

                                            psm.AddProteinMatch((proteinWithDigestInfo.Value.Notch, pep));
                                        }
                                    }
                                }
                            }
                        }
                    }
Example #23
0
 /// <summary>
 /// Determines whether a peptide includes a splice site
 /// </summary>
 /// <param name="pep"></param>
 /// <param name="site"></param>
 /// <returns></returns>
 private static bool Includes(PeptideWithSetModifications pep, SpliceSite site)
 {
     return(pep.OneBasedStartResidueInProtein <= site.OneBasedBeginPosition && pep.OneBasedEndResidueInProtein >= site.OneBasedEndPosition);
 }
Example #24
0
        public static PsmData CreateOnePsmDataFromPsm2(PeptideSpectralMatch psm, int notch, PeptideWithSetModifications firstPeptide, Dictionary <string, int> accessionCounts, Dictionary <string, int> sequenceToPsmCount, bool?trueOrFalse = null)
        {
            //dont' think ambiguity is helping so not using currently
            float ambiguity = (float)psm.PeptidesToMatchingFragments.Keys.Count;

            float intensity  = (float)(psm.Score - (int)psm.Score);
            float charge     = psm.ScanPrecursorCharge;
            float deltaScore = (float)psm.DeltaScore;
            float psmCount   = sequenceToPsmCount[String.Join("|", psm.BestMatchingPeptides.Select(p => p.Peptide.FullSequence).ToList())];

            float modCount = firstPeptide.AllModsOneIsNterminus.Keys.Count();

            //todo: for non-specific cleavage, ignore missed cleavages
            float missedCleavages = firstPeptide.MissedCleavages;

            float longestSeq = psm.GetLongestIonSeriesBidirectional(firstPeptide);

            string accession = firstPeptide.Protein.Accession;
            float  appearances;

            if (accessionCounts.Keys.Count != 0 && accessionCounts.ContainsKey(accession))
            {
                appearances = (float)accessionCounts[accession];
            }
            else
            {
                appearances = 1;
            }
            float score = (float)psm.Score;

            bool label;

            if (trueOrFalse != null)
            {
                label = trueOrFalse.Value;
            }
            else if (psm.IsDecoy)
            {
                label = false;
            }
            else
            {
                label = true;
            }

            return(new PsmData()
            {
                Intensity = intensity,
                ScanPrecursorCharge = charge,
                DeltaScore = deltaScore,
                Notch = notch,
                PsmCount = psmCount,
                ModsCount = modCount,
                MissedCleavagesCount = missedCleavages,
                Ambiguity = ambiguity,
                LongestFragmentIonSeries = longestSeq,
                AccessionAppearances = appearances,
                Label = label
            });
        }