Beispiel #1
0
        private static float GetSSRCalcHydrophobicityZScore(PeptideSpectralMatch psm, PeptideWithSetModifications Peptide, Dictionary <string, Dictionary <int, Tuple <double, double> > > d)
        {
            //Using SSRCalc3 but probably any number of different calculators could be used instead. One could also use the CE mobility.
            SSRCalc3 calc = new SSRCalc3("SSRCalc 3.0 (300A)", SSRCalc3.Column.A300);
            double   hydrophobicityZscore = double.NaN;

            if (d.ContainsKey(psm.FullFilePath))
            {
                int time = (int)(2 * Math.Round(psm.ScanRetentionTime / 2d, 0));
                if (d[psm.FullFilePath].Keys.Contains(time))
                {
                    double predictedHydrophobicity = calc.ScoreSequence(Peptide);
                    hydrophobicityZscore = Math.Abs(d[psm.FullFilePath][time].Item1 - predictedHydrophobicity) / d[psm.FullFilePath][time].Item2;
                }
            }

            double maxHydrophobicityZscore = 10; // each "Z" is one standard deviation. so, maxHydrophobicityZscore 10 is quite large

            if (double.IsNaN(hydrophobicityZscore) || double.IsInfinity(hydrophobicityZscore) || hydrophobicityZscore > maxHydrophobicityZscore)
            {
                hydrophobicityZscore = maxHydrophobicityZscore;
            }

            return((float)hydrophobicityZscore);
        }
Beispiel #2
0
        /// <summary>
        ///A test for ScoreSequence with 100A column
        ///</summary>
        // Problems with the results from the article
        // [TestMethod]
        public void ScoreSequence_100A_Test()
        {
            SSRCalc3 calc = new SSRCalc3(RetentionTimeRegression.SSRCALC_100_A, SSRCalc3.Column.A100);

            for (int i = 0; i < _peptides100A.GetLength(0); i++)
            {
                string peptide  = (string)_peptides100A[i, 0];
                double expected = (double)_peptides100A[i, 1];
                double actual   = calc.ScoreSequence(peptide) ?? 0;

                // Round the returned value to match the values presented
                // in the supporting information of the SSRCalc 3 publication.
                // First cast to float, since the added precision of the double
                // caused the double representation of 12.805 to round to 12.80
                // instead of 12.81.  When diffed with 12.81 the result was
                // 0.005000000000002558.
                double actualRound = Math.Round((float)actual, 2);

                // Extra conditional added to improve debugging of issues.
                if (Math.Abs(expected - actual) > 0.005)
                {
                    Assert.AreEqual(expected, actualRound, "Peptide {0}", peptide);
                }
            }
        }
Beispiel #3
0
        public static void SimpleReversePhaseRetentionTimeTest()
        {
            Dictionary <string, double> peptidesAndHyrophobicities = new Dictionary <string, double>
            {
                { "QSHFANAEPEQK", 11.27 },
                { "SDLFENLQNYR", 30.44 },
                { "SLPSEFEPINLR", 33.12 }
            };

            SSRCalc3 calc = new SSRCalc3("SSRCalc 3.0 (300A)", SSRCalc3.Column.A300);

            foreach (string peptideSequence in peptidesAndHyrophobicities.Keys)
            {
                var    peptide  = new PeptideWithSetModifications(peptideSequence, new Dictionary <string, Modification>());
                double expected = peptidesAndHyrophobicities[peptideSequence];
                double actual   = calc.ScoreSequence(peptide);

                Assert.That(expected, Is.EqualTo(actual).Within(0.01));
            }
        }
Beispiel #4
0
        private static Dictionary <string, Dictionary <int, Tuple <double, double> > > ComputeHydrophobicityValues(List <PeptideSpectralMatch> psms, bool computeHydrophobicitiesforModifiedPeptides)
        {
            SSRCalc3 calc = new SSRCalc3("SSRCalc 3.0 (300A)", SSRCalc3.Column.A300);

            //TODO change the tuple so the values have names
            Dictionary <string, Dictionary <int, Tuple <double, double> > > rtHydrophobicityAvgDev = new Dictionary <string, Dictionary <int, Tuple <double, double> > >();

            List <string> filenames = psms.Select(f => f.FullFilePath).ToList();

            filenames = filenames.Distinct().ToList();

            foreach (string filename in filenames)
            {
                Dictionary <int, List <double> >          hydrophobicities = new Dictionary <int, List <double> >();
                Dictionary <int, Tuple <double, double> > averagesCommaStandardDeviations = new Dictionary <int, Tuple <double, double> >();

                foreach (PeptideSpectralMatch psm in psms.Where(f => (f.FullFilePath == filename || f.FullFilePath == null) && f.FdrInfo.QValue <= 0.01 && !f.IsDecoy))
                {
                    List <string> fullSequences = new List <string>();
                    foreach ((int notch, PeptideWithSetModifications pwsm) in psm.BestMatchingPeptides)
                    {
                        if (fullSequences.Contains(pwsm.FullSequence))
                        {
                            continue;
                        }
                        fullSequences.Add(pwsm.FullSequence);
                        double predictedHydrophobicity = calc.ScoreSequence(pwsm);

                        //here i'm grouping this in 2 minute increments becuase there are cases where you get too few data points to get a good standard deviation an average. This is for stability.
                        int possibleKey = (int)(2 * Math.Round(psm.ScanRetentionTime / 2d, 0));
                        //First block of if statement is for modified peptides.
                        if (pwsm.AllModsOneIsNterminus.Any() && computeHydrophobicitiesforModifiedPeptides)
                        {
                            if (hydrophobicities.ContainsKey(possibleKey))
                            {
                                hydrophobicities[possibleKey].Add(predictedHydrophobicity);
                            }
                            else
                            {
                                hydrophobicities.Add(possibleKey, new List <double>()
                                {
                                    predictedHydrophobicity
                                });
                            }
                        }
                        //this second block of if statment is for unmodified peptides.
                        else if (!pwsm.AllModsOneIsNterminus.Any() && !computeHydrophobicitiesforModifiedPeptides)
                        {
                            if (hydrophobicities.ContainsKey(possibleKey))
                            {
                                hydrophobicities[possibleKey].Add(predictedHydrophobicity);
                            }
                            else
                            {
                                hydrophobicities.Add(possibleKey, new List <double>()
                                {
                                    predictedHydrophobicity
                                });
                            }
                        }
                    }
                }

                List <double> allSquaredHyrophobicityDifferences = new List <double>();

                foreach (int retentionTimeBin in hydrophobicities.Keys)
                {
                    //TODO consider using inner-quartile range instead of standard deviation
                    double averageHydrophobicity = hydrophobicities[retentionTimeBin].Average();
                    averagesCommaStandardDeviations.Add(retentionTimeBin, new Tuple <double, double>(averageHydrophobicity, hydrophobicities[retentionTimeBin].StandardDeviation()));
                    foreach (double hydrophobicity in hydrophobicities[retentionTimeBin])
                    {
                        double difference = Math.Abs(hydrophobicity - averageHydrophobicity);
                        if (!double.IsNaN(difference) && difference > 0)
                        {
                            allSquaredHyrophobicityDifferences.Add(Math.Pow(difference, 2));
                        }
                    }
                }

                //some standard deviations are too small or too large because of random reasons, so we replace those small numbers of oddballs with reasonable numbers.
                double globalStDev = 1;
                if (allSquaredHyrophobicityDifferences.Count() > 1)
                {
                    globalStDev = Math.Sqrt(allSquaredHyrophobicityDifferences.Sum() / (allSquaredHyrophobicityDifferences.Count() - 1));
                }

                Dictionary <int, Tuple <double, double> > stDevsToChange = new Dictionary <int, Tuple <double, double> >();
                foreach (KeyValuePair <int, Tuple <double, double> > item in averagesCommaStandardDeviations)
                {
                    //add stability. not allowing stdevs that are too small or too large at one position relative to the global stdev
                    //here we are finding which stdevs are out of whack.
                    if (Double.IsNaN(item.Value.Item2) || item.Value.Item2 < 0.5 || (item.Value.Item2 / globalStDev) > 3)
                    {
                        Tuple <double, double> pair = new Tuple <double, double>(averagesCommaStandardDeviations[item.Key].Item1, globalStDev);
                        stDevsToChange.Add(item.Key, pair);
                    }
                }
                //here we are replacing the stdevs that are out of whack.
                foreach (int key in stDevsToChange.Keys)
                {
                    averagesCommaStandardDeviations[key] = stDevsToChange[key];
                }

                rtHydrophobicityAvgDev.Add(filename, averagesCommaStandardDeviations);
            }
            return(rtHydrophobicityAvgDev);
        }
        //determine if a peptide is unqiue or shared. Also generates in silico peptide objects
        Dictionary <Protein, List <InSilicoPep> > DeterminePeptideStatus(string databaseName, Dictionary <Protein, List <PeptideWithSetModifications> > databasePeptides, Parameters userParams)
        {
            SSRCalc3 RTPrediction = new SSRCalc3("SSRCalc 3.0 (300A)", SSRCalc3.Column.A300);
            Dictionary <Protein, List <InSilicoPep> > inSilicoPeptides = new Dictionary <Protein, List <InSilicoPep> >();

            if (userParams.TreatModifiedPeptidesAsDifferent == true)
            {
                foreach (var peptideSequence in databasePeptides.Select(p => p.Value).SelectMany(pep => pep).GroupBy(p => p.FullSequence).ToDictionary(group => group.Key, group => group.ToList()))
                {
                    if (peptideSequence.Value.Select(p => p.Protein).Distinct().Count() == 1)
                    {
                        foreach (var peptide in peptideSequence.Value)
                        {
                            if (inSilicoPeptides.ContainsKey(peptide.Protein))
                            {
                                inSilicoPeptides[peptide.Protein].Add(new InSilicoPep(peptide.BaseSequence, peptide.FullSequence, peptide.PreviousAminoAcid, peptide.NextAminoAcid, true, RTPrediction.ScoreSequence(peptide), GetCifuentesMobility(peptide), peptide.Length, peptide.MonoisotopicMass, databaseName,
                                                                                      peptide.Protein.Accession, peptide.OneBasedStartResidueInProtein, peptide.OneBasedEndResidueInProtein, peptide.DigestionParams.Protease.Name));
                            }
                            else
                            {
                                inSilicoPeptides.Add(peptide.Protein, new List <InSilicoPep>()
                                {
                                    new InSilicoPep(peptide.BaseSequence, peptide.FullSequence, peptide.PreviousAminoAcid, peptide.NextAminoAcid, true, RTPrediction.ScoreSequence(peptide), GetCifuentesMobility(peptide), peptide.Length, peptide.MonoisotopicMass, databaseName,
                                                    peptide.Protein.Accession, peptide.OneBasedStartResidueInProtein, peptide.OneBasedEndResidueInProtein, peptide.DigestionParams.Protease.Name)
                                });
                            }
                        }
                    }
                    else
                    {
                        foreach (var peptide in peptideSequence.Value)
                        {
                            if (inSilicoPeptides.ContainsKey(peptide.Protein))
                            {
                                inSilicoPeptides[peptide.Protein].Add(new InSilicoPep(peptide.BaseSequence, peptide.FullSequence, peptide.PreviousAminoAcid, peptide.NextAminoAcid, false, RTPrediction.ScoreSequence(peptide), GetCifuentesMobility(peptide), peptide.Length, peptide.MonoisotopicMass, databaseName,
                                                                                      peptide.Protein.Accession, peptide.OneBasedStartResidueInProtein, peptide.OneBasedEndResidueInProtein, peptide.DigestionParams.Protease.Name));
                            }
                            else
                            {
                                inSilicoPeptides.Add(peptide.Protein, new List <InSilicoPep>()
                                {
                                    new InSilicoPep(peptide.BaseSequence, peptide.FullSequence, peptide.PreviousAminoAcid, peptide.NextAminoAcid, false, RTPrediction.ScoreSequence(peptide), GetCifuentesMobility(peptide), peptide.Length, peptide.MonoisotopicMass, databaseName,
                                                    peptide.Protein.Accession, peptide.OneBasedStartResidueInProtein, peptide.OneBasedEndResidueInProtein, peptide.DigestionParams.Protease.Name)
                                });
                            }
                        }
                    }
                }
            }
            else
            {
                foreach (var peptideSequence in databasePeptides.Select(p => p.Value).SelectMany(pep => pep).GroupBy(p => p.BaseSequence).ToDictionary(group => group.Key, group => group.ToList()))
                {
                    if (peptideSequence.Value.Select(p => p.Protein).Distinct().Count() == 1)
                    {
                        foreach (var peptide in peptideSequence.Value)
                        {
                            var hydrophob = RTPrediction.ScoreSequence(peptide);
                            var em        = GetCifuentesMobility(peptide);
                            if (inSilicoPeptides.ContainsKey(peptide.Protein))
                            {
                                inSilicoPeptides[peptide.Protein].Add(new InSilicoPep(peptide.BaseSequence, peptide.FullSequence, peptide.PreviousAminoAcid, peptide.NextAminoAcid, true, hydrophob, em, peptide.Length, peptide.MonoisotopicMass, databaseName,
                                                                                      peptide.Protein.Accession, peptide.OneBasedStartResidueInProtein, peptide.OneBasedEndResidueInProtein, peptide.DigestionParams.Protease.Name));
                            }
                            else
                            {
                                inSilicoPeptides.Add(peptide.Protein, new List <InSilicoPep>()
                                {
                                    new InSilicoPep(peptide.BaseSequence, peptide.FullSequence, peptide.PreviousAminoAcid, peptide.NextAminoAcid, true, hydrophob, em, peptide.Length, peptide.MonoisotopicMass, databaseName,
                                                    peptide.Protein.Accession, peptide.OneBasedStartResidueInProtein, peptide.OneBasedEndResidueInProtein, peptide.DigestionParams.Protease.Name)
                                });
                            }
                        }
                    }
                    else
                    {
                        foreach (var peptide in peptideSequence.Value)
                        {
                            var hydrophob = RTPrediction.ScoreSequence(peptide);
                            var em        = GetCifuentesMobility(peptide);
                            if (inSilicoPeptides.ContainsKey(peptide.Protein))
                            {
                                inSilicoPeptides[peptide.Protein].Add(new InSilicoPep(peptide.BaseSequence, peptide.FullSequence, peptide.PreviousAminoAcid, peptide.NextAminoAcid, false, hydrophob, em, peptide.Length, peptide.MonoisotopicMass, databaseName,
                                                                                      peptide.Protein.Accession, peptide.OneBasedStartResidueInProtein, peptide.OneBasedEndResidueInProtein, peptide.DigestionParams.Protease.Name));
                            }
                            else
                            {
                                inSilicoPeptides.Add(peptide.Protein, new List <InSilicoPep>()
                                {
                                    new InSilicoPep(peptide.BaseSequence, peptide.FullSequence, peptide.PreviousAminoAcid, peptide.NextAminoAcid, false, hydrophob, em, peptide.Length, peptide.MonoisotopicMass, databaseName,
                                                    peptide.Protein.Accession, peptide.OneBasedStartResidueInProtein, peptide.OneBasedEndResidueInProtein, peptide.DigestionParams.Protease.Name)
                                });
                            }
                        }
                    }
                }
            }
            databasePeptides = null;
            return(inSilicoPeptides);
        }
Beispiel #6
0
        public void ScoreSequence_300A_Test()
        {
            SSRCalc3 calc = new SSRCalc3(RetentionTimeRegression.SSRCALC_300_A, SSRCalc3.Column.A300);

            for (int i = 0; i < _peptides300A.GetLength(0); i++ )
            {
                string peptide = (string) _peptides300A[i, 0];
                double expected = (double) _peptides300A[i, 1];
                double actual = calc.ScoreSequence(peptide) ?? 0;

                // Round the returned value to match the values presented
                // in the supporting information of the SSRCalc 3 publication.
                // First cast to float, since the added precision of the double
                // caused the double representation of 12.805 to round to 12.80
                // instead of 12.81.  When diffed with 12.81 the result was
                // 0.005000000000002558.
                double actualRound = Math.Round((float) actual, 2);

                // Extra conditional added to improve debugging of issues.
                if (Math.Abs(expected - actual) > 0.005)
                    Assert.AreEqual(expected, actualRound, "Peptide {0}", peptide);
            }
        }
Beispiel #7
0
        private void linePlot(int plotType)
        {
            string        yAxisTitle = "";
            string        xAxisTitle = "";
            ScatterSeries series     = new ScatterSeries
            {
                MarkerFill          = OxyColors.Blue,
                MarkerSize          = 0.5,
                TrackerFormatString = "{1}: {2:0.###}\n{3}: {4:0.###}\nFull sequence: {Tag}"
            };
            ScatterSeries variantSeries = new ScatterSeries
            {
                MarkerFill          = OxyColors.DarkRed,
                MarkerSize          = 1.5,
                MarkerType          = MarkerType.Circle,
                TrackerFormatString = "{1}: {2:0.###}\n{3}: {4:0.###}\nFull sequence: {Tag}"
            };
            List <Tuple <double, double, string> > xy        = new List <Tuple <double, double, string> >();
            List <Tuple <double, double, string> > variantxy = new List <Tuple <double, double, string> >();
            var filteredList = allPsms.Where(p => !p.MassDiffDa.Contains("|") && Math.Round(double.Parse(p.MassDiffDa), 0) == 0).ToList();
            var test         = allPsms.SelectMany(p => p.MatchedIons.Select(v => v.MassErrorPpm));

            switch (plotType)
            {
            case 1:     // Precursor PPM Error vs. RT
                yAxisTitle = "Precursor error (ppm)";
                xAxisTitle = "Retention time";
                foreach (var psm in filteredList)
                {
                    if (psm.IdentifiedSequenceVariations == null || psm.IdentifiedSequenceVariations.Equals(""))
                    {
                        xy.Add(new Tuple <double, double, string>(double.Parse(psm.MassDiffPpm), (double)psm.RetentionTime, psm.FullSequence));
                    }
                    else
                    {
                        variantxy.Add(new Tuple <double, double, string>(double.Parse(psm.MassDiffPpm), (double)psm.RetentionTime, psm.FullSequence));
                    }
                }
                break;

            case 2:     // Fragment PPM Error vs. RT
                yAxisTitle = "Retention time";
                xAxisTitle = "Fragment error (ppm)";
                foreach (var psm in allPsms)
                {
                    foreach (var ion in psm.MatchedIons)
                    {
                        xy.Add(new Tuple <double, double, string>((double)psm.RetentionTime, ion.MassErrorPpm, psm.FullSequence));
                    }
                }
                break;

            case 3:     // Predicted RT vs. Observed RT
                yAxisTitle = "Predicted retention time";
                xAxisTitle = "Observed retention time";
                SSRCalc3 sSRCalc3 = new SSRCalc3("A100", SSRCalc3.Column.A100);
                foreach (var psm in allPsms)
                {
                    if (psm.IdentifiedSequenceVariations == null || psm.IdentifiedSequenceVariations.Equals(""))
                    {
                        xy.Add(new Tuple <double, double, string>(sSRCalc3.ScoreSequence(new PeptideWithSetModifications(psm.BaseSeq.Split('|')[0], null)),
                                                                  (double)psm.RetentionTime, psm.FullSequence));
                    }
                    else
                    {
                        variantxy.Add(new Tuple <double, double, string>(sSRCalc3.ScoreSequence(new PeptideWithSetModifications(psm.BaseSeq.Split('|')[0], null)),
                                                                         (double)psm.RetentionTime, psm.FullSequence));
                    }
                }
                break;
            }
            if (xy.Count != 0)
            {
                // plot each peptide
                IOrderedEnumerable <Tuple <double, double, string> > sorted = xy.OrderBy(x => x.Item1);
                foreach (var val in sorted)
                {
                    series.Points.Add(new ScatterPoint(val.Item2, val.Item1, tag: val.Item3));
                }
                privateModel.Series.Add(series);

                // add series displayed in legend, the real series will show up with a tiny dot for the symbol
                privateModel.Series.Add(new ScatterSeries {
                    Title = "non-variant PSMs", MarkerFill = OxyColors.Blue
                });
            }

            if (variantxy.Count != 0)
            {
                // plot each variant peptide
                IOrderedEnumerable <Tuple <double, double, string> > variantSorted = variantxy.OrderBy(x => x.Item1);
                foreach (var val in variantSorted)
                {
                    variantSeries.Points.Add(new ScatterPoint(val.Item2, val.Item1, tag: val.Item3));
                }
                privateModel.Series.Add(variantSeries);

                // add series displayed in legend, the real series will show up with a tiny dot for the symbol
                privateModel.Series.Add(new ScatterSeries {
                    Title = "variant PSMs", MarkerFill = OxyColors.DarkRed
                });
            }
            privateModel.Axes.Add(new LinearAxis {
                Title = xAxisTitle, Position = AxisPosition.Bottom
            });
            privateModel.Axes.Add(new LinearAxis {
                Title = yAxisTitle, Position = AxisPosition.Left
            });
        }
        private static Dictionary <string, Dictionary <int, Tuple <double, double> > > ComputeHydrophobicityValues(List <PeptideSpectralMatch> psms, bool computeHydrophobicitiesforModifiedPeptides)
        {
            SSRCalc3 calc = new SSRCalc3("SSRCalc 3.0 (300A)", SSRCalc3.Column.A300);
            Dictionary <string, Dictionary <int, Tuple <double, double> > > rtHydrophobicityAvgDev = new Dictionary <string, Dictionary <int, Tuple <double, double> > >();

            List <string> filenames = psms.Select(f => f.FullFilePath).ToList();

            filenames = filenames.Distinct().ToList();

            foreach (string filename in filenames)
            {
                Dictionary <int, List <double> >          hydrobophobicites = new Dictionary <int, List <double> >();
                Dictionary <int, Tuple <double, double> > averagesCommaStandardDeviations = new Dictionary <int, Tuple <double, double> >();

                foreach (PeptideSpectralMatch psm in psms.Where(f => (f.FullFilePath == filename || f.FullFilePath == null) && f.FdrInfo.QValue <= 0.01))
                {
                    foreach ((int notch, PeptideWithSetModifications pwsm) in psm.BestMatchingPeptides)
                    {
                        if (pwsm.AllModsOneIsNterminus.Any() && !computeHydrophobicitiesforModifiedPeptides)
                        {
                            double predictedHydrophobicity = calc.ScoreSequence(pwsm);
                            int    possibleKey             = (int)Math.Round(psm.ScanRetentionTime, 0);
                            if (hydrobophobicites.ContainsKey(possibleKey))
                            {
                                hydrobophobicites[possibleKey].Add(predictedHydrophobicity);
                            }
                            else
                            {
                                hydrobophobicites.Add(possibleKey, new List <double>()
                                {
                                    predictedHydrophobicity
                                });
                            }
                        }
                        else if (!pwsm.AllModsOneIsNterminus.Any() && computeHydrophobicitiesforModifiedPeptides)
                        {
                            double predictedHydrophobicity = calc.ScoreSequence(pwsm);
                            int    possibleKey             = (int)Math.Round(psm.ScanRetentionTime, 0);
                            if (hydrobophobicites.ContainsKey(possibleKey))
                            {
                                hydrobophobicites[possibleKey].Add(predictedHydrophobicity);
                            }
                            else
                            {
                                hydrobophobicites.Add(possibleKey, new List <double>()
                                {
                                    predictedHydrophobicity
                                });
                            }
                        }
                    }
                }

                foreach (int key in hydrobophobicites.Keys)
                {
                    //TODO consider using inner-quartile range instead of standard deviation
                    averagesCommaStandardDeviations.Add(key, new Tuple <double, double>(hydrobophobicites[key].Average(), hydrobophobicites[key].StandardDeviation()));
                }

                rtHydrophobicityAvgDev.Add(filename, averagesCommaStandardDeviations);
            }
            return(rtHydrophobicityAvgDev);
        }
        //determine if peptides are unique and shared for the speicifc database that they came from (Will do pooled analysis later)
        Dictionary <Protein, List <InSilicoPeptide> > DeterminePeptideStatus(Dictionary <Protein, List <PeptideWithSetModifications> > databasePeptides, Parameters userParams)
        {
            SSRCalc3 RTPrediction = new SSRCalc3("SSRCalc 3.0 (300A)", SSRCalc3.Column.A300);
            bool     treatModPeptidesAsDifferent = userParams.TreatModifiedPeptidesAsDifferent;
            Dictionary <string, (List <PeptideWithSetModifications>, HashSet <Protein>)> peptidesToProteins = new Dictionary <string, (List <PeptideWithSetModifications>, HashSet <Protein>)>();

            foreach (var protein in databasePeptides)

            {
                if (treatModPeptidesAsDifferent)
                {
                    //use full sequences
                    foreach (var peptide in protein.Value)
                    {
                        if (peptidesToProteins.ContainsKey(peptide.FullSequence))
                        {
                            peptidesToProteins[peptide.FullSequence].Item1.Add(peptide);
                            peptidesToProteins[peptide.FullSequence].Item2.Add(protein.Key);
                        }
                        else
                        {
                            peptidesToProteins.Add(peptide.FullSequence, (new List <PeptideWithSetModifications>()
                            {
                                peptide
                            }, new HashSet <Protein>()
                            {
                                protein.Key
                            }));
                        }
                    }
                }
                else
                {
                    //use base sequences

                    foreach (var peptide in protein.Value)
                    {
                        if (peptidesToProteins.ContainsKey(peptide.BaseSequence))
                        {
                            peptidesToProteins[peptide.BaseSequence].Item1.Add(peptide);
                            peptidesToProteins[peptide.BaseSequence].Item2.Add(protein.Key);
                        }
                        else
                        {
                            peptidesToProteins.Add(peptide.BaseSequence, (new List <PeptideWithSetModifications>()
                            {
                                peptide
                            }, new HashSet <Protein>()
                            {
                                protein.Key
                            }));
                        }
                    }
                }
            }
            var sharedPeptides = peptidesToProteins.Select(p => p.Value).Where(p => p.Item2.Count >= 2).Select(p => p.Item1).SelectMany(p => p).ToList();
            var uniquePeptides = peptidesToProteins.Select(p => p.Value).Where(p => p.Item2.Count == 1).Select(p => p.Item1).SelectMany(p => p).ToList();
            List <InSilicoPeptide> inSilicoPeptides = new List <InSilicoPeptide>();

            foreach (var peptide in sharedPeptides)
            {
                var pep = new InSilicoPeptide(peptide.Protein, peptide.DigestionParams, peptide.OneBasedStartResidueInProtein, peptide.OneBasedEndResidueInProtein,
                                              CleavageSpecificity.Full, peptide.PeptideDescription, peptide.MissedCleavages, peptide.AllModsOneIsNterminus, peptide.NumFixedMods,
                                              peptide.BaseSequence, false);
                var hydrophob = RTPrediction.ScoreSequence(pep);
                var em        = GetCifuentesMobility(peptide);
                pep.SetHydrophobicity(hydrophob);
                pep.SetElectrophoreticMobility(em);
                inSilicoPeptides.Add(pep);
            }
            foreach (var peptide in uniquePeptides)
            {
                var pep = new InSilicoPeptide(peptide.Protein, peptide.DigestionParams, peptide.OneBasedStartResidueInProtein, peptide.OneBasedEndResidueInProtein,
                                              CleavageSpecificity.Full, peptide.PeptideDescription, peptide.MissedCleavages, peptide.AllModsOneIsNterminus, peptide.NumFixedMods,
                                              peptide.BaseSequence, true);
                var hydrophob = RTPrediction.ScoreSequence(pep);
                var em        = GetCifuentesMobility(peptide);
                pep.SetHydrophobicity(hydrophob);
                pep.SetElectrophoreticMobility(em);
                inSilicoPeptides.Add(pep);
            }
            var labeledPeptides = inSilicoPeptides.GroupBy(p => p.Protein).ToDictionary(group => group.Key, group => group.ToList());

            return(labeledPeptides);
        }