Ejemplo n.º 1
0
        private string PeptideToString(proteome.Peptide peptide, IList <phosphoRS.PTMSiteProbability> localizationProbabilities, PhosphoRSConfig config)
        {
            var probabilityMap = localizationProbabilities.ToDictionary(o => o.SequencePosition, o => o.Probability);

            string        format = String.Format("[{{0:f{0}}}]", 0);
            StringBuilder sb     = new StringBuilder();

            if (peptide.modifications().ContainsKey(proteome.ModificationMap.NTerminus()))
            {
                sb.AppendFormat(format, peptide.modifications()[proteome.ModificationMap.NTerminus()].monoisotopicDeltaMass());
            }
            for (int i = 0; i < peptide.sequence.Length; ++i)
            {
                sb.Append(peptide.sequence[i]);
                if (probabilityMap.ContainsKey(i + 1))
                {
                    if (probabilityMap[i + 1] > 0)
                    {
                        sb.AppendFormat("[{0:f0}({1:f0}%)]", config.scoredAA.MassDelta, probabilityMap[i + 1] * 100);
                    }
                    //else
                    //    sb.AppendFormat("({0:f0})", config.scoredAA.MassDelta, probabilityMap[i + 1]);
                }
                else if (peptide.modifications().ContainsKey(i))
                {
                    double modMass = peptide.modifications()[i].monoisotopicDeltaMass();
                    sb.AppendFormat(format, modMass);
                }
            }
            if (peptide.modifications().ContainsKey(proteome.ModificationMap.CTerminus()))
            {
                sb.AppendFormat(format, peptide.modifications()[proteome.ModificationMap.CTerminus()].monoisotopicDeltaMass());
            }
            return(sb.ToString());
        }
Ejemplo n.º 2
0
        private List <double> getPeakStatistics()
        {
            IList <object[]> queryRows;

            lock (session)
            {
                var randomIds = session.CreateQuery("SELECT psm.Id " + viewFilter.GetFilteredQueryString(DataFilter.FromPeptideSpectrumMatch))
                                .List <long>()
                                .Shuffle()
                                .Take(1000)
                                .OrderBy(o => o);
                string randomIdSet = String.Join(",", randomIds.Select(o => o.ToString()).ToArray());
                queryRows = session.CreateQuery("SELECT psm.Spectrum.Source.Name, psm.Spectrum, psm, DISTINCT_GROUP_CONCAT(pm.Offset || ':' || mod.MonoMassDelta), psm.Peptide.Sequence " +
                                                "FROM PeptideSpectrumMatch psm " +
                                                "LEFT JOIN psm.Modifications pm " +
                                                "LEFT JOIN pm.Modification mod " +
                                                "WHERE psm.Id IN (" + randomIdSet + ") " +
                                                "GROUP BY psm.Spectrum.id ")
                            .List <object[]>();
            }
            var spectrumRows = queryRows.Select(o => new SpectrumRow(o)).OrderBy(o => o.SourceName);

            precursorScatterPlot.Clear();
            chargeReducedScatterPlot.Clear();

            int spectraCount = 0;

            string spectrumListFilters = String.Empty;

            Invoke(new MethodInvoker(() =>
            {
                spectrumListFilters = spectrumFiltersTextBox.Text;
                zedGraphControl.MasterPane.AxisChange();
                zedGraphControl.Refresh();
            }));

            var points = new PointPairList();

            string currentSourceName = null;
            string currentSourcePath = null;

            msdata.MSData msd = null;

            lock (owner)
                foreach (var row in spectrumRows)
                {
                    if (row.SourceName != currentSourceName)
                    {
                        currentSourceName = row.SourceName;
                        currentSourcePath = IDPickerForm.LocateSpectrumSource(currentSourceName, session.Connection.GetDataSource());
                        msd = new pwiz.CLI.msdata.MSDataFile(currentSourcePath);

                        //var param = session.Query<AnalysisParameter>().Where(o => o.Name == "SpectrumListFilters").Min(o => o.Value);
                        //string spectrumListFilters = String.IsNullOrEmpty(param) ? String.Empty : param;
                        SpectrumListFactory.wrap(msd, spectrumListFilters.Split(";".ToCharArray(), StringSplitOptions.RemoveEmptyEntries));
                    }

                    string label = String.Format("{0}/{1}\n{2}", row.SourceName, msdata.id.abbreviate(row.Spectrum.NativeID), row.ModifiedSequence);

                    var spectrumList = msd.run.spectrumList;

                    ++spectraCount;

                    var pwizPeptide = new proteome.Peptide(row.ModifiedSequence, proteome.ModificationParsing.ModificationParsing_Auto,
                                                           proteome.ModificationDelimiter.ModificationDelimiter_Brackets);
                    var fragmentation = pwizPeptide.fragmentation(true, true);

                    var    pwizSpectrum = spectrumList.spectrum(spectrumList.find(row.Spectrum.NativeID), true);
                    var    pointMap     = new seems.PointMap(new ZedGraph.PointPairList(pwizSpectrum.getMZArray().data, pwizSpectrum.getIntensityArray().data));
                    double tic          = pointMap.Values.Sum();

                    double precursorMz = row.Spectrum.PrecursorMZ;
                    double chargeReducedPrecursorMz = precursorMz * row.PeptideSpectrumMatch.Charge;

                    bool plotMatchedPeaks   = true;
                    bool removeMatchedPeaks = false;

                    double tolerance = 0.03;
                    seems.PointMap.Enumerator itr;
                    IonSeries[] ionSeries = Enum.GetValues(typeof(IonSeries)).Cast <IonSeries>().Where(o => o != IonSeries.Count).ToArray();

                    for (int z = 1; z <= 1; ++z)
                    {
                        for (int length = 1, end = pwizPeptide.sequence.Length; length <= end; ++length)
                        {
                            string NTermFragment = row.ModifiedSequence.Substring(0, length);
                            string CTermFragment = row.ModifiedSequence.Substring(row.ModifiedSequence.Length - length);

                            foreach (IonSeries series in ionSeries)
                            {
                                if ((series == IonSeries.c || series == IonSeries.cMinus1 || series == IonSeries.x) &&
                                    length == pwizPeptide.sequence.Length)
                                {
                                    continue;
                                }

                                itr = pointMap.FindNear(fragmentMass(fragmentation, series, length, z), tolerance);
                                if (itr != null && itr.IsValid)
                                {
                                    if (plotMatchedPeaks)
                                    {
                                        precursorScatterPlot.AddPoint(new PointPair(itr.Current.Key - precursorMz, itr.Current.Value / tic, (int)series, String.Format("{0} {1}\n{2} {3} {4} {5}", label, precursorMz, NTermFragment, itr.Current.Key, IonSeriesLabels[(int)series], length)));
                                        chargeReducedScatterPlot.AddPoint(new PointPair(itr.Current.Key - chargeReducedPrecursorMz, itr.Current.Value / tic, (int)series, String.Format("{0} {1}\n{2} {3} {4} {5}", label, chargeReducedPrecursorMz, NTermFragment, itr.Current.Key, IonSeriesLabels[(int)series], length)));
                                    }

                                    if (removeMatchedPeaks)
                                    {
                                        pointMap.Remove(itr);
                                    }
                                }
                            }
                        }
                    }

                    foreach (var pair in pointMap)
                    {
                        precursorScatterPlot.AddPoint(new PointPair(pair.Key - precursorMz, pair.Value / tic, 0, label));
                        chargeReducedScatterPlot.AddPoint(new PointPair(pair.Key - chargeReducedPrecursorMz, pair.Value / tic, 0, label));
                    }

                    if ((spectraCount % 100) == 0)
                    {
                        Invoke(new MethodInvoker(() =>
                        {
                            zedGraphControl.MasterPane.AxisChange();
                            zedGraphControl.Refresh();
                        }));
                    }
                }

            Invoke(new MethodInvoker(() =>
            {
                if (!lockZoomCheckBox.Checked)
                {
                    zedGraphControl.ZoomOutAll(zedGraphControl.GraphPane);
                }
                zedGraphControl.MasterPane.AxisChange();
                zedGraphControl.Refresh();
            }));
            return(new List <double>()); //percentTicBySpectrumByFragmentType[1];
        }
Ejemplo n.º 3
0
        private void getFragmentationStatistics()
        {
            IList <object[]> queryRows;

            lock (session)
            {
                var randomIds = session.CreateQuery("SELECT psm.Id " + viewFilter.GetFilteredQueryString(DataFilter.FromPeptideSpectrumMatch))
                                .List <long>()
                                .Shuffle()
                                .Take(1000)
                                .OrderBy(o => o);
                string randomIdSet = String.Join(",", randomIds.Select(o => o.ToString()).ToArray());
                queryRows = session.CreateQuery("SELECT psm.Spectrum.Source.Name, psm.Spectrum, psm, DISTINCT_GROUP_CONCAT(pm.Offset || ':' || mod.MonoMassDelta), psm.Peptide.Sequence " +
                                                "FROM PeptideSpectrumMatch psm " +
                                                "LEFT JOIN psm.Modifications pm " +
                                                "LEFT JOIN pm.Modification mod " +
                                                "WHERE psm.Id IN (" + randomIdSet + ") " +
                                                "GROUP BY psm.Spectrum.id ")
                            .List <object[]>();
            }
            var spectrumRows = queryRows.Select(o => new SpectrumRow(o)).OrderBy(o => o.SourceName);

            var percentTicBySpectrumByFragmentType       = new List <PointPairList>();
            var percentPeakCountBySpectrumByFragmentType = new List <PointPairList>();
            var meanMzErrorBySpectrumByFragmentType      = new List <PointPairList>();
            var percentTicListByFragmentType             = new List <List <double> >();
            var percentPeakCountListByFragmentType       = new List <List <double> >();
            var meanMzErrorListByFragmentType            = new List <List <double> >();

            foreach (var graphControl in graphControls)
            {
                graphControl.MasterPane.PaneList.ForEach(o => o.CurveList.ForEach(c => c.Clear()));
            }

            for (int i = 0; i < (int)IonSeries.Count; ++i)
            {
                percentTicBySpectrumByFragmentType.Add(percentTicGraphControl.MasterPane.PaneList[i + 1].CurveList[3].Points as PointPairList);
                percentPeakCountBySpectrumByFragmentType.Add(percentPeakCountGraphControl.MasterPane.PaneList[i + 1].CurveList[3].Points as PointPairList);
                meanMzErrorBySpectrumByFragmentType.Add(meanMzErrorGraphControl.MasterPane.PaneList[i + 1].CurveList[3].Points as PointPairList);
                percentTicListByFragmentType.Add(new List <double>());
                percentPeakCountListByFragmentType.Add(new List <double>());
                meanMzErrorListByFragmentType.Add(new List <double>());
            }

            int spectraCount = 0;

            maxPercentTic       = 10;
            maxPercentPeakCount = 10;
            maxMeanMzError      = 0.1;
            var tolerance = fragmentTolerance;

            string spectrumListFilters = String.Empty;

            Invoke(new MethodInvoker(() =>
            {
                tolerance.value = Convert.ToDouble(fragmentToleranceTextBox.Text);
                tolerance.units = (MZTolerance.Units)fragmentToleranceUnitsComboBox.SelectedIndex;
                meanMzErrorGraphControl.GraphPane.YAxis.Title.Text = "Mean m/z error (" + tolerance.units.ToString() + ")";

                spectrumListFilters = spectrumFiltersTextBox.Text;
                setAutomaticScales();
            }));

            var points = new PointPairList();

            string currentSourceName = null;
            string currentSourcePath = null;

            msdata.MSData msd = null;

            lock (owner)
                foreach (var row in spectrumRows)
                {
                    if (row.SourceName != currentSourceName)
                    {
                        currentSourceName = row.SourceName;
                        currentSourcePath = IDPickerForm.LocateSpectrumSource(currentSourceName, session.Connection.GetDataSource());
                        if (String.IsNullOrEmpty(currentSourcePath))
                        {
                            throw new FileNotFoundException("source file not found");
                        }
                        msd = new pwiz.CLI.msdata.MSDataFile(currentSourcePath);

                        //var param = session.Query<AnalysisParameter>().Where(o => o.Name == "SpectrumListFilters").Min(o => o.Value);
                        //string spectrumListFilters = String.IsNullOrEmpty(param) ? String.Empty : param;
                        SpectrumListFactory.wrap(msd, spectrumListFilters.Split(";".ToCharArray(), StringSplitOptions.RemoveEmptyEntries));
                    }

                    string spectrumId = String.Format("{0}/{1}", row.SourceName, msdata.id.abbreviate(row.Spectrum.NativeID));

                    var spectrumList = msd.run.spectrumList;

                    ++spectraCount;

                    var pwizPeptide = new proteome.Peptide(row.ModifiedSequence, proteome.ModificationParsing.ModificationParsing_Auto,
                                                           proteome.ModificationDelimiter.ModificationDelimiter_Brackets);
                    var fragmentation = pwizPeptide.fragmentation(true, true);

                    var    pwizSpectrum = spectrumList.spectrum(spectrumList.find(row.Spectrum.NativeID), true);
                    var    pointMap     = new seems.PointMap(new ZedGraph.PointPairList(pwizSpectrum.getMZArray().data, pwizSpectrum.getIntensityArray().data));
                    double tic          = pointMap.Values.Sum();

                    var percentTicByFragmentType       = new List <double>(Enumerable.Repeat(0.0, (int)IonSeries.Count));
                    var percentPeakCountByFragmentType = new List <double>(Enumerable.Repeat(0.0, (int)IonSeries.Count));
                    var matchCountByFragmentType       = new List <int>(Enumerable.Repeat(0, (int)IonSeries.Count));
                    var meanMzErrorByFragmentType      = new List <double>(Enumerable.Repeat(Double.NaN, (int)IonSeries.Count));

                    seems.PointMap.Enumerator itr;
                    double      expected;
                    IonSeries[] ionSeries = Enum.GetValues(typeof(IonSeries)).Cast <IonSeries>().Where(o => o != IonSeries.Count).ToArray();

                    for (int z = 1; z <= 1; ++z)
                    {
                        for (int length = 1, end = pwizPeptide.sequence.Length; length <= end; ++length)
                        {
                            foreach (IonSeries series in ionSeries)
                            {
                                if ((series == IonSeries.c || series == IonSeries.x) &&
                                    length == pwizPeptide.sequence.Length)
                                {
                                    continue;
                                }

                                expected = fragmentMass(fragmentation, series, length, z);
                                itr      = pointMap.FindNear(expected, expected - (expected - tolerance));
                                if (itr != null && itr.IsValid)
                                {
                                    percentTicByFragmentType[(int)series] += itr.Current.Value;
                                    ++percentPeakCountByFragmentType[(int)series];
                                    ++matchCountByFragmentType[(int)series];
                                    if (Double.IsNaN(meanMzErrorByFragmentType[(int)series]))
                                    {
                                        meanMzErrorByFragmentType[(int)series] = 0;
                                    }
                                    meanMzErrorByFragmentType[(int)series] += mzError(itr.Current.Key, expected);
                                }
                            }
                        }
                    }

                    var rng = new Random();

                    for (int i = 0; i < percentTicBySpectrumByFragmentType.Count; ++i)
                    {
                        // convert sum to mean
                        if (percentPeakCountByFragmentType[i] > 0)
                        {
                            meanMzErrorByFragmentType[i] /= matchCountByFragmentType[i];
                        }

                        // convert to percentages
                        percentTicByFragmentType[i]       /= tic / 100;
                        percentPeakCountByFragmentType[i] /= pointMap.Count / 100.0;

                        maxPercentTic       = Math.Max(maxPercentTic, percentTicByFragmentType[i]);
                        maxPercentPeakCount = Math.Max(maxPercentPeakCount, percentPeakCountByFragmentType[i]);

                        double jitter = (rng.NextDouble() - 0.5);
                        percentTicBySpectrumByFragmentType[i].Add(jitter, percentTicByFragmentType[i], String.Format("{0}: {1:G4}% ({2} matches)", spectrumId, percentTicByFragmentType[i], matchCountByFragmentType[i]));
                        percentPeakCountBySpectrumByFragmentType[i].Add(jitter, percentPeakCountByFragmentType[i], String.Format("{0}: {1:G4}% ({2} matches)", spectrumId, percentPeakCountByFragmentType[i], matchCountByFragmentType[i]));

                        percentTicListByFragmentType[i].Add(percentTicByFragmentType[i]);
                        percentPeakCountListByFragmentType[i].Add(percentPeakCountByFragmentType[i]);

                        if (!Double.IsNaN(meanMzErrorByFragmentType[i]))
                        {
                            maxMeanMzError = Math.Max(maxMeanMzError, Math.Abs(meanMzErrorByFragmentType[i]));
                            meanMzErrorBySpectrumByFragmentType[i].Add(jitter, meanMzErrorByFragmentType[i], String.Format("{0}: {1:G4}%", spectrumId, meanMzErrorByFragmentType[i]));
                            meanMzErrorListByFragmentType[i].Add(meanMzErrorByFragmentType[i]);
                        }
                    }

                    if ((spectraCount % 100) == 0)
                    {
                        setAutomaticScales();
                    }
                }// for each spectrum row

            Invoke(new MethodInvoker(() =>
            {
                for (int i = 0; i < percentTicBySpectrumByFragmentType.Count; ++i)
                {
                    if (percentTicListByFragmentType[i].Count < 5)
                    {
                        continue;
                    }
                    percentTicListByFragmentType[i].Sort();
                    percentPeakCountListByFragmentType[i].Sort();
                    addSixNumberSummary(percentTicGraphControl.MasterPane.PaneList[i + 1], percentTicListByFragmentType[i]);
                    addSixNumberSummary(percentPeakCountGraphControl.MasterPane.PaneList[i + 1], percentPeakCountListByFragmentType[i]);

                    if (meanMzErrorListByFragmentType[i].Count < 5)
                    {
                        continue;
                    }
                    meanMzErrorListByFragmentType[i].Sort();
                    addSixNumberSummary(meanMzErrorGraphControl.MasterPane.PaneList[i + 1], meanMzErrorListByFragmentType[i]);
                }
            }));
        }
Ejemplo n.º 4
0
        public void TestPeptideSpectrumMatches()
        {
            var ss1 = session.UniqueResult<SpectrumSource>(o => o.Name == "Source 1");
            var ss4 = session.UniqueResult<SpectrumSource>(o => o.Name == "Source 4");
            var a1 = session.UniqueResult<Analysis>(o => o.Name == "Engine 1 1.0");
            var a2 = session.UniqueResult<Analysis>(o => o.Name == "Engine 2 1.0");

            // [C2H2O1]PEPTIDE
            var ss1s1psm1 = session.UniqueResult<PeptideSpectrumMatch>(o => o.Spectrum.Source.Id == ss1.Id &&
                                                                            o.Spectrum.Index == 0 &&
                                                                            o.Analysis.Id == a1.Id &&
                                                                            o.Rank == 1);
            var pwizPeptide = new PwizPeptide("(C2H2O1)PEPTIDE", proteome.ModificationParsing.ModificationParsing_ByFormula);
            Assert.AreEqual(session.UniqueResult<Peptide>(o => o.Sequence == "PEPTIDE"), ss1s1psm1.Peptide);
            Assert.AreEqual(a1, ss1s1psm1.Analysis);
            Assert.IsTrue(a1.Matches.Contains(ss1s1psm1));
            Assert.AreEqual(1, ss1s1psm1.Modifications.Count);
            Assert.AreEqual("C2H2O1", ss1s1psm1.Modifications[0].Modification.Formula);
            Assert.AreEqual(int.MinValue, ss1s1psm1.Modifications[0].Offset);
            Assert.AreEqual(2, ss1s1psm1.Charge);
            Assert.AreEqual(pwizPeptide.monoisotopicMass(), ss1s1psm1.ObservedNeutralMass - ss1s1psm1.MonoisotopicMassError, 1e-12);
            Assert.AreEqual(pwizPeptide.molecularWeight(), ss1s1psm1.ObservedNeutralMass - ss1s1psm1.MolecularWeightError, 1e-12);
            Assert.AreEqual(12.0, ss1s1psm1.Scores["score1"], 1e-12);
            Assert.AreEqual(1 / 12.0, ss1s1psm1.Scores["score2"], 1e-12);

            var ss1s1psm1e2 = session.UniqueResult<PeptideSpectrumMatch>(o => o.Spectrum.Source.Id == ss1.Id &&
                                                                              o.Spectrum.Index == 0 &&
                                                                              o.Analysis.Id == a2.Id &&
                                                                              o.Rank == 1);
            pwizPeptide = new PwizPeptide("TIDERPEPTIDEK");
            Assert.AreEqual(session.UniqueResult<Peptide>(o => o.Sequence == "TIDERPEPTIDEK"), ss1s1psm1e2.Peptide);
            Assert.AreEqual(a2, ss1s1psm1e2.Analysis);
            Assert.IsTrue(a2.Matches.Contains(ss1s1psm1e2));
            Assert.AreEqual(0, ss1s1psm1e2.Modifications.Count);
            Assert.AreEqual(4, ss1s1psm1e2.Charge);
            Assert.AreEqual(pwizPeptide.monoisotopicMass(), ss1s1psm1e2.ObservedNeutralMass - ss1s1psm1e2.MonoisotopicMassError, 1e-12);
            Assert.AreEqual(pwizPeptide.molecularWeight(), ss1s1psm1e2.ObservedNeutralMass - ss1s1psm1e2.MolecularWeightError, 1e-12);
            Assert.AreEqual(120.0, ss1s1psm1e2.Scores["score1"], 1e-12);
            Assert.AreEqual(1 / 120.0, ss1s1psm1e2.Scores["score2"], 1e-12);

            var ss1s1psm2 = session.UniqueResult<PeptideSpectrumMatch>(o => o.Spectrum.Source.Id == ss1.Id &&
                                                                            o.Spectrum.Index == 0 &&
                                                                            o.Analysis.Id == a1.Id &&
                                                                            o.Rank == 2);
            Assert.AreEqual(session.UniqueResult<Peptide>(o => o.Sequence == "TIDERPEPTIDEK"), ss1s1psm2.Peptide);
            Assert.AreEqual(a1, ss1s1psm1.Analysis);
            Assert.AreEqual(4, ss1s1psm2.Charge);
            Assert.AreEqual(2, ss1s1psm2.Rank);
            Assert.AreEqual(12.0 / 2, ss1s1psm2.Scores["score1"], 1e-12);
            Assert.AreEqual(1 / (12.0 / 2), ss1s1psm2.Scores["score2"], 1e-12);

            var ss1s1psm3 = session.UniqueResult<PeptideSpectrumMatch>(o => o.Spectrum.Source.Id == ss1.Id &&
                                                                            o.Spectrum.Index == 0 &&
                                                                            o.Analysis.Id == a1.Id &&
                                                                            o.Rank == 3);
            Assert.AreEqual(session.UniqueResult<Peptide>(o => o.Sequence == "EPPIER"), ss1s1psm3.Peptide);
            Assert.AreEqual(a1, ss1s1psm3.Analysis);
            Assert.AreEqual(1, ss1s1psm3.Charge);

            
            // E[H-2O-1]DIT[P1O4]PEPR
            var ss2s1psm2 = session.UniqueResult<PeptideSpectrumMatch>(o => o.Spectrum.Source.Name == "Source 2" &&
                                                                            o.Spectrum.Index == 0 &&
                                                                            o.Analysis.Id == a1.Id &&
                                                                            o.Rank == 2);
            pwizPeptide = new PwizPeptide("E(H-2O-1)DIT(P1O4)PEPR", proteome.ModificationParsing.ModificationParsing_ByFormula);
            Assert.AreEqual(session.UniqueResult<Peptide>(o => o.Sequence == "EDITPEPR"), ss2s1psm2.Peptide);
            Assert.AreEqual(2, ss2s1psm2.Modifications.Count);
            Assert.AreEqual("H-2O-1", ss2s1psm2.Modifications[0].Modification.Formula);
            Assert.AreEqual(0, ss2s1psm2.Modifications[0].Offset);
            Assert.AreEqual("O4P1", ss2s1psm2.Modifications[1].Modification.Formula);
            Assert.AreEqual(3, ss2s1psm2.Modifications[1].Offset);
            Assert.AreEqual(pwizPeptide.monoisotopicMass(), ss2s1psm2.ObservedNeutralMass - ss2s1psm2.MonoisotopicMassError, 1e-12);
            Assert.AreEqual(pwizPeptide.molecularWeight(), ss2s1psm2.ObservedNeutralMass - ss2s1psm2.MolecularWeightError, 1e-12);

            var ss4s1psm1 = session.UniqueResult<PeptideSpectrumMatch>(o => o.Spectrum.Source.Id == ss4.Id &&
                                                                            o.Spectrum.Index == 0 &&
                                                                            o.Analysis.Id == a1.Id &&
                                                                            o.Rank == 1);
            Assert.AreEqual(session.UniqueResult<Peptide>(o => o.Sequence == "TIDERPEPTIDEK"), ss4s1psm1.Peptide);
            Assert.AreEqual(a1, ss4s1psm1.Analysis);
            Assert.AreEqual(4, ss4s1psm1.Charge);
        }
Ejemplo n.º 5
0
        public static void CreateTestData (NHibernate.ISession session, IList<SpectrumTuple> testPsmSummary)
        {
            var dbGroups = new Map<string, SpectrumSourceGroup>();
            foreach (var ssg in session.Query<SpectrumSourceGroup>())
                dbGroups[ssg.Name] = ssg;

            var dbSources = new Map<long, SpectrumSource>();
            foreach (var ss in session.Query<SpectrumSource>())
                dbSources[ss.Id.Value] = ss;

            var dbAnalyses = new Map<long, Analysis>();
            foreach (var a in session.Query<Analysis>())
                dbAnalyses[a.Id.Value] = a;

            var dbPeptides = new Map<string, Peptide>();
            foreach (var pep in session.Query<Peptide>())
                dbPeptides[pep.Sequence] = pep;

            var bulkInserter = new BulkInserter(session.Connection);

            long lastPsmId = session.CreateQuery("SELECT MAX(Id) FROM PeptideSpectrumMatch").UniqueResult<long?>().GetValueOrDefault();
            long lastModId = session.CreateQuery("SELECT MAX(Id) FROM Modification").UniqueResult<long?>().GetValueOrDefault();
            long lastPmId = session.CreateQuery("SELECT MAX(Id) FROM PeptideModification").UniqueResult<long?>().GetValueOrDefault();
            long lastGroupId = session.CreateQuery("SELECT MAX(Id) FROM SpectrumSourceGroup").UniqueResult<long?>().GetValueOrDefault();
            long lastSourceId = session.CreateQuery("SELECT MAX(Id) FROM SpectrumSource").UniqueResult<long?>().GetValueOrDefault();
            long lastSglId = session.CreateQuery("SELECT MAX(Id) FROM SpectrumSourceGroupLink").UniqueResult<long?>().GetValueOrDefault();

            foreach (SpectrumTuple row in testPsmSummary)
            {
                string groupName = row.Group;
                string sourceName = "Source " + row.Source;
                string analysisId = "Engine " + row.Analysis;
                string peptideTuples = row.PeptideTuples;

                SpectrumSourceGroup group = dbGroups[groupName];
                if (String.IsNullOrEmpty(group.Name))
                {
                    group.Id = ++lastGroupId;
                    group.Name = groupName;
                    bulkInserter.Add(group);
                }

                SpectrumSource source = dbSources[row.Source];
                if (String.IsNullOrEmpty(source.Name))
                {
                    source.Id = ++lastSourceId;
                    source.Name = sourceName;
                    source.Group = group;
                    source.Spectra = new List<Spectrum>();
                    bulkInserter.Add(source);

                    // add a source group link for the source's immediate group
                    bulkInserter.Add(new SpectrumSourceGroupLink() { Id = ++lastSglId, Group = group, Source = source });

                    #region add source group links for all of the immediate group's parent groups

                    if (groupName != "/")
                    {
                        string parentGroupName = groupName.Substring(0, groupName.LastIndexOf("/"));
                        while (true)
                        {
                            if (String.IsNullOrEmpty(parentGroupName))
                                parentGroupName = "/";

                            // add the parent group if it doesn't exist yet
                            SpectrumSourceGroup parentGroup = session.UniqueResult<SpectrumSourceGroup>(o => o.Name == parentGroupName);
                            if (parentGroup == null)
                            {
                                parentGroup = new SpectrumSourceGroup() { Id = ++lastGroupId, Name = parentGroupName };
                                bulkInserter.Add(parentGroup);
                            }

                            bulkInserter.Add(new SpectrumSourceGroupLink() { Id = ++lastSglId, Group = parentGroup, Source = source });

                            if (parentGroupName == "/")
                                break;
                            parentGroupName = parentGroupName.Substring(0, parentGroupName.LastIndexOf("/"));
                        }
                    }

                    #endregion
                }

                Spectrum spectrum = source.Spectra.SingleOrDefault(o => o.Source.Id == source.Id &&
                                                                        o.Index == row.Spectrum - 1);
                if (spectrum == null)
                {
                    spectrum = new Spectrum()
                                   {
                                       Id = source.Id * 10000 + row.Spectrum,
                                       Index = row.Spectrum - 1,
                                       NativeID = "scan=" + row.Spectrum,
                                       Source = source,
                                       PrecursorMZ = 42
                                   };
                    source.Spectra.Add(spectrum);
                    bulkInserter.Add(spectrum);
                }

                Analysis analysis = dbAnalyses[row.Analysis];
                if (String.IsNullOrEmpty(analysis.Name))
                {
                    analysis.Id = dbAnalyses.Max(o => o.Value.Id).GetValueOrDefault() + 1;
                    analysis.Name = analysisId + " 1.0";
                    analysis.Software = new AnalysisSoftware() {Name = analysisId, Version = "1.0"};
                    analysis.StartTime = DateTime.Today.AddHours(row.Analysis);
                    analysis.Type = AnalysisType.DatabaseSearch;

                    analysis.Parameters = new SortedSet<AnalysisParameter>()
                    {
                        new AnalysisParameter()
                        {
                            Id = analysis.Id * 10000,
                            Analysis = analysis,
                            Name = "Parameter 1",
                            Value = "Value 1"
                        }
                    };

                    bulkInserter.Add(analysis);
                }

                // make sure peptides are sorted by their score divider (which will determine rank)
                var peptideList = new SortedList<int, List<PeptideTuple>>();
                foreach (string tuple in peptideTuples.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries))
                {
                    var peptideTuple = new PeptideTuple()
                                           {
                                               Sequence = tuple.Split('@', '/')[0],
                                               Charge = Convert.ToInt32(tuple.Split('@', '/')[1]),
                                               ScoreDivider = Convert.ToInt32(tuple.Split('@', '/')[2])
                                           };
                    if (!peptideList.ContainsKey(peptideTuple.ScoreDivider))
                        peptideList[peptideTuple.ScoreDivider] = new List<PeptideTuple>();
                    peptideList[peptideTuple.ScoreDivider].Add(peptideTuple);
                }

                int rank = 1;
                int lastDivider = 1;
                foreach (var peptideTupleList in peptideList.Values)
                    foreach (var peptideTuple in peptideTupleList)
                    {
                        using (PwizPeptide pwizPeptide = new PwizPeptide(peptideTuple.Sequence, ModParsing.ModificationParsing_Auto, ModDelimiter.ModificationDelimiter_Brackets))
                        {
                            Peptide peptide = dbPeptides[pwizPeptide.sequence];
                            if (String.IsNullOrEmpty(peptide.Sequence))
                            {
                                peptide = new TestPeptide(pwizPeptide.sequence);
                                peptide.Id = dbPeptides.Max(o => o.Value.Id).GetValueOrDefault() + 1;
                                peptide.MonoisotopicMass = pwizPeptide.monoisotopicMass(false);
                                peptide.MolecularWeight = pwizPeptide.molecularWeight(false);
                                dbPeptides[pwizPeptide.sequence] = peptide;
                                bulkInserter.Add(peptide);
                                createTestPeptideInstances(session, bulkInserter, peptide);
                            }

                            double neutralPrecursorMass = (spectrum.PrecursorMZ*peptideTuple.Charge) - (peptideTuple.Charge*Proton.Mass);

                            var psm = new PeptideSpectrumMatch()
                                          {
                                              Id = ++lastPsmId,
                                              Peptide = peptide,
                                              Spectrum = spectrum,
                                              Analysis = analysis,
                                              ObservedNeutralMass = neutralPrecursorMass,
                                              MonoisotopicMassError = neutralPrecursorMass - pwizPeptide.monoisotopicMass(),
                                              MolecularWeightError = neutralPrecursorMass - pwizPeptide.molecularWeight(),
                                              Charge = peptideTuple.Charge,
                                              Rank = (peptideTuple.ScoreDivider == lastDivider ? rank : ++rank),
                                              QValue = (rank == 1 ? row.QValue : PeptideSpectrumMatch.DefaultQValue),
                                          };

                            if (row.Score != null)
                                psm.Scores = new Dictionary<string, double>()
                                                 {
                                                     {"score1", (double) row.Score/peptideTuple.ScoreDivider},
                                                     {"score2", 1/((double) row.Score/peptideTuple.ScoreDivider)}
                                                 };

                            bulkInserter.Add(psm);
                            lastDivider = peptideTuple.ScoreDivider;

                            // add PeptideModifications and Modifications
                            foreach (KeyValuePair<int, ModList> itr in pwizPeptide.modifications())
                            {
                                foreach (PwizMod pwizMod in itr.Value)
                                {
                                    Modification mod = session.UniqueResult<Modification>(o => o.Formula == pwizMod.formula());
                                    if (mod == null)
                                    {
                                        mod = new Modification()
                                                  {
                                                      Id = ++lastModId,
                                                      Formula = pwizMod.formula(),
                                                      MonoMassDelta = pwizMod.monoisotopicDeltaMass(),
                                                      AvgMassDelta = pwizMod.averageDeltaMass(),
                                                      Name = pwizMod.formula()
                                                  };
                                        bulkInserter.Add(mod);
                                    }

                                    bulkInserter.Add(new PeptideModification()
                                                         {
                                                             Id = ++lastPmId,
                                                             PeptideSpectrumMatch = psm,
                                                             Modification = mod,
                                                             Offset = itr.Key == ModMap.NTerminus() ? int.MinValue
                                                                    : itr.Key == ModMap.CTerminus() ? int.MaxValue
                                                                    : itr.Key
                                                         });
                                }
                            }
                        }
                    }
            }
            bulkInserter.Execute();
            bulkInserter.Reset("");
        }
Ejemplo n.º 6
0
            public PhosphoPeptideAttestationRow(object[] queryRow)
            {
                PSMId = (long)queryRow[0];
                SpectrumId = (long)queryRow[1];
                SourceName = (string)queryRow[2];
                SpectrumNativeID = (string)queryRow[3];
                PrecursorMZ = Convert.ToDouble(queryRow[4]);
                Charge = Convert.ToInt32(queryRow[5]);

                // Build the peptide sequence with modifications. Leave the phospho sites out of the string. They
                // will reunited with the string right before the PSM is submitted to phosphoRS. This is necessary 
                // because phosphoRS requires all phospho sites marked with a single numerical representation across all
                // PSMs.
                OriginalPhosphoSites = new SortedDictionary<int, long>();
                var mods = new Dictionary<int, List<double>>();
                string peptideSequence = (string)queryRow[7];
                Peptide = new proteome.Peptide(peptideSequence);
                var pwizMods = Peptide.modifications();
                if (!String.IsNullOrEmpty((string)queryRow[6]))
                {
                    var IdMassDeltaAndOffsetTriplets = ((string)queryRow[6]).Split(',');
                    foreach (var triplet in IdMassDeltaAndOffsetTriplets)
                    {
                        var tokens = triplet.Split(':');
                        long pmId = Convert.ToInt64(tokens[0]);
                        double deltaMass = Convert.ToDouble(tokens[1]);
                        int roundedDeltaMass = (int) Math.Round(deltaMass);
                        int offset = Convert.ToInt32(tokens[2]);
                        pwizMods[offset].Add(new proteome.Modification(deltaMass, deltaMass));
                        if (roundedDeltaMass == 80 && (peptideSequence[offset] == 'S' || peptideSequence[offset] == 'T' || peptideSequence[offset] == 'Y'))
                            OriginalPhosphoSites[offset] = pmId;
                        else
                        {
                            if (!mods.ContainsKey(offset))
                                mods[offset] = new List<double>();
                            mods[offset].Add(deltaMass);
                        }
                    }
                }

                string format = String.Format("[{{0:f{0}}}]", 4);
                StringBuilder sb = new StringBuilder(peptideSequence);
                foreach (var mod in (from m in mods orderby m.Key descending select m))
                    foreach (var massDelta in mod.Value)
                        if (mod.Key == int.MinValue)
                            sb.Insert(0, String.Format(format, massDelta));
                        else if (mod.Key == int.MaxValue || mod.Key >= sb.Length)
                            sb.AppendFormat(format, massDelta);
                        else
                            sb.Insert(mod.Key + 1, String.Format(format, massDelta));
                UnphosphorylatedSequence = sb.ToString();
                DecoyState = Convert.ToInt16(queryRow[8]);

                // Determine the location of phosphorylation sites
                PossiblePhosphoSites = new List<int>();
                for (int residueIndex = 0; residueIndex < peptideSequence.Length; ++residueIndex)
                    if (peptideSequence[residueIndex] == 'S' || peptideSequence[residueIndex] == 'T' || peptideSequence[residueIndex] == 'Y')
                        PossiblePhosphoSites.Add(residueIndex);
            }
Ejemplo n.º 7
0
        private phosphoRS.PeptideSpectrumMatch getPhosphoRS_PSM(PhosphoRSConfig config, PhosphoPeptideAttestationRow variant)
        {
            // Get the phosphorylated peptide and add all modifications to the base sequence.
            proteome.Peptide phosphoPeptide = new proteome.Peptide(variant.UnphosphorylatedSequence, proteome.ModificationParsing.ModificationParsing_Auto, proteome.ModificationDelimiter.ModificationDelimiter_Brackets);
            proteome.ModificationMap variantPeptideMods = phosphoPeptide.modifications();
            variant.OriginalPhosphoSites.Keys.ToList().ForEach(location => { variantPeptideMods[location].Add(config.pwizMod); });

            // This modification ID is used to tell phosphoRS how to modify the sequence.
            int modificationID = config.phosphorylationSymbol + 1;

            // Build a string representation of all modificaitons in a peptide for phospoRS
            // "0.00011000000000.0" : 1 is the ID of the modification. All phosphos in a data
            // set need to have one ID. This ID is used by the PhosphoRS to figure out which
            // mods need to be scored.
            var ptmRepresentation = new StringBuilder();

            // Store all modifications in phosphoRS modification objects
            var modifications = new List<phosphoRS.AminoAcidModification>();

            // Get the n-terminal modifications.
            if (variantPeptideMods.ContainsKey(proteome.ModificationMap.NTerminus()))
            {
                phosphoRS.AminoAcidModification otherMod = new phosphoRS.AminoAcidModification('2', "unknown", "unk", "none", variantPeptideMods[proteome.ModificationMap.NTerminus()].monoisotopicDeltaMass(), 0.0, null);
                modifications.Add(otherMod);
                ptmRepresentation.Append(modificationID.ToString() + ".");
                //++modificationID;
            }
            else
            {
                ptmRepresentation.Append("0.");
            }

            // Process all other modifications.
            for (int aaIndex = 0; aaIndex < phosphoPeptide.sequence.Length; ++aaIndex)
            {
                // If phosphorylation, use the existing scoredAA variable.
                if (variantPeptideMods.ContainsKey(aaIndex))
                {
                    if (variant.OriginalPhosphoSites.Keys.Contains(aaIndex))
                    {
                        modifications.Add(config.scoredAA);
                        ptmRepresentation.Append(config.phosphorylationSymbol.ToString()[0]);
                    }
                    else
                    {
                        // Otherwise, make an "unknown" modification with a separate modification ID.
                        var otherMod = new phosphoRS.AminoAcidModification(modificationID.ToString()[0], "unknown", "unk", "none", variantPeptideMods[aaIndex].monoisotopicDeltaMass(), 0.0, phosphoRS.AminoAcidSequence.ParseAASequence("" + phosphoPeptide.sequence[aaIndex]));
                        modifications.Add(otherMod);
                        ptmRepresentation.Append(modificationID.ToString());
                        //++modificationID;
                    }
                }
                else
                {
                    ptmRepresentation.Append("0");
                }
            }
            // Process any c-terminal modifications.
            if (variantPeptideMods.ContainsKey(proteome.ModificationMap.CTerminus()))
            {
                var otherMod = new phosphoRS.AminoAcidModification(modificationID.ToString()[0], "unknown", "unk", "none", variantPeptideMods[proteome.ModificationMap.CTerminus()].monoisotopicDeltaMass(), 0.0, null);
                modifications.Add(otherMod);
                ptmRepresentation.Append("." + modificationID.ToString());
            }
            else
            {
                ptmRepresentation.Append(".0");
            }

            // Get the phosphoRS peptide sequence.
            // Assign spectrum ID, amino acid sequence, list of all modifications, a so-called 'modification position string' (here every digit represents an amino acid within the peptide sequence
            // '0' indicates not modified, values != '0' indicate the unique identifier of the amino acid's modification the first digit represents the n-terminus the last digit represents the c-terminus)
            var AAS = phosphoRS.AminoAcidSequence.Create((int)variant.SpectrumId, phosphoPeptide.sequence, modifications, ptmRepresentation.ToString());
            // Make a phosphoRS peptide-spectrum match.
            return new phosphoRS.PeptideSpectrumMatch((int)variant.PSMId, variant.SpectrumType, variant.Charge, variant.PrecursorMZ, variant.Peaks, AAS);
        }
Ejemplo n.º 8
0
            public PhosphoPeptideAttestationRow(object[] queryRow)
            {
                PSMId            = (long)queryRow[0];
                SpectrumId       = (long)queryRow[1];
                SourceName       = (string)queryRow[2];
                SpectrumNativeID = (string)queryRow[3];
                PrecursorMZ      = Convert.ToDouble(queryRow[4]);
                Charge           = Convert.ToInt32(queryRow[5]);

                // Build the peptide sequence with modifications. Leave the phospho sites out of the string. They
                // will reunited with the string right before the PSM is submitted to phosphoRS. This is necessary
                // because phosphoRS requires all phospho sites marked with a single numerical representation across all
                // PSMs.
                OriginalPhosphoSites = new SortedDictionary <int, long>();
                var    mods            = new Dictionary <int, List <double> >();
                string peptideSequence = (string)queryRow[7];

                Peptide = new proteome.Peptide(peptideSequence);
                var pwizMods = Peptide.modifications();

                if (!String.IsNullOrEmpty((string)queryRow[6]))
                {
                    var IdMassDeltaAndOffsetTriplets = ((string)queryRow[6]).Split(Properties.Settings.Default.GroupConcatSeparator[0]);
                    foreach (var triplet in IdMassDeltaAndOffsetTriplets)
                    {
                        var    tokens           = triplet.Split(':');
                        long   pmId             = Convert.ToInt64(tokens[0]);
                        double deltaMass        = Convert.ToDouble(tokens[1]);
                        int    roundedDeltaMass = (int)Math.Round(deltaMass);
                        int    offset           = Convert.ToInt32(tokens[2]);
                        pwizMods[offset].Add(new proteome.Modification(deltaMass, deltaMass));
                        if (roundedDeltaMass == 80 && (peptideSequence[offset] == 'S' || peptideSequence[offset] == 'T' || peptideSequence[offset] == 'Y'))
                        {
                            OriginalPhosphoSites[offset] = pmId;
                        }
                        else
                        {
                            if (!mods.ContainsKey(offset))
                            {
                                mods[offset] = new List <double>();
                            }
                            mods[offset].Add(deltaMass);
                        }
                    }
                }

                string        format = String.Format("[{{0:f{0}}}]", 4);
                StringBuilder sb     = new StringBuilder(peptideSequence);

                foreach (var mod in (from m in mods orderby m.Key descending select m))
                {
                    foreach (var massDelta in mod.Value)
                    {
                        if (mod.Key == int.MinValue)
                        {
                            sb.Insert(0, String.Format(format, massDelta));
                        }
                        else if (mod.Key == int.MaxValue || mod.Key >= sb.Length)
                        {
                            sb.AppendFormat(format, massDelta);
                        }
                        else
                        {
                            sb.Insert(mod.Key + 1, String.Format(format, massDelta));
                        }
                    }
                }
                UnphosphorylatedSequence = sb.ToString();
                DecoyState = Convert.ToInt16(queryRow[8]);

                // Determine the location of phosphorylation sites
                PossiblePhosphoSites = new List <int>();
                for (int residueIndex = 0; residueIndex < peptideSequence.Length; ++residueIndex)
                {
                    if (peptideSequence[residueIndex] == 'S' || peptideSequence[residueIndex] == 'T' || peptideSequence[residueIndex] == 'Y')
                    {
                        PossiblePhosphoSites.Add(residueIndex);
                    }
                }
            }
Ejemplo n.º 9
0
        private phosphoRS.PeptideSpectrumMatch getPhosphoRS_PSM(PhosphoRSConfig config, PhosphoPeptideAttestationRow variant)
        {
            // Get the phosphorylated peptide and add all modifications to the base sequence.
            proteome.Peptide         phosphoPeptide     = new proteome.Peptide(variant.UnphosphorylatedSequence, proteome.ModificationParsing.ModificationParsing_Auto, proteome.ModificationDelimiter.ModificationDelimiter_Brackets);
            proteome.ModificationMap variantPeptideMods = phosphoPeptide.modifications();
            variant.OriginalPhosphoSites.Keys.ToList().ForEach(location => { variantPeptideMods[location].Add(config.pwizMod); });

            // This modification ID is used to tell phosphoRS how to modify the sequence.
            int modificationID = config.phosphorylationSymbol + 1;

            // Build a string representation of all modificaitons in a peptide for phospoRS
            // "0.00011000000000.0" : 1 is the ID of the modification. All phosphos in a data
            // set need to have one ID. This ID is used by the PhosphoRS to figure out which
            // mods need to be scored.
            var ptmRepresentation = new StringBuilder();

            // Store all modifications in phosphoRS modification objects
            var modifications = new List <phosphoRS.AminoAcidModification>();

            // Get the n-terminal modifications.
            if (variantPeptideMods.ContainsKey(proteome.ModificationMap.NTerminus()))
            {
                phosphoRS.AminoAcidModification otherMod = new phosphoRS.AminoAcidModification('2', "unknown", "unk", "none", variantPeptideMods[proteome.ModificationMap.NTerminus()].monoisotopicDeltaMass(), 0.0, null);
                modifications.Add(otherMod);
                ptmRepresentation.Append(modificationID.ToString() + ".");
                //++modificationID;
            }
            else
            {
                ptmRepresentation.Append("0.");
            }

            // Process all other modifications.
            for (int aaIndex = 0; aaIndex < phosphoPeptide.sequence.Length; ++aaIndex)
            {
                // If phosphorylation, use the existing scoredAA variable.
                if (variantPeptideMods.ContainsKey(aaIndex))
                {
                    if (variant.OriginalPhosphoSites.Keys.Contains(aaIndex))
                    {
                        modifications.Add(config.scoredAA);
                        ptmRepresentation.Append(config.phosphorylationSymbol.ToString()[0]);
                    }
                    else
                    {
                        // Otherwise, make an "unknown" modification with a separate modification ID.
                        var otherMod = new phosphoRS.AminoAcidModification(modificationID.ToString()[0], "unknown", "unk", "none", variantPeptideMods[aaIndex].monoisotopicDeltaMass(), 0.0, phosphoRS.AminoAcidSequence.ParseAASequence("" + phosphoPeptide.sequence[aaIndex]));
                        modifications.Add(otherMod);
                        ptmRepresentation.Append(modificationID.ToString());
                        //++modificationID;
                    }
                }
                else
                {
                    ptmRepresentation.Append("0");
                }
            }
            // Process any c-terminal modifications.
            if (variantPeptideMods.ContainsKey(proteome.ModificationMap.CTerminus()))
            {
                var otherMod = new phosphoRS.AminoAcidModification(modificationID.ToString()[0], "unknown", "unk", "none", variantPeptideMods[proteome.ModificationMap.CTerminus()].monoisotopicDeltaMass(), 0.0, null);
                modifications.Add(otherMod);
                ptmRepresentation.Append("." + modificationID.ToString());
            }
            else
            {
                ptmRepresentation.Append(".0");
            }

            // Get the phosphoRS peptide sequence.
            // Assign spectrum ID, amino acid sequence, list of all modifications, a so-called 'modification position string' (here every digit represents an amino acid within the peptide sequence
            // '0' indicates not modified, values != '0' indicate the unique identifier of the amino acid's modification the first digit represents the n-terminus the last digit represents the c-terminus)
            var AAS = phosphoRS.AminoAcidSequence.Create((int)variant.SpectrumId, phosphoPeptide.sequence, modifications, ptmRepresentation.ToString());

            // Make a phosphoRS peptide-spectrum match.
            return(new phosphoRS.PeptideSpectrumMatch((int)variant.PSMId, variant.SpectrumType, variant.Charge, variant.PrecursorMZ, variant.Peaks, AAS));
        }
        private void getFragmentationStatistics ()
        {
            IList<object[]> queryRows;
            lock (session)
            {
                var randomIds = session.CreateQuery("SELECT psm.Id " + viewFilter.GetFilteredQueryString(DataFilter.FromPeptideSpectrumMatch))
                                       .List<long>()
                                       .Shuffle()
                                       .Take(1000)
                                       .OrderBy(o => o);
                string randomIdSet = String.Join(",", randomIds.Select(o => o.ToString()).ToArray());
                queryRows = session.CreateQuery("SELECT psm.Spectrum.Source.Name, psm.Spectrum, psm, DISTINCT_GROUP_CONCAT(pm.Offset || ':' || mod.MonoMassDelta), psm.Peptide.Sequence " +
                                                "FROM PeptideSpectrumMatch psm " +
                                                "LEFT JOIN psm.Modifications pm " + 
                                                "LEFT JOIN pm.Modification mod " +
                                                "WHERE psm.Id IN (" + randomIdSet + ") " +
                                                "GROUP BY psm.Spectrum.id ")
                                   .List<object[]>();
            }
            var spectrumRows = queryRows.Select(o => new SpectrumRow(o)).OrderBy(o => o.SourceName);

            var percentTicBySpectrumByFragmentType = new List<PointPairList>();
            var percentPeakCountBySpectrumByFragmentType = new List<PointPairList>();
            var meanMzErrorBySpectrumByFragmentType = new List<PointPairList>();
            var percentTicListByFragmentType = new List<List<double>>();
            var percentPeakCountListByFragmentType = new List<List<double>>();
            var meanMzErrorListByFragmentType = new List<List<double>>();

            foreach(var graphControl in graphControls)
                graphControl.MasterPane.PaneList.ForEach(o => o.CurveList.ForEach(c => c.Clear()));

            for (int i = 0; i < (int) IonSeries.Count; ++i)
            {
                percentTicBySpectrumByFragmentType.Add(percentTicGraphControl.MasterPane.PaneList[i + 1].CurveList[3].Points as PointPairList);
                percentPeakCountBySpectrumByFragmentType.Add(percentPeakCountGraphControl.MasterPane.PaneList[i + 1].CurveList[3].Points as PointPairList);
                meanMzErrorBySpectrumByFragmentType.Add(meanMzErrorGraphControl.MasterPane.PaneList[i + 1].CurveList[3].Points as PointPairList);
                percentTicListByFragmentType.Add(new List<double>());
                percentPeakCountListByFragmentType.Add(new List<double>());
                meanMzErrorListByFragmentType.Add(new List<double>());
            }

            int spectraCount = 0;
            maxPercentTic = 10;
            maxPercentPeakCount = 10;
            maxMeanMzError = 0.1;
            var tolerance = fragmentTolerance;

            string spectrumListFilters = String.Empty;
            Invoke(new MethodInvoker(() =>
            {
                tolerance.value = Convert.ToDouble(fragmentToleranceTextBox.Text);
                tolerance.units = (MZTolerance.Units) fragmentToleranceUnitsComboBox.SelectedIndex;
                meanMzErrorGraphControl.GraphPane.YAxis.Title.Text = "Mean m/z error (" + tolerance.units.ToString() + ")";

                spectrumListFilters = spectrumFiltersTextBox.Text;
                setAutomaticScales();
            }));

            var points = new PointPairList();

            string currentSourceName = null;
            string currentSourcePath = null;
            msdata.MSData msd = null;

            lock (owner)
            foreach (var row in spectrumRows)
            {
                if (row.SourceName != currentSourceName)
                {
                    currentSourceName = row.SourceName;
                    currentSourcePath = IDPickerForm.LocateSpectrumSource(currentSourceName, session.Connection.GetDataSource());
                    if (String.IsNullOrEmpty(currentSourcePath))
                        throw new FileNotFoundException("source file not found");
                    msd = new pwiz.CLI.msdata.MSDataFile(currentSourcePath);

                    //var param = session.Query<AnalysisParameter>().Where(o => o.Name == "SpectrumListFilters").Min(o => o.Value);
                    //string spectrumListFilters = String.IsNullOrEmpty(param) ? String.Empty : param;
                    SpectrumListFactory.wrap(msd, spectrumListFilters.Split(";".ToCharArray(), StringSplitOptions.RemoveEmptyEntries));
                }

                string spectrumId = String.Format("{0}/{1}", row.SourceName, msdata.id.abbreviate(row.Spectrum.NativeID));

                var spectrumList = msd.run.spectrumList;

                ++spectraCount;

                var pwizPeptide = new proteome.Peptide(row.ModifiedSequence, proteome.ModificationParsing.ModificationParsing_Auto,
                                                       proteome.ModificationDelimiter.ModificationDelimiter_Brackets);
                var fragmentation = pwizPeptide.fragmentation(true, true);

                var pwizSpectrum = spectrumList.spectrum(spectrumList.find(row.Spectrum.NativeID), true);
                var pointMap = new seems.PointMap(new ZedGraph.PointPairList(pwizSpectrum.getMZArray().data, pwizSpectrum.getIntensityArray().data));
                double tic = pointMap.Values.Sum();

                var percentTicByFragmentType = new List<double>(Enumerable.Repeat(0.0, (int) IonSeries.Count));
                var percentPeakCountByFragmentType = new List<double>(Enumerable.Repeat(0.0, (int) IonSeries.Count));
                var matchCountByFragmentType = new List<int>(Enumerable.Repeat(0, (int) IonSeries.Count));
                var meanMzErrorByFragmentType = new List<double>(Enumerable.Repeat(Double.NaN, (int) IonSeries.Count));

                seems.PointMap.Enumerator itr;
                double expected;
                IonSeries[] ionSeries = Enum.GetValues(typeof(IonSeries)).Cast<IonSeries>().Where(o => o != IonSeries.Count).ToArray();

                for (int z = 1; z <= 1; ++z)
                for (int length = 1, end = pwizPeptide.sequence.Length; length <= end; ++length)
                foreach (IonSeries series in ionSeries)
                {
                    if ((series == IonSeries.c || series == IonSeries.x) &&
                        length == pwizPeptide.sequence.Length)
                        continue;

                    expected = fragmentMass(fragmentation, series, length, z);
                    itr = pointMap.FindNear(expected, expected - (expected - tolerance));
                    if (itr != null && itr.IsValid)
                    {
                        percentTicByFragmentType[(int)series] += itr.Current.Value;
                        ++percentPeakCountByFragmentType[(int)series];
                        ++matchCountByFragmentType[(int)series];
                        if (Double.IsNaN(meanMzErrorByFragmentType[(int)series])) meanMzErrorByFragmentType[(int)series] = 0;
                        meanMzErrorByFragmentType[(int)series] += mzError(itr.Current.Key, expected);
                    }
                }

                var rng = new Random();

                for (int i = 0; i < percentTicBySpectrumByFragmentType.Count; ++i)
                {
                    // convert sum to mean
                    if (percentPeakCountByFragmentType[i] > 0)
                        meanMzErrorByFragmentType[i] /= matchCountByFragmentType[i];

                    // convert to percentages
                    percentTicByFragmentType[i] /= tic / 100;
                    percentPeakCountByFragmentType[i] /= pointMap.Count / 100.0;

                    maxPercentTic = Math.Max(maxPercentTic, percentTicByFragmentType[i]);
                    maxPercentPeakCount = Math.Max(maxPercentPeakCount, percentPeakCountByFragmentType[i]);

                    double jitter = (rng.NextDouble() - 0.5);
                    percentTicBySpectrumByFragmentType[i].Add(jitter, percentTicByFragmentType[i], String.Format("{0}: {1:G4}% ({2} matches)", spectrumId, percentTicByFragmentType[i], matchCountByFragmentType[i]));
                    percentPeakCountBySpectrumByFragmentType[i].Add(jitter, percentPeakCountByFragmentType[i], String.Format("{0}: {1:G4}% ({2} matches)", spectrumId, percentPeakCountByFragmentType[i], matchCountByFragmentType[i]));

                    percentTicListByFragmentType[i].Add(percentTicByFragmentType[i]);
                    percentPeakCountListByFragmentType[i].Add(percentPeakCountByFragmentType[i]);

                    if (!Double.IsNaN(meanMzErrorByFragmentType[i]))
                    {
                        maxMeanMzError = Math.Max(maxMeanMzError, Math.Abs(meanMzErrorByFragmentType[i]));
                        meanMzErrorBySpectrumByFragmentType[i].Add(jitter, meanMzErrorByFragmentType[i], String.Format("{0}: {1:G4}%", spectrumId, meanMzErrorByFragmentType[i]));
                        meanMzErrorListByFragmentType[i].Add(meanMzErrorByFragmentType[i]);
                    }
                }

                if ((spectraCount % 100) == 0)
                    setAutomaticScales();
            } // for each spectrum row

            Invoke(new MethodInvoker(() =>
            {
                for (int i = 0; i < percentTicBySpectrumByFragmentType.Count; ++i)
                {
                    if (percentTicListByFragmentType[i].Count < 5)
                        continue;
                    percentTicListByFragmentType[i].Sort();
                    percentPeakCountListByFragmentType[i].Sort();
                    addSixNumberSummary(percentTicGraphControl.MasterPane.PaneList[i + 1], percentTicListByFragmentType[i]);
                    addSixNumberSummary(percentPeakCountGraphControl.MasterPane.PaneList[i + 1], percentPeakCountListByFragmentType[i]);

                    if (meanMzErrorListByFragmentType[i].Count < 5)
                        continue;
                    meanMzErrorListByFragmentType[i].Sort();
                    addSixNumberSummary(meanMzErrorGraphControl.MasterPane.PaneList[i + 1], meanMzErrorListByFragmentType[i]);
                }
            }));
        }
Ejemplo n.º 11
0
        private List<double> getPeakStatistics ()
        {
            IList<object[]> queryRows;
            lock (session)
            {
                var randomIds = session.CreateQuery("SELECT psm.Id " + viewFilter.GetFilteredQueryString(DataFilter.FromPeptideSpectrumMatch))
                                       .List<long>()
                                       .Shuffle()
                                       .Take(1000)
                                       .OrderBy(o => o);
                string randomIdSet = String.Join(",", randomIds.Select(o => o.ToString()).ToArray());
                queryRows = session.CreateQuery("SELECT psm.Spectrum.Source.Name, psm.Spectrum, psm, DISTINCT_GROUP_CONCAT(pm.Offset || ':' || mod.MonoMassDelta), psm.Peptide.Sequence " +
                                                "FROM PeptideSpectrumMatch psm " +
                                                "LEFT JOIN psm.Modifications pm " +
                                                "LEFT JOIN pm.Modification mod " +
                                                "WHERE psm.Id IN (" + randomIdSet + ") " +
                                                "GROUP BY psm.Spectrum.id ")
                                   .List<object[]>();
            }
            var spectrumRows = queryRows.Select(o => new SpectrumRow(o)).OrderBy(o => o.SourceName);

            precursorScatterPlot.Clear();
            chargeReducedScatterPlot.Clear();

            int spectraCount = 0;

            string spectrumListFilters = String.Empty;
            Invoke(new MethodInvoker(() =>
            {
                spectrumListFilters = spectrumFiltersTextBox.Text;
                zedGraphControl.MasterPane.AxisChange();
                zedGraphControl.Refresh();
            }));

            var points = new PointPairList();

            string currentSourceName = null;
            string currentSourcePath = null;
            msdata.MSData msd = null;

            lock(owner)
            foreach (var row in spectrumRows)
            {
                if (row.SourceName != currentSourceName)
                {
                    currentSourceName = row.SourceName;
                    currentSourcePath = IDPickerForm.LocateSpectrumSource(currentSourceName, session.Connection.GetDataSource());
                    msd = new pwiz.CLI.msdata.MSDataFile(currentSourcePath);

                    //var param = session.Query<AnalysisParameter>().Where(o => o.Name == "SpectrumListFilters").Min(o => o.Value);
                    //string spectrumListFilters = String.IsNullOrEmpty(param) ? String.Empty : param;
                    SpectrumListFactory.wrap(msd, spectrumListFilters.Split(";".ToCharArray(), StringSplitOptions.RemoveEmptyEntries));
                }

                string label = String.Format("{0}/{1}\n{2}", row.SourceName, msdata.id.abbreviate(row.Spectrum.NativeID), row.ModifiedSequence);

                var spectrumList = msd.run.spectrumList;

                ++spectraCount;

                var pwizPeptide = new proteome.Peptide(row.ModifiedSequence, proteome.ModificationParsing.ModificationParsing_Auto,
                                                       proteome.ModificationDelimiter.ModificationDelimiter_Brackets);
                var fragmentation = pwizPeptide.fragmentation(true, true);

                var pwizSpectrum = spectrumList.spectrum(spectrumList.find(row.Spectrum.NativeID), true);
                var pointMap = new seems.PointMap(new ZedGraph.PointPairList(pwizSpectrum.getMZArray().data, pwizSpectrum.getIntensityArray().data));
                double tic = pointMap.Values.Sum();

                double precursorMz = row.Spectrum.PrecursorMZ;
                double chargeReducedPrecursorMz = precursorMz * row.PeptideSpectrumMatch.Charge;

                bool plotMatchedPeaks = true;
                bool removeMatchedPeaks = false;

                double tolerance = 0.03;
                seems.PointMap.Enumerator itr;
                IonSeries[] ionSeries = Enum.GetValues(typeof(IonSeries)).Cast<IonSeries>().Where(o => o != IonSeries.Count).ToArray();

                for (int z = 1; z <= 1; ++z)
                for (int length = 1, end = pwizPeptide.sequence.Length; length <= end; ++length)
                {
                    string NTermFragment = row.ModifiedSequence.Substring(0, length);
                    string CTermFragment = row.ModifiedSequence.Substring(row.ModifiedSequence.Length - length);

                    foreach (IonSeries series in ionSeries)
                    {
                        if ((series == IonSeries.c || series == IonSeries.cMinus1 || series == IonSeries.x) &&
                            length == pwizPeptide.sequence.Length)
                            continue;

                        itr = pointMap.FindNear(fragmentMass(fragmentation, series, length, z), tolerance);
                        if (itr != null && itr.IsValid)
                        {
                            if (plotMatchedPeaks)
                            {
                                precursorScatterPlot.AddPoint(new PointPair(itr.Current.Key - precursorMz, itr.Current.Value / tic, (int)series, String.Format("{0} {1}\n{2} {3} {4} {5}", label, precursorMz, NTermFragment, itr.Current.Key, IonSeriesLabels[(int)series], length)));
                                chargeReducedScatterPlot.AddPoint(new PointPair(itr.Current.Key - chargeReducedPrecursorMz, itr.Current.Value / tic, (int)series, String.Format("{0} {1}\n{2} {3} {4} {5}", label, chargeReducedPrecursorMz, NTermFragment, itr.Current.Key, IonSeriesLabels[(int)series], length)));
                            }

                            if (removeMatchedPeaks)
                                pointMap.Remove(itr);
                        }
                    }
                }

                foreach (var pair in pointMap)
                {
                    precursorScatterPlot.AddPoint(new PointPair(pair.Key - precursorMz, pair.Value/tic, 0, label));
                    chargeReducedScatterPlot.AddPoint(new PointPair(pair.Key - chargeReducedPrecursorMz, pair.Value/tic, 0, label));
                }

                if ((spectraCount % 100) == 0)
                {
                    Invoke(new MethodInvoker(() =>
                    {
                        zedGraphControl.MasterPane.AxisChange();
                        zedGraphControl.Refresh();
                    }));
                }
            }

            Invoke(new MethodInvoker(() =>
            {
                if (!lockZoomCheckBox.Checked)
                {
                    zedGraphControl.ZoomOutAll(zedGraphControl.GraphPane);
                }
                zedGraphControl.MasterPane.AxisChange();
                zedGraphControl.Refresh();
            }));
            return new List<double>(); //percentTicBySpectrumByFragmentType[1];
        }