private string PeptideToString(proteome.Peptide peptide, IList <phosphoRS.PTMSiteProbability> localizationProbabilities, PhosphoRSConfig config) { var probabilityMap = localizationProbabilities.ToDictionary(o => o.SequencePosition, o => o.Probability); string format = String.Format("[{{0:f{0}}}]", 0); StringBuilder sb = new StringBuilder(); if (peptide.modifications().ContainsKey(proteome.ModificationMap.NTerminus())) { sb.AppendFormat(format, peptide.modifications()[proteome.ModificationMap.NTerminus()].monoisotopicDeltaMass()); } for (int i = 0; i < peptide.sequence.Length; ++i) { sb.Append(peptide.sequence[i]); if (probabilityMap.ContainsKey(i + 1)) { if (probabilityMap[i + 1] > 0) { sb.AppendFormat("[{0:f0}({1:f0}%)]", config.scoredAA.MassDelta, probabilityMap[i + 1] * 100); } //else // sb.AppendFormat("({0:f0})", config.scoredAA.MassDelta, probabilityMap[i + 1]); } else if (peptide.modifications().ContainsKey(i)) { double modMass = peptide.modifications()[i].monoisotopicDeltaMass(); sb.AppendFormat(format, modMass); } } if (peptide.modifications().ContainsKey(proteome.ModificationMap.CTerminus())) { sb.AppendFormat(format, peptide.modifications()[proteome.ModificationMap.CTerminus()].monoisotopicDeltaMass()); } return(sb.ToString()); }
private List <double> getPeakStatistics() { IList <object[]> queryRows; lock (session) { var randomIds = session.CreateQuery("SELECT psm.Id " + viewFilter.GetFilteredQueryString(DataFilter.FromPeptideSpectrumMatch)) .List <long>() .Shuffle() .Take(1000) .OrderBy(o => o); string randomIdSet = String.Join(",", randomIds.Select(o => o.ToString()).ToArray()); queryRows = session.CreateQuery("SELECT psm.Spectrum.Source.Name, psm.Spectrum, psm, DISTINCT_GROUP_CONCAT(pm.Offset || ':' || mod.MonoMassDelta), psm.Peptide.Sequence " + "FROM PeptideSpectrumMatch psm " + "LEFT JOIN psm.Modifications pm " + "LEFT JOIN pm.Modification mod " + "WHERE psm.Id IN (" + randomIdSet + ") " + "GROUP BY psm.Spectrum.id ") .List <object[]>(); } var spectrumRows = queryRows.Select(o => new SpectrumRow(o)).OrderBy(o => o.SourceName); precursorScatterPlot.Clear(); chargeReducedScatterPlot.Clear(); int spectraCount = 0; string spectrumListFilters = String.Empty; Invoke(new MethodInvoker(() => { spectrumListFilters = spectrumFiltersTextBox.Text; zedGraphControl.MasterPane.AxisChange(); zedGraphControl.Refresh(); })); var points = new PointPairList(); string currentSourceName = null; string currentSourcePath = null; msdata.MSData msd = null; lock (owner) foreach (var row in spectrumRows) { if (row.SourceName != currentSourceName) { currentSourceName = row.SourceName; currentSourcePath = IDPickerForm.LocateSpectrumSource(currentSourceName, session.Connection.GetDataSource()); msd = new pwiz.CLI.msdata.MSDataFile(currentSourcePath); //var param = session.Query<AnalysisParameter>().Where(o => o.Name == "SpectrumListFilters").Min(o => o.Value); //string spectrumListFilters = String.IsNullOrEmpty(param) ? String.Empty : param; SpectrumListFactory.wrap(msd, spectrumListFilters.Split(";".ToCharArray(), StringSplitOptions.RemoveEmptyEntries)); } string label = String.Format("{0}/{1}\n{2}", row.SourceName, msdata.id.abbreviate(row.Spectrum.NativeID), row.ModifiedSequence); var spectrumList = msd.run.spectrumList; ++spectraCount; var pwizPeptide = new proteome.Peptide(row.ModifiedSequence, proteome.ModificationParsing.ModificationParsing_Auto, proteome.ModificationDelimiter.ModificationDelimiter_Brackets); var fragmentation = pwizPeptide.fragmentation(true, true); var pwizSpectrum = spectrumList.spectrum(spectrumList.find(row.Spectrum.NativeID), true); var pointMap = new seems.PointMap(new ZedGraph.PointPairList(pwizSpectrum.getMZArray().data, pwizSpectrum.getIntensityArray().data)); double tic = pointMap.Values.Sum(); double precursorMz = row.Spectrum.PrecursorMZ; double chargeReducedPrecursorMz = precursorMz * row.PeptideSpectrumMatch.Charge; bool plotMatchedPeaks = true; bool removeMatchedPeaks = false; double tolerance = 0.03; seems.PointMap.Enumerator itr; IonSeries[] ionSeries = Enum.GetValues(typeof(IonSeries)).Cast <IonSeries>().Where(o => o != IonSeries.Count).ToArray(); for (int z = 1; z <= 1; ++z) { for (int length = 1, end = pwizPeptide.sequence.Length; length <= end; ++length) { string NTermFragment = row.ModifiedSequence.Substring(0, length); string CTermFragment = row.ModifiedSequence.Substring(row.ModifiedSequence.Length - length); foreach (IonSeries series in ionSeries) { if ((series == IonSeries.c || series == IonSeries.cMinus1 || series == IonSeries.x) && length == pwizPeptide.sequence.Length) { continue; } itr = pointMap.FindNear(fragmentMass(fragmentation, series, length, z), tolerance); if (itr != null && itr.IsValid) { if (plotMatchedPeaks) { precursorScatterPlot.AddPoint(new PointPair(itr.Current.Key - precursorMz, itr.Current.Value / tic, (int)series, String.Format("{0} {1}\n{2} {3} {4} {5}", label, precursorMz, NTermFragment, itr.Current.Key, IonSeriesLabels[(int)series], length))); chargeReducedScatterPlot.AddPoint(new PointPair(itr.Current.Key - chargeReducedPrecursorMz, itr.Current.Value / tic, (int)series, String.Format("{0} {1}\n{2} {3} {4} {5}", label, chargeReducedPrecursorMz, NTermFragment, itr.Current.Key, IonSeriesLabels[(int)series], length))); } if (removeMatchedPeaks) { pointMap.Remove(itr); } } } } } foreach (var pair in pointMap) { precursorScatterPlot.AddPoint(new PointPair(pair.Key - precursorMz, pair.Value / tic, 0, label)); chargeReducedScatterPlot.AddPoint(new PointPair(pair.Key - chargeReducedPrecursorMz, pair.Value / tic, 0, label)); } if ((spectraCount % 100) == 0) { Invoke(new MethodInvoker(() => { zedGraphControl.MasterPane.AxisChange(); zedGraphControl.Refresh(); })); } } Invoke(new MethodInvoker(() => { if (!lockZoomCheckBox.Checked) { zedGraphControl.ZoomOutAll(zedGraphControl.GraphPane); } zedGraphControl.MasterPane.AxisChange(); zedGraphControl.Refresh(); })); return(new List <double>()); //percentTicBySpectrumByFragmentType[1]; }
private void getFragmentationStatistics() { IList <object[]> queryRows; lock (session) { var randomIds = session.CreateQuery("SELECT psm.Id " + viewFilter.GetFilteredQueryString(DataFilter.FromPeptideSpectrumMatch)) .List <long>() .Shuffle() .Take(1000) .OrderBy(o => o); string randomIdSet = String.Join(",", randomIds.Select(o => o.ToString()).ToArray()); queryRows = session.CreateQuery("SELECT psm.Spectrum.Source.Name, psm.Spectrum, psm, DISTINCT_GROUP_CONCAT(pm.Offset || ':' || mod.MonoMassDelta), psm.Peptide.Sequence " + "FROM PeptideSpectrumMatch psm " + "LEFT JOIN psm.Modifications pm " + "LEFT JOIN pm.Modification mod " + "WHERE psm.Id IN (" + randomIdSet + ") " + "GROUP BY psm.Spectrum.id ") .List <object[]>(); } var spectrumRows = queryRows.Select(o => new SpectrumRow(o)).OrderBy(o => o.SourceName); var percentTicBySpectrumByFragmentType = new List <PointPairList>(); var percentPeakCountBySpectrumByFragmentType = new List <PointPairList>(); var meanMzErrorBySpectrumByFragmentType = new List <PointPairList>(); var percentTicListByFragmentType = new List <List <double> >(); var percentPeakCountListByFragmentType = new List <List <double> >(); var meanMzErrorListByFragmentType = new List <List <double> >(); foreach (var graphControl in graphControls) { graphControl.MasterPane.PaneList.ForEach(o => o.CurveList.ForEach(c => c.Clear())); } for (int i = 0; i < (int)IonSeries.Count; ++i) { percentTicBySpectrumByFragmentType.Add(percentTicGraphControl.MasterPane.PaneList[i + 1].CurveList[3].Points as PointPairList); percentPeakCountBySpectrumByFragmentType.Add(percentPeakCountGraphControl.MasterPane.PaneList[i + 1].CurveList[3].Points as PointPairList); meanMzErrorBySpectrumByFragmentType.Add(meanMzErrorGraphControl.MasterPane.PaneList[i + 1].CurveList[3].Points as PointPairList); percentTicListByFragmentType.Add(new List <double>()); percentPeakCountListByFragmentType.Add(new List <double>()); meanMzErrorListByFragmentType.Add(new List <double>()); } int spectraCount = 0; maxPercentTic = 10; maxPercentPeakCount = 10; maxMeanMzError = 0.1; var tolerance = fragmentTolerance; string spectrumListFilters = String.Empty; Invoke(new MethodInvoker(() => { tolerance.value = Convert.ToDouble(fragmentToleranceTextBox.Text); tolerance.units = (MZTolerance.Units)fragmentToleranceUnitsComboBox.SelectedIndex; meanMzErrorGraphControl.GraphPane.YAxis.Title.Text = "Mean m/z error (" + tolerance.units.ToString() + ")"; spectrumListFilters = spectrumFiltersTextBox.Text; setAutomaticScales(); })); var points = new PointPairList(); string currentSourceName = null; string currentSourcePath = null; msdata.MSData msd = null; lock (owner) foreach (var row in spectrumRows) { if (row.SourceName != currentSourceName) { currentSourceName = row.SourceName; currentSourcePath = IDPickerForm.LocateSpectrumSource(currentSourceName, session.Connection.GetDataSource()); if (String.IsNullOrEmpty(currentSourcePath)) { throw new FileNotFoundException("source file not found"); } msd = new pwiz.CLI.msdata.MSDataFile(currentSourcePath); //var param = session.Query<AnalysisParameter>().Where(o => o.Name == "SpectrumListFilters").Min(o => o.Value); //string spectrumListFilters = String.IsNullOrEmpty(param) ? String.Empty : param; SpectrumListFactory.wrap(msd, spectrumListFilters.Split(";".ToCharArray(), StringSplitOptions.RemoveEmptyEntries)); } string spectrumId = String.Format("{0}/{1}", row.SourceName, msdata.id.abbreviate(row.Spectrum.NativeID)); var spectrumList = msd.run.spectrumList; ++spectraCount; var pwizPeptide = new proteome.Peptide(row.ModifiedSequence, proteome.ModificationParsing.ModificationParsing_Auto, proteome.ModificationDelimiter.ModificationDelimiter_Brackets); var fragmentation = pwizPeptide.fragmentation(true, true); var pwizSpectrum = spectrumList.spectrum(spectrumList.find(row.Spectrum.NativeID), true); var pointMap = new seems.PointMap(new ZedGraph.PointPairList(pwizSpectrum.getMZArray().data, pwizSpectrum.getIntensityArray().data)); double tic = pointMap.Values.Sum(); var percentTicByFragmentType = new List <double>(Enumerable.Repeat(0.0, (int)IonSeries.Count)); var percentPeakCountByFragmentType = new List <double>(Enumerable.Repeat(0.0, (int)IonSeries.Count)); var matchCountByFragmentType = new List <int>(Enumerable.Repeat(0, (int)IonSeries.Count)); var meanMzErrorByFragmentType = new List <double>(Enumerable.Repeat(Double.NaN, (int)IonSeries.Count)); seems.PointMap.Enumerator itr; double expected; IonSeries[] ionSeries = Enum.GetValues(typeof(IonSeries)).Cast <IonSeries>().Where(o => o != IonSeries.Count).ToArray(); for (int z = 1; z <= 1; ++z) { for (int length = 1, end = pwizPeptide.sequence.Length; length <= end; ++length) { foreach (IonSeries series in ionSeries) { if ((series == IonSeries.c || series == IonSeries.x) && length == pwizPeptide.sequence.Length) { continue; } expected = fragmentMass(fragmentation, series, length, z); itr = pointMap.FindNear(expected, expected - (expected - tolerance)); if (itr != null && itr.IsValid) { percentTicByFragmentType[(int)series] += itr.Current.Value; ++percentPeakCountByFragmentType[(int)series]; ++matchCountByFragmentType[(int)series]; if (Double.IsNaN(meanMzErrorByFragmentType[(int)series])) { meanMzErrorByFragmentType[(int)series] = 0; } meanMzErrorByFragmentType[(int)series] += mzError(itr.Current.Key, expected); } } } } var rng = new Random(); for (int i = 0; i < percentTicBySpectrumByFragmentType.Count; ++i) { // convert sum to mean if (percentPeakCountByFragmentType[i] > 0) { meanMzErrorByFragmentType[i] /= matchCountByFragmentType[i]; } // convert to percentages percentTicByFragmentType[i] /= tic / 100; percentPeakCountByFragmentType[i] /= pointMap.Count / 100.0; maxPercentTic = Math.Max(maxPercentTic, percentTicByFragmentType[i]); maxPercentPeakCount = Math.Max(maxPercentPeakCount, percentPeakCountByFragmentType[i]); double jitter = (rng.NextDouble() - 0.5); percentTicBySpectrumByFragmentType[i].Add(jitter, percentTicByFragmentType[i], String.Format("{0}: {1:G4}% ({2} matches)", spectrumId, percentTicByFragmentType[i], matchCountByFragmentType[i])); percentPeakCountBySpectrumByFragmentType[i].Add(jitter, percentPeakCountByFragmentType[i], String.Format("{0}: {1:G4}% ({2} matches)", spectrumId, percentPeakCountByFragmentType[i], matchCountByFragmentType[i])); percentTicListByFragmentType[i].Add(percentTicByFragmentType[i]); percentPeakCountListByFragmentType[i].Add(percentPeakCountByFragmentType[i]); if (!Double.IsNaN(meanMzErrorByFragmentType[i])) { maxMeanMzError = Math.Max(maxMeanMzError, Math.Abs(meanMzErrorByFragmentType[i])); meanMzErrorBySpectrumByFragmentType[i].Add(jitter, meanMzErrorByFragmentType[i], String.Format("{0}: {1:G4}%", spectrumId, meanMzErrorByFragmentType[i])); meanMzErrorListByFragmentType[i].Add(meanMzErrorByFragmentType[i]); } } if ((spectraCount % 100) == 0) { setAutomaticScales(); } }// for each spectrum row Invoke(new MethodInvoker(() => { for (int i = 0; i < percentTicBySpectrumByFragmentType.Count; ++i) { if (percentTicListByFragmentType[i].Count < 5) { continue; } percentTicListByFragmentType[i].Sort(); percentPeakCountListByFragmentType[i].Sort(); addSixNumberSummary(percentTicGraphControl.MasterPane.PaneList[i + 1], percentTicListByFragmentType[i]); addSixNumberSummary(percentPeakCountGraphControl.MasterPane.PaneList[i + 1], percentPeakCountListByFragmentType[i]); if (meanMzErrorListByFragmentType[i].Count < 5) { continue; } meanMzErrorListByFragmentType[i].Sort(); addSixNumberSummary(meanMzErrorGraphControl.MasterPane.PaneList[i + 1], meanMzErrorListByFragmentType[i]); } })); }
public void TestPeptideSpectrumMatches() { var ss1 = session.UniqueResult<SpectrumSource>(o => o.Name == "Source 1"); var ss4 = session.UniqueResult<SpectrumSource>(o => o.Name == "Source 4"); var a1 = session.UniqueResult<Analysis>(o => o.Name == "Engine 1 1.0"); var a2 = session.UniqueResult<Analysis>(o => o.Name == "Engine 2 1.0"); // [C2H2O1]PEPTIDE var ss1s1psm1 = session.UniqueResult<PeptideSpectrumMatch>(o => o.Spectrum.Source.Id == ss1.Id && o.Spectrum.Index == 0 && o.Analysis.Id == a1.Id && o.Rank == 1); var pwizPeptide = new PwizPeptide("(C2H2O1)PEPTIDE", proteome.ModificationParsing.ModificationParsing_ByFormula); Assert.AreEqual(session.UniqueResult<Peptide>(o => o.Sequence == "PEPTIDE"), ss1s1psm1.Peptide); Assert.AreEqual(a1, ss1s1psm1.Analysis); Assert.IsTrue(a1.Matches.Contains(ss1s1psm1)); Assert.AreEqual(1, ss1s1psm1.Modifications.Count); Assert.AreEqual("C2H2O1", ss1s1psm1.Modifications[0].Modification.Formula); Assert.AreEqual(int.MinValue, ss1s1psm1.Modifications[0].Offset); Assert.AreEqual(2, ss1s1psm1.Charge); Assert.AreEqual(pwizPeptide.monoisotopicMass(), ss1s1psm1.ObservedNeutralMass - ss1s1psm1.MonoisotopicMassError, 1e-12); Assert.AreEqual(pwizPeptide.molecularWeight(), ss1s1psm1.ObservedNeutralMass - ss1s1psm1.MolecularWeightError, 1e-12); Assert.AreEqual(12.0, ss1s1psm1.Scores["score1"], 1e-12); Assert.AreEqual(1 / 12.0, ss1s1psm1.Scores["score2"], 1e-12); var ss1s1psm1e2 = session.UniqueResult<PeptideSpectrumMatch>(o => o.Spectrum.Source.Id == ss1.Id && o.Spectrum.Index == 0 && o.Analysis.Id == a2.Id && o.Rank == 1); pwizPeptide = new PwizPeptide("TIDERPEPTIDEK"); Assert.AreEqual(session.UniqueResult<Peptide>(o => o.Sequence == "TIDERPEPTIDEK"), ss1s1psm1e2.Peptide); Assert.AreEqual(a2, ss1s1psm1e2.Analysis); Assert.IsTrue(a2.Matches.Contains(ss1s1psm1e2)); Assert.AreEqual(0, ss1s1psm1e2.Modifications.Count); Assert.AreEqual(4, ss1s1psm1e2.Charge); Assert.AreEqual(pwizPeptide.monoisotopicMass(), ss1s1psm1e2.ObservedNeutralMass - ss1s1psm1e2.MonoisotopicMassError, 1e-12); Assert.AreEqual(pwizPeptide.molecularWeight(), ss1s1psm1e2.ObservedNeutralMass - ss1s1psm1e2.MolecularWeightError, 1e-12); Assert.AreEqual(120.0, ss1s1psm1e2.Scores["score1"], 1e-12); Assert.AreEqual(1 / 120.0, ss1s1psm1e2.Scores["score2"], 1e-12); var ss1s1psm2 = session.UniqueResult<PeptideSpectrumMatch>(o => o.Spectrum.Source.Id == ss1.Id && o.Spectrum.Index == 0 && o.Analysis.Id == a1.Id && o.Rank == 2); Assert.AreEqual(session.UniqueResult<Peptide>(o => o.Sequence == "TIDERPEPTIDEK"), ss1s1psm2.Peptide); Assert.AreEqual(a1, ss1s1psm1.Analysis); Assert.AreEqual(4, ss1s1psm2.Charge); Assert.AreEqual(2, ss1s1psm2.Rank); Assert.AreEqual(12.0 / 2, ss1s1psm2.Scores["score1"], 1e-12); Assert.AreEqual(1 / (12.0 / 2), ss1s1psm2.Scores["score2"], 1e-12); var ss1s1psm3 = session.UniqueResult<PeptideSpectrumMatch>(o => o.Spectrum.Source.Id == ss1.Id && o.Spectrum.Index == 0 && o.Analysis.Id == a1.Id && o.Rank == 3); Assert.AreEqual(session.UniqueResult<Peptide>(o => o.Sequence == "EPPIER"), ss1s1psm3.Peptide); Assert.AreEqual(a1, ss1s1psm3.Analysis); Assert.AreEqual(1, ss1s1psm3.Charge); // E[H-2O-1]DIT[P1O4]PEPR var ss2s1psm2 = session.UniqueResult<PeptideSpectrumMatch>(o => o.Spectrum.Source.Name == "Source 2" && o.Spectrum.Index == 0 && o.Analysis.Id == a1.Id && o.Rank == 2); pwizPeptide = new PwizPeptide("E(H-2O-1)DIT(P1O4)PEPR", proteome.ModificationParsing.ModificationParsing_ByFormula); Assert.AreEqual(session.UniqueResult<Peptide>(o => o.Sequence == "EDITPEPR"), ss2s1psm2.Peptide); Assert.AreEqual(2, ss2s1psm2.Modifications.Count); Assert.AreEqual("H-2O-1", ss2s1psm2.Modifications[0].Modification.Formula); Assert.AreEqual(0, ss2s1psm2.Modifications[0].Offset); Assert.AreEqual("O4P1", ss2s1psm2.Modifications[1].Modification.Formula); Assert.AreEqual(3, ss2s1psm2.Modifications[1].Offset); Assert.AreEqual(pwizPeptide.monoisotopicMass(), ss2s1psm2.ObservedNeutralMass - ss2s1psm2.MonoisotopicMassError, 1e-12); Assert.AreEqual(pwizPeptide.molecularWeight(), ss2s1psm2.ObservedNeutralMass - ss2s1psm2.MolecularWeightError, 1e-12); var ss4s1psm1 = session.UniqueResult<PeptideSpectrumMatch>(o => o.Spectrum.Source.Id == ss4.Id && o.Spectrum.Index == 0 && o.Analysis.Id == a1.Id && o.Rank == 1); Assert.AreEqual(session.UniqueResult<Peptide>(o => o.Sequence == "TIDERPEPTIDEK"), ss4s1psm1.Peptide); Assert.AreEqual(a1, ss4s1psm1.Analysis); Assert.AreEqual(4, ss4s1psm1.Charge); }
public static void CreateTestData (NHibernate.ISession session, IList<SpectrumTuple> testPsmSummary) { var dbGroups = new Map<string, SpectrumSourceGroup>(); foreach (var ssg in session.Query<SpectrumSourceGroup>()) dbGroups[ssg.Name] = ssg; var dbSources = new Map<long, SpectrumSource>(); foreach (var ss in session.Query<SpectrumSource>()) dbSources[ss.Id.Value] = ss; var dbAnalyses = new Map<long, Analysis>(); foreach (var a in session.Query<Analysis>()) dbAnalyses[a.Id.Value] = a; var dbPeptides = new Map<string, Peptide>(); foreach (var pep in session.Query<Peptide>()) dbPeptides[pep.Sequence] = pep; var bulkInserter = new BulkInserter(session.Connection); long lastPsmId = session.CreateQuery("SELECT MAX(Id) FROM PeptideSpectrumMatch").UniqueResult<long?>().GetValueOrDefault(); long lastModId = session.CreateQuery("SELECT MAX(Id) FROM Modification").UniqueResult<long?>().GetValueOrDefault(); long lastPmId = session.CreateQuery("SELECT MAX(Id) FROM PeptideModification").UniqueResult<long?>().GetValueOrDefault(); long lastGroupId = session.CreateQuery("SELECT MAX(Id) FROM SpectrumSourceGroup").UniqueResult<long?>().GetValueOrDefault(); long lastSourceId = session.CreateQuery("SELECT MAX(Id) FROM SpectrumSource").UniqueResult<long?>().GetValueOrDefault(); long lastSglId = session.CreateQuery("SELECT MAX(Id) FROM SpectrumSourceGroupLink").UniqueResult<long?>().GetValueOrDefault(); foreach (SpectrumTuple row in testPsmSummary) { string groupName = row.Group; string sourceName = "Source " + row.Source; string analysisId = "Engine " + row.Analysis; string peptideTuples = row.PeptideTuples; SpectrumSourceGroup group = dbGroups[groupName]; if (String.IsNullOrEmpty(group.Name)) { group.Id = ++lastGroupId; group.Name = groupName; bulkInserter.Add(group); } SpectrumSource source = dbSources[row.Source]; if (String.IsNullOrEmpty(source.Name)) { source.Id = ++lastSourceId; source.Name = sourceName; source.Group = group; source.Spectra = new List<Spectrum>(); bulkInserter.Add(source); // add a source group link for the source's immediate group bulkInserter.Add(new SpectrumSourceGroupLink() { Id = ++lastSglId, Group = group, Source = source }); #region add source group links for all of the immediate group's parent groups if (groupName != "/") { string parentGroupName = groupName.Substring(0, groupName.LastIndexOf("/")); while (true) { if (String.IsNullOrEmpty(parentGroupName)) parentGroupName = "/"; // add the parent group if it doesn't exist yet SpectrumSourceGroup parentGroup = session.UniqueResult<SpectrumSourceGroup>(o => o.Name == parentGroupName); if (parentGroup == null) { parentGroup = new SpectrumSourceGroup() { Id = ++lastGroupId, Name = parentGroupName }; bulkInserter.Add(parentGroup); } bulkInserter.Add(new SpectrumSourceGroupLink() { Id = ++lastSglId, Group = parentGroup, Source = source }); if (parentGroupName == "/") break; parentGroupName = parentGroupName.Substring(0, parentGroupName.LastIndexOf("/")); } } #endregion } Spectrum spectrum = source.Spectra.SingleOrDefault(o => o.Source.Id == source.Id && o.Index == row.Spectrum - 1); if (spectrum == null) { spectrum = new Spectrum() { Id = source.Id * 10000 + row.Spectrum, Index = row.Spectrum - 1, NativeID = "scan=" + row.Spectrum, Source = source, PrecursorMZ = 42 }; source.Spectra.Add(spectrum); bulkInserter.Add(spectrum); } Analysis analysis = dbAnalyses[row.Analysis]; if (String.IsNullOrEmpty(analysis.Name)) { analysis.Id = dbAnalyses.Max(o => o.Value.Id).GetValueOrDefault() + 1; analysis.Name = analysisId + " 1.0"; analysis.Software = new AnalysisSoftware() {Name = analysisId, Version = "1.0"}; analysis.StartTime = DateTime.Today.AddHours(row.Analysis); analysis.Type = AnalysisType.DatabaseSearch; analysis.Parameters = new SortedSet<AnalysisParameter>() { new AnalysisParameter() { Id = analysis.Id * 10000, Analysis = analysis, Name = "Parameter 1", Value = "Value 1" } }; bulkInserter.Add(analysis); } // make sure peptides are sorted by their score divider (which will determine rank) var peptideList = new SortedList<int, List<PeptideTuple>>(); foreach (string tuple in peptideTuples.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries)) { var peptideTuple = new PeptideTuple() { Sequence = tuple.Split('@', '/')[0], Charge = Convert.ToInt32(tuple.Split('@', '/')[1]), ScoreDivider = Convert.ToInt32(tuple.Split('@', '/')[2]) }; if (!peptideList.ContainsKey(peptideTuple.ScoreDivider)) peptideList[peptideTuple.ScoreDivider] = new List<PeptideTuple>(); peptideList[peptideTuple.ScoreDivider].Add(peptideTuple); } int rank = 1; int lastDivider = 1; foreach (var peptideTupleList in peptideList.Values) foreach (var peptideTuple in peptideTupleList) { using (PwizPeptide pwizPeptide = new PwizPeptide(peptideTuple.Sequence, ModParsing.ModificationParsing_Auto, ModDelimiter.ModificationDelimiter_Brackets)) { Peptide peptide = dbPeptides[pwizPeptide.sequence]; if (String.IsNullOrEmpty(peptide.Sequence)) { peptide = new TestPeptide(pwizPeptide.sequence); peptide.Id = dbPeptides.Max(o => o.Value.Id).GetValueOrDefault() + 1; peptide.MonoisotopicMass = pwizPeptide.monoisotopicMass(false); peptide.MolecularWeight = pwizPeptide.molecularWeight(false); dbPeptides[pwizPeptide.sequence] = peptide; bulkInserter.Add(peptide); createTestPeptideInstances(session, bulkInserter, peptide); } double neutralPrecursorMass = (spectrum.PrecursorMZ*peptideTuple.Charge) - (peptideTuple.Charge*Proton.Mass); var psm = new PeptideSpectrumMatch() { Id = ++lastPsmId, Peptide = peptide, Spectrum = spectrum, Analysis = analysis, ObservedNeutralMass = neutralPrecursorMass, MonoisotopicMassError = neutralPrecursorMass - pwizPeptide.monoisotopicMass(), MolecularWeightError = neutralPrecursorMass - pwizPeptide.molecularWeight(), Charge = peptideTuple.Charge, Rank = (peptideTuple.ScoreDivider == lastDivider ? rank : ++rank), QValue = (rank == 1 ? row.QValue : PeptideSpectrumMatch.DefaultQValue), }; if (row.Score != null) psm.Scores = new Dictionary<string, double>() { {"score1", (double) row.Score/peptideTuple.ScoreDivider}, {"score2", 1/((double) row.Score/peptideTuple.ScoreDivider)} }; bulkInserter.Add(psm); lastDivider = peptideTuple.ScoreDivider; // add PeptideModifications and Modifications foreach (KeyValuePair<int, ModList> itr in pwizPeptide.modifications()) { foreach (PwizMod pwizMod in itr.Value) { Modification mod = session.UniqueResult<Modification>(o => o.Formula == pwizMod.formula()); if (mod == null) { mod = new Modification() { Id = ++lastModId, Formula = pwizMod.formula(), MonoMassDelta = pwizMod.monoisotopicDeltaMass(), AvgMassDelta = pwizMod.averageDeltaMass(), Name = pwizMod.formula() }; bulkInserter.Add(mod); } bulkInserter.Add(new PeptideModification() { Id = ++lastPmId, PeptideSpectrumMatch = psm, Modification = mod, Offset = itr.Key == ModMap.NTerminus() ? int.MinValue : itr.Key == ModMap.CTerminus() ? int.MaxValue : itr.Key }); } } } } } bulkInserter.Execute(); bulkInserter.Reset(""); }
public PhosphoPeptideAttestationRow(object[] queryRow) { PSMId = (long)queryRow[0]; SpectrumId = (long)queryRow[1]; SourceName = (string)queryRow[2]; SpectrumNativeID = (string)queryRow[3]; PrecursorMZ = Convert.ToDouble(queryRow[4]); Charge = Convert.ToInt32(queryRow[5]); // Build the peptide sequence with modifications. Leave the phospho sites out of the string. They // will reunited with the string right before the PSM is submitted to phosphoRS. This is necessary // because phosphoRS requires all phospho sites marked with a single numerical representation across all // PSMs. OriginalPhosphoSites = new SortedDictionary<int, long>(); var mods = new Dictionary<int, List<double>>(); string peptideSequence = (string)queryRow[7]; Peptide = new proteome.Peptide(peptideSequence); var pwizMods = Peptide.modifications(); if (!String.IsNullOrEmpty((string)queryRow[6])) { var IdMassDeltaAndOffsetTriplets = ((string)queryRow[6]).Split(','); foreach (var triplet in IdMassDeltaAndOffsetTriplets) { var tokens = triplet.Split(':'); long pmId = Convert.ToInt64(tokens[0]); double deltaMass = Convert.ToDouble(tokens[1]); int roundedDeltaMass = (int) Math.Round(deltaMass); int offset = Convert.ToInt32(tokens[2]); pwizMods[offset].Add(new proteome.Modification(deltaMass, deltaMass)); if (roundedDeltaMass == 80 && (peptideSequence[offset] == 'S' || peptideSequence[offset] == 'T' || peptideSequence[offset] == 'Y')) OriginalPhosphoSites[offset] = pmId; else { if (!mods.ContainsKey(offset)) mods[offset] = new List<double>(); mods[offset].Add(deltaMass); } } } string format = String.Format("[{{0:f{0}}}]", 4); StringBuilder sb = new StringBuilder(peptideSequence); foreach (var mod in (from m in mods orderby m.Key descending select m)) foreach (var massDelta in mod.Value) if (mod.Key == int.MinValue) sb.Insert(0, String.Format(format, massDelta)); else if (mod.Key == int.MaxValue || mod.Key >= sb.Length) sb.AppendFormat(format, massDelta); else sb.Insert(mod.Key + 1, String.Format(format, massDelta)); UnphosphorylatedSequence = sb.ToString(); DecoyState = Convert.ToInt16(queryRow[8]); // Determine the location of phosphorylation sites PossiblePhosphoSites = new List<int>(); for (int residueIndex = 0; residueIndex < peptideSequence.Length; ++residueIndex) if (peptideSequence[residueIndex] == 'S' || peptideSequence[residueIndex] == 'T' || peptideSequence[residueIndex] == 'Y') PossiblePhosphoSites.Add(residueIndex); }
private phosphoRS.PeptideSpectrumMatch getPhosphoRS_PSM(PhosphoRSConfig config, PhosphoPeptideAttestationRow variant) { // Get the phosphorylated peptide and add all modifications to the base sequence. proteome.Peptide phosphoPeptide = new proteome.Peptide(variant.UnphosphorylatedSequence, proteome.ModificationParsing.ModificationParsing_Auto, proteome.ModificationDelimiter.ModificationDelimiter_Brackets); proteome.ModificationMap variantPeptideMods = phosphoPeptide.modifications(); variant.OriginalPhosphoSites.Keys.ToList().ForEach(location => { variantPeptideMods[location].Add(config.pwizMod); }); // This modification ID is used to tell phosphoRS how to modify the sequence. int modificationID = config.phosphorylationSymbol + 1; // Build a string representation of all modificaitons in a peptide for phospoRS // "0.00011000000000.0" : 1 is the ID of the modification. All phosphos in a data // set need to have one ID. This ID is used by the PhosphoRS to figure out which // mods need to be scored. var ptmRepresentation = new StringBuilder(); // Store all modifications in phosphoRS modification objects var modifications = new List<phosphoRS.AminoAcidModification>(); // Get the n-terminal modifications. if (variantPeptideMods.ContainsKey(proteome.ModificationMap.NTerminus())) { phosphoRS.AminoAcidModification otherMod = new phosphoRS.AminoAcidModification('2', "unknown", "unk", "none", variantPeptideMods[proteome.ModificationMap.NTerminus()].monoisotopicDeltaMass(), 0.0, null); modifications.Add(otherMod); ptmRepresentation.Append(modificationID.ToString() + "."); //++modificationID; } else { ptmRepresentation.Append("0."); } // Process all other modifications. for (int aaIndex = 0; aaIndex < phosphoPeptide.sequence.Length; ++aaIndex) { // If phosphorylation, use the existing scoredAA variable. if (variantPeptideMods.ContainsKey(aaIndex)) { if (variant.OriginalPhosphoSites.Keys.Contains(aaIndex)) { modifications.Add(config.scoredAA); ptmRepresentation.Append(config.phosphorylationSymbol.ToString()[0]); } else { // Otherwise, make an "unknown" modification with a separate modification ID. var otherMod = new phosphoRS.AminoAcidModification(modificationID.ToString()[0], "unknown", "unk", "none", variantPeptideMods[aaIndex].monoisotopicDeltaMass(), 0.0, phosphoRS.AminoAcidSequence.ParseAASequence("" + phosphoPeptide.sequence[aaIndex])); modifications.Add(otherMod); ptmRepresentation.Append(modificationID.ToString()); //++modificationID; } } else { ptmRepresentation.Append("0"); } } // Process any c-terminal modifications. if (variantPeptideMods.ContainsKey(proteome.ModificationMap.CTerminus())) { var otherMod = new phosphoRS.AminoAcidModification(modificationID.ToString()[0], "unknown", "unk", "none", variantPeptideMods[proteome.ModificationMap.CTerminus()].monoisotopicDeltaMass(), 0.0, null); modifications.Add(otherMod); ptmRepresentation.Append("." + modificationID.ToString()); } else { ptmRepresentation.Append(".0"); } // Get the phosphoRS peptide sequence. // Assign spectrum ID, amino acid sequence, list of all modifications, a so-called 'modification position string' (here every digit represents an amino acid within the peptide sequence // '0' indicates not modified, values != '0' indicate the unique identifier of the amino acid's modification the first digit represents the n-terminus the last digit represents the c-terminus) var AAS = phosphoRS.AminoAcidSequence.Create((int)variant.SpectrumId, phosphoPeptide.sequence, modifications, ptmRepresentation.ToString()); // Make a phosphoRS peptide-spectrum match. return new phosphoRS.PeptideSpectrumMatch((int)variant.PSMId, variant.SpectrumType, variant.Charge, variant.PrecursorMZ, variant.Peaks, AAS); }
public PhosphoPeptideAttestationRow(object[] queryRow) { PSMId = (long)queryRow[0]; SpectrumId = (long)queryRow[1]; SourceName = (string)queryRow[2]; SpectrumNativeID = (string)queryRow[3]; PrecursorMZ = Convert.ToDouble(queryRow[4]); Charge = Convert.ToInt32(queryRow[5]); // Build the peptide sequence with modifications. Leave the phospho sites out of the string. They // will reunited with the string right before the PSM is submitted to phosphoRS. This is necessary // because phosphoRS requires all phospho sites marked with a single numerical representation across all // PSMs. OriginalPhosphoSites = new SortedDictionary <int, long>(); var mods = new Dictionary <int, List <double> >(); string peptideSequence = (string)queryRow[7]; Peptide = new proteome.Peptide(peptideSequence); var pwizMods = Peptide.modifications(); if (!String.IsNullOrEmpty((string)queryRow[6])) { var IdMassDeltaAndOffsetTriplets = ((string)queryRow[6]).Split(Properties.Settings.Default.GroupConcatSeparator[0]); foreach (var triplet in IdMassDeltaAndOffsetTriplets) { var tokens = triplet.Split(':'); long pmId = Convert.ToInt64(tokens[0]); double deltaMass = Convert.ToDouble(tokens[1]); int roundedDeltaMass = (int)Math.Round(deltaMass); int offset = Convert.ToInt32(tokens[2]); pwizMods[offset].Add(new proteome.Modification(deltaMass, deltaMass)); if (roundedDeltaMass == 80 && (peptideSequence[offset] == 'S' || peptideSequence[offset] == 'T' || peptideSequence[offset] == 'Y')) { OriginalPhosphoSites[offset] = pmId; } else { if (!mods.ContainsKey(offset)) { mods[offset] = new List <double>(); } mods[offset].Add(deltaMass); } } } string format = String.Format("[{{0:f{0}}}]", 4); StringBuilder sb = new StringBuilder(peptideSequence); foreach (var mod in (from m in mods orderby m.Key descending select m)) { foreach (var massDelta in mod.Value) { if (mod.Key == int.MinValue) { sb.Insert(0, String.Format(format, massDelta)); } else if (mod.Key == int.MaxValue || mod.Key >= sb.Length) { sb.AppendFormat(format, massDelta); } else { sb.Insert(mod.Key + 1, String.Format(format, massDelta)); } } } UnphosphorylatedSequence = sb.ToString(); DecoyState = Convert.ToInt16(queryRow[8]); // Determine the location of phosphorylation sites PossiblePhosphoSites = new List <int>(); for (int residueIndex = 0; residueIndex < peptideSequence.Length; ++residueIndex) { if (peptideSequence[residueIndex] == 'S' || peptideSequence[residueIndex] == 'T' || peptideSequence[residueIndex] == 'Y') { PossiblePhosphoSites.Add(residueIndex); } } }
private phosphoRS.PeptideSpectrumMatch getPhosphoRS_PSM(PhosphoRSConfig config, PhosphoPeptideAttestationRow variant) { // Get the phosphorylated peptide and add all modifications to the base sequence. proteome.Peptide phosphoPeptide = new proteome.Peptide(variant.UnphosphorylatedSequence, proteome.ModificationParsing.ModificationParsing_Auto, proteome.ModificationDelimiter.ModificationDelimiter_Brackets); proteome.ModificationMap variantPeptideMods = phosphoPeptide.modifications(); variant.OriginalPhosphoSites.Keys.ToList().ForEach(location => { variantPeptideMods[location].Add(config.pwizMod); }); // This modification ID is used to tell phosphoRS how to modify the sequence. int modificationID = config.phosphorylationSymbol + 1; // Build a string representation of all modificaitons in a peptide for phospoRS // "0.00011000000000.0" : 1 is the ID of the modification. All phosphos in a data // set need to have one ID. This ID is used by the PhosphoRS to figure out which // mods need to be scored. var ptmRepresentation = new StringBuilder(); // Store all modifications in phosphoRS modification objects var modifications = new List <phosphoRS.AminoAcidModification>(); // Get the n-terminal modifications. if (variantPeptideMods.ContainsKey(proteome.ModificationMap.NTerminus())) { phosphoRS.AminoAcidModification otherMod = new phosphoRS.AminoAcidModification('2', "unknown", "unk", "none", variantPeptideMods[proteome.ModificationMap.NTerminus()].monoisotopicDeltaMass(), 0.0, null); modifications.Add(otherMod); ptmRepresentation.Append(modificationID.ToString() + "."); //++modificationID; } else { ptmRepresentation.Append("0."); } // Process all other modifications. for (int aaIndex = 0; aaIndex < phosphoPeptide.sequence.Length; ++aaIndex) { // If phosphorylation, use the existing scoredAA variable. if (variantPeptideMods.ContainsKey(aaIndex)) { if (variant.OriginalPhosphoSites.Keys.Contains(aaIndex)) { modifications.Add(config.scoredAA); ptmRepresentation.Append(config.phosphorylationSymbol.ToString()[0]); } else { // Otherwise, make an "unknown" modification with a separate modification ID. var otherMod = new phosphoRS.AminoAcidModification(modificationID.ToString()[0], "unknown", "unk", "none", variantPeptideMods[aaIndex].monoisotopicDeltaMass(), 0.0, phosphoRS.AminoAcidSequence.ParseAASequence("" + phosphoPeptide.sequence[aaIndex])); modifications.Add(otherMod); ptmRepresentation.Append(modificationID.ToString()); //++modificationID; } } else { ptmRepresentation.Append("0"); } } // Process any c-terminal modifications. if (variantPeptideMods.ContainsKey(proteome.ModificationMap.CTerminus())) { var otherMod = new phosphoRS.AminoAcidModification(modificationID.ToString()[0], "unknown", "unk", "none", variantPeptideMods[proteome.ModificationMap.CTerminus()].monoisotopicDeltaMass(), 0.0, null); modifications.Add(otherMod); ptmRepresentation.Append("." + modificationID.ToString()); } else { ptmRepresentation.Append(".0"); } // Get the phosphoRS peptide sequence. // Assign spectrum ID, amino acid sequence, list of all modifications, a so-called 'modification position string' (here every digit represents an amino acid within the peptide sequence // '0' indicates not modified, values != '0' indicate the unique identifier of the amino acid's modification the first digit represents the n-terminus the last digit represents the c-terminus) var AAS = phosphoRS.AminoAcidSequence.Create((int)variant.SpectrumId, phosphoPeptide.sequence, modifications, ptmRepresentation.ToString()); // Make a phosphoRS peptide-spectrum match. return(new phosphoRS.PeptideSpectrumMatch((int)variant.PSMId, variant.SpectrumType, variant.Charge, variant.PrecursorMZ, variant.Peaks, AAS)); }
private void getFragmentationStatistics () { IList<object[]> queryRows; lock (session) { var randomIds = session.CreateQuery("SELECT psm.Id " + viewFilter.GetFilteredQueryString(DataFilter.FromPeptideSpectrumMatch)) .List<long>() .Shuffle() .Take(1000) .OrderBy(o => o); string randomIdSet = String.Join(",", randomIds.Select(o => o.ToString()).ToArray()); queryRows = session.CreateQuery("SELECT psm.Spectrum.Source.Name, psm.Spectrum, psm, DISTINCT_GROUP_CONCAT(pm.Offset || ':' || mod.MonoMassDelta), psm.Peptide.Sequence " + "FROM PeptideSpectrumMatch psm " + "LEFT JOIN psm.Modifications pm " + "LEFT JOIN pm.Modification mod " + "WHERE psm.Id IN (" + randomIdSet + ") " + "GROUP BY psm.Spectrum.id ") .List<object[]>(); } var spectrumRows = queryRows.Select(o => new SpectrumRow(o)).OrderBy(o => o.SourceName); var percentTicBySpectrumByFragmentType = new List<PointPairList>(); var percentPeakCountBySpectrumByFragmentType = new List<PointPairList>(); var meanMzErrorBySpectrumByFragmentType = new List<PointPairList>(); var percentTicListByFragmentType = new List<List<double>>(); var percentPeakCountListByFragmentType = new List<List<double>>(); var meanMzErrorListByFragmentType = new List<List<double>>(); foreach(var graphControl in graphControls) graphControl.MasterPane.PaneList.ForEach(o => o.CurveList.ForEach(c => c.Clear())); for (int i = 0; i < (int) IonSeries.Count; ++i) { percentTicBySpectrumByFragmentType.Add(percentTicGraphControl.MasterPane.PaneList[i + 1].CurveList[3].Points as PointPairList); percentPeakCountBySpectrumByFragmentType.Add(percentPeakCountGraphControl.MasterPane.PaneList[i + 1].CurveList[3].Points as PointPairList); meanMzErrorBySpectrumByFragmentType.Add(meanMzErrorGraphControl.MasterPane.PaneList[i + 1].CurveList[3].Points as PointPairList); percentTicListByFragmentType.Add(new List<double>()); percentPeakCountListByFragmentType.Add(new List<double>()); meanMzErrorListByFragmentType.Add(new List<double>()); } int spectraCount = 0; maxPercentTic = 10; maxPercentPeakCount = 10; maxMeanMzError = 0.1; var tolerance = fragmentTolerance; string spectrumListFilters = String.Empty; Invoke(new MethodInvoker(() => { tolerance.value = Convert.ToDouble(fragmentToleranceTextBox.Text); tolerance.units = (MZTolerance.Units) fragmentToleranceUnitsComboBox.SelectedIndex; meanMzErrorGraphControl.GraphPane.YAxis.Title.Text = "Mean m/z error (" + tolerance.units.ToString() + ")"; spectrumListFilters = spectrumFiltersTextBox.Text; setAutomaticScales(); })); var points = new PointPairList(); string currentSourceName = null; string currentSourcePath = null; msdata.MSData msd = null; lock (owner) foreach (var row in spectrumRows) { if (row.SourceName != currentSourceName) { currentSourceName = row.SourceName; currentSourcePath = IDPickerForm.LocateSpectrumSource(currentSourceName, session.Connection.GetDataSource()); if (String.IsNullOrEmpty(currentSourcePath)) throw new FileNotFoundException("source file not found"); msd = new pwiz.CLI.msdata.MSDataFile(currentSourcePath); //var param = session.Query<AnalysisParameter>().Where(o => o.Name == "SpectrumListFilters").Min(o => o.Value); //string spectrumListFilters = String.IsNullOrEmpty(param) ? String.Empty : param; SpectrumListFactory.wrap(msd, spectrumListFilters.Split(";".ToCharArray(), StringSplitOptions.RemoveEmptyEntries)); } string spectrumId = String.Format("{0}/{1}", row.SourceName, msdata.id.abbreviate(row.Spectrum.NativeID)); var spectrumList = msd.run.spectrumList; ++spectraCount; var pwizPeptide = new proteome.Peptide(row.ModifiedSequence, proteome.ModificationParsing.ModificationParsing_Auto, proteome.ModificationDelimiter.ModificationDelimiter_Brackets); var fragmentation = pwizPeptide.fragmentation(true, true); var pwizSpectrum = spectrumList.spectrum(spectrumList.find(row.Spectrum.NativeID), true); var pointMap = new seems.PointMap(new ZedGraph.PointPairList(pwizSpectrum.getMZArray().data, pwizSpectrum.getIntensityArray().data)); double tic = pointMap.Values.Sum(); var percentTicByFragmentType = new List<double>(Enumerable.Repeat(0.0, (int) IonSeries.Count)); var percentPeakCountByFragmentType = new List<double>(Enumerable.Repeat(0.0, (int) IonSeries.Count)); var matchCountByFragmentType = new List<int>(Enumerable.Repeat(0, (int) IonSeries.Count)); var meanMzErrorByFragmentType = new List<double>(Enumerable.Repeat(Double.NaN, (int) IonSeries.Count)); seems.PointMap.Enumerator itr; double expected; IonSeries[] ionSeries = Enum.GetValues(typeof(IonSeries)).Cast<IonSeries>().Where(o => o != IonSeries.Count).ToArray(); for (int z = 1; z <= 1; ++z) for (int length = 1, end = pwizPeptide.sequence.Length; length <= end; ++length) foreach (IonSeries series in ionSeries) { if ((series == IonSeries.c || series == IonSeries.x) && length == pwizPeptide.sequence.Length) continue; expected = fragmentMass(fragmentation, series, length, z); itr = pointMap.FindNear(expected, expected - (expected - tolerance)); if (itr != null && itr.IsValid) { percentTicByFragmentType[(int)series] += itr.Current.Value; ++percentPeakCountByFragmentType[(int)series]; ++matchCountByFragmentType[(int)series]; if (Double.IsNaN(meanMzErrorByFragmentType[(int)series])) meanMzErrorByFragmentType[(int)series] = 0; meanMzErrorByFragmentType[(int)series] += mzError(itr.Current.Key, expected); } } var rng = new Random(); for (int i = 0; i < percentTicBySpectrumByFragmentType.Count; ++i) { // convert sum to mean if (percentPeakCountByFragmentType[i] > 0) meanMzErrorByFragmentType[i] /= matchCountByFragmentType[i]; // convert to percentages percentTicByFragmentType[i] /= tic / 100; percentPeakCountByFragmentType[i] /= pointMap.Count / 100.0; maxPercentTic = Math.Max(maxPercentTic, percentTicByFragmentType[i]); maxPercentPeakCount = Math.Max(maxPercentPeakCount, percentPeakCountByFragmentType[i]); double jitter = (rng.NextDouble() - 0.5); percentTicBySpectrumByFragmentType[i].Add(jitter, percentTicByFragmentType[i], String.Format("{0}: {1:G4}% ({2} matches)", spectrumId, percentTicByFragmentType[i], matchCountByFragmentType[i])); percentPeakCountBySpectrumByFragmentType[i].Add(jitter, percentPeakCountByFragmentType[i], String.Format("{0}: {1:G4}% ({2} matches)", spectrumId, percentPeakCountByFragmentType[i], matchCountByFragmentType[i])); percentTicListByFragmentType[i].Add(percentTicByFragmentType[i]); percentPeakCountListByFragmentType[i].Add(percentPeakCountByFragmentType[i]); if (!Double.IsNaN(meanMzErrorByFragmentType[i])) { maxMeanMzError = Math.Max(maxMeanMzError, Math.Abs(meanMzErrorByFragmentType[i])); meanMzErrorBySpectrumByFragmentType[i].Add(jitter, meanMzErrorByFragmentType[i], String.Format("{0}: {1:G4}%", spectrumId, meanMzErrorByFragmentType[i])); meanMzErrorListByFragmentType[i].Add(meanMzErrorByFragmentType[i]); } } if ((spectraCount % 100) == 0) setAutomaticScales(); } // for each spectrum row Invoke(new MethodInvoker(() => { for (int i = 0; i < percentTicBySpectrumByFragmentType.Count; ++i) { if (percentTicListByFragmentType[i].Count < 5) continue; percentTicListByFragmentType[i].Sort(); percentPeakCountListByFragmentType[i].Sort(); addSixNumberSummary(percentTicGraphControl.MasterPane.PaneList[i + 1], percentTicListByFragmentType[i]); addSixNumberSummary(percentPeakCountGraphControl.MasterPane.PaneList[i + 1], percentPeakCountListByFragmentType[i]); if (meanMzErrorListByFragmentType[i].Count < 5) continue; meanMzErrorListByFragmentType[i].Sort(); addSixNumberSummary(meanMzErrorGraphControl.MasterPane.PaneList[i + 1], meanMzErrorListByFragmentType[i]); } })); }
private List<double> getPeakStatistics () { IList<object[]> queryRows; lock (session) { var randomIds = session.CreateQuery("SELECT psm.Id " + viewFilter.GetFilteredQueryString(DataFilter.FromPeptideSpectrumMatch)) .List<long>() .Shuffle() .Take(1000) .OrderBy(o => o); string randomIdSet = String.Join(",", randomIds.Select(o => o.ToString()).ToArray()); queryRows = session.CreateQuery("SELECT psm.Spectrum.Source.Name, psm.Spectrum, psm, DISTINCT_GROUP_CONCAT(pm.Offset || ':' || mod.MonoMassDelta), psm.Peptide.Sequence " + "FROM PeptideSpectrumMatch psm " + "LEFT JOIN psm.Modifications pm " + "LEFT JOIN pm.Modification mod " + "WHERE psm.Id IN (" + randomIdSet + ") " + "GROUP BY psm.Spectrum.id ") .List<object[]>(); } var spectrumRows = queryRows.Select(o => new SpectrumRow(o)).OrderBy(o => o.SourceName); precursorScatterPlot.Clear(); chargeReducedScatterPlot.Clear(); int spectraCount = 0; string spectrumListFilters = String.Empty; Invoke(new MethodInvoker(() => { spectrumListFilters = spectrumFiltersTextBox.Text; zedGraphControl.MasterPane.AxisChange(); zedGraphControl.Refresh(); })); var points = new PointPairList(); string currentSourceName = null; string currentSourcePath = null; msdata.MSData msd = null; lock(owner) foreach (var row in spectrumRows) { if (row.SourceName != currentSourceName) { currentSourceName = row.SourceName; currentSourcePath = IDPickerForm.LocateSpectrumSource(currentSourceName, session.Connection.GetDataSource()); msd = new pwiz.CLI.msdata.MSDataFile(currentSourcePath); //var param = session.Query<AnalysisParameter>().Where(o => o.Name == "SpectrumListFilters").Min(o => o.Value); //string spectrumListFilters = String.IsNullOrEmpty(param) ? String.Empty : param; SpectrumListFactory.wrap(msd, spectrumListFilters.Split(";".ToCharArray(), StringSplitOptions.RemoveEmptyEntries)); } string label = String.Format("{0}/{1}\n{2}", row.SourceName, msdata.id.abbreviate(row.Spectrum.NativeID), row.ModifiedSequence); var spectrumList = msd.run.spectrumList; ++spectraCount; var pwizPeptide = new proteome.Peptide(row.ModifiedSequence, proteome.ModificationParsing.ModificationParsing_Auto, proteome.ModificationDelimiter.ModificationDelimiter_Brackets); var fragmentation = pwizPeptide.fragmentation(true, true); var pwizSpectrum = spectrumList.spectrum(spectrumList.find(row.Spectrum.NativeID), true); var pointMap = new seems.PointMap(new ZedGraph.PointPairList(pwizSpectrum.getMZArray().data, pwizSpectrum.getIntensityArray().data)); double tic = pointMap.Values.Sum(); double precursorMz = row.Spectrum.PrecursorMZ; double chargeReducedPrecursorMz = precursorMz * row.PeptideSpectrumMatch.Charge; bool plotMatchedPeaks = true; bool removeMatchedPeaks = false; double tolerance = 0.03; seems.PointMap.Enumerator itr; IonSeries[] ionSeries = Enum.GetValues(typeof(IonSeries)).Cast<IonSeries>().Where(o => o != IonSeries.Count).ToArray(); for (int z = 1; z <= 1; ++z) for (int length = 1, end = pwizPeptide.sequence.Length; length <= end; ++length) { string NTermFragment = row.ModifiedSequence.Substring(0, length); string CTermFragment = row.ModifiedSequence.Substring(row.ModifiedSequence.Length - length); foreach (IonSeries series in ionSeries) { if ((series == IonSeries.c || series == IonSeries.cMinus1 || series == IonSeries.x) && length == pwizPeptide.sequence.Length) continue; itr = pointMap.FindNear(fragmentMass(fragmentation, series, length, z), tolerance); if (itr != null && itr.IsValid) { if (plotMatchedPeaks) { precursorScatterPlot.AddPoint(new PointPair(itr.Current.Key - precursorMz, itr.Current.Value / tic, (int)series, String.Format("{0} {1}\n{2} {3} {4} {5}", label, precursorMz, NTermFragment, itr.Current.Key, IonSeriesLabels[(int)series], length))); chargeReducedScatterPlot.AddPoint(new PointPair(itr.Current.Key - chargeReducedPrecursorMz, itr.Current.Value / tic, (int)series, String.Format("{0} {1}\n{2} {3} {4} {5}", label, chargeReducedPrecursorMz, NTermFragment, itr.Current.Key, IonSeriesLabels[(int)series], length))); } if (removeMatchedPeaks) pointMap.Remove(itr); } } } foreach (var pair in pointMap) { precursorScatterPlot.AddPoint(new PointPair(pair.Key - precursorMz, pair.Value/tic, 0, label)); chargeReducedScatterPlot.AddPoint(new PointPair(pair.Key - chargeReducedPrecursorMz, pair.Value/tic, 0, label)); } if ((spectraCount % 100) == 0) { Invoke(new MethodInvoker(() => { zedGraphControl.MasterPane.AxisChange(); zedGraphControl.Refresh(); })); } } Invoke(new MethodInvoker(() => { if (!lockZoomCheckBox.Checked) { zedGraphControl.ZoomOutAll(zedGraphControl.GraphPane); } zedGraphControl.MasterPane.AxisChange(); zedGraphControl.Refresh(); })); return new List<double>(); //percentTicBySpectrumByFragmentType[1]; }